nyx/src/taint/tests.rs

use super::*;
use crate::cfg::FileCfg;
use crate::interop::InteropEdge;
use crate::labels::Cap;
use crate::symbol::FuncKey;

// ── SSA-specific taint tests ─────────────────────────────────────────────

/// Helper: run SSA taint analysis on Rust source.
/// Uses the first function body if one exists, otherwise top-level.
fn ssa_analyse_rust(src: &[u8]) -> Vec<Finding> {
    use crate::cfg::build_cfg;
    use crate::state::symbol::SymbolInterner;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&tree_sitter::Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src, None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let body = if file_cfg.bodies.len() > 1 {
        &file_cfg.bodies[1]
    } else {
        file_cfg.first_body()
    };
    let cfg = &body.graph;
    let entry = body.entry;
    let summaries = &file_cfg.summaries;
    let interner = SymbolInterner::from_cfg(cfg);
    let ssa =
        crate::ssa::lower_to_ssa(cfg, entry, None, true).expect("SSA lowering should succeed");

    let transfer = ssa_transfer::SsaTaintTransfer {
        lang: Lang::Rust,
        namespace: "test.rs",
        interner: &interner,
        local_summaries: summaries,
        global_summaries: None,
        interop_edges: &[],
        owner_body_id: crate::cfg::BodyId(0),
        parent_body_id: None,
        global_seed: None,
        param_seed: None,
        receiver_seed: None,
        const_values: None,
        type_facts: None,
        ssa_summaries: None,
        extra_labels: None,
        base_aliases: None,
        callee_bodies: None,
        inline_cache: None,
        context_depth: 0,
        callback_bindings: None,
        points_to: None,
        dynamic_pts: None,
        import_bindings: None,
        promisify_aliases: None,
        module_aliases: None,
        static_map: None,
        auto_seed_handler_params: false,
        cross_file_bodies: None,
        pointer_facts: None,
    };
    let events = ssa_transfer::run_ssa_taint(&ssa, cfg, &transfer);
    let mut findings = ssa_transfer::ssa_events_to_findings(&events, &ssa, cfg);
    findings.sort_by_key(|f| (f.sink.index(), f.source.index()));
    findings.dedup_by_key(|f| (f.sink, f.source));
    findings
}

#[test]
fn ssa_linear_source_to_sink() {
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("DANGEROUS_ARG").unwrap();
            Command::new("sh").arg(x).status().unwrap();
        }"#;
    let findings = ssa_analyse_rust(src);
    assert_eq!(
        findings.len(),
        1,
        "SSA: linear source→sink should produce 1 finding"
    );
}

#[test]
fn ssa_linear_sanitized_no_finding() {
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let clean = shell_escape::unix::escape(&x);
            Command::new("sh").arg(clean).status().unwrap();
        }"#;
    let findings = ssa_analyse_rust(src);
    assert!(
        findings.is_empty(),
        "SSA: matching sanitizer should eliminate finding"
    );
}

#[test]
fn ssa_reassignment_kills_taint() {
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let x = "safe_constant";
            Command::new("sh").arg(x).status().unwrap();
        }"#;
    let findings = ssa_analyse_rust(src);
    assert!(
        findings.is_empty(),
        "SSA: reassignment to constant should kill taint"
    );
}

#[test]
fn ssa_taint_through_branch_merge() {
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let safe = html_escape::encode_safe(&x);
            if x.len() > 5 {
                Command::new("sh").arg(&x).status().unwrap();
            } else {
                Command::new("sh").arg(&safe).status().unwrap();
            }
        }"#;
    let findings = ssa_analyse_rust(src);
    assert!(
        !findings.is_empty(),
        "SSA: taint through branch should produce at least 1 finding"
    );
}

#[test]
fn ssa_taint_through_loop() {
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let mut x = env::var("DANGEROUS").unwrap();
            while x.len() < 100 {
                x.push_str("a");
            }
            Command::new("sh").arg(x).status().unwrap();
        }"#;
    let findings = ssa_analyse_rust(src);
    assert_eq!(
        findings.len(),
        1,
        "SSA: taint through loop should produce 1 finding"
    );
}

#[test]
fn ssa_multi_variable_independence() {
    // Independent variables should not interfere
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let x = env::var("TAINTED").unwrap();
            let y = "safe";
            Command::new("sh").arg(y).status().unwrap();
        }"#;
    let findings = ssa_analyse_rust(src);
    assert!(
        findings.is_empty(),
        "SSA: untainted variable at sink should produce no finding"
    );
}

#[test]
fn env_to_arg_is_flagged() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("DANGEROUS_ARG").unwrap();
            Command::new("sh").arg(x).status().unwrap();
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    assert_eq!(findings.len(), 1); // exactly one unsanitised Source→Sink
}

#[test]
fn taint_through_if_else() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let safe = html_escape::encode_safe(&x);

            if x.len() > 5 {
                Command::new("sh").arg(&x).status().unwrap();   // UNSAFE
            } else {
                Command::new("sh").arg(&safe).status().unwrap(); // SAFE
            }
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    // Both branches have findings: the true branch uses unsanitized `x`,
    // the else branch uses `safe` which was sanitized with HTML_ESCAPE
    // but the sink requires SHELL_ESCAPE (wrong sanitizer → still tainted).
    assert_eq!(findings.len(), 2);
}

#[test]
fn taint_through_while_loop() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let mut x = env::var("DANGEROUS").unwrap();
            while x.len() < 100 {                       // Loop header (Loop)
                x.push_str("a");
            }
            Command::new("sh").arg(x).status().unwrap(); // Should be flagged
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
    assert_eq!(findings.len(), 1);
}

#[test]
fn taint_killed_by_matching_sanitizer() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;

    // shell_escape sanitizer strips SHELL_ESCAPE → Command sink checks
    // SHELL_ESCAPE → the matching bit is gone → no finding.
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let clean = shell_escape::unix::escape(&x);
            Command::new("sh").arg(clean).status().unwrap();
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
    assert!(
        findings.is_empty(),
        "matching sanitizer should kill the taint"
    );
}

#[test]
fn wrong_sanitizer_preserves_taint() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;

    // html_escape sanitizer strips HTML_ESCAPE, but Command sink checks
    // SHELL_ESCAPE → the wrong bit was stripped → finding persists.
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let clean = html_escape::encode_safe(&x);
            Command::new("sh").arg(clean).status().unwrap();
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
    assert_eq!(
        findings.len(),
        1,
        "wrong sanitizer should NOT kill the taint"
    );
}

#[test]
fn taint_breaks_out_of_loop() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            loop {
                let x = env::var("DANGEROUS").unwrap();
                Command::new("sh").arg(&x).status().unwrap(); // vulnerable
                break;
            }
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
    assert_eq!(findings.len(), 1);
}

#[test]
fn test_two_sources_one_sanitised() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;

    // Two env sources, one properly sanitised with the MATCHING sanitiser.
    // x → unsanitised → Command = FINDING
    // y → shell_escape → Command = safe
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let y = env::var("ANOTHER").unwrap();
            let clean = shell_escape::unix::escape(&y);
            Command::new("sh").arg(x).status().unwrap();
            Command::new("sh").arg(clean).status().unwrap();
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
    assert_eq!(
        findings.len(),
        1,
        "only the unsanitised source should be flagged"
    );
}

#[test]
fn test_two_sources_wrong_sanitiser_both_flagged() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;

    // Two env sources, one "sanitised" with the WRONG sanitiser.
    // x → unsanitised → Command = FINDING
    // y → html_escape → Command = FINDING (wrong sanitiser for shell sink)
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let y = env::var("ANOTHER").unwrap();
            let clean = html_escape::encode_safe(&y);
            Command::new("sh").arg(x).status().unwrap();
            Command::new("sh").arg(clean).status().unwrap();
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
    assert_eq!(
        findings.len(),
        2,
        "both should be flagged — wrong sanitiser"
    );
}

#[test]
fn test_should_not_panic_on_empty_function() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;
    let src = br#"
        use std::{env, process::Command};
        fn f() {
            if cond() {
                return;
            }
            do_something();
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
    assert!(findings.is_empty());
}

#[test]
fn cross_file_source_resolved_via_global_summaries() {
    use crate::summary::FuncSummary;

    // Simulate file B calling `get_dangerous()` which is defined in file A.
    // File A's summary says get_dangerous is a Source(all).
    let src = br#"
        use std::process::Command;
        fn main() {
            let x = get_dangerous();
            Command::new("sh").arg(x).status().unwrap();
        }"#;

    let file_cfg = parse_rust(src);
    let local_summaries = &file_cfg.summaries;

    // Build global summaries as if file A exported get_dangerous
    let mut global = GlobalSummaries::new();
    let key = FuncKey {
        lang: Lang::Rust,
        namespace: "file_a.rs".into(),
        name: "get_dangerous".into(),
        arity: Some(0),
        ..Default::default()
    };
    global.insert(
        key,
        FuncSummary {
            name: "get_dangerous".into(),
            file_path: "file_a.rs".into(),
            lang: "rust".into(),
            param_count: 0,
            param_names: vec![],
            source_caps: Cap::all().bits(),
            sanitizer_caps: 0,
            sink_caps: 0,
            propagating_params: vec![],
            propagates_taint: false,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );

    let findings = analyse_file(
        &file_cfg,
        local_summaries,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );
    assert_eq!(findings.len(), 1, "cross-file source should be detected");
}

#[test]
fn cross_file_sanitizer_resolved_via_global_summaries() {
    use crate::summary::FuncSummary;

    // File B gets tainted data and passes it through `my_sanitize()` from file A.
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let clean = my_sanitize(x);
            Command::new("sh").arg(clean).status().unwrap();
        }"#;

    let file_cfg = parse_rust(src);
    let local_summaries = &file_cfg.summaries;

    let mut global = GlobalSummaries::new();
    let key = FuncKey {
        lang: Lang::Rust,
        namespace: "file_a.rs".into(),
        name: "my_sanitize".into(),
        arity: Some(1),
        ..Default::default()
    };
    global.insert(
        key,
        FuncSummary {
            name: "my_sanitize".into(),
            file_path: "file_a.rs".into(),
            lang: "rust".into(),
            param_count: 1,
            param_names: vec!["input".into()],
            source_caps: 0,
            sanitizer_caps: Cap::all().bits(),
            sink_caps: 0,
            propagating_params: vec![0],
            propagates_taint: false,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );

    let findings = analyse_file(
        &file_cfg,
        local_summaries,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "cross-file sanitizer should neutralise taint"
    );
}

// ─────────────────────────────────────────────────────────────────────────────
//  Shared test helpers
// ─────────────────────────────────────────────────────────────────────────────

/// Parse Rust source bytes → FileCfg
fn parse_rust(src: &[u8]) -> FileCfg {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;
    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src, None).unwrap();
    build_cfg(&tree, src, "rust", "test.rs", None)
}

/// Parse Rust source bytes, build CFG, and export cross-file summaries.
fn extract_summaries_from_bytes(src: &[u8], path: &str) -> Vec<crate::summary::FuncSummary> {
    use crate::cfg::export_summaries;
    let file_cfg = parse_rust(src);
    export_summaries(&file_cfg.summaries, path, "rust")
}

#[test]
fn cross_file_sink_resolved_via_global_summaries() {
    use crate::summary::FuncSummary;

    // File B calls `dangerous_exec(x)` from file A which is a sink.
    let src = br#"
        use std::env;
        fn main() {
            let x = env::var("INPUT").unwrap();
            dangerous_exec(x);
        }"#;

    let file_cfg = parse_rust(src);
    let local_summaries = &file_cfg.summaries;

    let mut global = GlobalSummaries::new();
    let key = FuncKey {
        lang: Lang::Rust,
        namespace: "file_a.rs".into(),
        name: "dangerous_exec".into(),
        arity: Some(1),
        ..Default::default()
    };
    global.insert(
        key,
        FuncSummary {
            name: "dangerous_exec".into(),
            file_path: "file_a.rs".into(),
            lang: "rust".into(),
            param_count: 1,
            param_names: vec!["cmd".into()],
            source_caps: 0,
            sanitizer_caps: 0,
            sink_caps: Cap::SHELL_ESCAPE.bits(),
            propagating_params: vec![],
            propagates_taint: false,
            tainted_sink_params: vec![0],
            callees: vec!["Command::new".into()],
            ..Default::default()
        },
    );

    let findings = analyse_file(
        &file_cfg,
        local_summaries,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );
    assert_eq!(findings.len(), 1, "cross-file sink should be detected");
}

#[test]
fn cross_file_sink_finding_carries_primary_location() {
    // Primary sink-location attribution: when a callee summary carries a
    // [`SinkSite`] with resolved coordinates, the emitted Finding must
    // expose those coordinates via `primary_location`.  This guards the
    // event→finding plumbing independent of any CFG/label changes.
    use crate::summary::{FuncSummary, SinkSite};
    use smallvec::smallvec;

    let src = br#"
        use std::env;
        fn main() {
            let x = env::var("INPUT").unwrap();
            dangerous_exec(x);
        }"#;

    let file_cfg = parse_rust(src);
    let local_summaries = &file_cfg.summaries;

    let mut global = GlobalSummaries::new();
    let key = FuncKey {
        lang: Lang::Rust,
        namespace: "file_a.rs".into(),
        name: "dangerous_exec".into(),
        arity: Some(1),
        ..Default::default()
    };
    // Summary: param 0 (`cmd`) flows to a shell-exec sink at file_a.rs:42:5.
    let sink_site = SinkSite {
        file_rel: "file_a.rs".into(),
        line: 42,
        col: 5,
        snippet: "Command::new(\"sh\").arg(cmd).status().unwrap();".into(),
        cap: Cap::SHELL_ESCAPE,
    };
    global.insert(
        key,
        FuncSummary {
            name: "dangerous_exec".into(),
            file_path: "file_a.rs".into(),
            lang: "rust".into(),
            param_count: 1,
            param_names: vec!["cmd".into()],
            source_caps: 0,
            sanitizer_caps: 0,
            sink_caps: Cap::SHELL_ESCAPE.bits(),
            propagating_params: vec![],
            propagates_taint: false,
            tainted_sink_params: vec![0],
            param_to_sink: vec![(0, smallvec![sink_site.clone()])],
            callees: vec!["Command::new".into()],
            ..Default::default()
        },
    );

    let findings = analyse_file(
        &file_cfg,
        local_summaries,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );
    assert_eq!(
        findings.len(),
        1,
        "cross-file sink should still be detected",
    );
    let finding = &findings[0];
    // Note: `uses_summary == false` here because the source (env::var) is
    // local, only the *sink* was summary-resolved.  That's the case the
    // `primary_location` / `uses_summary` independence comment on
    // [`super::Finding::primary_location`] documents.
    let loc = finding
        .primary_location
        .as_ref()
        .expect("summary-resolved sink with SinkSite must carry primary_location");
    assert_eq!(loc.file_rel, "file_a.rs");
    assert_eq!(loc.line, 42);
    assert_eq!(loc.col, 5);
}

#[test]
fn cross_file_sink_cap_only_site_leaves_primary_location_none() {
    // Cap-only SinkSites (line == 0) must not surface as Finding.primary_location,
    // otherwise the formatter would claim a (0, 0) position as authoritative.
    use crate::summary::FuncSummary;

    let src = br#"
        use std::env;
        fn main() {
            let x = env::var("INPUT").unwrap();
            dangerous_exec(x);
        }"#;

    let file_cfg = parse_rust(src);
    let local_summaries = &file_cfg.summaries;

    let mut global = GlobalSummaries::new();
    let key = FuncKey {
        lang: Lang::Rust,
        namespace: "file_a.rs".into(),
        name: "dangerous_exec".into(),
        arity: Some(1),
        ..Default::default()
    };
    global.insert(
        key,
        FuncSummary {
            name: "dangerous_exec".into(),
            file_path: "file_a.rs".into(),
            lang: "rust".into(),
            param_count: 1,
            param_names: vec!["cmd".into()],
            source_caps: 0,
            sanitizer_caps: 0,
            sink_caps: Cap::SHELL_ESCAPE.bits(),
            propagating_params: vec![],
            propagates_taint: false,
            tainted_sink_params: vec![0],
            // No param_to_sink: falls back to cap-only summary (no SinkSite).
            callees: vec!["Command::new".into()],
            ..Default::default()
        },
    );

    let findings = analyse_file(
        &file_cfg,
        local_summaries,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );
    assert_eq!(findings.len(), 1, "cross-file sink should be detected");
    assert!(
        findings[0].primary_location.is_none(),
        "cap-only summary must not produce a primary_location",
    );
}

// ─────────────────────────────────────────────────────────────────────────────
//  Multi-file integration tests (real parsing, full pass-1 → pass-2 pipeline)
// ─────────────────────────────────────────────────────────────────────────────

#[test]
fn multi_file_source_to_sink_detected() {
    use crate::summary::merge_summaries;

    // File A: defines get_dangerous() which calls env::var (a source).
    let lib_src = br#"
        use std::env;
        fn get_dangerous() -> String {
            env::var("SECRET").unwrap()
        }
    "#;

    // File B: calls get_dangerous() then passes result to Command (a sink).
    let caller_src = br#"
        use std::process::Command;
        fn main() {
            let x = get_dangerous();
            Command::new("sh").arg(x).status().unwrap();
        }
    "#;

    let summaries = extract_summaries_from_bytes(lib_src, "lib.rs");
    let global = merge_summaries(summaries, None);

    let file_cfg = parse_rust(caller_src);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );

    assert_eq!(
        findings.len(),
        1,
        "cross-file source → inline sink should produce 1 finding"
    );
}

#[test]
fn multi_file_sanitizer_neutralises_cross_file_source() {
    use crate::summary::merge_summaries;

    // File A: source + matching shell sanitizer.
    // NOTE: function name avoids `sanitize_` prefix which triggers
    //       the inline HTML sanitizer label rule.
    let lib_src = br#"
        use std::env;
        fn get_input() -> String {
            env::var("INPUT").unwrap()
        }
        fn clean_shell(s: &str) -> String {
            shell_escape::unix::escape(s).to_string()
        }
    "#;

    // File B: source → clean_shell → shell sink.
    let caller_src = br#"
        use std::process::Command;
        fn main() {
            let x = get_input();
            let clean = clean_shell(&x);
            Command::new("sh").arg(clean).status().unwrap();
        }
    "#;

    let summaries = extract_summaries_from_bytes(lib_src, "lib.rs");
    let global = merge_summaries(summaries, None);

    let file_cfg = parse_rust(caller_src);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );

    assert!(
        findings.is_empty(),
        "matching cross-file sanitizer should neutralise taint, got {} findings",
        findings.len()
    );
}

#[test]
fn multi_file_wrong_sanitizer_preserves_taint() {
    use crate::summary::merge_summaries;

    // File A: source + HTML sanitizer (wrong for shell sink).
    let lib_src = br#"
        use std::env;
        fn get_input() -> String {
            env::var("INPUT").unwrap()
        }
        fn clean_html(s: &str) -> String {
            html_escape::encode_safe(s).to_string()
        }
    "#;

    // File B: source → HTML sanitize → shell sink → should still flag.
    let caller_src = br#"
        use std::process::Command;
        fn main() {
            let x = get_input();
            let clean = clean_html(&x);
            Command::new("sh").arg(clean).status().unwrap();
        }
    "#;

    let summaries = extract_summaries_from_bytes(lib_src, "lib.rs");
    let global = merge_summaries(summaries, None);

    let file_cfg = parse_rust(caller_src);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );

    assert_eq!(
        findings.len(),
        1,
        "wrong sanitizer (HTML for shell sink) should NOT neutralise taint"
    );
}

#[test]
fn multi_file_sink_in_another_file() {
    use crate::summary::merge_summaries;

    // File A: defines exec_cmd() which internally calls Command::new (a sink).
    let lib_src = br#"
        use std::process::Command;
        fn exec_cmd(cmd: &str) {
            Command::new("sh").arg(cmd).status().unwrap();
        }
    "#;

    // File B: env::var → exec_cmd(), sink is cross-file.
    let caller_src = br#"
        use std::env;
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            exec_cmd(&x);
        }
    "#;

    let summaries = extract_summaries_from_bytes(lib_src, "lib.rs");
    let global = merge_summaries(summaries, None);

    let file_cfg = parse_rust(caller_src);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );

    assert_eq!(findings.len(), 1, "cross-file sink should be detected");
}

#[test]
fn multi_file_passthrough_preserves_taint() {
    use crate::summary::FuncSummary;

    // identity() just returns its argument, it propagates taint but has no
    // source/sanitizer/sink caps of its own.
    let mut global = GlobalSummaries::new();
    let key = FuncKey {
        lang: Lang::Rust,
        namespace: "lib.rs".into(),
        name: "identity".into(),
        arity: Some(1),
        ..Default::default()
    };
    global.insert(
        key,
        FuncSummary {
            name: "identity".into(),
            file_path: "lib.rs".into(),
            lang: "rust".into(),
            param_count: 1,
            param_names: vec!["s".into()],
            source_caps: 0,
            sanitizer_caps: 0,
            sink_caps: 0,
            propagating_params: vec![0],
            propagates_taint: false,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );

    let caller_src = br#"
        use std::{env, process::Command};
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let y = identity(&x);
            Command::new("sh").arg(y).status().unwrap();
        }
    "#;

    let file_cfg = parse_rust(caller_src);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );

    assert_eq!(
        findings.len(),
        1,
        "taint should propagate through passthrough function"
    );
}

#[test]
fn multi_file_chain_source_sanitize_sink_across_files() {
    use crate::summary::merge_summaries;

    // Library file defines all three roles: source, sanitizer, sink.
    let lib_src = br#"
        use std::env;
        use std::process::Command;
        fn get_input() -> String {
            env::var("INPUT").unwrap()
        }
        fn clean_shell(s: &str) -> String {
            shell_escape::unix::escape(s).to_string()
        }
        fn exec_cmd(cmd: &str) {
            Command::new("sh").arg(cmd).status().unwrap();
        }
    "#;

    // Caller: source → correct sanitizer → sink.
    let caller_src = br#"
        fn main() {
            let x = get_input();
            let clean = clean_shell(&x);
            exec_cmd(&clean);
        }
    "#;

    let summaries = extract_summaries_from_bytes(lib_src, "lib.rs");
    let global = merge_summaries(summaries, None);

    let file_cfg = parse_rust(caller_src);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );

    assert!(
        findings.is_empty(),
        "source → matching sanitizer → sink should produce 0 findings, got {}",
        findings.len()
    );
}

// ─────────────────────────────────────────────────────────────────────────────
//  Edge-case unit tests
// ─────────────────────────────────────────────────────────────────────────────

#[test]
fn sanitizer_strips_only_matching_bits() {
    // Source(ALL) → shell_escape → sink_html (HTML sink).
    // shell_escape strips SHELL_ESCAPE but not HTML_ESCAPE.
    // sink_html is an HTML sink, HTML_ESCAPE bit is still set → 1 finding.
    let src = br#"
        use std::env;
        fn sink_html(s: &str) {}
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let clean = shell_escape::unix::escape(&x);
            sink_html(&clean);
        }
    "#;

    let file_cfg = parse_rust(src);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    assert_eq!(
        findings.len(),
        1,
        "shell sanitizer should NOT strip HTML_ESCAPE bit; HTML sink should still fire"
    );
}

#[test]
fn multiple_sanitizers_strip_all_bits() {
    // Source → shell_escape → html_escape → Command (shell sink).
    // shell_escape strips SHELL_ESCAPE; html_escape strips HTML_ESCAPE.
    // After both, the remaining taint bits relevant to SHELL_ESCAPE are gone.
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let a = shell_escape::unix::escape(&x);
            let b = html_escape::encode_safe(&a);
            Command::new("sh").arg(b).status().unwrap();
        }
    "#;

    let file_cfg = parse_rust(src);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    assert!(
        findings.is_empty(),
        "both sanitizers together should strip all relevant bits"
    );
}

#[test]
fn taint_through_variable_reassignment() {
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let y = x;
            Command::new("sh").arg(y).status().unwrap();
        }
    "#;

    let file_cfg = parse_rust(src);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    assert_eq!(
        findings.len(),
        1,
        "taint should flow through simple variable reassignment"
    );
}

#[test]
fn untainted_variable_at_sink_is_safe() {
    // A string literal (not from a source) passed to Command, no finding.
    let src = br#"
        use std::process::Command;
        fn main() {
            let x = "harmless";
            Command::new("sh").arg(x).status().unwrap();
        }
    "#;

    let file_cfg = parse_rust(src);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    assert!(
        findings.is_empty(),
        "untainted literal should not trigger a finding"
    );
}

#[test]
fn local_summary_takes_precedence_over_global() {
    use crate::summary::FuncSummary;

    // The caller file defines my_func locally as a source.
    // Global says my_func is a sanitizer.
    // Local should win → finding expected.
    let caller_src = br#"
        use std::{env, process::Command};
        fn my_func() -> String {
            env::var("SECRET").unwrap()
        }
        fn main() {
            let x = my_func();
            Command::new("sh").arg(x).status().unwrap();
        }
    "#;

    let mut global = GlobalSummaries::new();
    let key = FuncKey {
        lang: Lang::Rust,
        namespace: "other.rs".into(),
        name: "my_func".into(),
        arity: Some(0),
        ..Default::default()
    };
    global.insert(
        key,
        FuncSummary {
            name: "my_func".into(),
            file_path: "other.rs".into(),
            lang: "rust".into(),
            param_count: 0,
            param_names: vec![],
            source_caps: 0,
            sanitizer_caps: Cap::all().bits(),
            sink_caps: 0,
            propagating_params: vec![0],
            propagates_taint: false,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );

    let file_cfg = parse_rust(caller_src);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );

    assert_eq!(
        findings.len(),
        1,
        "local summary (source) should take precedence over global (sanitizer)"
    );
}

#[test]
fn empty_global_summaries_same_as_none() {
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            Command::new("sh").arg(x).status().unwrap();
        }
    "#;

    let file_cfg = parse_rust(src);
    let summaries = &file_cfg.summaries;

    let findings_none = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
    let empty = GlobalSummaries::new();
    let findings_empty = analyse_file(
        &file_cfg,
        summaries,
        Some(&empty),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );

    assert_eq!(
        findings_none.len(),
        findings_empty.len(),
        "empty GlobalSummaries should behave identically to None"
    );
}

#[test]
fn taint_not_introduced_by_non_source_function() {
    // Call an unknown function (no summary anywhere), assign to var, pass to sink.
    // Unknown calls should NOT introduce taint.
    let src = br#"
        use std::process::Command;
        fn main() {
            let x = totally_unknown_func();
            Command::new("sh").arg(x).status().unwrap();
        }
    "#;

    let file_cfg = parse_rust(src);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    assert!(
        findings.is_empty(),
        "unknown function call should not introduce taint"
    );
}

#[test]
fn source_and_sink_on_same_function() {
    use crate::summary::FuncSummary;

    // Cross-file function that is both source AND sink.
    // Tainted arg hits sink → 1 finding.
    let mut global = GlobalSummaries::new();
    let key = FuncKey {
        lang: Lang::Rust,
        namespace: "lib.rs".into(),
        name: "source_and_sink".into(),
        arity: Some(1),
        ..Default::default()
    };
    global.insert(
        key,
        FuncSummary {
            name: "source_and_sink".into(),
            file_path: "lib.rs".into(),
            lang: "rust".into(),
            param_count: 1,
            param_names: vec!["input".into()],
            source_caps: Cap::all().bits(),
            sanitizer_caps: 0,
            sink_caps: Cap::SHELL_ESCAPE.bits(),
            propagating_params: vec![],
            propagates_taint: false,
            tainted_sink_params: vec![0],
            callees: vec![],
            ..Default::default()
        },
    );

    // Pass tainted data from env::var into source_and_sink.
    let src = br#"
        use std::env;
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            source_and_sink(x);
        }
    "#;

    let file_cfg = parse_rust(src);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );

    assert_eq!(
        findings.len(),
        1,
        "function that is both source and sink should detect tainted arg as finding"
    );
}

#[test]
fn multiple_cross_file_sources_one_sanitised() {
    use crate::summary::FuncSummary;

    let mut global = GlobalSummaries::new();
    // Two cross-file sources
    let key1 = FuncKey {
        lang: Lang::Rust,
        namespace: "lib.rs".into(),
        name: "get_secret".into(),
        arity: Some(0),
        ..Default::default()
    };
    global.insert(
        key1,
        FuncSummary {
            name: "get_secret".into(),
            file_path: "lib.rs".into(),
            lang: "rust".into(),
            param_count: 0,
            param_names: vec![],
            source_caps: Cap::all().bits(),
            sanitizer_caps: 0,
            sink_caps: 0,
            propagating_params: vec![],
            propagates_taint: false,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );
    let key2 = FuncKey {
        lang: Lang::Rust,
        namespace: "lib.rs".into(),
        name: "get_other_secret".into(),
        arity: Some(0),
        ..Default::default()
    };
    global.insert(
        key2,
        FuncSummary {
            name: "get_other_secret".into(),
            file_path: "lib.rs".into(),
            lang: "rust".into(),
            param_count: 0,
            param_names: vec![],
            source_caps: Cap::all().bits(),
            sanitizer_caps: 0,
            sink_caps: 0,
            propagating_params: vec![],
            propagates_taint: false,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );

    // One source sanitised, one not.
    let src = br#"
        use std::process::Command;
        fn main() {
            let a = get_secret();
            let b = get_other_secret();
            let clean_a = shell_escape::unix::escape(&a);
            Command::new("sh").arg(clean_a).status().unwrap();
            Command::new("sh").arg(b).status().unwrap();
        }
    "#;

    let file_cfg = parse_rust(src);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );

    assert_eq!(
        findings.len(),
        1,
        "only the unsanitised cross-file source should produce a finding"
    );
}

// ─────────────────────────────────────────────────────────────────────────────
//  Multi-language helpers and tests
// ─────────────────────────────────────────────────────────────────────────────

/// Parse source bytes for any supported language → FileCfg
fn parse_lang(src: &[u8], slug: &str, ts_lang: tree_sitter::Language) -> FileCfg {
    use crate::cfg::build_cfg;
    let mut parser = tree_sitter::Parser::new();
    parser.set_language(&ts_lang).unwrap();
    let tree = parser.parse(src, None).unwrap();
    let ext = match slug {
        "rust" => "test.rs",
        "javascript" => "test.js",
        "typescript" => "test.ts",
        "python" => "test.py",
        "go" => "test.go",
        "java" => "test.java",
        "c" => "test.c",
        "cpp" => "test.cpp",
        "php" => "test.php",
        "ruby" => "test.rb",
        _ => "test.txt",
    };
    build_cfg(&tree, src, slug, ext, None)
}

#[test]
fn js_source_to_sink() {
    let src = b"function main() {\n  let x = document.location();\n  eval(x);\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert_eq!(
        findings.len(),
        1,
        "JS: source->sink should produce 1 finding"
    );
}

#[test]
fn ts_source_to_sink() {
    let src = b"function main() {\n  let x = document.location();\n  eval(x);\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT);
    let file_cfg = parse_lang(src, "typescript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::TypeScript,
        "test.ts",
        &[],
        None,
    );
    assert_eq!(
        findings.len(),
        1,
        "TS: source->sink should produce 1 finding"
    );
}

#[test]
fn python_source_to_sink() {
    let src = b"def main():\n    x = os.getenv(\"SECRET\")\n    os.system(x)\n";
    let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
    let file_cfg = parse_lang(src, "python", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::Python,
        "test.py",
        &[],
        None,
    );
    assert_eq!(
        findings.len(),
        1,
        "Python: source->sink should produce 1 finding"
    );
}

#[test]
fn go_source_to_sink() {
    let src =
        b"package main\n\nfunc main() {\n\tx := os.Getenv(\"SECRET\")\n\texec.Command(x)\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
    let file_cfg = parse_lang(src, "go", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Go, "test.go", &[], None);
    assert_eq!(
        findings.len(),
        1,
        "Go: source->sink should produce 1 finding"
    );
}

#[test]
fn java_source_to_sink() {
    let src = b"class Main {\n  void main() {\n    String x = System.getenv(\"SECRET\");\n    Runtime.exec(x);\n  }\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
    let file_cfg = parse_lang(src, "java", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::Java,
        "test.java",
        &[],
        None,
    );
    assert_eq!(
        findings.len(),
        1,
        "Java: source->sink should produce 1 finding"
    );
}

#[test]
fn c_source_to_sink() {
    let src = b"void main() {\n  char* x = getenv(\"SECRET\");\n  system(x);\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
    let file_cfg = parse_lang(src, "c", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::C, "test.c", &[], None);
    assert_eq!(
        findings.len(),
        1,
        "C: source->sink should produce 1 finding"
    );
}

#[test]
fn cpp_source_to_sink() {
    let src = b"void main() {\n  char* x = getenv(\"SECRET\");\n  system(x);\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
    let file_cfg = parse_lang(src, "cpp", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
    assert_eq!(
        findings.len(),
        1,
        "C++: source->sink should produce 1 finding"
    );
}

/// `c_str()` is a const accessor on `std::string`
/// that returns a pointer to the same buffer.  It must propagate taint from
/// the receiver to the result so the downstream sink fires.
#[test]
fn cpp_c_str_propagates_taint() {
    let src = b"#include <cstdlib>\n#include <string>\nint main() {\n  char* input = std::getenv(\"X\");\n  std::string s = input;\n  std::system(s.c_str());\n  return 0;\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
    let file_cfg = parse_lang(src, "cpp", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
    assert!(
        !findings.is_empty(),
        "C++: tainted s.c_str() into system() must fire",
    );
}

/// `std::move(x)` returns its argument unchanged in terms of
/// data flow, the rvalue cast is a representation move, not a sanitiser.
/// Default propagation collects argument taint into the result.
#[test]
fn cpp_std_move_propagates_taint() {
    let src = b"#include <cstdlib>\n#include <string>\n#include <utility>\nint main() {\n  char* input = std::getenv(\"X\");\n  std::string s = input;\n  std::string moved = std::move(s);\n  std::system(moved.c_str());\n  return 0;\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
    let file_cfg = parse_lang(src, "cpp", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
    assert!(
        !findings.is_empty(),
        "C++: taint must flow through std::move() into the sink",
    );
}

/// `static_cast<T>(x)` is parsed as a call expression by
/// tree-sitter-cpp; default propagation transports taint from the casted
/// argument to the result.
#[test]
fn cpp_static_cast_propagates_taint() {
    let src = b"#include <cstdlib>\nint main() {\n  char* input = std::getenv(\"X\");\n  const char* casted = static_cast<const char*>(input);\n  std::system(casted);\n  return 0;\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
    let file_cfg = parse_lang(src, "cpp", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
    assert!(
        !findings.is_empty(),
        "C++: taint must flow through static_cast<T>() into the sink",
    );
}

/// a fluent builder chain whose host
/// argument is tainted should fire on the terminal `.connect()`
/// SSRF sink.  The chained `.host(...)` / `.port(...)` calls return
/// the receiver, and default Call-arg propagation puts the tainted
/// argument on the chain so it reaches the terminal sink.
#[test]
fn cpp_builder_chain_user_host_fires() {
    let src = b"#include <cstdlib>\n#include <string>\nclass Socket {\npublic:\n  static Socket builder() { return Socket(); }\n  Socket& host(const std::string& h) { host_ = h; return *this; }\n  Socket& port(int p) { port_ = p; return *this; }\n  void connect() {}\nprivate:\n  std::string host_;\n  int port_ = 0;\n};\nint main() {\n  char* h = std::getenv(\"X\");\n  Socket::builder().host(h).port(80).connect();\n  return 0;\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
    let file_cfg = parse_lang(src, "cpp", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
    assert!(
        !findings.is_empty(),
        "C++: tainted host through fluent builder chain must reach terminal connect()",
    );
}

/// a fluent builder chain with a hardcoded host literal
/// must NOT fire on the terminal connect() sink, the chain carries
/// no taint.
#[test]
fn cpp_builder_chain_const_host_silent() {
    let src = b"#include <string>\nclass Socket {\npublic:\n  static Socket builder() { return Socket(); }\n  Socket& host(const std::string& h) { host_ = h; return *this; }\n  Socket& port(int p) { port_ = p; return *this; }\n  void connect() {}\nprivate:\n  std::string host_;\n  int port_ = 0;\n};\nint main() {\n  Socket::builder().host(\"api.example.com\").port(80).connect();\n  return 0;\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
    let file_cfg = parse_lang(src, "cpp", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
    assert!(
        findings.is_empty(),
        "C++: builder chain with literal host must NOT fire (Negative)",
    );
}

/// inline member-function bodies inside a
/// `class_specifier` must be extracted as separate functions and
/// intra-file calls must resolve to their bodies. Pre-Phase-4, the
/// `class_specifier` AST kind was unmapped in cpp KINDS, so the CFG
/// walker treated the entire class as a leaf `Seq` node and never
/// descended into inline methods.
#[test]
fn cpp_inline_class_method_resolves() {
    let src = b"#include <cstdlib>\nclass Inner {\npublic:\n  void run(const char* arg) { std::system(arg); }\n};\nint main() {\n  char* input = std::getenv(\"X\");\n  Inner inner;\n  inner.run(input);\n  return 0;\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
    let file_cfg = parse_lang(src, "cpp", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
    assert!(
        !findings.is_empty(),
        "C++: tainted arg through inline class method must reach system()",
    );
}

/// a tainted argument passed through an
/// identity-style lambda (`auto echo = [](const char* s) { return s; }`)
/// must reach the downstream sink. This is handled by the same default
/// Call-arg propagation as `std::move`/`static_cast`; pinning the
/// behaviour here so future engine work doesn't silently regress
/// identity lambdas.
#[test]
fn cpp_identity_lambda_propagates_taint() {
    let src = b"#include <cstdlib>\nint main() {\n  char* input = std::getenv(\"X\");\n  auto echo = [](const char* s) { return s; };\n  std::system(echo(input));\n  return 0;\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
    let file_cfg = parse_lang(src, "cpp", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
    assert!(
        !findings.is_empty(),
        "C++: taint must flow through identity lambda echo() into system()",
    );
}

/// `std::vector<char>::data()` is a Load-style container op that
/// returns a pointer to the underlying buffer; `system(v.data())` should
/// fire when `v` is tainted.
#[test]
fn cpp_vector_data_propagates_taint() {
    let src = b"#include <cstdlib>\n#include <vector>\nint main() {\n  char* input = std::getenv(\"X\");\n  std::vector<char> v(input, input + 8);\n  std::system(v.data());\n  return 0;\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
    let file_cfg = parse_lang(src, "cpp", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
    assert!(
        !findings.is_empty(),
        "C++: taint must flow through v.data() into the sink",
    );
}

#[test]
fn php_source_to_sink() {
    let src =
        b"<?php\nfunction main() {\n  $x = file_get_contents(\"secret\");\n  system($x);\n}\n?>";
    let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
    let file_cfg = parse_lang(src, "php", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Php, "test.php", &[], None);
    assert_eq!(
        findings.len(),
        1,
        "PHP: source->sink should produce 1 finding"
    );
}

#[test]
fn php_echo_xss() {
    // PHP `echo` is a language construct (echo_statement), not a function call.
    // Tainted data flowing through echo should be detected as an XSS sink.
    let src = b"<?php\n$name = $_GET['name'];\necho \"<h1>Hello \" . $name . \"</h1>\";\n";
    let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
    let file_cfg = parse_lang(src, "php", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Php, "test.php", &[], None);
    assert_eq!(
        findings.len(),
        1,
        "PHP echo with tainted var should produce 1 XSS finding"
    );
}

#[test]
fn php_echo_simple_var() {
    // Simple `echo $var;` with a tainted variable.
    let src = b"<?php\n$x = $_POST['data'];\necho $x;\n";
    let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
    let file_cfg = parse_lang(src, "php", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Php, "test.php", &[], None);
    assert_eq!(
        findings.len(),
        1,
        "PHP echo with simple tainted var should produce 1 finding"
    );
}

#[test]
fn php_echo_safe_literal() {
    // `echo "hello";` with no tainted data should produce no finding.
    let src = b"<?php\necho \"hello world\";\n";
    let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
    let file_cfg = parse_lang(src, "php", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Php, "test.php", &[], None);
    assert_eq!(
        findings.len(),
        0,
        "PHP echo with literal string should produce 0 findings"
    );
}

#[test]
fn ruby_source_to_sink() {
    let src = b"def main\n  x = gets()\n  system(x)\nend\n";
    let lang = tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE);
    let file_cfg = parse_lang(src, "ruby", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Ruby, "test.rb", &[], None);
    assert_eq!(
        findings.len(),
        1,
        "Ruby: source->sink should produce 1 finding"
    );
}

// ─────────────────────────────────────────────────────────────────────────────
//  Cross-language multi-file tests
// ─────────────────────────────────────────────────────────────────────────────
//
// Cross-language resolution now requires explicit InteropEdge declarations.
// Without an edge, functions from different languages are never resolved ,
// this prevents false positives from name collisions across languages.

/// Extract cross-file summaries from any language's source bytes.
fn extract_lang_summaries(
    src: &[u8],
    slug: &str,
    ts_lang: tree_sitter::Language,
    path: &str,
) -> Vec<crate::summary::FuncSummary> {
    use crate::cfg::export_summaries;
    let file_cfg = parse_lang(src, slug, ts_lang);
    let local = &file_cfg.summaries;
    export_summaries(local, path, slug)
}

// ── Scenario 1: Python source function → JavaScript sink via interop ─────
#[test]
fn cross_lang_python_source_to_js_sink_via_interop() {
    use crate::interop::CallSiteKey;
    use crate::summary::merge_summaries;

    let py_src = b"def get_input():\n    x = os.getenv(\"SECRET\")\n    return x\n";
    let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
    let py_summaries = extract_lang_summaries(py_src, "python", py_lang, "lib.py");
    let global = merge_summaries(py_summaries, None);

    // JavaScript file calls get_input() and passes to eval()
    let js_src = b"function main() {\n  let x = get_input();\n  eval(x);\n}\n";
    let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(js_src, "javascript", js_lang);
    let local = &file_cfg.summaries;

    // Without interop: no cross-lang resolution
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::JavaScript,
        "main.js",
        &[],
        None,
    );
    assert!(findings.is_empty(), "No cross-lang without interop edge");

    // With interop edge
    let edges = vec![InteropEdge {
        from: CallSiteKey {
            caller_lang: Lang::JavaScript,
            caller_namespace: "main.js".into(),
            caller_func: "main".into(),
            callee_symbol: "get_input".into(),
            ordinal: 0,
        },
        to: FuncKey {
            lang: Lang::Python,
            namespace: "lib.py".into(),
            name: "get_input".into(),
            arity: Some(0),
            ..Default::default()
        },
    }];
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::JavaScript,
        "main.js",
        &edges,
        None,
    );
    assert_eq!(
        findings.len(),
        1,
        "Python source → JS sink via interop edge"
    );
}

// ── Scenario 2: Go source function → Python sink via interop ─────────────
#[test]
fn cross_lang_go_source_to_python_sink_via_interop() {
    use crate::interop::CallSiteKey;
    use crate::summary::merge_summaries;

    let go_src =
        b"package main\n\nfunc fetch_env() string {\n\tx := os.Getenv(\"SECRET\")\n\treturn x\n}\n";
    let go_lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
    let go_summaries = extract_lang_summaries(go_src, "go", go_lang, "lib.go");
    let global = merge_summaries(go_summaries, None);

    let py_src = b"def main():\n    x = fetch_env()\n    os.system(x)\n";
    let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
    let file_cfg = parse_lang(py_src, "python", py_lang);
    let local = &file_cfg.summaries;

    // Without interop: no findings
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Python,
        "main.py",
        &[],
        None,
    );
    assert!(findings.is_empty(), "No cross-lang without interop");

    // With interop
    let edges = vec![InteropEdge {
        from: CallSiteKey {
            caller_lang: Lang::Python,
            caller_namespace: "main.py".into(),
            caller_func: "main".into(),
            callee_symbol: "fetch_env".into(),
            ordinal: 0,
        },
        to: FuncKey {
            lang: Lang::Go,
            namespace: "lib.go".into(),
            name: "fetch_env".into(),
            arity: Some(0),
            ..Default::default()
        },
    }];
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Python,
        "main.py",
        &edges,
        None,
    );
    assert_eq!(findings.len(), 1, "Go source → Python sink via interop");
}

// ── Scenario 3: Rust sanitizer applied in JavaScript context via interop ──
#[test]
fn cross_lang_rust_sanitizer_in_js_via_interop() {
    use crate::interop::CallSiteKey;
    use crate::summary::merge_summaries;

    let rs_src = br#"
        fn clean_shell(s: &str) -> String {
            shell_escape::unix::escape(s).to_string()
        }
    "#;
    let rs_lang = tree_sitter::Language::from(tree_sitter_rust::LANGUAGE);
    let rs_summaries = extract_lang_summaries(rs_src, "rust", rs_lang, "lib.rs");
    let global = merge_summaries(rs_summaries, None);

    // JS: source → Rust sanitizer → shell sink
    let js_src = b"function main() {\n  let x = document.location();\n  let y = clean_shell(x);\n  eval(y);\n}\n";
    let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(js_src, "javascript", js_lang);
    let local = &file_cfg.summaries;

    let edges = vec![InteropEdge {
        from: CallSiteKey {
            caller_lang: Lang::JavaScript,
            caller_namespace: "main.js".into(),
            caller_func: "main".into(),
            callee_symbol: "clean_shell".into(),
            ordinal: 0,
        },
        to: FuncKey {
            lang: Lang::Rust,
            namespace: "lib.rs".into(),
            name: "clean_shell".into(),
            arity: Some(1),
            ..Default::default()
        },
    }];
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::JavaScript,
        "main.js",
        &edges,
        None,
    );
    // eval uses Cap::all(), so a SHELL_ESCAPE sanitizer alone does NOT
    // neutralise taint, shell-escape is semantically wrong for code injection.
    // The finding should still be reported.
    assert!(
        !findings.is_empty(),
        "SHELL_ESCAPE sanitizer should NOT neutralise eval (code injection) taint"
    );
}

// ── Scenario 4: C sink function called from Java via interop ─────────────
#[test]
fn cross_lang_c_sink_called_from_java_via_interop() {
    use crate::interop::CallSiteKey;
    use crate::summary::merge_summaries;

    let c_src = b"void run_cmd(char* cmd) {\n  system(cmd);\n}\n";
    let c_lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
    let c_summaries = extract_lang_summaries(c_src, "c", c_lang, "native.c");
    let global = merge_summaries(c_summaries, None);

    let java_src = b"class Main {\n  void main() {\n    String x = System.getenv(\"INPUT\");\n    run_cmd(x);\n  }\n}\n";
    let java_lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
    let file_cfg = parse_lang(java_src, "java", java_lang);
    let local = &file_cfg.summaries;

    let edges = vec![InteropEdge {
        from: CallSiteKey {
            caller_lang: Lang::Java,
            caller_namespace: "Main.java".into(),
            caller_func: "main".into(),
            callee_symbol: "run_cmd".into(),
            ordinal: 0,
        },
        to: FuncKey {
            lang: Lang::C,
            namespace: "native.c".into(),
            name: "run_cmd".into(),
            arity: Some(1),
            ..Default::default()
        },
    }];
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Java,
        "Main.java",
        &edges,
        None,
    );
    assert_eq!(findings.len(), 1, "Java source → C sink via interop");
}

// ── Scenario 5: Multi-language summary merge with interop ────────────────
#[test]
fn cross_lang_three_languages_merged_summaries_via_interop() {
    use crate::interop::CallSiteKey;
    use crate::summary::merge_summaries;

    // Python: source function
    let py_src = b"def get_secret():\n    x = os.getenv(\"SECRET\")\n    return x\n";
    let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
    let py_sums = extract_lang_summaries(py_src, "python", py_lang, "source.py");

    // C: sink function
    let c_src = b"void run_dangerous(char* cmd) {\n  system(cmd);\n}\n";
    let c_lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
    let c_sums = extract_lang_summaries(c_src, "c", c_lang, "native.c");

    // Rust: sanitizer function
    let rs_src = br#"
        fn make_safe(s: &str) -> String {
            shell_escape::unix::escape(s).to_string()
        }
    "#;
    let rs_lang = tree_sitter::Language::from(tree_sitter_rust::LANGUAGE);
    let rs_sums = extract_lang_summaries(rs_src, "rust", rs_lang, "lib.rs");

    let all_sums: Vec<_> = py_sums.into_iter().chain(c_sums).chain(rs_sums).collect();
    let global = merge_summaries(all_sums, None);

    // Go caller: source → sanitizer → sink (all cross-language)
    let go_src = b"package main\n\nfunc main() {\n\tx := get_secret()\n\ty := make_safe(x)\n\trun_dangerous(y)\n}\n";
    let go_lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
    let file_cfg = parse_lang(go_src, "go", go_lang);
    let local = &file_cfg.summaries;

    let edges = vec![
        InteropEdge {
            from: CallSiteKey {
                caller_lang: Lang::Go,
                caller_namespace: "main.go".into(),
                caller_func: "main".into(),
                callee_symbol: "get_secret".into(),
                ordinal: 0,
            },
            to: FuncKey {
                lang: Lang::Python,
                namespace: "source.py".into(),
                name: "get_secret".into(),
                arity: Some(0),
                ..Default::default()
            },
        },
        InteropEdge {
            from: CallSiteKey {
                caller_lang: Lang::Go,
                caller_namespace: "main.go".into(),
                caller_func: "main".into(),
                callee_symbol: "make_safe".into(),
                ordinal: 0,
            },
            to: FuncKey {
                lang: Lang::Rust,
                namespace: "lib.rs".into(),
                name: "make_safe".into(),
                arity: Some(1),
                ..Default::default()
            },
        },
        InteropEdge {
            from: CallSiteKey {
                caller_lang: Lang::Go,
                caller_namespace: "main.go".into(),
                caller_func: "main".into(),
                callee_symbol: "run_dangerous".into(),
                ordinal: 0,
            },
            to: FuncKey {
                lang: Lang::C,
                namespace: "native.c".into(),
                name: "run_dangerous".into(),
                arity: Some(1),
                ..Default::default()
            },
        },
    ];
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Go,
        "main.go",
        &edges,
        None,
    );
    assert!(
        findings.is_empty(),
        "source(Py) → sanitizer(Rs) → sink(C) via interop should be safe; got {} findings",
        findings.len()
    );
}

// ── Scenario 6: Same flow without sanitizer should flag via interop ──────
#[test]
fn cross_lang_three_languages_unsanitised_via_interop() {
    use crate::interop::CallSiteKey;
    use crate::summary::merge_summaries;

    let py_src = b"def get_secret():\n    x = os.getenv(\"SECRET\")\n    return x\n";
    let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
    let py_sums = extract_lang_summaries(py_src, "python", py_lang, "source.py");

    let c_src = b"void run_dangerous(char* cmd) {\n  system(cmd);\n}\n";
    let c_lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
    let c_sums = extract_lang_summaries(c_src, "c", c_lang, "native.c");

    let all_sums: Vec<_> = py_sums.into_iter().chain(c_sums).collect();
    let global = merge_summaries(all_sums, None);

    // Go caller: source → sink directly (no sanitizer)
    let go_src = b"package main\n\nfunc main() {\n\tx := get_secret()\n\trun_dangerous(x)\n}\n";
    let go_lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
    let file_cfg = parse_lang(go_src, "go", go_lang);
    let local = &file_cfg.summaries;

    let edges = vec![
        InteropEdge {
            from: CallSiteKey {
                caller_lang: Lang::Go,
                caller_namespace: "main.go".into(),
                caller_func: "main".into(),
                callee_symbol: "get_secret".into(),
                ordinal: 0,
            },
            to: FuncKey {
                lang: Lang::Python,
                namespace: "source.py".into(),
                name: "get_secret".into(),
                arity: Some(0),
                ..Default::default()
            },
        },
        InteropEdge {
            from: CallSiteKey {
                caller_lang: Lang::Go,
                caller_namespace: "main.go".into(),
                caller_func: "main".into(),
                callee_symbol: "run_dangerous".into(),
                ordinal: 0,
            },
            to: FuncKey {
                lang: Lang::C,
                namespace: "native.c".into(),
                name: "run_dangerous".into(),
                arity: Some(1),
                ..Default::default()
            },
        },
    ];
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Go,
        "main.go",
        &edges,
        None,
    );
    assert_eq!(
        findings.len(),
        1,
        "source(Py) → sink(C) without sanitizer via interop"
    );
}

// ── Scenario 7: Name collision across languages stays separate ───────────
#[test]
fn cross_lang_name_collision_stays_separate() {
    use crate::summary::merge_summaries;

    // Python version: source
    let py_src = b"def process_data():\n    x = os.getenv(\"DATA\")\n    return x\n";
    let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
    let py_sums = extract_lang_summaries(py_src, "python", py_lang, "handler.py");

    // C version: benign passthrough (constructed manually)
    let c_summary = crate::summary::FuncSummary {
        name: "process_data".into(),
        file_path: "handler.c".into(),
        lang: "c".into(),
        param_count: 1,
        param_names: vec!["s".into()],
        source_caps: 0,
        sanitizer_caps: 0,
        sink_caps: 0,
        propagating_params: vec![0],
        propagates_taint: false,
        tainted_sink_params: vec![],
        callees: vec![],
        ..Default::default()
    };

    let all_sums: Vec<_> = py_sums
        .into_iter()
        .chain(std::iter::once(c_summary))
        .collect();
    let global = merge_summaries(all_sums, None);

    // Verify they are stored under different FuncKeys
    let py_matches = global.lookup_same_lang(Lang::Python, "process_data");
    let c_matches = global.lookup_same_lang(Lang::C, "process_data");
    assert_eq!(py_matches.len(), 1, "Python version stored separately");
    assert_eq!(c_matches.len(), 1, "C version stored separately");

    // Python's source_caps should NOT bleed into C
    assert!(py_matches[0].1.source_caps != 0, "Python has source caps");
    assert_eq!(
        c_matches[0].1.source_caps, 0,
        "C should NOT get Python's source caps"
    );
}

// ── Scenario 8: Ruby passthrough in JS via interop ───────────────────────
#[test]
fn cross_lang_ruby_passthrough_in_js_via_interop() {
    use crate::interop::CallSiteKey;
    use crate::summary::FuncSummary;

    let mut global = GlobalSummaries::new();
    let key = FuncKey {
        lang: Lang::Ruby,
        namespace: "helper.rb".into(),
        name: "transform".into(),
        arity: Some(1),
        ..Default::default()
    };
    global.insert(
        key.clone(),
        FuncSummary {
            name: "transform".into(),
            file_path: "helper.rb".into(),
            lang: "ruby".into(),
            param_count: 1,
            param_names: vec!["data".into()],
            source_caps: 0,
            sanitizer_caps: 0,
            sink_caps: 0,
            propagating_params: vec![0],
            propagates_taint: false,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );

    let js_src = b"function main() {\n  let x = document.location();\n  let y = transform(x);\n  eval(y);\n}\n";
    let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(js_src, "javascript", js_lang);
    let local = &file_cfg.summaries;

    let edges = vec![InteropEdge {
        from: CallSiteKey {
            caller_lang: Lang::JavaScript,
            caller_namespace: "main.js".into(),
            caller_func: "main".into(),
            callee_symbol: "transform".into(),
            ordinal: 0,
        },
        to: key,
    }];
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::JavaScript,
        "main.js",
        &edges,
        None,
    );
    assert_eq!(
        findings.len(),
        1,
        "taint should propagate through cross-lang passthrough via interop"
    );
}

// ── Scenario 9: PHP source → Go sink via interop ─────────────────────────
#[test]
fn cross_lang_php_source_to_go_sink_via_interop() {
    use crate::interop::CallSiteKey;
    use crate::summary::{FuncSummary, merge_summaries};

    let php_summary = FuncSummary {
        name: "read_input".into(),
        file_path: "input.php".into(),
        lang: "php".into(),
        param_count: 0,
        param_names: vec![],
        source_caps: Cap::all().bits(),
        sanitizer_caps: 0,
        sink_caps: 0,
        propagating_params: vec![],
        propagates_taint: false,
        tainted_sink_params: vec![],
        callees: vec!["file_get_contents".into()],
        ..Default::default()
    };

    let global = merge_summaries(vec![php_summary], None);

    let go_src = b"package main\n\nfunc main() {\n\tx := read_input()\n\texec.Command(x)\n}\n";
    let go_lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
    let file_cfg = parse_lang(go_src, "go", go_lang);
    let local = &file_cfg.summaries;

    let edges = vec![InteropEdge {
        from: CallSiteKey {
            caller_lang: Lang::Go,
            caller_namespace: "main.go".into(),
            caller_func: "main".into(),
            callee_symbol: "read_input".into(),
            ordinal: 0,
        },
        to: FuncKey {
            lang: Lang::Php,
            namespace: "input.php".into(),
            name: "read_input".into(),
            arity: Some(0),
            ..Default::default()
        },
    }];
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Go,
        "main.go",
        &edges,
        None,
    );
    assert_eq!(findings.len(), 1, "PHP source → Go sink via interop");
}

// ── Scenario 10: Wrong sanitizer caps still wrong across languages ───────
#[test]
fn cross_lang_wrong_sanitizer_still_flags_via_interop() {
    use crate::interop::CallSiteKey;
    use crate::summary::FuncSummary;

    let mut global = GlobalSummaries::new();
    let key = FuncKey {
        lang: Lang::Python,
        namespace: "sanitizers.py".into(),
        name: "html_clean".into(),
        arity: Some(1),
        ..Default::default()
    };
    global.insert(
        key.clone(),
        FuncSummary {
            name: "html_clean".into(),
            file_path: "sanitizers.py".into(),
            lang: "python".into(),
            param_count: 1,
            param_names: vec!["text".into()],
            source_caps: 0,
            sanitizer_caps: Cap::HTML_ESCAPE.bits(),
            sink_caps: 0,
            propagating_params: vec![0],
            propagates_taint: false,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );

    // JS: source → Python HTML sanitizer → shell sink
    let js_src = b"function main() {\n  let x = document.location();\n  let y = html_clean(x);\n  eval(y);\n}\n";
    let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(js_src, "javascript", js_lang);
    let local = &file_cfg.summaries;

    let edges = vec![InteropEdge {
        from: CallSiteKey {
            caller_lang: Lang::JavaScript,
            caller_namespace: "main.js".into(),
            caller_func: "main".into(),
            callee_symbol: "html_clean".into(),
            ordinal: 0,
        },
        to: key,
    }];
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::JavaScript,
        "main.js",
        &edges,
        None,
    );
    assert_eq!(
        findings.len(),
        1,
        "wrong cross-language sanitizer should NOT neutralise"
    );
}

// ── Scenario 11: Summary lang field preserved (different FuncKeys) ───────
#[test]
fn cross_lang_summary_preserves_lang_metadata() {
    use crate::summary::merge_summaries;

    let py_summary = crate::summary::FuncSummary {
        name: "helper".into(),
        file_path: "lib.py".into(),
        lang: "python".into(),
        param_count: 0,
        param_names: vec![],
        source_caps: Cap::all().bits(),
        sanitizer_caps: 0,
        sink_caps: 0,
        propagating_params: vec![],
        propagates_taint: false,
        tainted_sink_params: vec![],
        callees: vec![],
        ..Default::default()
    };

    let js_summary = crate::summary::FuncSummary {
        name: "helper".into(),
        file_path: "lib.js".into(),
        lang: "javascript".into(),
        param_count: 1,
        param_names: vec!["x".into()],
        source_caps: 0,
        sanitizer_caps: 0,
        sink_caps: Cap::SHELL_ESCAPE.bits(),
        propagating_params: vec![0],
        propagates_taint: false,
        tainted_sink_params: vec![0],
        callees: vec![],
        ..Default::default()
    };

    let global = merge_summaries(vec![py_summary, js_summary], None);

    // They are now separate entries, not merged
    let py_matches = global.lookup_same_lang(Lang::Python, "helper");
    let js_matches = global.lookup_same_lang(Lang::JavaScript, "helper");

    assert_eq!(py_matches.len(), 1, "Python helper stored separately");
    assert_eq!(js_matches.len(), 1, "JS helper stored separately");
    assert!(
        py_matches[0].1.source_caps != 0,
        "Python source caps preserved"
    );
    assert!(js_matches[0].1.sink_caps != 0, "JS sink caps preserved");
    assert!(
        js_matches[0].1.propagates_any(),
        "JS propagates_any preserved"
    );
}

// ── Scenario 12: Full pipeline Python lib + JS caller via interop ────────
#[test]
fn cross_lang_full_pipeline_python_lib_js_caller_via_interop() {
    use crate::interop::CallSiteKey;
    use crate::summary::merge_summaries;

    // Python library: defines dangerous_query() that reads from os.getenv
    let py_src = b"def dangerous_query():\n    x = os.getenv(\"SQL\")\n    return x\n";
    let py_lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
    let py_sums = extract_lang_summaries(py_src, "python", py_lang, "db.py");

    // JavaScript library: defines run_query() that calls eval (a sink)
    let js_lib_src = b"function run_query(q) {\n  eval(q);\n}\n";
    let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let js_sums = extract_lang_summaries(js_lib_src, "javascript", js_lang, "db.js");

    let all_sums: Vec<_> = py_sums.into_iter().chain(js_sums).collect();
    let global = merge_summaries(all_sums, None);

    // Go caller: dangerous_query() → run_query()
    let go_src = b"package main\n\nfunc main() {\n\tq := dangerous_query()\n\trun_query(q)\n}\n";
    let go_lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
    let file_cfg = parse_lang(go_src, "go", go_lang);
    let local = &file_cfg.summaries;

    let edges = vec![
        InteropEdge {
            from: CallSiteKey {
                caller_lang: Lang::Go,
                caller_namespace: "main.go".into(),
                caller_func: "main".into(),
                callee_symbol: "dangerous_query".into(),
                ordinal: 0,
            },
            to: FuncKey {
                lang: Lang::Python,
                namespace: "db.py".into(),
                name: "dangerous_query".into(),
                arity: Some(0),
                ..Default::default()
            },
        },
        InteropEdge {
            from: CallSiteKey {
                caller_lang: Lang::Go,
                caller_namespace: "main.go".into(),
                caller_func: "main".into(),
                callee_symbol: "run_query".into(),
                ordinal: 0,
            },
            to: FuncKey {
                lang: Lang::JavaScript,
                namespace: "db.js".into(),
                name: "run_query".into(),
                arity: Some(1),
                ..Default::default()
            },
        },
    ];
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Go,
        "main.go",
        &edges,
        None,
    );
    assert_eq!(
        findings.len(),
        1,
        "Python source → JS sink via Go caller via interop"
    );
}

// ── New tests: ambiguous resolution, interop edge specificity ────────────

#[test]
fn ambiguous_resolution_returns_none() {
    use crate::summary::FuncSummary;

    // Two same-lang functions, same name + arity, different namespaces
    let mut global = GlobalSummaries::new();
    for ns in &["a.rs", "b.rs"] {
        let key = FuncKey {
            lang: Lang::Rust,
            namespace: (*ns).to_string(),
            name: "helper".into(),
            arity: Some(0),
            ..Default::default()
        };
        global.insert(
            key,
            FuncSummary {
                name: "helper".into(),
                file_path: (*ns).to_string(),
                lang: "rust".into(),
                param_count: 0,
                param_names: vec![],
                source_caps: Cap::all().bits(),
                sanitizer_caps: 0,
                sink_caps: 0,
                propagating_params: vec![],
                propagates_taint: false,
                tainted_sink_params: vec![],
                callees: vec![],
                ..Default::default()
            },
        );
    }

    // Caller from c.rs calls helper(), ambiguous (two matches, neither is caller's namespace)
    let src = br#"
        use std::process::Command;
        fn main() {
            let x = helper();
            Command::new("sh").arg(x).status().unwrap();
        }
    "#;

    let file_cfg = parse_rust(src);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "c.rs",
        &[],
        None,
    );

    // Ambiguous resolution returns None → no source → no finding
    assert!(
        findings.is_empty(),
        "ambiguous resolution (two namespaces) should return None → no finding"
    );
}

#[test]
fn exact_namespace_match_wins() {
    use crate::summary::FuncSummary;

    // Same name in two namespaces, but one matches caller's namespace
    let mut global = GlobalSummaries::new();
    // test.rs version: source
    let key_local = FuncKey {
        lang: Lang::Rust,
        namespace: "test.rs".into(),
        name: "helper".into(),
        arity: Some(0),
        ..Default::default()
    };
    global.insert(
        key_local,
        FuncSummary {
            name: "helper".into(),
            file_path: "test.rs".into(),
            lang: "rust".into(),
            param_count: 0,
            param_names: vec![],
            source_caps: Cap::all().bits(),
            sanitizer_caps: 0,
            sink_caps: 0,
            propagating_params: vec![],
            propagates_taint: false,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );
    // other.rs version: no caps
    let key_other = FuncKey {
        lang: Lang::Rust,
        namespace: "other.rs".into(),
        name: "helper".into(),
        arity: Some(0),
        ..Default::default()
    };
    global.insert(
        key_other,
        FuncSummary {
            name: "helper".into(),
            file_path: "other.rs".into(),
            lang: "rust".into(),
            param_count: 0,
            param_names: vec![],
            source_caps: 0,
            sanitizer_caps: 0,
            sink_caps: 0,
            propagating_params: vec![],
            propagates_taint: false,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );

    let src = br#"
        use std::process::Command;
        fn main() {
            let x = helper();
            Command::new("sh").arg(x).status().unwrap();
        }
    "#;

    let file_cfg = parse_rust(src);
    let local = &file_cfg.summaries;
    // caller_namespace = "test.rs" matches the source version
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );

    assert_eq!(
        findings.len(),
        1,
        "exact namespace match should resolve to the source version"
    );
}

#[test]
fn interop_edge_wrong_caller_lang_no_match() {
    use crate::interop::CallSiteKey;
    use crate::summary::FuncSummary;

    let mut global = GlobalSummaries::new();
    let key = FuncKey {
        lang: Lang::Python,
        namespace: "lib.py".into(),
        name: "get_data".into(),
        arity: Some(0),
        ..Default::default()
    };
    global.insert(
        key.clone(),
        FuncSummary {
            name: "get_data".into(),
            file_path: "lib.py".into(),
            lang: "python".into(),
            param_count: 0,
            param_names: vec![],
            source_caps: Cap::all().bits(),
            sanitizer_caps: 0,
            sink_caps: 0,
            propagating_params: vec![],
            propagates_taint: false,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );

    // Edge specifies Python caller, but we're calling from JavaScript
    let edges = vec![InteropEdge {
        from: CallSiteKey {
            caller_lang: Lang::Python, // wrong!
            caller_namespace: "main.js".into(),
            caller_func: "main".into(),
            callee_symbol: "get_data".into(),
            ordinal: 0,
        },
        to: key,
    }];

    let js_src = b"function main() {\n  let x = get_data();\n  eval(x);\n}\n";
    let js_lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(js_src, "javascript", js_lang);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::JavaScript,
        "main.js",
        &edges,
        None,
    );

    assert!(
        findings.is_empty(),
        "Edge for wrong caller_lang should not match"
    );
}

#[test]
fn return_call_recognized_as_source() {
    use crate::cfg::{build_cfg, export_summaries};
    use tree_sitter::Language;

    // fn foo() -> String { env::var("X").unwrap() }
    // The return statement contains a call to env::var which should be
    // recognized as a source after the return-call fix.
    let src = br#"
        use std::env;
        fn foo() -> String {
            env::var("X").unwrap()
        }
    "#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();
    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let exported = export_summaries(summaries, "test.rs", "rust");

    let foo = exported
        .iter()
        .find(|s| s.name == "foo")
        .expect("foo should exist");
    assert!(
        foo.source_caps != 0,
        "foo() should have source_caps set because env::var is called inside return"
    );
}

// ─── Path-sensitive analysis tests ───────────────────────────────────────────

#[test]
fn validate_and_early_return() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;

    // Validate before use: if validation fails, early return.
    // The sink after the guard is on the "validated" path.
    //
    // The CFG creates a synthetic pass-through node for the false path
    // with an explicit False edge from the If node.  BFS reaches the
    // sink via: cond → (False) → pass-through → (Seq) → sink.
    // The predicate on the False edge records that `!validate(&x)` was
    // false (i.e. validation passed), so the sink is path-guarded.
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("INPUT").unwrap();
            if !validate(&x) { return; }
            Command::new("sh").arg(x).status().unwrap();
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    // Validated findings are now suppressed, validate() guard means the
    // sink is on the safe path, so no finding should be emitted.
    assert_eq!(findings.len(), 0, "validated finding should be suppressed");
}

#[test]
fn validate_in_if_else_path_validated() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;

    // If/else where the True branch (validation passed) contains the sink.
    // This IS detectable because the If node has genuine True/False branches.
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("INPUT").unwrap();
            if validate(&x) {
                Command::new("sh").arg(&x).status().unwrap();
            } else {
                println!("invalid input");
            }
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    // Validated findings are now suppressed, sink is in the validated
    // branch, so no finding should be emitted.
    assert_eq!(findings.len(), 0, "validated finding should be suppressed");
}

#[test]
fn sink_on_failed_validation_branch() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;

    // Sink is in the failed-validation branch (negated condition, false edge).
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("INPUT").unwrap();
            if !validate(&x) {
                Command::new("sh").arg(&x).status().unwrap();
            }
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    assert_eq!(findings.len(), 1, "should detect taint flow to sink");
    assert!(
        !findings[0].path_validated,
        "finding should NOT be path_validated (sink is in failed-validation branch)"
    );
}

#[test]
fn contradictory_null_check_pruned() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;

    // Inner branch is infeasible: if x.is_none() then x cannot also be is_none().
    // After early return on is_none(), the fall-through path has polarity=false
    // for NullCheck. The inner `if x.is_none()` True branch has polarity=true ,
    // contradiction.
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("INPUT").ok();
            if x.is_none() { return; }
            if x.is_none() {
                Command::new("sh").arg("dangerous").status().unwrap();
            }
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    // The inner branch is infeasible, and the arg "dangerous" is a string
    // literal (not tainted), so there should be no findings.
    assert!(
        findings.is_empty(),
        "inner branch is infeasible — should produce no findings (got {})",
        findings.len()
    );
}

#[test]
fn sanitize_one_branch_no_regression() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;

    // Same as existing taint_through_if_else: sanitized in one branch, not in the other.
    // Verify the finding count stays at 1 (no regression from path sensitivity).
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let safe = html_escape::encode_safe(&x);

            if x.len() > 5 {
                Command::new("sh").arg(&x).status().unwrap();   // UNSAFE
            } else {
                Command::new("sh").arg(&safe).status().unwrap(); // SAFE
            }
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    // Both branches produce findings: the true branch uses unsanitized `x`,
    // the else branch uses `safe` (HTML_ESCAPE sanitizer vs SHELL_ESCAPE sink).
    // Previously only 1 finding because else_clause was silently dropped from CFG.
    assert_eq!(
        findings.len(),
        2,
        "two findings expected (both branches reach sink with wrong/no sanitizer)"
    );
}

#[test]
fn path_state_budget_graceful() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;

    // Deeply nested ifs with a sink at the innermost level.
    // PathState should truncate gracefully after MAX_PATH_PREDICATES.
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("INPUT").unwrap();
            if x.len() > 1 {
            if x.len() > 2 {
            if x.len() > 3 {
            if x.len() > 4 {
            if x.len() > 5 {
            if x.len() > 6 {
            if x.len() > 7 {
            if x.len() > 8 {
            if x.len() > 9 {
                Command::new("sh").arg(&x).status().unwrap();
            }
            }
            }
            }
            }
            }
            }
            }
            }
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    // Should still detect the flow, truncation shouldn't cause false negatives.
    assert_eq!(
        findings.len(),
        1,
        "should detect taint flow even with truncated PathState"
    );
}

#[test]
fn unknown_predicate_not_pruned() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;

    // Comparison predicates are NOT in the contradiction whitelist, so even
    // seemingly contradictory comparisons should not be pruned.
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("INPUT").unwrap();
            if x.len() > 5 { return; }
            if x.len() > 5 {
                Command::new("sh").arg(&x).status().unwrap();
            }
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    // Comparison is not in the whitelist, the path should NOT be pruned.
    assert_eq!(
        findings.len(),
        1,
        "Comparison predicate should not cause contradiction pruning"
    );
}

#[test]
fn duplicate_null_guard_prunes_unreachable_sink() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;

    // After `if y.is_none() { return; }`, the false arm proves
    // `y.is_none() == false` on the only surviving path.  A second
    // `if y.is_none() { sink }` then adds `y.is_none() == true` on the
    // body's True arm, a per-symbol PredicateSummary contradiction
    // (known_true & known_false on bit NullCheck).  The body is
    // structurally unreachable; the sink must not fire.
    //
    // Regression guard: this expected behaviour only emerges once the
    // OR-chain / direct-return rejection arm correctly terminates its
    // SSA block (see
    // `src/ssa/lower.rs::tests::or_chain_rejection_block_terminates_with_return`).
    // Pre-fix the rejection arm Goto'd into the merged tail and its
    // contradicting predicate joined with the false-arm to empty,
    // letting flow through.  Pruning here is the precise outcome.
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("INPUT").unwrap();
            let y = env::var("OTHER").ok();
            if y.is_none() { return; }
            if y.is_none() {
                Command::new("sh").arg(&x).status().unwrap();
            }
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    assert!(
        findings.is_empty(),
        "duplicate null-guard with intervening early-return must prune \
         the second if's body as unreachable; got findings = {:?}",
        findings
    );
}

#[test]
fn c_curl_handle_ssrf() {
    let src = b"#include <stdlib.h>\n#include <curl/curl.h>\n\
        void fetch() {\n  char *url = getenv(\"TARGET\");\n  \
        CURL *curl = curl_easy_init();\n  \
        curl_easy_setopt(curl, CURLOPT_URL, url);\n  \
        curl_easy_perform(curl);\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
    let file_cfg = parse_lang(src, "c", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::C, "test.c", &[], None);
    assert!(
        !findings.is_empty(),
        "C: getenv -> curl_easy_setopt -> curl_easy_perform should produce SSRF finding"
    );
}

#[test]
fn c_curl_handle_no_taint() {
    let src = b"#include <curl/curl.h>\n\
        void fetch() {\n  CURL *curl = curl_easy_init();\n  \
        curl_easy_setopt(curl, CURLOPT_URL, \"https://example.com\");\n  \
        curl_easy_perform(curl);\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
    let file_cfg = parse_lang(src, "c", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::C, "test.c", &[], None);
    assert!(
        findings.is_empty(),
        "C: hardcoded URL in curl_easy_setopt should not produce finding"
    );
}

// ── Per-argument propagation tests ───────────────────────────────────────

#[test]
fn per_arg_propagation_tainted_param_propagates() {
    use crate::summary::FuncSummary;

    // transform(a, b) only propagates param 0. Tainted value at param 0 → finding.
    let mut global = GlobalSummaries::new();
    global.insert(
        FuncKey {
            lang: Lang::Rust,
            namespace: "lib.rs".into(),
            name: "transform".into(),
            arity: Some(2),
            ..Default::default()
        },
        FuncSummary {
            name: "transform".into(),
            file_path: "lib.rs".into(),
            lang: "rust".into(),
            param_count: 2,
            param_names: vec!["a".into(), "b".into()],
            source_caps: 0,
            sanitizer_caps: 0,
            sink_caps: 0,
            propagating_params: vec![0],
            propagates_taint: false,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );

    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let tainted = env::var("X").unwrap();
            let safe = String::from("ok");
            let y = transform(&tainted, &safe);
            Command::new("sh").arg(y).status().unwrap();
        }
    "#;

    let file_cfg = parse_rust(src);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );
    assert_eq!(
        findings.len(),
        1,
        "tainted arg at propagating position should produce finding"
    );
}

#[test]
fn per_arg_propagation_safe_at_propagating_position() {
    use crate::summary::FuncSummary;

    // transform(a, b) only propagates param 0. Tainted value at param 1 (non-propagating) → no finding.
    let mut global = GlobalSummaries::new();
    global.insert(
        FuncKey {
            lang: Lang::Rust,
            namespace: "lib.rs".into(),
            name: "transform".into(),
            arity: Some(2),
            ..Default::default()
        },
        FuncSummary {
            name: "transform".into(),
            file_path: "lib.rs".into(),
            lang: "rust".into(),
            param_count: 2,
            param_names: vec!["a".into(), "b".into()],
            source_caps: 0,
            sanitizer_caps: 0,
            sink_caps: 0,
            propagating_params: vec![0],
            propagates_taint: false,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );

    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let safe = String::from("ok");
            let tainted = env::var("X").unwrap();
            let y = transform(&safe, &tainted);
            Command::new("sh").arg(y).status().unwrap();
        }
    "#;

    let file_cfg = parse_rust(src);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );
    assert_eq!(
        findings.len(),
        0,
        "tainted arg at non-propagating position should not produce finding"
    );
}

#[test]
fn per_arg_propagation_legacy_backward_compat() {
    use crate::summary::FuncSummary;

    // legacy_pass has propagates_taint=true but empty propagating_params (legacy).
    // Should fall back to all-uses propagation.
    let mut global = GlobalSummaries::new();
    global.insert(
        FuncKey {
            lang: Lang::Rust,
            namespace: "lib.rs".into(),
            name: "legacy_pass".into(),
            arity: Some(2),
            ..Default::default()
        },
        FuncSummary {
            name: "legacy_pass".into(),
            file_path: "lib.rs".into(),
            lang: "rust".into(),
            param_count: 2,
            param_names: vec!["a".into(), "b".into()],
            source_caps: 0,
            sanitizer_caps: 0,
            sink_caps: 0,
            propagating_params: vec![],
            propagates_taint: true,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );

    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let safe = String::from("ok");
            let tainted = env::var("X").unwrap();
            let y = legacy_pass(&safe, &tainted);
            Command::new("sh").arg(y).status().unwrap();
        }
    "#;

    let file_cfg = parse_rust(src);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );
    assert_eq!(
        findings.len(),
        1,
        "legacy propagates_taint=true with empty propagating_params should propagate all args"
    );
}

#[test]
fn per_arg_propagation_both_params_propagate() {
    use crate::summary::FuncSummary;

    // concat(a, b) propagates both params 0 and 1. Tainted at param 1 → finding.
    let mut global = GlobalSummaries::new();
    global.insert(
        FuncKey {
            lang: Lang::Rust,
            namespace: "lib.rs".into(),
            name: "concat".into(),
            arity: Some(2),
            ..Default::default()
        },
        FuncSummary {
            name: "concat".into(),
            file_path: "lib.rs".into(),
            lang: "rust".into(),
            param_count: 2,
            param_names: vec!["a".into(), "b".into()],
            source_caps: 0,
            sanitizer_caps: 0,
            sink_caps: 0,
            propagating_params: vec![0, 1],
            propagates_taint: false,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );

    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let safe = String::from("ok");
            let tainted = env::var("X").unwrap();
            let y = concat(&safe, &tainted);
            Command::new("sh").arg(y).status().unwrap();
        }
    "#;

    let file_cfg = parse_rust(src);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );
    assert_eq!(
        findings.len(),
        1,
        "both params propagate — tainted arg at position 1 should produce finding"
    );
}

#[test]
fn per_arg_propagation_literal_first_arg() {
    use crate::summary::FuncSummary;

    // transform("literal", tainted) with only param 1 propagating → finding.
    // The literal arg at position 0 has no identifiers, but positional mapping is still correct.
    let mut global = GlobalSummaries::new();
    global.insert(
        FuncKey {
            lang: Lang::Rust,
            namespace: "lib.rs".into(),
            name: "transform".into(),
            arity: Some(2),
            ..Default::default()
        },
        FuncSummary {
            name: "transform".into(),
            file_path: "lib.rs".into(),
            lang: "rust".into(),
            param_count: 2,
            param_names: vec!["a".into(), "b".into()],
            source_caps: 0,
            sanitizer_caps: 0,
            sink_caps: 0,
            propagating_params: vec![1],
            propagates_taint: false,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );

    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let tainted = env::var("X").unwrap();
            let y = transform("prefix", &tainted);
            Command::new("sh").arg(y).status().unwrap();
        }
    "#;

    let file_cfg = parse_rust(src);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );
    assert_eq!(
        findings.len(),
        1,
        "literal first arg should not shift positional mapping — tainted at param 1 propagates"
    );
}

#[test]
fn per_arg_propagation_nested_expr_arg() {
    use crate::summary::FuncSummary;

    // transform(inner(x), tainted) with only param 1 propagating → finding.
    // Nested call in arg 0 doesn't affect arg 1 position.
    let mut global = GlobalSummaries::new();
    global.insert(
        FuncKey {
            lang: Lang::Rust,
            namespace: "lib.rs".into(),
            name: "transform".into(),
            arity: Some(2),
            ..Default::default()
        },
        FuncSummary {
            name: "transform".into(),
            file_path: "lib.rs".into(),
            lang: "rust".into(),
            param_count: 2,
            param_names: vec!["a".into(), "b".into()],
            source_caps: 0,
            sanitizer_caps: 0,
            sink_caps: 0,
            propagating_params: vec![1],
            propagates_taint: false,
            tainted_sink_params: vec![],
            callees: vec![],
            ..Default::default()
        },
    );

    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let x = String::from("safe");
            let tainted = env::var("X").unwrap();
            let y = transform(inner(&x), &tainted);
            Command::new("sh").arg(y).status().unwrap();
        }
    "#;

    let file_cfg = parse_rust(src);
    let local = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        local,
        Some(&global),
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );
    assert_eq!(
        findings.len(),
        1,
        "nested call in arg 0 should not affect arg 1 positional mapping"
    );
}

#[test]
fn js_cross_function_global_taint() {
    let src = b"let x = \"safe\";\nfunction leak() { x = document.location(); }\nfunction use_it() { eval(x); }\nleak();\nuse_it();\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "cross-function global taint (leak -> use_it) should be detected"
    );
}

#[test]
fn js_two_level_converges_no_mutation() {
    let src = b"let x = document.location();\nfunction f() { eval(x); }\nf();\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "top-level source to function sink should be detected"
    );
}

// ── Catch-parameter provenance tests ──────────────────────────────────────

#[test]
fn catch_param_to_sink_has_caught_exception_source_kind() {
    // Catch param flows to a sink, the finding source_kind must be
    // CaughtException, not Unknown.
    let src = b"
        const { exec } = require('child_process');
        try {
            doSomething();
        } catch (err) {
            exec(err.command);
        }
    ";

    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );

    assert!(
        !findings.is_empty(),
        "catch param to sink should produce a finding"
    );
    for f in &findings {
        assert_eq!(
            f.source_kind,
            crate::labels::SourceKind::CaughtException,
            "catch-param origin should have CaughtException source kind, not {:?}",
            f.source_kind
        );
    }
}

#[test]
fn catch_param_source_node_has_callee() {
    // The source CFG node for a catch-param finding must have a non-None callee
    // so the report renders a meaningful descriptor instead of "(unknown)".
    let src = b"
        try {
            riskyOperation();
        } catch (e) {
            fetch(e.message);
        }
    ";

    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let the_cfg = &file_cfg.first_body().graph;
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );

    assert!(
        !findings.is_empty(),
        "catch param to fetch should produce a finding"
    );
    for f in &findings {
        let source_info = &the_cfg[f.source];
        assert!(
            source_info.call.callee.is_some(),
            "catch-param source node must have a callee for reporting, got None"
        );
        let callee = source_info.call.callee.as_deref().unwrap();
        assert!(
            callee.contains("catch"),
            "catch-param callee should contain 'catch', got {:?}",
            callee
        );
    }
}

#[test]
fn taint_origin_preserved_through_assignment() {
    // Source origin should be preserved when taint flows through variable
    // assignments, not replaced or lost.
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("CMD").unwrap();
            let y = x;
            let z = y;
            Command::new("sh").arg(z).status().unwrap();
        }"#;

    let file_cfg = parse_rust(src);
    let the_cfg = &file_cfg.first_body().graph;
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    assert_eq!(findings.len(), 1);
    let f = &findings[0];
    // The source should point to the env::var call, not the intermediate assignments
    let source_info = &the_cfg[f.source];
    assert!(
        source_info.call.callee.is_some(),
        "source node should have callee after propagation through assignments"
    );
    let callee = source_info.call.callee.as_deref().unwrap();
    assert!(
        callee.contains("env") || callee.contains("var"),
        "source callee should reference env::var, got {:?}",
        callee
    );
}

#[test]
fn taint_origin_preserved_through_branch_merge() {
    // When taint flows through both branches of an if-else and merges,
    // the origin should still point to the original source.
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("CMD").unwrap();
            let y;
            if true {
                y = x;
            } else {
                y = x;
            }
            Command::new("sh").arg(y).status().unwrap();
        }"#;

    let file_cfg = parse_rust(src);
    let the_cfg = &file_cfg.first_body().graph;
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    assert!(!findings.is_empty());
    for f in &findings {
        let source_info = &the_cfg[f.source];
        assert!(
            source_info.call.callee.is_some(),
            "source callee must not be None after branch merge"
        );
    }
}

// ── SSA / Legacy Output-Equivalence Tests ─────────────────────────────────

/// Run both legacy and SSA taint analysis on the same Rust source and assert
/// that they produce the same findings (by source/sink/source_kind triple).
/// Assert that `analyse_file` (high-level) matches direct SSA pipeline invocation.
fn assert_ssa_integration(src: &[u8]) {
    use crate::cfg::build_cfg;
    use crate::state::symbol::SymbolInterner;
    use std::collections::HashSet;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&tree_sitter::Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src, None).unwrap();
    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;

    // High-level path (per-body analysis)
    let high_level = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    // Direct SSA path, use the first function body (fn main), not top-level
    let body = if file_cfg.bodies.len() > 1 {
        &file_cfg.bodies[1]
    } else {
        file_cfg.first_body()
    };
    let the_cfg = &body.graph;
    let entry = body.entry;
    let interner = SymbolInterner::from_cfg(the_cfg);
    let ssa =
        crate::ssa::lower_to_ssa(the_cfg, entry, None, true).expect("SSA lowering should succeed");
    let ssa_xfer = ssa_transfer::SsaTaintTransfer {
        lang: Lang::Rust,
        namespace: "test.rs",
        interner: &interner,
        local_summaries: summaries,
        global_summaries: None,
        interop_edges: &[],
        owner_body_id: crate::cfg::BodyId(0),
        parent_body_id: None,
        global_seed: None,
        param_seed: None,
        receiver_seed: None,
        const_values: None,
        type_facts: None,
        ssa_summaries: None,
        extra_labels: None,
        base_aliases: None,
        callee_bodies: None,
        inline_cache: None,
        context_depth: 0,
        callback_bindings: None,
        points_to: None,
        dynamic_pts: None,
        import_bindings: None,
        promisify_aliases: None,
        module_aliases: None,
        static_map: None,
        auto_seed_handler_params: false,
        cross_file_bodies: None,
        pointer_facts: None,
    };
    let events = ssa_transfer::run_ssa_taint(&ssa, the_cfg, &ssa_xfer);
    let mut ssa_findings = ssa_transfer::ssa_events_to_findings(&events, &ssa, the_cfg);
    ssa_findings.sort_by_key(|f| (f.sink.index(), f.source.index(), !f.path_validated));
    ssa_findings.dedup_by_key(|f| (f.sink, f.source));

    // Compare by (source, sink)
    let high_set: HashSet<_> = high_level
        .iter()
        .map(|f| (f.source.index(), f.sink.index()))
        .collect();
    let ssa_set: HashSet<_> = ssa_findings
        .iter()
        .map(|f| (f.source.index(), f.sink.index()))
        .collect();

    assert_eq!(
        high_set, ssa_set,
        "analyse_file vs direct SSA mismatch.\nHigh-level: {high_set:?}\nDirect SSA: {ssa_set:?}"
    );
}

#[test]
fn equiv_env_to_arg() {
    assert_ssa_integration(
        br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("DANGEROUS_ARG").unwrap();
            Command::new("sh").arg(x).status().unwrap();
        }"#,
    );
}

#[test]
fn equiv_taint_through_if_else() {
    assert_ssa_integration(
        br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let safe = html_escape::encode_safe(&x);
            if x.len() > 5 {
                Command::new("sh").arg(&x).status().unwrap();
            } else {
                Command::new("sh").arg(&safe).status().unwrap();
            }
        }"#,
    );
}

#[test]
fn equiv_taint_through_while_loop() {
    assert_ssa_integration(
        br#"
        use std::{env, process::Command};
        fn main() {
            let mut x = env::var("DANGEROUS").unwrap();
            while x.len() < 100 {
                x.push_str("a");
            }
            Command::new("sh").arg(x).status().unwrap();
        }"#,
    );
}

#[test]
fn equiv_killed_by_matching_sanitizer() {
    assert_ssa_integration(
        br#"
        use std::{env, process::Command};
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let clean = shell_escape::unix::escape(&x);
            Command::new("sh").arg(clean).status().unwrap();
        }"#,
    );
}

#[test]
fn equiv_wrong_sanitizer_preserves_taint() {
    assert_ssa_integration(
        br#"
        use std::{env, process::Command};
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            let escaped = html_escape::encode_safe(&x);
            Command::new("sh").arg(escaped).status().unwrap();
        }"#,
    );
}

#[test]
fn integ_php_echo_simple_var() {
    use crate::state::symbol::SymbolInterner;
    let src = b"<?php\n$x = $_POST['data'];\necho $x;\n";
    let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
    let file_cfg = parse_lang(src, "php", lang);
    let the_cfg = &file_cfg.first_body().graph;
    let entry = file_cfg.first_body().entry;
    let summaries = &file_cfg.summaries;

    let high_level = analyse_file(&file_cfg, summaries, None, Lang::Php, "test.php", &[], None);

    let interner = SymbolInterner::from_cfg(the_cfg);
    let ssa = crate::ssa::lower_to_ssa(the_cfg, entry, None, true).expect("SSA lowering");
    let ssa_xfer = ssa_transfer::SsaTaintTransfer {
        lang: Lang::Php,
        namespace: "test.php",
        interner: &interner,
        local_summaries: summaries,
        global_summaries: None,
        interop_edges: &[],
        owner_body_id: crate::cfg::BodyId(0),
        parent_body_id: None,
        global_seed: None,
        param_seed: None,
        receiver_seed: None,
        const_values: None,
        type_facts: None,
        ssa_summaries: None,
        extra_labels: None,
        base_aliases: None,
        callee_bodies: None,
        inline_cache: None,
        context_depth: 0,
        callback_bindings: None,
        points_to: None,
        dynamic_pts: None,
        import_bindings: None,
        promisify_aliases: None,
        module_aliases: None,
        static_map: None,
        auto_seed_handler_params: false,
        cross_file_bodies: None,
        pointer_facts: None,
    };
    let events = ssa_transfer::run_ssa_taint(&ssa, the_cfg, &ssa_xfer);
    let mut ssa_findings = ssa_transfer::ssa_events_to_findings(&events, &ssa, the_cfg);
    ssa_findings.sort_by_key(|f| (f.sink.index(), f.source.index(), !f.path_validated));
    ssa_findings.dedup_by_key(|f| (f.sink, f.source));

    let high_set: std::collections::HashSet<_> = high_level
        .iter()
        .map(|f| (f.source.index(), f.sink.index()))
        .collect();
    let ssa_set: std::collections::HashSet<_> = ssa_findings
        .iter()
        .map(|f| (f.source.index(), f.sink.index()))
        .collect();
    assert_eq!(
        high_set, ssa_set,
        "PHP echo analyse_file vs direct SSA mismatch"
    );
}

#[test]
fn integ_c_curl_handle_ssrf() {
    use crate::state::symbol::SymbolInterner;
    let src = b"#include <stdlib.h>\n#include <curl/curl.h>\n\
        void fetch() {\n  char *url = getenv(\"TARGET\");\n  \
        CURL *curl = curl_easy_init();\n  \
        curl_easy_setopt(curl, CURLOPT_URL, url);\n  \
        curl_easy_perform(curl);\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
    let file_cfg = parse_lang(src, "c", lang);
    let the_cfg = &file_cfg.first_body().graph;
    let entry = file_cfg.first_body().entry;
    let summaries = &file_cfg.summaries;

    let high_level = analyse_file(&file_cfg, summaries, None, Lang::C, "test.c", &[], None);

    let interner = SymbolInterner::from_cfg(the_cfg);
    let ssa = crate::ssa::lower_to_ssa(the_cfg, entry, None, true).expect("SSA lowering");
    let ssa_xfer = ssa_transfer::SsaTaintTransfer {
        lang: Lang::C,
        namespace: "test.c",
        interner: &interner,
        local_summaries: summaries,
        global_summaries: None,
        interop_edges: &[],
        owner_body_id: crate::cfg::BodyId(0),
        parent_body_id: None,
        global_seed: None,
        param_seed: None,
        receiver_seed: None,
        const_values: None,
        type_facts: None,
        ssa_summaries: None,
        extra_labels: None,
        base_aliases: None,
        callee_bodies: None,
        inline_cache: None,
        context_depth: 0,
        callback_bindings: None,
        points_to: None,
        dynamic_pts: None,
        import_bindings: None,
        promisify_aliases: None,
        module_aliases: None,
        static_map: None,
        auto_seed_handler_params: false,
        cross_file_bodies: None,
        pointer_facts: None,
    };
    let events = ssa_transfer::run_ssa_taint(&ssa, the_cfg, &ssa_xfer);
    let mut ssa_findings = ssa_transfer::ssa_events_to_findings(&events, &ssa, the_cfg);
    ssa_findings.sort_by_key(|f| (f.sink.index(), f.source.index(), !f.path_validated));
    ssa_findings.dedup_by_key(|f| (f.sink, f.source));

    let high_set: std::collections::HashSet<_> = high_level
        .iter()
        .map(|f| (f.source.index(), f.sink.index()))
        .collect();
    let ssa_set: std::collections::HashSet<_> = ssa_findings
        .iter()
        .map(|f| (f.source.index(), f.sink.index()))
        .collect();
    assert_eq!(
        high_set, ssa_set,
        "curl analyse_file vs direct SSA mismatch"
    );
}

#[test]
fn equiv_validate_and_early_return() {
    assert_ssa_integration(
        br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("INPUT").unwrap();
            if !validate(&x) { return; }
            Command::new("sh").arg(x).status().unwrap();
        }"#,
    );
}

// ── JS/TS SSA Two-Level Solve Tests ─────────────────────────────────────

#[test]
fn ssa_js_two_level_global_to_function() {
    // Top-level source → function sink via global seed
    let src = b"let x = document.location();\nfunction f() { eval(x); }\nf();\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;

    // SSA is now the default path for JS/TS
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "SSA JS two-level: top-level source should flow to function sink"
    );
}

#[test]
fn ssa_js_two_level_function_isolation() {
    // Variable x in func_a should not leak to func_b
    let src =
        b"function a() { let x = document.location(); }\nfunction b() { eval(x); }\na();\nb();\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;

    // SSA is now the default path for JS/TS
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    // x is local to a(), so it shouldn't flow to b()'s eval
    // Note: this depends on x being properly scoped; if the CFG treats x as global, it may still flow.
    // The test verifies that the SSA path doesn't crash and produces reasonable results.
    let _ = findings; // Assert no panic
}

#[test]
fn ssa_js_two_level_convergence() {
    // Function writes back to global, 2nd round picks it up
    let src = b"let x = 'safe';\nfunction leak() { x = document.location(); }\nfunction use_it() { eval(x); }\nleak();\nuse_it();\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;

    // SSA is now the default path for JS/TS
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "SSA JS two-level: function mutation of global should converge and detect taint"
    );
}

/// Verify SSA JS two-level correctly detects taint through chained method calls
/// (e.g. fetch(url).then(fn).then(fn) in Express callbacks).
#[test]
fn ssa_js_chained_call_taint() {
    let src = b"var express = require('express');\nvar app = express();\n\napp.get('/proxy', function(req, res) {\n    var url = req.query.url;\n    fetch(url).then(function(response) {\n        return response.text();\n    }).then(function(body) {\n        res.send(body);\n    });\n});\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;

    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "SSA should detect taint through fetch(url).then().then() chain"
    );
}

// ── Field access taint tracking tests ────────────────────────────────────

#[test]
fn ssa_field_write_to_sink() {
    // obj.data = source; sink(obj.data) → finding
    let src = b"var express = require('express');\nvar app = express();\napp.get('/f', function(req, res) {\n    var obj = {};\n    obj.data = req.query.input;\n    res.send(obj.data);\n});\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "SSA: field write from source should propagate taint to field read at sink"
    );
}

#[test]
fn ssa_field_overwrite_kills_taint() {
    // obj.data = source; obj.data = "safe"; sink(obj.data) → no finding
    let src = b"var express = require('express');\nvar app = express();\napp.get('/f', function(req, res) {\n    var obj = {};\n    obj.data = req.query.input;\n    obj.data = \"safe\";\n    res.send(obj.data);\n});\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "SSA: constant overwrite of field should kill taint"
    );
}

#[test]
fn ssa_field_different_bases_no_alias() {
    // a.tainted = source; sink(b.safe) → no finding (different base objects, different fields)
    let src = b"var express = require('express');\nvar app = express();\napp.get('/f', function(req, res) {\n    var a = {};\n    var b = {};\n    a.tainted = req.query.input;\n    res.send(b.safe);\n});\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "SSA: different base objects should not alias — a.tainted taint must not reach b.safe"
    );
}

#[test]
fn ssa_python_attribute_taint() {
    // config.cmd = os.getenv("CMD"); os.system(config.cmd) → finding
    let src = b"import os\n\nclass Config:\n    pass\n\nconfig = Config()\nconfig.cmd = os.getenv(\"CMD\")\nos.system(config.cmd)\n";
    let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
    let file_cfg = parse_lang(src, "python", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::Python,
        "test.py",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "SSA: Python attribute write from source should propagate taint to attribute read at sink"
    );
}

// ── Field-aware taint suppression tests ──────────────────────────────────

#[test]
fn ssa_field_safe_overwrite_no_fp() {
    // obj = tainted source; obj.safe = "constant"; sink(obj.safe) → NO finding
    let src = b"var express = require('express');\nvar app = express();\napp.get('/f', function(req, res) {\n    var obj = req.query;\n    obj.safe = \"constant\";\n    res.send(obj.safe);\n});\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "field-aware suppression: reading safe field of tainted base should not produce a finding, got {} findings",
        findings.len()
    );
}

#[test]
fn ssa_field_tainted_field_still_fires() {
    // obj.data = source; sink(obj.data) → finding (dotted path IS tainted, no suppression)
    let src = b"var express = require('express');\nvar app = express();\napp.get('/f', function(req, res) {\n    var obj = {};\n    obj.data = req.query.input;\n    res.send(obj.data);\n});\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "field-aware suppression: tainted dotted-path field read should still produce a finding"
    );
}

#[test]
fn ssa_field_base_sink_no_suppression() {
    // obj.data = source; sink(obj) → finding (no dotted path at sink, no suppression)
    let src = b"var express = require('express');\nvar app = express();\napp.get('/f', function(req, res) {\n    var obj = {};\n    obj.data = req.query.input;\n    res.send(obj);\n});\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "field-aware suppression: tainted base passed directly to sink should still fire"
    );
}

// ── SSA Function Summary tests ───────────────────────────────────────────

#[test]
fn ssa_summary_identity_propagation() {
    // Function that returns its param unchanged → Identity transform
    use crate::state::symbol::SymbolInterner;
    use crate::summary::ssa_summary::TaintTransform;

    let src = br#"
        fn passthrough(x: String) -> String {
            x
        }"#;
    let file_cfg = parse_lang(
        src,
        "rust",
        tree_sitter::Language::from(tree_sitter_rust::LANGUAGE),
    );
    let the_cfg = &file_cfg.first_body().graph;
    let _entry = file_cfg.first_body().entry;
    let summaries = &file_cfg.summaries;
    let interner = SymbolInterner::from_cfg(the_cfg);
    let func_entries = super::find_function_entries(the_cfg);
    assert!(
        !func_entries.is_empty(),
        "should find at least one function entry"
    );

    for (func_name, func_entry) in &func_entries {
        let func_ssa = crate::ssa::lower_to_ssa(the_cfg, *func_entry, Some(func_name), false);
        if let Ok(ssa) = func_ssa {
            let param_count = ssa
                .blocks
                .iter()
                .flat_map(|b| b.phis.iter().chain(b.body.iter()))
                .filter(|i| matches!(i.op, crate::ssa::ir::SsaOp::Param { .. }))
                .count();
            if param_count == 0 {
                continue;
            }

            let summary = ssa_transfer::extract_ssa_func_summary(
                &ssa,
                the_cfg,
                summaries,
                None,
                Lang::Rust,
                "test.rs",
                &interner,
                param_count,
                None,
                None,
                None,
                None,
            );
            assert!(
                !summary.param_to_return.is_empty(),
                "passthrough function should have param_to_return entries"
            );
            // Check the transform is Identity (all caps survive)
            for (_, transform) in &summary.param_to_return {
                assert!(
                    matches!(transform, TaintTransform::Identity),
                    "passthrough should produce Identity transform, got {:?}",
                    transform
                );
            }
        }
    }
}

#[test]
fn ssa_summary_sanitizer_strips_bits() {
    // Function with internal sanitizer → StripBits transform
    use crate::state::symbol::SymbolInterner;
    use crate::summary::ssa_summary::TaintTransform;

    let src = br#"
        fn sanitize_input(x: String) -> String {
            html_escape::encode_safe(&x)
        }"#;
    let file_cfg = parse_lang(
        src,
        "rust",
        tree_sitter::Language::from(tree_sitter_rust::LANGUAGE),
    );
    let the_cfg = &file_cfg.first_body().graph;
    let _entry = file_cfg.first_body().entry;
    let summaries = &file_cfg.summaries;
    let interner = SymbolInterner::from_cfg(the_cfg);
    let func_entries = super::find_function_entries(the_cfg);

    for (func_name, func_entry) in &func_entries {
        let func_ssa = crate::ssa::lower_to_ssa(the_cfg, *func_entry, Some(func_name), false);
        if let Ok(ssa) = func_ssa {
            let param_count = ssa
                .blocks
                .iter()
                .flat_map(|b| b.phis.iter().chain(b.body.iter()))
                .filter(|i| matches!(i.op, crate::ssa::ir::SsaOp::Param { .. }))
                .count();
            if param_count == 0 {
                continue;
            }

            let summary = ssa_transfer::extract_ssa_func_summary(
                &ssa,
                the_cfg,
                summaries,
                None,
                Lang::Rust,
                "test.rs",
                &interner,
                param_count,
                None,
                None,
                None,
                None,
            );
            // Sanitizer should strip some bits
            for (_, transform) in &summary.param_to_return {
                assert!(
                    matches!(transform, TaintTransform::StripBits(_)),
                    "sanitizer wrapper should produce StripBits transform, got {:?}",
                    transform
                );
            }
        }
    }
}

#[test]
fn ssa_summary_source_adds_bits() {
    // Function that reads env → source_caps should be non-empty
    use crate::state::symbol::SymbolInterner;

    let src = br#"
        use std::env;
        fn read_config() -> String {
            env::var("CONFIG").unwrap()
        }"#;
    let file_cfg = parse_lang(
        src,
        "rust",
        tree_sitter::Language::from(tree_sitter_rust::LANGUAGE),
    );
    let the_cfg = &file_cfg.first_body().graph;
    let _entry = file_cfg.first_body().entry;
    let summaries = &file_cfg.summaries;
    let interner = SymbolInterner::from_cfg(the_cfg);
    let func_entries = super::find_function_entries(the_cfg);

    for (func_name, func_entry) in &func_entries {
        let func_ssa = crate::ssa::lower_to_ssa(the_cfg, *func_entry, Some(func_name), false);
        if let Ok(ssa) = func_ssa {
            let param_count = ssa
                .blocks
                .iter()
                .flat_map(|b| b.phis.iter().chain(b.body.iter()))
                .filter(|i| matches!(i.op, crate::ssa::ir::SsaOp::Param { .. }))
                .count();

            let summary = ssa_transfer::extract_ssa_func_summary(
                &ssa,
                the_cfg,
                summaries,
                None,
                Lang::Rust,
                "test.rs",
                &interner,
                param_count,
                None,
                None,
                None,
                None,
            );
            assert!(
                !summary.source_caps.is_empty(),
                "env-reading function should have non-empty source_caps, got {:?}",
                summary.source_caps
            );
        }
    }
}

#[test]
fn ssa_summary_param_to_sink() {
    // Function that passes param to a dangerous call → param_to_sink
    use crate::state::symbol::SymbolInterner;

    let src = br#"
        use std::process::Command;
        fn run_cmd(cmd: String) {
            Command::new("sh").arg(cmd).status().unwrap();
        }"#;
    let file_cfg = parse_lang(
        src,
        "rust",
        tree_sitter::Language::from(tree_sitter_rust::LANGUAGE),
    );
    let the_cfg = &file_cfg.first_body().graph;
    let _entry = file_cfg.first_body().entry;
    let summaries = &file_cfg.summaries;
    let interner = SymbolInterner::from_cfg(the_cfg);
    let func_entries = super::find_function_entries(the_cfg);

    for (func_name, func_entry) in &func_entries {
        let func_ssa = crate::ssa::lower_to_ssa(the_cfg, *func_entry, Some(func_name), false);
        if let Ok(ssa) = func_ssa {
            let param_count = ssa
                .blocks
                .iter()
                .flat_map(|b| b.phis.iter().chain(b.body.iter()))
                .filter(|i| matches!(i.op, crate::ssa::ir::SsaOp::Param { .. }))
                .count();
            if param_count == 0 {
                continue;
            }

            let summary = ssa_transfer::extract_ssa_func_summary(
                &ssa,
                the_cfg,
                summaries,
                None,
                Lang::Rust,
                "test.rs",
                &interner,
                param_count,
                None,
                None,
                None,
                None,
            );
            assert!(
                !summary.param_to_sink.is_empty(),
                "function passing param to Command sink should have param_to_sink entries"
            );
        }
    }
}

#[test]
fn ssa_cross_function_taint_with_sanitizer_wrapper() {
    // Cross-function: caller passes tainted data through sanitizer wrapper
    // The SSA summary should capture the sanitizer's StripBits, reducing taint at call site
    let src = b"var express = require('express');\nvar app = express();\n\nfunction cleanHtml(input) {\n    return DOMPurify.sanitize(input);\n}\n\napp.get('/safe', function(req, res) {\n    var name = req.query.name;\n    var safe = cleanHtml(name);\n    res.send(safe);\n});\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let the_cfg = &file_cfg.first_body().graph;
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );

    // With SSA summary, cleanHtml should be recognized as stripping HTML_ESCAPE bits,
    // so res.send(safe) should not fire for XSS (HTML_ESCAPE stripped).
    // The finding may still exist for other cap bits, but the XSS-specific ones should be gone.
    // This test validates that the SSA summary integration is working.
    // Note: whether this fully suppresses depends on the specific cap bit overlap.
    // At minimum, the summary extraction should produce a non-trivial result.
    drop(findings);

    // Verify that summary extraction works for this code
    use crate::state::symbol::SymbolInterner;
    let interner = SymbolInterner::from_cfg(the_cfg);
    let ssa_summaries = super::extract_intra_file_ssa_summaries(
        the_cfg,
        &interner,
        Lang::JavaScript,
        "test.js",
        summaries,
        None,
    );
    // cleanHtml should have an SSA summary
    let clean_summary = ssa_summaries
        .iter()
        .find(|(k, _)| k.name == "cleanHtml")
        .map(|(_, v)| v)
        .unwrap_or_else(|| {
            panic!(
                "cleanHtml should have an SSA summary, got keys: {:?}",
                ssa_summaries.keys().map(|k| &k.name).collect::<Vec<_>>()
            )
        });
    assert!(
        !clean_summary.param_to_return.is_empty(),
        "cleanHtml should propagate param to return"
    );
}

// ── Inter-procedural container store tests ────────────────────────────────

#[test]
fn ssa_interproc_container_store_summary() {
    // Verify that extract_container_flow_summary produces correct indices
    // for storeInto(value, arr) { arr.push(value); } after param reordering.
    use crate::state::symbol::SymbolInterner;

    let src = b"var express = require('express');\nvar app = express();\n\nfunction storeInto(value, arr) {\n    arr.push(value);\n}\n\napp.get('/store', function(req, res) {\n    var items = [];\n    storeInto(req.query.input, items);\n    res.send(items.join(''));\n});\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let the_cfg = &file_cfg.first_body().graph;
    let summaries = &file_cfg.summaries;
    let interner = SymbolInterner::from_cfg(the_cfg);

    // Extract SSA summaries (uses lower_to_ssa_with_params)
    let ssa_summaries = super::extract_intra_file_ssa_summaries(
        the_cfg,
        &interner,
        Lang::JavaScript,
        "test.js",
        summaries,
        None,
    );

    let store_summary = ssa_summaries
        .iter()
        .find(|(k, _)| k.name == "storeInto")
        .map(|(_, v)| v)
        .expect("storeInto should have an SSA summary");
    assert!(
        !store_summary.param_to_container_store.is_empty(),
        "storeInto should have param_to_container_store (value stored into arr)"
    );
    // With correct param ordering: value=0, arr=1
    assert_eq!(
        store_summary.param_to_container_store,
        vec![(0, 1)],
        "param_to_container_store should map value(0) → arr(1)"
    );

    // Verify the full analysis produces a finding
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "inter-procedural container store should produce a finding"
    );
}

// ── Loop Induction Variable Optimization ─────────────────────────────────

#[test]
fn ssa_induction_var_no_taint() {
    // Counter in loop with tainted source elsewhere: counter should not gain taint.
    // The loop counter `i` is a simple induction variable (i = i + 1).
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let data = env::var("INPUT").unwrap();
            let mut i = 0;
            while i < 10 {
                i = i + 1;
            }
            Command::new("sh").arg(data).status().unwrap();
        }"#;
    let findings = ssa_analyse_rust(src);
    // Should still find the data→sink flow but `i` should not gain taint
    assert_eq!(
        findings.len(),
        1,
        "induction var optimization: tainted source should still produce 1 finding"
    );
}

#[test]
fn ssa_loop_tainted_var_not_induction() {
    // `x` is tainted and transformed in a loop, NOT an induction variable
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let mut x = env::var("DANGEROUS").unwrap();
            while x.len() < 100 {
                x.push_str("a");
            }
            Command::new("sh").arg(x).status().unwrap();
        }"#;
    let findings = ssa_analyse_rust(src);
    assert_eq!(
        findings.len(),
        1,
        "tainted var in loop (not induction) should still propagate"
    );
}

#[test]
fn ssa_taint_through_loop_still_works() {
    // Existing test ported: taint through a loop body should work
    let src = br#"
        use std::{env, process::Command};
        fn main() {
            let x = env::var("DANGEROUS").unwrap();
            for _i in 0..10 {
                let _unused = 1;
            }
            Command::new("sh").arg(x).status().unwrap();
        }"#;
    let findings = ssa_analyse_rust(src);
    assert_eq!(
        findings.len(),
        1,
        "taint through loop should still produce 1 finding"
    );
}

// ── Enhanced Condition Predicate Classification ──────────────────────────

#[test]
fn ssa_validation_targets_specific_var() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;

    // `validate(x, config)` should only validate `x`, not `config`
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("INPUT").unwrap();
            let config = env::var("CONFIG").unwrap();
            if validate(x, config) {
                Command::new("sh").arg(config).status().unwrap();
            }
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    // config flows to a sink; only x was validated, so config should NOT be validated
    assert!(!findings.is_empty(), "should detect taint flow for config");
    // The finding for config should NOT be path_validated since validate() targets x, not config
    let config_finding = findings.iter().find(|f| !f.path_validated);
    assert!(
        config_finding.is_some(),
        "config should NOT be marked as path_validated (only x is validated)"
    );
}

#[test]
fn ssa_method_validation_target() {
    use crate::taint::path_state::classify_condition_with_target;
    // Method call: `x.isValid()` should target `x`
    let (kind, target) = classify_condition_with_target("x.isValid()");
    assert_eq!(kind, PredicateKind::ValidationCall);
    assert_eq!(target.as_deref(), Some("x"));
}

// ── Path Sensitivity via Phi Structure ───────────────────────────────────

#[test]
fn ssa_phi_path_sensitive_both_branches_validated() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;

    // Variable validated on both branches → phi result should be fully validated
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("INPUT").unwrap();
            if validate(&x) {
                Command::new("sh").arg(&x).status().unwrap();
            }
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    // Validated findings are now suppressed, sink is in the validated
    // branch, so no finding should be emitted.
    assert_eq!(findings.len(), 0, "validated finding should be suppressed");
}

#[test]
fn ssa_phi_path_sensitive_one_branch_not_validated() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;

    // Sink is in the unvalidated branch → should NOT be path_validated
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("INPUT").unwrap();
            if !validate(&x) {
                Command::new("sh").arg(&x).status().unwrap();
            }
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    assert_eq!(findings.len(), 1, "should detect taint flow");
    assert!(
        !findings[0].path_validated,
        "finding should NOT be path_validated (sink in failed-validation branch)"
    );
}

// ── Cross-language reassignment kill verification ───────────────────────

#[test]
fn ssa_reassignment_kills_taint_js() {
    let src = b"var express = require('express');\nvar app = express();\napp.get('/r', function(req, res) {\n    var name = req.query.input;\n    name = \"Guest\";\n    eval(name);\n});\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "JS: reassignment to constant should kill taint, got {} findings",
        findings.len()
    );
}

#[test]
fn ssa_reassignment_kills_taint_ts() {
    let src =
        b"function main() {\n  let x = document.location();\n  x = \"safe\";\n  eval(x);\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT);
    let file_cfg = parse_lang(src, "typescript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::TypeScript,
        "test.ts",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "TS: reassignment to constant should kill taint, got {} findings",
        findings.len()
    );
}

#[test]
fn ssa_reassignment_kills_taint_python() {
    let src = b"import os\ndef main():\n    cmd = os.getenv(\"CMD\")\n    cmd = \"safe\"\n    os.system(cmd)\n";
    let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
    let file_cfg = parse_lang(src, "python", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::Python,
        "test.py",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "Python: reassignment to constant should kill taint, got {} findings",
        findings.len()
    );
}

#[test]
fn ssa_reassignment_kills_taint_go() {
    let src = b"package main\n\nimport \"os\"\nimport \"os/exec\"\n\nfunc main() {\n\tcmd := os.Getenv(\"CMD\")\n\tcmd = \"safe\"\n\texec.Command(cmd)\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
    let file_cfg = parse_lang(src, "go", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Go, "test.go", &[], None);
    assert!(
        findings.is_empty(),
        "Go: reassignment to constant should kill taint, got {} findings",
        findings.len()
    );
}

#[test]
fn ssa_reassignment_kills_taint_java() {
    let src = b"class Main {\n  void main() {\n    String cmd = System.getenv(\"CMD\");\n    cmd = \"safe\";\n    Runtime.exec(cmd);\n  }\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
    let file_cfg = parse_lang(src, "java", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::Java,
        "test.java",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "Java: reassignment to constant should kill taint, got {} findings",
        findings.len()
    );
}

#[test]
fn ssa_reassignment_kills_taint_php() {
    let src = b"<?php\n$cmd = $_GET['cmd'];\n$cmd = \"safe\";\neval($cmd);\n";
    let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
    let file_cfg = parse_lang(src, "php", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Php, "test.php", &[], None);
    assert!(
        findings.is_empty(),
        "PHP: reassignment to constant should kill taint, got {} findings",
        findings.len()
    );
}

#[test]
fn ssa_reassignment_kills_taint_ruby() {
    let src = b"def main\n  cmd = gets()\n  cmd = \"safe\"\n  system(cmd)\nend\n";
    let lang = tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE);
    let file_cfg = parse_lang(src, "ruby", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Ruby, "test.rb", &[], None);
    assert!(
        findings.is_empty(),
        "Ruby: reassignment to constant should kill taint, got {} findings",
        findings.len()
    );
}

#[test]
fn ssa_reassignment_kills_taint_c() {
    let src = b"#include <stdlib.h>\nvoid main() {\n  char* cmd = getenv(\"CMD\");\n  cmd = \"safe\";\n  system(cmd);\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
    let file_cfg = parse_lang(src, "c", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::C, "test.c", &[], None);
    assert!(
        findings.is_empty(),
        "C: reassignment to constant should kill taint, got {} findings",
        findings.len()
    );
}

#[test]
fn ssa_reassignment_kills_taint_cpp() {
    let src = b"#include <cstdlib>\nvoid main() {\n  char* cmd = std::getenv(\"CMD\");\n  cmd = \"safe\";\n  system(cmd);\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
    let file_cfg = parse_lang(src, "cpp", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
    assert!(
        findings.is_empty(),
        "C++: reassignment to constant should kill taint, got {} findings",
        findings.len()
    );
}

// ── Compound assignment preserves taint ─────────────────────────────────

#[test]
fn ssa_compound_preserves_taint_js() {
    let src = b"var express = require('express');\nvar app = express();\napp.get('/r', function(req, res) {\n    var name = req.query.input;\n    name = name + \" suffix\";\n    eval(name);\n});\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "JS: compound assignment should preserve taint"
    );
}

#[test]
fn ssa_compound_preserves_taint_python() {
    let src = b"import os\ndef main():\n    cmd = os.getenv(\"CMD\")\n    cmd = cmd + \" safe\"\n    os.system(cmd)\n";
    let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
    let file_cfg = parse_lang(src, "python", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::Python,
        "test.py",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "Python: compound assignment should preserve taint"
    );
}

#[test]
fn ssa_compound_preserves_taint_go() {
    let src = b"package main\n\nimport \"os\"\nimport \"os/exec\"\n\nfunc main() {\n\tcmd := os.Getenv(\"CMD\")\n\tcmd = cmd + \" suffix\"\n\texec.Command(cmd)\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
    let file_cfg = parse_lang(src, "go", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Go, "test.go", &[], None);
    assert!(
        !findings.is_empty(),
        "Go: compound assignment should preserve taint"
    );
}

#[test]
fn ssa_compound_preserves_taint_java() {
    let src = b"class Main {\n  void main() {\n    String cmd = System.getenv(\"CMD\");\n    cmd = cmd + \" safe\";\n    Runtime.exec(cmd);\n  }\n}\n";
    let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
    let file_cfg = parse_lang(src, "java", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::Java,
        "test.java",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "Java: compound assignment should preserve taint"
    );
}

// ── PHI merge preserves taint on non-reassigned path ────────────────────

#[test]
fn ssa_phi_preserves_taint_on_non_reassigned_path_js() {
    let src = b"var express = require('express');\nvar app = express();\napp.get('/r', function(req, res) {\n    var name = req.query.input;\n    if (name.length > 10) {\n        name = \"fallback\";\n    }\n    eval(name);\n});\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "JS: PHI merge should preserve taint from non-reassigned path"
    );
}

#[test]
fn ssa_phi_preserves_taint_on_non_reassigned_path_rust() {
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let mut x = env::var("DANGEROUS").unwrap();
            if x.len() > 5 {
                x = "safe".to_string();
            }
            Command::new("sh").arg(&x).status().unwrap();
        }"#;
    let findings = ssa_analyse_rust(src);
    assert!(
        !findings.is_empty(),
        "Rust: PHI merge should preserve taint from non-reassigned path"
    );
}

/// Smoke test: linear SSRF prefix suppression (no phi, no branches).
///
/// The prefix must be in a named variable so the CFG captures it as a
/// separate SSA Const value. Inline string literals in binary expressions
/// are not currently tracked as SSA operands.
#[test]
fn abstract_ssrf_prefix_linear_suppression() {
    let src = b"var userId = document.location();\nvar prefix = 'https://api.example.com/users/';\nvar url = prefix + userId;\nfetch(url);\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "Linear SSRF prefix: 'https://api.example.com/users/' + userId should be \
         suppressed by abstract string domain. Got {} findings.",
        findings.len()
    );
}

/// Regression test for abstract phi replay in collect_block_events.
///
/// Two predecessor blocks produce string concat values with different safe
/// prefixes ("https://api.example.com/users/" and "https://api.example.com/admins/").
/// A phi merges them. The LCP of the prefixes is "https://api.example.com/" which
/// still has scheme://host/, so SSRF suppression should fire.
///
/// Before the phi replay fix, collect_block_events did NOT replay abstract phis,
/// leaving the phi result's abstract value as Top (stale). The SSRF suppression
/// would fail because there was no known prefix.
///
/// Note: prefix must be in a named variable so the CFG captures it as an SSA Const.
#[test]
fn abstract_phi_replay_ssrf_suppression() {
    let src = b"var userId = document.location();\nvar prefix1 = 'https://api.example.com/users/';\nvar prefix2 = 'https://api.example.com/admins/';\nvar url;\nif (userId.length > 5) {\n  url = prefix1 + userId;\n} else {\n  url = prefix2 + userId;\n}\nfetch(url);\n";
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "Abstract phi replay: both branches produce safe SSRF prefixes, \
         phi merge should preserve the common prefix 'https://api.example.com/' \
         and suppress the SSRF finding. Got {} findings.",
        findings.len()
    );
}

#[test]
fn ruby_type_check_guard_suppresses_taint() {
    // Ruby `unless user_id.is_a?(Integer)` guard should validate user_id
    // so that the subsequent SQL sink does not produce a finding.
    let src = b"def run_query(params)\n  user_id = params[:id]\n  unless user_id.is_a?(Integer)\n    return \"bad input\"\n  end\n  connection.execute(\"SELECT * FROM users WHERE id = \" + user_id.to_s)\nend\n";
    let lang = tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE);
    let file_cfg = parse_lang(src, "ruby", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Ruby, "test.rb", &[], None);
    assert!(
        findings.is_empty(),
        "Ruby: is_a?(Integer) type guard should suppress taint finding, got {} findings",
        findings.len()
    );
}

// ── Rust struct expression taint propagation ────────────────────────────

#[test]
fn rust_struct_literal_with_source_produces_source_caps() {
    let src = br#"
        use std::env;
        struct Cfg { val: String }
        fn make_cfg() -> Cfg {
            Cfg { val: env::var("X").unwrap() }
        }
    "#;
    let summaries = extract_summaries_from_bytes(src, "test.rs");
    let make = summaries
        .iter()
        .find(|s| s.name == "make_cfg")
        .expect("make_cfg should have a summary");
    assert!(
        make.source_caps != 0,
        "make_cfg should have source_caps from env::var inside struct literal, got 0"
    );
}

#[test]
fn rust_struct_constructor_source_flows_through_format_to_sink() {
    let src = br#"
        use std::env;
        use std::process::Command;
        use std::fs;

        struct AppConfig {
            db_url: String,
            upload_dir: String,
        }

        fn load_config() -> AppConfig {
            AppConfig {
                db_url: env::var("DATABASE_URL").unwrap(),
                upload_dir: env::var("UPLOAD_DIR").unwrap(),
            }
        }

        fn handle_export() {
            let config = load_config();
            let dump_cmd = format!("pg_dump {}", config.db_url);
            Command::new("sh").arg("-c").arg(&dump_cmd).output().unwrap();
            let dump_path = format!("{}/export.sql", config.upload_dir);
            fs::write(&dump_path, "data").unwrap();
        }
    "#;
    let file_cfg = parse_rust(src);
    let findings = analyse_file(
        &file_cfg,
        &file_cfg.summaries,
        None,
        Lang::Rust,
        "test.rs",
        &[],
        None,
    );
    assert!(
        findings.len() >= 2,
        "Expected >= 2 taint findings (Command::new + fs::write), got {}",
        findings.len()
    );
}

#[test]
fn ssa_format_macro_propagates_taint() {
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("INPUT").unwrap();
            let cmd = format!("echo {}", x);
            Command::new("sh").arg("-c").arg(&cmd).output().unwrap();
        }
    "#;
    let findings = ssa_analyse_rust(src);
    assert_eq!(
        findings.len(),
        1,
        "format! should propagate taint from env::var to Command::new sink"
    );
}

// ── B-2 regression: phi validated_must must use must-analysis, not may ───

#[test]
fn phi_validated_must_requires_all_paths() {
    use crate::cfg::build_cfg;
    use tree_sitter::Language;

    // Path A validates x, path B does NOT validate x.
    // The phi for x after the merge must NOT get validated_must, only
    // validated_may (since at least one path validated). The sink after
    // the merge must still fire because the must-analysis says "not
    // definitely validated on all paths".
    let src = br#"
        use std::env; use std::process::Command;
        fn main() {
            let x = env::var("INPUT").unwrap();
            if some_condition() {
                validate(&x);
            }
            Command::new("sh").arg(&x).status().unwrap();
        }"#;

    let mut parser = tree_sitter::Parser::new();
    parser
        .set_language(&Language::from(tree_sitter_rust::LANGUAGE))
        .unwrap();
    let tree = parser.parse(src as &[u8], None).unwrap();

    let file_cfg = build_cfg(&tree, src, "rust", "test.rs", None);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

    // x is validated on only one branch, so the phi merge must NOT promote
    // to validated_must. The sink should still fire.
    assert!(
        !findings.is_empty(),
        "B-2 regression: phi must NOT promote to validated_must when only \
         one branch validates — sink should still fire"
    );
}

// ── C-1 regression: inline return taint precision ───────────────────────

#[test]
fn inline_return_constant_with_internal_source_produces_no_finding() {
    use tree_sitter::Language;

    // Callee has an internal source (document.location) but returns a constant.
    // The caller feeds tainted input as an argument. Since the return value is
    // a constant (never tainted), the caller's call result should be untainted.
    let src = b"var child_process = require('child_process');\n\
        var express = require('express');\n\
        var app = express();\n\
        \n\
        function transform(input) {\n\
            var internal = document.location();\n\
            return 'constant_value';\n\
        }\n\
        \n\
        app.get('/safe', function(req, res) {\n\
            var result = transform(req.query.data);\n\
            child_process.exec(result);\n\
        });\n";

    let lang = Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );

    // transform() returns a constant, no taint should leak to caller
    assert_eq!(
        findings.len(),
        0,
        "C-1: transform() returns constant — internal source must not leak, got {} findings: {:?}",
        findings.len(),
        findings
            .iter()
            .map(|f| format!("{}→{}", f.source.index(), f.sink.index()))
            .collect::<Vec<_>>()
    );
}

#[test]
fn inline_return_taint_prefers_explicit_return_value() {
    use tree_sitter::Language;

    // When a callee has an explicit Return(Some(rv)) and rv IS tainted,
    // extract_inline_return_taint should collect ONLY that value's taint,
    // not all live tainted variables.
    let src = b"var child_process = require('child_process');\n\
        var express = require('express');\n\
        var app = express();\n\
        \n\
        function passthrough(cmd) {\n\
            return cmd;\n\
        }\n\
        \n\
        app.get('/a', function(req, res) {\n\
            var w = passthrough(req.query.cmd);\n\
            child_process.exec(w);\n\
        });\n";

    let lang = Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );

    // passthrough(tainted) returns tainted → exactly 1 finding
    assert_eq!(
        findings.len(),
        1,
        "C-1 regression: passthrough(tainted) should produce exactly 1 finding, got {}",
        findings.len()
    );
}

#[test]
fn inline_return_taint_internal_source_does_not_widen_caps() {
    use tree_sitter::Language;

    // Callee has an internal source (document.location) alongside a tainted
    // param. The explicit return value is the param. Without the C-1 fix,
    // extract_inline_return_taint would union ALL live tainted values' caps
    //, the internal source's derived-caps would override the param-caps
    // (derived takes priority in the extraction logic). With the fix, only
    // the return value's taint is collected, so param taint is returned
    // correctly.
    //
    // Both old and new produce a finding, but the fix ensures the return
    // taint comes from the param flow, not from the internal source.
    let src = b"var child_process = require('child_process');\n\
        var express = require('express');\n\
        var app = express();\n\
        \n\
        function withSideEffect(cmd) {\n\
            var leaked = document.location();\n\
            return cmd;\n\
        }\n\
        \n\
        app.get('/a', function(req, res) {\n\
            var r = withSideEffect(req.query.cmd);\n\
            child_process.exec(r);\n\
        });\n";

    let lang = Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );

    // The callee returns cmd (tainted param), 1 finding expected.
    // The internal document.location() should NOT widen the return taint.
    assert_eq!(
        findings.len(),
        1,
        "C-1 regression: withSideEffect should produce exactly 1 finding (param flow), got {}",
        findings.len()
    );
}

/// Regression guard for the FuncKey-based re-keying of local SSA summaries
/// and cached callee bodies.
///
/// Two class methods share the leaf name `process` in the same file.  If the
/// summary map were keyed by bare name (or raw file-path namespace), the
/// second lowering would overwrite the first, both methods would end up
/// pointing at whichever summary was extracted last.
///
/// With canonical `FuncKey` identity (`container` discriminates them) both
/// methods must appear as distinct entries with matching containers.
#[test]
fn same_name_methods_distinct_func_keys() {
    let src = br#"
class Sanitizer {
    process(x) {
        return escape(x);
    }
}

class Worker {
    process(x) {
        eval(x);
    }
}
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);

    let (summaries, bodies) = super::extract_ssa_artifacts_from_file_cfg(
        &file_cfg,
        Lang::JavaScript,
        "test.js",
        &file_cfg.summaries,
        None,
        None,
    );

    // Collect containers of every key named "process".
    let mut containers: Vec<String> = summaries
        .keys()
        .filter(|k| k.name == "process")
        .map(|k| k.container.clone())
        .collect();
    containers.sort();

    assert_eq!(
        containers,
        vec!["Sanitizer".to_string(), "Worker".to_string()],
        "FuncKey-based keying must produce one `process` summary per container; \
         got {containers:?} from {:?}",
        summaries.keys().collect::<Vec<_>>(),
    );

    // Same invariant on the cached-bodies map, inline analysis depends on
    // being able to fetch the correct body by full FuncKey.
    let mut body_containers: Vec<String> = bodies
        .iter()
        .filter(|(k, _)| k.name == "process")
        .map(|(k, _)| k.container.clone())
        .collect();
    body_containers.sort();
    assert_eq!(
        body_containers,
        vec!["Sanitizer".to_string(), "Worker".to_string()],
        "callee-body cache must keep both same-name methods distinct; got {body_containers:?}",
    );

    // Cross-map agreement: every summary key must also be a body key.
    // (Pass 2 looks up bodies and summaries with the same key.)
    for key in summaries.keys() {
        assert!(
            bodies.iter().any(|(bk, _)| bk == key),
            "summary key {key:?} missing from callee-body map"
        );
    }
}

/// Same-name *free function* overloads (not methods): two `helper` functions
/// with identical names and arities at the same scope collide on
/// `(name, arity)` but are disambiguated by `FuncKey.disambig` (body start
/// byte).  Regression guard that neither overwrites the other in the SSA
/// summary / callee-body maps.
#[test]
fn same_name_same_arity_functions_distinct_func_keys() {
    // Two top-level `helper(x)` declarations in one file.  JS allows the
    // later one to shadow the first at runtime, but our static summary
    // extraction must retain *both* so cross-file callers of either body
    // span still find their intended definition.
    let src = br#"
function helper(x) {
    return escape(x);
}

function helper(x) {
    eval(x);
}
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);

    let (summaries, bodies) = super::extract_ssa_artifacts_from_file_cfg(
        &file_cfg,
        Lang::JavaScript,
        "test.js",
        &file_cfg.summaries,
        None,
        None,
    );

    let helper_keys: Vec<_> = summaries.keys().filter(|k| k.name == "helper").collect();
    assert_eq!(
        helper_keys.len(),
        2,
        "two same-name same-arity definitions must produce two distinct summary entries; \
         got {} keys: {:?}",
        helper_keys.len(),
        helper_keys,
    );

    // Disambiguator must actually differ (body start bytes).
    let disambigs: std::collections::HashSet<_> = helper_keys.iter().map(|k| k.disambig).collect();
    assert_eq!(
        disambigs.len(),
        2,
        "FuncKey.disambig should differ for colliding same-name same-arity defs",
    );

    // And the body cache agrees.
    let body_count = bodies.iter().filter(|(k, _)| k.name == "helper").count();
    assert_eq!(body_count, 2, "callee-body cache must also keep both defs");
}

// ── alternative-path dedup and linking ─────────────────────────────────

/// Build a bare Finding suitable for feeding into `link_alternative_paths`.
/// Only the fields consulted by that pass are populated; the rest use the
/// cheapest default so the test stays focused on the dedup contract.
fn make_finding_for_link_test(
    body_id: u32,
    source_idx: usize,
    sink_idx: usize,
    path_hash: u64,
    path_validated: bool,
) -> Finding {
    Finding {
        body_id: crate::cfg::BodyId(body_id),
        sink: petgraph::graph::NodeIndex::new(sink_idx),
        source: petgraph::graph::NodeIndex::new(source_idx),
        path: Vec::new(),
        source_kind: crate::labels::SourceKind::EnvironmentConfig,
        path_validated,
        guard_kind: None,
        hop_count: 0,
        cap_specificity: 0,
        uses_summary: false,
        flow_steps: Vec::new(),
        symbolic: None,
        source_span: None,
        primary_location: None,
        engine_notes: smallvec::SmallVec::new(),
        path_hash,
        finding_id: String::new(),
        alternative_finding_ids: smallvec::SmallVec::new(),
        effective_sink_caps: crate::labels::Cap::empty(),
    }
}

/// `make_finding_id` must produce stable, distinct IDs for findings
/// that differ on any dedup-key axis, and carry the `v`/`u`
/// validation-status suffix so a human can tell siblings apart.
#[test]
fn finding_id_encodes_validation_and_path_hash() {
    let v = make_finding_for_link_test(1, 3, 7, 0xabcd_1234_0000_0001, true);
    let mut v = v;
    v.finding_id = super::make_finding_id(&v);
    assert!(
        v.finding_id.ends_with("-v"),
        "validated ID must end -v: {}",
        v.finding_id
    );
    assert!(
        v.finding_id.contains("abcd12340000"),
        "hash component missing: {}",
        v.finding_id
    );

    let mut u = make_finding_for_link_test(1, 3, 7, 0xabcd_1234_0000_0001, false);
    u.finding_id = super::make_finding_id(&u);
    assert!(
        u.finding_id.ends_with("-u"),
        "unvalidated ID must end -u: {}",
        u.finding_id
    );
    assert_ne!(
        v.finding_id, u.finding_id,
        "validation status must disambiguate IDs"
    );

    // Differing path_hash produces a different ID even with the same
    // (body, source, sink, validated), the whole point of the path
    // component in the dedup key.
    let mut u2 = make_finding_for_link_test(1, 3, 7, 0xdead_beef_0000_0002, false);
    u2.finding_id = super::make_finding_id(&u2);
    assert_ne!(
        u.finding_id, u2.finding_id,
        "path_hash must disambiguate IDs"
    );
}

/// `link_alternative_paths` must cross-link findings that share
/// `(body_id, sink, source)`, so a validated flow and an unvalidated
/// flow on the same source/sink pair each list the other's ID.
#[test]
fn link_alternative_paths_cross_references_same_body_sink_source() {
    let mut findings = vec![
        make_finding_for_link_test(1, 3, 7, 0x1111, true),
        make_finding_for_link_test(1, 3, 7, 0x2222, false),
    ];
    for f in &mut findings {
        f.finding_id = super::make_finding_id(f);
    }

    let v_id = findings[0].finding_id.clone();
    let u_id = findings[1].finding_id.clone();
    super::link_alternative_paths(&mut findings);

    assert_eq!(
        findings[0].alternative_finding_ids.as_slice(),
        std::slice::from_ref(&u_id),
        "validated finding must reference the unvalidated sibling",
    );
    assert_eq!(
        findings[1].alternative_finding_ids.as_slice(),
        std::slice::from_ref(&v_id),
        "unvalidated finding must reference the validated sibling",
    );
}

/// Findings that differ on `(body_id, sink, source)` are independent
/// vulnerabilities, they must **not** end up cross-linked as
/// alternatives, otherwise the "alternative path" framing becomes
/// noise.
#[test]
fn link_alternative_paths_does_not_link_distinct_sink_source() {
    let mut findings = vec![
        make_finding_for_link_test(1, 3, 7, 0x1111, false),
        // Different sink, independent finding, not an alternative.
        make_finding_for_link_test(1, 3, 8, 0x1111, false),
        // Different source, also independent.
        make_finding_for_link_test(1, 4, 7, 0x1111, false),
        // Different body, also independent.
        make_finding_for_link_test(2, 3, 7, 0x1111, false),
    ];
    for f in &mut findings {
        f.finding_id = super::make_finding_id(f);
    }
    super::link_alternative_paths(&mut findings);
    for (i, f) in findings.iter().enumerate() {
        assert!(
            f.alternative_finding_ids.is_empty(),
            "finding {i} should have no alternatives; got {:?}",
            f.alternative_finding_ids,
        );
    }
}

/// When the same `(body, sink, source)` has three sibling findings
/// (e.g. validated, unvalidated-path-A, unvalidated-path-B), each
/// finding must list the other two, the group is symmetric and
/// complete rather than a chain.
#[test]
fn link_alternative_paths_three_way_group() {
    let mut findings = vec![
        make_finding_for_link_test(1, 3, 7, 0x1111, true),
        make_finding_for_link_test(1, 3, 7, 0x2222, false),
        make_finding_for_link_test(1, 3, 7, 0x3333, false),
    ];
    for f in &mut findings {
        f.finding_id = super::make_finding_id(f);
    }
    let ids: Vec<String> = findings.iter().map(|f| f.finding_id.clone()).collect();
    super::link_alternative_paths(&mut findings);
    for (i, f) in findings.iter().enumerate() {
        let expected: std::collections::HashSet<&String> = ids
            .iter()
            .enumerate()
            .filter_map(|(j, id)| if i == j { None } else { Some(id) })
            .collect();
        let got: std::collections::HashSet<&String> = f.alternative_finding_ids.iter().collect();
        assert_eq!(
            got, expected,
            "finding {i} must list every other sibling ID",
        );
    }
}

// ─────────────────────────────────────────────────────────────────────────────
//  Typed call-graph devirtualisation (typed_call_receivers)
// ─────────────────────────────────────────────────────────────────────────────

/// when a method call's receiver was constructed from a known
/// constructor (`File::open` → `FileHandle`), the SSA-extraction
/// pipeline must record `(call_ordinal, "FileHandle")` on the
/// caller's [`crate::summary::ssa_summary::SsaFuncSummary::typed_call_receivers`]
/// so build_call_graph can devirtualise the cross-file edge.
///
/// Uses Java because `FileInputStream` / `FileOutputStream` are part
/// of the [`crate::ssa::type_facts::constructor_type`] table for Java
/// and yield [`crate::ssa::type_facts::TypeKind::FileHandle`] without
/// any framework annotation plumbing.
#[test]
fn typed_call_receivers_populated_for_constructor_typed_receiver() {
    let src = br#"
class Reader {
    void read() {
        FileInputStream f = new FileInputStream("/etc/passwd");
        f.close();
    }
}
"#;
    let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
    let file_cfg = parse_lang(src, "java", lang);

    let (summaries, _bodies) = super::extract_ssa_artifacts_from_file_cfg(
        &file_cfg,
        Lang::Java,
        "Reader.java",
        &file_cfg.summaries,
        None,
        None,
    );

    let read_sum = summaries
        .iter()
        .find(|(k, _)| k.name == "read")
        .map(|(_, s)| s)
        .expect("read() summary must be extracted");

    let containers: Vec<&str> = read_sum
        .typed_call_receivers
        .iter()
        .map(|(_, c)| c.as_str())
        .collect();
    assert!(
        containers.contains(&"FileHandle"),
        "FileInputStream-typed receiver must surface as `FileHandle` container; got {:?}",
        read_sum.typed_call_receivers,
    );
}

/// Negative control: free-function calls (no receiver) must
/// never appear in `typed_call_receivers`.  Even when the callee is a
/// known type-producing constructor, it sits in the body as a Call
/// with `receiver = None` and is not a candidate for devirtualisation.
#[test]
fn typed_call_receivers_skips_free_function_calls() {
    // `new FileInputStream(...)` is a constructor invocation with no
    // receiver, exactly the shape we want to ignore.
    let src = br#"
class Maker {
    void make() {
        new FileInputStream("/tmp/x");
    }
}
"#;
    let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
    let file_cfg = parse_lang(src, "java", lang);

    let (summaries, _) = super::extract_ssa_artifacts_from_file_cfg(
        &file_cfg,
        Lang::Java,
        "Maker.java",
        &file_cfg.summaries,
        None,
        None,
    );

    // make() has zero parameters and no fresh-allocation return, so the
    // generic insertion gate skips it.  The phase-2 patch only force-
    // inserts when `typed_call_receivers` is non-empty, which it
    // isn't here, since `new FileInputStream(...)` is a free-function-
    // shaped constructor call (no SSA receiver).  So either the
    // summary is absent, or, if some other side effect inserted it ,
    // its `typed_call_receivers` is empty.  Both forms prove no
    // spurious typed entry was recorded.
    let typed = summaries
        .iter()
        .find(|(k, _)| k.name == "make")
        .map(|(_, s)| s.typed_call_receivers.clone())
        .unwrap_or_default();
    assert!(
        typed.is_empty(),
        "constructor-invocation Call has no receiver and must not surface a typed entry; \
         got {typed:?}",
    );
}

/// Regression: nested arrow functions inside `return new Promise((res,rej)
/// => { ... })` must be lifted as separate bodies. Before the Kind::Return
/// arm in cfg/mod.rs called `collect_nested_function_nodes`, only the
/// outer function (`downloadFromUri`) was extracted, the executor and
/// its inner callbacks were silently swallowed, hiding the inner gated
/// http.get sink from classification. Motivated by CVE-2025-64430.
#[test]
fn cve_2025_64430_promise_executor_extracted_as_body() {
    let src = br#"
const downloadFromUri = (uri) => {
  return new Promise((res, rej) => {
    http.get(uri, response => { response.on('data', () => {}); }).on('error', e => rej(e));
  });
};
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let names: Vec<Option<String>> = file_cfg
        .bodies
        .iter()
        .map(|b| b.meta.name.clone())
        .collect();
    assert!(
        file_cfg.bodies.len() >= 3,
        "expected at least 3 bodies (top-level + downloadFromUri + Promise executor), \
         got {}: {:?}",
        file_cfg.bodies.len(),
        names
    );
}

/// End-to-end: cross-function flow through a Promise-wrapping helper.
/// Caller passes a labeled-source value (`req.body.uri`) to a wrapper
/// whose body is `return new Promise((res, rej) => http.get(uri))`.
/// The wrapper's SSA summary's `param_to_sink` must include SSRF (via
/// the closure-capture summary-augmentation pass in
/// `lower_all_functions_from_bodies`), so the caller's
/// `wrapper(req.body.uri)` call resolves to a SSRF sink.
/// Motivated by CVE-2025-64430.
#[test]
fn cve_2025_64430_promise_wrapper_via_summary_param_to_sink() {
    let src = br#"
const downloadFromUri = uri => {
  return new Promise((res, rej) => {
    http.get(uri, response => { response.on('data', () => {}); }).on('error', e => rej(e));
  });
};
const handler = (req) => {
  downloadFromUri(req.body.uri);
};
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "expected SSRF flow finding via Promise-wrapper summary; got 0",
    );
}

/// End-to-end smoke check: when a JS/TS handler param is recognised as
/// user-input-bearing (`is_js_ts_handler_param_name`), Promise-executor
/// closure capture via lexical containment must propagate the seeded
/// taint into the executor body so the inner gated http.get sink fires.
/// Without the Kind::Return fix the executor was never extracted as a
/// body and the sink was invisible to classification. Motivated by
/// CVE-2025-64430.
#[test]
fn cve_2025_64430_promise_executor_sink_via_lexical_containment() {
    let src = br#"
const f = (input) => {
  return new Promise((res, rej) => {
    http.get(input);
  });
};
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "expected SSRF Sink finding in Promise executor capturing `input`; got 0",
    );
}

/// Regression: `wrapper(req.body.uri)` where wrapper passes its first
/// param to a gated SSRF sink must fire. The CFG's first_member_label
/// rebinds info.call.callee to `"req.body.uri"` (so the source label
/// applies) and preserves the actual function name in `outer_callee`.
/// resolve_sink_info has to consult outer_callee when the inner callee
/// has no sink so the wrapper's `param_to_sink: [(0, SSRF)]` summary
/// fires. Motivated by CVE-2025-64430.
#[test]
fn cve_2025_64430_wrapper_with_member_source_arg_fires() {
    let src = br#"
const helper = (uri) => {
  http.get(uri);
};

const handler = (req) => {
  helper(req.body.uri);
};
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "expected at least one SSRF flow finding through wrapper; got 0",
    );
}

/// Two-hop transitive cross-function summary propagation. The chain is
/// `handler(req) -> helper(req.body) -> downloadFromUri(x.url) ->
///     Promise(http.get(uri))`.
///
/// The augment pass populates `downloadFromUri.summary.param_to_sink:
/// [(0, SSRF)]` (single-hop closure-capture lift). For the handler's
/// `helper(req.body)` call to fire, `helper.summary.param_to_sink` must
/// also contain `[(0, SSRF)]`, but that requires `helper`'s probe to
/// see `downloadFromUri`'s augmented summary at resolution time.
///
/// Because the probe currently runs with `ssa_summaries=None`,
/// `helper.summary.param_to_sink` stays empty and the handler call site
/// reports nothing. A second extraction pass that re-runs probes with
/// the augmented summaries map plumbed through closes the gap. Mirrors
/// the upstream Parse Server CVE chain (`addFileDataIfNeeded` →
/// `downloadFileFromURI` → executor). Motivated by CVE-2025-64430.
#[test]
fn cve_2025_64430_two_hop_transitive_summary_propagation() {
    let src = br#"
const downloadFromUri = uri => {
  return new Promise((res, rej) => {
    http.get(uri, response => { response.on('data', () => {}); }).on('error', e => rej(e));
  });
};
const helper = file => {
  downloadFromUri(file._source.uri);
};
const handler = (req) => {
  helper(req.body);
};
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "expected SSRF flow finding via two-hop transitive summary propagation; got 0",
    );
}

/// Regression for the multi-line method-chain form
/// `http\n  .get(uri, ...)\n  .on('error', ...)`. Tree-sitter parses
/// this with whitespace embedded in the inner member-expression's
/// source text (`"http\n      .get"`), so the chained-call inner-gate
/// rebinding's classification lookup missed the gated `http.get` sink.
/// `find_chained_inner_call` now strips whitespace from the inner
/// callee text before classification. Without this, the upstream
/// Parse Server fixture (CVE-2025-64430 vulnerable.js) does not fire
/// even after the transitive summary propagation fix.
#[test]
fn cve_2025_64430_multiline_chained_get_classifies_inner_sink() {
    let src = br#"
const downloadFromUri = uri => {
  return new Promise((res, rej) => {
    http
      .get(uri, response => { response.on('data', () => {}); })
      .on('error', e => rej(e));
  });
};
const helper = file => {
  downloadFromUri(file._source.uri);
};
const handler = (req) => {
  helper(req.body);
};
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "expected SSRF flow finding through multi-line chained http.get; got 0",
    );
}

/// Three-hop transitive propagation: handler -> middle -> helper ->
/// downloadFromUri (Promise wrapper) -> http.get. The second extraction
/// pass must lift `downloadFromUri.summary.param_to_sink` (single-hop
/// from augment) onto `helper.summary.param_to_sink`, then onto
/// `middle.summary.param_to_sink`, then handler's call site picks it up.
///
/// Today the second-pass runs only once (no fixed-point), so depth-3+
/// is expected to NOT fire, guards against accidental fixed-point
/// regression that would mask an over-eager rewrite.  Marked
/// `#[ignore]` so it documents the depth limit without breaking CI.
/// Motivated by CVE-2025-64430 corner case; remove the `#[ignore]` and
/// any guarding `assert!` polarity if a fixed-point is added later.
/// Indirect-validator branch narrowing: when an if-condition is a
/// bare result variable whose reaching SSA def is a Call to a
/// callee classified by `classify_input_validator_callee` (e.g.
/// `validateUrlSsrf`, `verifyToken`, `isValidUrl`), the validator's
/// argument is treated as validated on the success branch.
///
/// This pins the SSA-level
/// `apply_input_validator_branch_narrowing` regardless of whether
/// downstream consumers (sink-arg taint, cfg-unguarded-sink) honor
/// `validated_must`.  Test asserts the symbol-keyed validation flag
/// is set on the analysis exit state.
///
/// Direct-flow shape (no helper indirection); the helper-summary
/// case still has open architectural gaps (validated_must doesn't
/// propagate through `param_to_sink` summaries, same gap blocks
/// AllowlistCheck-in-helper, see CVE_DEFERRED.md GHSA-4x48-cgf9-q33f).
///
/// Motivated by Novu CVE GHSA-4x48-cgf9-q33f
/// (`const ssrfError = await validateUrlSsrf(child.webhookUrl); if (ssrfError) throw …;`).
#[test]
fn indirect_validator_narrowing_marks_arg_validated() {
    let src = br#"
async function handler(req) {
  const target = req.query.url;
  const ssrfError = await validateUrlSsrf(target);
  if (ssrfError) {
    throw new Error('blocked');
  }
  await axios.get(target);
}
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    // Direct-flow: validator narrowing should clear axios.get's taint event.
    assert!(
        findings.is_empty(),
        "validator narrowing should suppress direct-flow SSRF; got {} finding(s)",
        findings.len()
    );
}

/// Regex-allowlist `<X>.test(value)` is recognised as a ValidationCall
/// targeting the call's first argument (not the regex receiver).
///
/// Shape:
///
/// ```js
/// const v = req.body.x;
/// if (!SAFE_REGEX.test(v)) { throw }
/// db.execute(v);  // direct flow: should be silent
/// ```
///
/// `classify_condition` returns ValidationCall for the `*regex*.test()`
/// receiver shape (see `target_regex_test_first_arg` in path_state) and
/// `extract_validation_target` overrides the default receiver-as-target
/// rule to extract the call's first argument.  Together with the
/// existing CFG-level negation handling in `compute_succ_states` the
/// false branch (continue) marks `v` as validated.
///
/// Motivated by Payload CVE-2026-25544
/// (`if (!SAFE_STRING_REGEX.test(value)) throw`).  Note: this test pins
/// the direct-flow case; transitive validation through SSA-derived
/// values (e.g. template-literal concat of `v` into `sql`) is a deeper
/// gap tracked separately and not closed here.
#[test]
fn regex_test_allowlist_narrowing_clears_direct_flow() {
    let src = br#"
const SAFE_REGEX = /^[\w]+$/;

async function handler(req) {
    const userValue = req.body.filter;
    if (!SAFE_REGEX.test(userValue)) {
        throw new Error('bad');
    }
    return await db.execute(userValue);
}
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "regex.test allowlist narrowing should suppress direct-flow finding; got {} finding(s): {findings:?}",
        findings.len()
    );
}

/// Regression: `extract_ssa_func_summary` must skip `all_validated`
/// events when populating `param_to_sink` / `param_to_sink_param`.
///
/// Helper bodies whose validator-call branch narrowing fired produce
/// per-param probe events flagged `all_validated=true`.  Without
/// summary-extract suppression, callers would still see the helper
/// in their summary's sink set and refire on `helper(taintedArg)`
/// even though the validator inside the helper proved the path
/// safe.  The caller can't see the validator (it's behind the
/// summary), so the gap manifests as a precision miss only when
/// helper + caller are in the same file.
///
/// Closes the helper-summary half of Novu CVE GHSA-4x48-cgf9-q33f.
#[test]
fn helper_with_validator_does_not_propagate_to_caller_via_summary() {
    let src = br#"
async function getWebhookResponse(child) {
    const ssrfError = await validateUrlSsrf(child.webhookUrl);
    if (ssrfError) {
        throw new Error('blocked');
    }
    return await axios.post(child.webhookUrl, {});
}

async function handler(req) {
    const child = req.body.filter;
    const r = await getWebhookResponse(child);
    return r;
}
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "helper-with-validator should not propagate sink via summary; got {} finding(s)",
        findings.len()
    );
}

/// Companion: same shape WITHOUT the validator inside the helper
/// must still fire so the precision gain is targeted.  Asserts
/// `all_validated` skip doesn't accidentally suppress unsafe helpers.
#[test]
fn helper_without_validator_still_propagates_to_caller_via_summary() {
    let src = br#"
async function getWebhookResponse(child) {
    return await axios.post(child.webhookUrl, {});
}

async function handler(req) {
    const child = req.body.filter;
    const r = await getWebhookResponse(child);
    return r;
}
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "helper-without-validator must still flag the cross-fn SSRF path",
    );
}

/// Regression for CVE-2026-25544 deep fix
/// (`validated_params_to_return` summary field): a helper that
/// validates its parameter via a regex `.test(...)` allowlist and
/// returns a string derived from the validated parameter must
/// suppress the caller's downstream sink even when:
///   * the caller binds the call result to a fresh variable
///     (`const sql = sanitize(userValue)`), and
///   * the helper's return is a *derived* template literal, not a
///     pass-through of the parameter itself.
///
/// Sound because the helper only returns normally on the validating
/// arm — control could not reach the post-call instruction unless
/// the regex accepted the argument.  Pinned by
/// `propagate_validated_params_to_return` marking both the arg and
/// the call result `validated_must` / `validated_may` so the sink's
/// `all_validated` check fires.
#[test]
fn validated_params_to_return_suppresses_one_hop_helper_validator() {
    let src = br#"
const SAFE_REGEX = /^[\w]+$/;

const sanitize = (value) => {
    if (!SAFE_REGEX.test(value)) throw new Error('bad');
    return `safe:${value}`;
};

async function handler(req) {
    const userValue = req.body.filter;
    const sql = sanitize(userValue);
    db.execute(sql);
}
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "regex.test allowlist inside helper must suppress caller sink; got {} finding(s)",
        findings.len()
    );
}

/// Two-hop variant of
/// `validated_params_to_return_suppresses_one_hop_helper_validator`:
/// when the validator helper is itself wrapped by another helper
/// that interpolates the validator's return into a template literal,
/// summary extraction must still surface
/// `validated_params_to_return` on the *outer* helper.  This pins
/// the second-pass re-extraction (via
/// `re_extract_summaries_with_augment_view`) plus the OR-merge of
/// `validated_params_to_return` in `merge_sink_fields`.
#[test]
fn validated_params_to_return_suppresses_two_hop_helper_validator() {
    let src = br#"
const SAFE_REGEX = /^[\w]+$/;

const sanitize = (value) => {
    if (!SAFE_REGEX.test(value)) throw new Error('bad');
    return value;
};

const buildQuery = (value) => {
    const s = sanitize(value);
    return s + '!';
};

async function handler(req) {
    const userValue = req.body.filter;
    const sql = buildQuery(userValue);
    db.execute(sql);
}
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "two-hop helper-validator must propagate validated_params_to_return through both helpers; got {} finding(s)",
        findings.len()
    );
}

/// Companion to
/// `validated_params_to_return_suppresses_one_hop_helper_validator`:
/// same shape WITHOUT the regex.test guard inside the helper must
/// still fire.  Asserts the validated-flow propagation does not
/// over-suppress when the helper does not actually validate.
#[test]
fn validated_params_to_return_does_not_suppress_unvalidated_helper() {
    let src = br#"
const sanitize = (value) => {
    return `safe:${value}`;
};

async function handler(req) {
    const userValue = req.body.filter;
    const sql = sanitize(userValue);
    db.execute(sql);
}
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "helper without regex guard must still flag the caller sink",
    );
}

/// Regression: per-parameter summary probe must seed every
/// destructured object-pattern sibling sharing a slot, not only the
/// primary name picked by `extract_param_meta`.  Without this, a
/// helper that destructures its single argument as
/// `({ value }) => …` cannot have `validated_params_to_return = [0]`
/// proven, because the validator inside the body operates on the
/// `value` binding while the probe only seeded the primary `value`
/// (or any earlier sibling) of the object pattern.  Closes the
/// residual blocker for CVE-2026-25544 (PayloadCMS Drizzle SQLi).
#[test]
fn validated_params_to_return_suppresses_destructured_object_arg_helper() {
    let src = br#"
const SAFE_REGEX = /^[\w]+$/;

const sanitize = (value) => {
    if (!SAFE_REGEX.test(value)) throw new Error('bad');
    return value;
};

const buildQuery = ({ value }) => {
    const s = sanitize(value);
    return s + '!';
};

async function handler(req) {
    const userValue = req.body.filter;
    const sql = buildQuery({ value: userValue });
    db.execute(sql);
}
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "destructured object-pattern arg with regex.test allowlist inside the helper must suppress caller sink; got {} finding(s)",
        findings.len()
    );
}

/// Regression: same coverage for TypeScript object-pattern formals
/// (`required_parameter > pattern: object_pattern`).  TS exposes the
/// destructure under a wrapper required_parameter; JS exposes it as a
/// direct child of formal_parameters.  Both paths must surface
/// destructured siblings to the per-parameter probe.
#[test]
fn validated_params_to_return_suppresses_destructured_object_arg_helper_ts() {
    let src = br#"
const SAFE_REGEX = /^[\w]+$/;

const sanitize = (value: string): string => {
    if (!SAFE_REGEX.test(value)) throw new Error('bad');
    return value;
};

const buildQuery = ({ value }: { value: string }): string => {
    const s = sanitize(value);
    return s + '!';
};

async function handler(req: any) {
    const userValue = req.body.filter;
    const sql = buildQuery({ value: userValue });
    db.execute(sql);
}
"#;
    let lang = tree_sitter::Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT);
    let file_cfg = parse_lang(src, "typescript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::TypeScript,
        "test.ts",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "TS destructured object-pattern arg with regex.test allowlist must suppress caller sink; got {} finding(s)",
        findings.len()
    );
}

/// Regression: a destructured object-pattern formal with multiple
/// fields must still propagate validated_params_to_return when the
/// validation lives behind a sibling that is NOT the primary name
/// returned by `extract_param_meta`.  In CVE-2026-25544 the primary
/// is `column` (first ident in `{ column, operator, pathSegments,
/// value }`) but the validator gates `value` — without sibling
/// seeding the probe never sees the validation.
#[test]
fn destructured_sibling_validation_propagates_through_summary() {
    let src = br#"
const SAFE_REGEX = /^[\w]+$/;

const sanitize = (value) => {
    if (!SAFE_REGEX.test(value)) throw new Error('bad');
    return value;
};

const buildQuery = ({ column, operator, value }) => {
    return `${column} ${operator} ${sanitize(value)}`;
};

async function handler(req) {
    const userValue = req.body.filter;
    const sql = buildQuery({ column: 'col', operator: '=', value: userValue });
    db.execute(sql);
}
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "destructured-sibling validation (validator binds non-primary slot binding) must propagate through summary; got {} finding(s)",
        findings.len()
    );
}

/// Regression: `validate*`-named callees match
/// `InputValidatorPolarity::ErrorReturning`, bare `if (err) throw`
/// guards the success branch (false branch).  `is_valid*`/`is_safe*`
/// callees match `InputValidatorPolarity::BooleanTrueIsValid`, bare
/// `if (!ok) throw` guards the success branch (true branch via
/// `condition_negated`).
#[test]
fn classify_input_validator_callee_polarity_buckets() {
    use crate::ssa::type_facts::{InputValidatorPolarity, classify_input_validator_callee};

    // ErrorReturning bucket
    assert_eq!(
        classify_input_validator_callee("validateUrlSsrf"),
        Some(InputValidatorPolarity::ErrorReturning)
    );
    assert_eq!(
        classify_input_validator_callee("verifyToken"),
        Some(InputValidatorPolarity::ErrorReturning)
    );
    assert_eq!(
        classify_input_validator_callee("validate_url"),
        Some(InputValidatorPolarity::ErrorReturning)
    );

    // BooleanTrueIsValid bucket
    assert_eq!(
        classify_input_validator_callee("isValidUrl"),
        Some(InputValidatorPolarity::BooleanTrueIsValid)
    );
    assert_eq!(
        classify_input_validator_callee("is_valid_email"),
        Some(InputValidatorPolarity::BooleanTrueIsValid)
    );
    assert_eq!(
        classify_input_validator_callee("isSafe"),
        Some(InputValidatorPolarity::BooleanTrueIsValid)
    );

    // Negative, names that look like validators but are auth-flavored
    // (`checkPermissions`, `is_authorized`) are intentionally not
    // matched here; they have separate semantics in the auth pipeline.
    assert_eq!(classify_input_validator_callee("checkPermissions"), None);
    assert_eq!(classify_input_validator_callee("is_authorized"), None);
    assert_eq!(classify_input_validator_callee("randomThing"), None);

    // Path-prefix peeling: `obj.validateXxx` should classify the same
    // as the bare callee.
    assert_eq!(
        classify_input_validator_callee("validator.validateUrlSsrf"),
        Some(InputValidatorPolarity::ErrorReturning)
    );
}

#[test]
#[ignore]
fn cve_2025_64430_three_hop_transitive_documents_depth_limit() {
    let src = br#"
const downloadFromUri = uri => {
  return new Promise((res, rej) => {
    http.get(uri, response => { response.on('data', () => {}); }).on('error', e => rej(e));
  });
};
const helper = file => {
  downloadFromUri(file._source.uri);
};
const middle = data => {
  helper(data);
};
const handler = (req) => {
  middle(req.body);
};
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let _findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
}

/// JS arrow-function default parameters (`(a = {}, b = {}) => …`)
/// are wrapped by tree-sitter in `assignment_pattern` nodes whose
/// `left` field carries the actual identifier.  Without
/// `assignment_pattern` in `PARAM_CONFIG.param_node_kinds`, the
/// param walker skipped them, producing a parameter-less summary
/// for any function whose params have defaults.  That broke
/// cross-function `param_to_sink` propagation for shapes like
/// Strapi `sendTemplatedEmail`.  Motivated by CVE-2023-22621.
#[test]
fn cve_2023_22621_js_default_params_extracted() {
    use crate::cfg::extract_param_meta_for_test;
    let src = br#"
const sendTemplatedEmail = (emailOptions = {}, emailTemplate = {}, data = {}) => {
  return emailTemplate;
};
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let mut parser = tree_sitter::Parser::new();
    parser.set_language(&lang).unwrap();
    let tree = parser.parse(&src[..], None).unwrap();
    let root = tree.root_node();
    let mut arrow_node: Option<tree_sitter::Node> = None;
    fn find<'a>(n: tree_sitter::Node<'a>, out: &mut Option<tree_sitter::Node<'a>>) {
        if n.kind() == "arrow_function" {
            *out = Some(n);
            return;
        }
        let mut c = n.walk();
        for ch in n.named_children(&mut c) {
            find(ch, out);
            if out.is_some() {
                return;
            }
        }
    }
    find(root, &mut arrow_node);
    let arrow = arrow_node.expect("arrow function not found");
    let params = extract_param_meta_for_test(arrow, "javascript", src);
    let names: Vec<String> = params.iter().map(|(n, _)| n.clone()).collect();
    assert_eq!(
        names,
        vec![
            "emailOptions".to_string(),
            "emailTemplate".to_string(),
            "data".to_string()
        ],
        "expected all 3 default-valued arrow params extracted; got {:?}",
        names
    );
}

/// `_.template(tainted)` is a server-side template injection sink:
/// lodash compiles `<% ... %>` evaluate blocks into a JS Function,
/// so attacker-controlled input becomes RCE at render time.  Gate
/// activates conservatively when arg 1 is missing (default lodash
/// behavior is dangerous).  Motivated by CVE-2023-22621 (Strapi).
#[test]
fn cve_2023_22621_lodash_template_fires_on_tainted_input() {
    let src = br#"
const _ = require('lodash');
const handler = (req, res) => {
  _.template(req.body.tpl);
};
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "expected taint flow on _.template(req.body.tpl); got 0 findings",
    );
}

/// `_.template(tainted, { evaluate: false })` disables lodash's
/// `<% ... %>` evaluate block compilation, so the call is no
/// longer a code-execution sink.  The gate's `keyword_name =
/// "evaluate"` activation reads the literal value via the JS-side
/// closure that walks the call's arg-1 object literal (since JS
/// has no language-level keyword args).  Motivated by Strapi's
/// CVE-2023-22621 patch.
#[test]
fn cve_2023_22621_lodash_template_suppressed_by_evaluate_false() {
    let src = br#"
const _ = require('lodash');
const handler = (req, res) => {
  _.template(req.body.tpl, { evaluate: false });
};
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        findings.is_empty(),
        "expected no taint flow when evaluate:false is set; got {} findings",
        findings.len(),
    );
}

/// Double-call chained form `_.template(tainted)(data)` — the outer
/// call's `function` field is itself a call_expression rather than
/// the member-chain shape `find_chained_inner_call` was originally
/// written for.  The extension recognises the `f()()` pattern and
/// rebinds gate classification to the inner call so the gated
/// `_.template` fires even when wrapped in an immediate invocation
/// of the compiled function.  Motivated by CVE-2023-22621.
#[test]
fn cve_2023_22621_lodash_template_double_call_inner_rebinding() {
    let src = br#"
const _ = require('lodash');
const handler = (req, res) => {
  const tpl = req.body.tpl;
  _.template(tpl)({});
};
"#;
    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
    let file_cfg = parse_lang(src, "javascript", lang);
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(
        &file_cfg,
        summaries,
        None,
        Lang::JavaScript,
        "test.js",
        &[],
        None,
    );
    assert!(
        !findings.is_empty(),
        "expected taint flow via double-call chain rebinding; got 0 findings",
    );
}