nyx/tests/ssa_equivalence_tests.rs

//! Semantic regression suite for the SSA lowering + optimisation + taint
//! pipeline.
//!
//! This file used to be a legacy/SSA equivalence test.  After legacy
//! was removed the file degenerated to "scan each fixture and assert
//! no panic", which proved almost nothing.  It has been restored as a
//! multi-tier correctness signal.  Each `#[test]` fn below verifies a
//! distinct property:
//!
//!   * `ssa_structural_invariants_corpus`, every body in every real-world
//!     fixture lowers to well-formed SSA.  Enforced via
//!     [`nyx_scanner::ssa::invariants::check_structural_invariants`]:
//!     single-assignment, pred/succ symmetry, terminator/succs agreement,
//!     phi arity and operand sources, value-def coverage, and reachability.
//!
//!   * `ssa_lowering_is_deterministic`, lowering the same CFG twice produces
//!     structurally identical SSA (equal fingerprint).  Catches any incoming
//!     non-determinism introduced by hashing or iteration order.
//!
//!   * `ssa_optimize_is_idempotent`, `optimize_ssa` reaches a fixpoint on
//!     the first run: re-running it must prune zero branches, eliminate
//!     zero copies, and remove zero dead defs, and must not change the body
//!     fingerprint.  Catches optimiser bugs where a second pass would find
//!     new work (indicating the first pass failed to converge).
//!
//!   * `summary_extraction_is_deterministic`, extracting summaries from the
//!     same bytes twice yields the same `(FuncSummary, SsaFuncSummary)`
//!     sets, compared via stable JSON serialisation.  Catches any
//!     non-determinism in summary construction or cross-file key ordering.
//!
//!   * `scan_is_stable_across_runs`, a full two-pass scan produces the same
//!     diag list when invoked twice on the same input.  Runs on a curated
//!     per-language fixture subset to keep wall time bounded; the other
//!     tiers already cover full-corpus behaviour.
//!
//!   * `ssa_corpus_does_not_panic`, the original smoke check, kept to lock
//!     in termination on the full fixture matrix.
//!
//! Run with: `cargo test --test ssa_equivalence_tests`
//!
//! Set `NYX_SSA_VERBOSE=1` for per-fixture progress output.

mod common;

use common::test_config;
use nyx_scanner::ast::{build_cfg_for_file, extract_all_summaries_from_bytes};
use nyx_scanner::cfg::BodyCfg;
use nyx_scanner::commands::scan::Diag;
use nyx_scanner::ssa::{
    invariants::{body_fingerprint, check_structural_invariants},
    lower_to_ssa, optimize_ssa,
};
use nyx_scanner::utils::config::{AnalysisMode, Config};
use std::path::{Path, PathBuf};

// ── Fixture discovery ─────────────────────────────────────────────────────

struct Fixture {
    name: String,
    source_path: PathBuf,
}

fn discover_fixtures() -> Vec<Fixture> {
    let base = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/real_world");
    let mut fixtures = Vec::new();

    let langs = [
        "rust",
        "c",
        "cpp",
        "java",
        "go",
        "php",
        "python",
        "ruby",
        "typescript",
        "javascript",
    ];
    let categories = ["taint", "cfg", "state", "mixed"];

    for lang in &langs {
        for category in &categories {
            let dir = base.join(lang).join(category);
            if !dir.is_dir() {
                continue;
            }
            let Ok(entries) = std::fs::read_dir(&dir) else {
                continue;
            };
            for entry in entries.flatten() {
                let path = entry.path();
                let fname = path.file_name().unwrap().to_string_lossy().to_string();
                if !fname.ends_with(".expect.json") {
                    continue;
                }
                let stem = fname.trim_end_matches(".expect.json");
                if let Some(source_path) = find_source_file(&dir, stem) {
                    fixtures.push(Fixture {
                        name: format!("{lang}/{category}/{stem}"),
                        source_path,
                    });
                }
            }
        }
    }
    fixtures.sort_by(|a, b| a.name.cmp(&b.name));
    fixtures
}

fn find_source_file(dir: &Path, stem: &str) -> Option<PathBuf> {
    let extensions = [
        "rs", "c", "cpp", "cc", "cxx", "java", "go", "php", "py", "rb", "ts", "tsx", "js", "jsx",
    ];
    for ext in extensions {
        let candidate = dir.join(format!("{stem}.{ext}"));
        if candidate.exists() {
            return Some(candidate);
        }
    }
    None
}

fn verbose() -> bool {
    std::env::var("NYX_SSA_VERBOSE")
        .map(|v| matches!(v.as_str(), "1" | "true" | "yes"))
        .unwrap_or(false)
}

// ── Helpers for scanning a single-file fixture in isolation ──────────────

fn scan_single_file(fixture: &Fixture) -> Vec<Diag> {
    let tmp = tempfile::TempDir::with_prefix("nyx_ssa_sem_").expect("tempdir");
    let dest = tmp.path().join(fixture.source_path.file_name().unwrap());
    std::fs::copy(&fixture.source_path, &dest).expect("copy fixture");

    let cfg = test_config(AnalysisMode::Full);
    let mut diags =
        nyx_scanner::scan_no_index(tmp.path(), &cfg).expect("scan_no_index should succeed");

    // Normalise paths to filenames so tmp path does not leak into comparisons.
    for d in &mut diags {
        if let Some(fname) = Path::new(&d.path).file_name() {
            d.path = fname.to_string_lossy().to_string();
        }
    }
    diags.sort_by(|a, b| {
        a.id.cmp(&b.id)
            .then(a.line.cmp(&b.line))
            .then(a.col.cmp(&b.col))
            .then(a.path.cmp(&b.path))
    });
    diags
}

/// Render a diag list to a canonical string for equality comparison.
/// Strips non-deterministic fields (rank_score floats) that should not
/// affect correctness.
fn diag_fingerprint(diags: &[Diag]) -> String {
    use std::fmt::Write;
    let mut out = String::new();
    for d in diags {
        let _ = writeln!(
            out,
            "{id}|{path}|{line}|{col}|{sev}|{cat:?}|{pv}|{gk}|{sup}",
            id = d.id,
            path = d.path,
            line = d.line,
            col = d.col,
            sev = d.severity.as_db_str(),
            cat = d.category,
            pv = d.path_validated,
            gk = d.guard_kind.as_deref().unwrap_or(""),
            sup = d.suppressed,
        );
    }
    out
}

/// Iterate every body (top-level + functions) across all bodies of a file.
fn each_body<'a>(bodies: &'a [BodyCfg]) -> impl Iterator<Item = &'a BodyCfg> + 'a {
    bodies.iter()
}

// ── Tier 1: Structural invariants on every body of every fixture ─────────

#[test]
fn ssa_structural_invariants_corpus() {
    let fixtures = discover_fixtures();
    assert!(
        !fixtures.is_empty(),
        "no fixtures discovered — CARGO_MANIFEST_DIR wrong?"
    );

    let cfg = test_config(AnalysisMode::Full);
    let mut failures: Vec<String> = Vec::new();
    let mut bodies_checked: usize = 0;

    for fixture in &fixtures {
        let Ok(Some((file_cfg, _lang))) = build_cfg_for_file(&fixture.source_path, &cfg) else {
            continue;
        };

        for body in each_body(&file_cfg.bodies) {
            let Ok(ssa) = lower_to_ssa(&body.graph, body.entry, None, true) else {
                // Some bodies are legitimately empty / unreachable; skip
                // without flagging.  The panic-free smoke test covers that
                // the scan path handles the `Err` correctly.
                continue;
            };
            bodies_checked += 1;

            let errs = check_structural_invariants(&ssa);
            if !errs.is_empty() {
                failures.push(format!(
                    "{} body={:?} ({} block(s)):\n  {}",
                    fixture.name,
                    body.meta.name.as_deref().unwrap_or("<toplevel>"),
                    ssa.blocks.len(),
                    errs.join("\n  ")
                ));
            }
        }
    }

    assert!(
        bodies_checked > 100,
        "sanity: expected >100 bodies across the corpus, got {bodies_checked}"
    );
    if verbose() {
        eprintln!(
            "structural invariants: {} bodies checked across {} fixtures",
            bodies_checked,
            fixtures.len()
        );
    }
    assert!(
        failures.is_empty(),
        "SSA structural invariants violated in {} body/fixture combo(s):\n{}",
        failures.len(),
        failures.join("\n")
    );
}

// ── Tier 2: Lowering determinism ─────────────────────────────────────────

#[test]
fn ssa_lowering_is_deterministic() {
    let fixtures = discover_fixtures();
    let cfg = test_config(AnalysisMode::Full);
    let mut failures: Vec<String> = Vec::new();
    let mut bodies_checked: usize = 0;

    for fixture in &fixtures {
        let Ok(Some((file_cfg, _))) = build_cfg_for_file(&fixture.source_path, &cfg) else {
            continue;
        };
        for body in each_body(&file_cfg.bodies) {
            let Ok(a) = lower_to_ssa(&body.graph, body.entry, None, true) else {
                continue;
            };
            let Ok(b) = lower_to_ssa(&body.graph, body.entry, None, true) else {
                continue;
            };
            bodies_checked += 1;

            let fa = body_fingerprint(&a);
            let fb = body_fingerprint(&b);
            if fa != fb {
                failures.push(format!(
                    "{} body={:?}: non-deterministic SSA lowering",
                    fixture.name,
                    body.meta.name.as_deref().unwrap_or("<toplevel>"),
                ));
            }
        }
    }

    assert!(
        bodies_checked > 100,
        "sanity: expected >100 bodies, got {bodies_checked}"
    );
    assert!(
        failures.is_empty(),
        "SSA lowering is non-deterministic in {} body/fixture combo(s):\n{}",
        failures.len(),
        failures.join("\n")
    );
}

// ── Tier 2b: Strict 10× determinism on multi-phi bodies ──────────────────

/// Stronger determinism check than Tier 2: for every body in the corpus
/// that carries ≥ 2 phis (where phi ordering is the most likely culprit
/// for hasher-driven non-determinism), lower the CFG ten times in a row
/// and assert every fingerprint matches the first, bit-for-bit, with no
/// sort tolerance.  Runs are interleaved across fixtures so that
/// process-wide hasher state between lowerings is as adversarial as we
/// can make it without `PYTHONHASHSEED`-style seeding.
#[test]
fn ssa_lowering_is_deterministic_strict_10x() {
    let fixtures = discover_fixtures();
    let cfg = test_config(AnalysisMode::Full);
    let mut failures: Vec<String> = Vec::new();
    let mut bodies_checked: usize = 0;
    let mut multi_phi_bodies: usize = 0;

    for fixture in &fixtures {
        let Ok(Some((file_cfg, _))) = build_cfg_for_file(&fixture.source_path, &cfg) else {
            continue;
        };
        for body in each_body(&file_cfg.bodies) {
            // Lower once up front to detect multi-phi bodies cheaply; skip
            // trivially-phi-less bodies so the 10× loop stays bounded.
            let Ok(first) = lower_to_ssa(&body.graph, body.entry, None, true) else {
                continue;
            };
            let phi_count: usize = first.blocks.iter().map(|b| b.phis.len()).sum();
            if phi_count < 2 {
                continue;
            }
            multi_phi_bodies += 1;

            let expected = body_fingerprint(&first);
            for i in 1..10 {
                let Ok(again) = lower_to_ssa(&body.graph, body.entry, None, true) else {
                    failures.push(format!(
                        "{} body={:?}: lowering failed on iteration {i} after succeeding earlier",
                        fixture.name,
                        body.meta.name.as_deref().unwrap_or("<toplevel>"),
                    ));
                    break;
                };
                let fp = body_fingerprint(&again);
                if fp != expected {
                    failures.push(format!(
                        "{} body={:?}: fingerprint diverged on iteration {i}\n  --- expected ---\n{expected}  --- got ---\n{fp}",
                        fixture.name,
                        body.meta.name.as_deref().unwrap_or("<toplevel>"),
                    ));
                    break;
                }
                bodies_checked += 1;
            }
        }
    }

    assert!(
        multi_phi_bodies >= 10,
        "expected to cover >= 10 multi-phi bodies for a meaningful strict-determinism check, got {multi_phi_bodies}",
    );
    assert!(
        bodies_checked > 80,
        "sanity: expected >80 (body × iteration) samples, got {bodies_checked}"
    );
    assert!(
        failures.is_empty(),
        "SSA lowering is non-deterministic in {} body/fixture combo(s) under 10× strict comparison:\n{}",
        failures.len(),
        failures.join("\n")
    );
}

// ── Tier 3: Optimization idempotence ─────────────────────────────────────

#[test]
fn ssa_optimize_is_idempotent() {
    let fixtures = discover_fixtures();
    let cfg = test_config(AnalysisMode::Full);
    let mut failures: Vec<String> = Vec::new();
    let mut bodies_checked: usize = 0;

    for fixture in &fixtures {
        let Ok(Some((file_cfg, lang))) = build_cfg_for_file(&fixture.source_path, &cfg) else {
            continue;
        };
        for body in each_body(&file_cfg.bodies) {
            let Ok(mut ssa) = lower_to_ssa(&body.graph, body.entry, None, true) else {
                continue;
            };

            // First optimisation pass, may do real work.
            let _ = optimize_ssa(&mut ssa, &body.graph, Some(lang));
            let fp_after_first = body_fingerprint(&ssa);

            // Second pass must be a fixpoint:
            //   * body fingerprint unchanged
            //   * zero additional branches pruned / copies eliminated /
            //     dead defs removed
            let second = optimize_ssa(&mut ssa, &body.graph, Some(lang));
            let fp_after_second = body_fingerprint(&ssa);
            bodies_checked += 1;

            if fp_after_first != fp_after_second {
                failures.push(format!(
                    "{} body={:?}: optimize_ssa changed body fingerprint on second pass",
                    fixture.name,
                    body.meta.name.as_deref().unwrap_or("<toplevel>"),
                ));
            }
            if second.branches_pruned != 0
                || second.copies_eliminated != 0
                || second.dead_defs_removed != 0
            {
                failures.push(format!(
                    "{} body={:?}: optimize_ssa did not reach fixpoint (branches={}, copies={}, dead_defs={})",
                    fixture.name,
                    body.meta.name.as_deref().unwrap_or("<toplevel>"),
                    second.branches_pruned,
                    second.copies_eliminated,
                    second.dead_defs_removed,
                ));
            }
        }
    }

    assert!(
        bodies_checked > 100,
        "sanity: expected >100 bodies, got {bodies_checked}"
    );
    assert!(
        failures.is_empty(),
        "optimize_ssa is not idempotent in {} body/fixture combo(s):\n{}",
        failures.len(),
        failures.join("\n")
    );
}

// ── Tier 4: Summary-extraction determinism ───────────────────────────────

#[test]
fn summary_extraction_is_deterministic() {
    let fixtures = discover_fixtures();
    let cfg = test_config(AnalysisMode::Full);
    let mut failures: Vec<String> = Vec::new();
    let mut files_checked: usize = 0;

    for fixture in &fixtures {
        let Ok(bytes) = std::fs::read(&fixture.source_path) else {
            continue;
        };
        let Ok((fn_a, ssa_a, _bodies_a, _auth_a, _cpi_a)) =
            extract_all_summaries_from_bytes(&bytes, &fixture.source_path, &cfg, None)
        else {
            continue;
        };
        let Ok((fn_b, ssa_b, _bodies_b, _auth_b, _cpi_b)) =
            extract_all_summaries_from_bytes(&bytes, &fixture.source_path, &cfg, None)
        else {
            continue;
        };
        files_checked += 1;

        // Counts must match exactly.
        if fn_a.len() != fn_b.len() {
            failures.push(format!(
                "{}: FuncSummary count unstable ({} vs {})",
                fixture.name,
                fn_a.len(),
                fn_b.len()
            ));
            continue;
        }
        if ssa_a.len() != ssa_b.len() {
            failures.push(format!(
                "{}: SsaFuncSummary count unstable ({} vs {})",
                fixture.name,
                ssa_a.len(),
                ssa_b.len()
            ));
            continue;
        }

        // SSA summaries: compare after sorting by key (order from the extractor
        // is expected-deterministic, but if two runs diverge only in order the
        // test should still pass, what matters is the set identity).
        let mut ssa_a_sorted = ssa_a;
        let mut ssa_b_sorted = ssa_b;
        ssa_a_sorted.sort_by(|a, b| format!("{:?}", a.0).cmp(&format!("{:?}", b.0)));
        ssa_b_sorted.sort_by(|a, b| format!("{:?}", a.0).cmp(&format!("{:?}", b.0)));

        for (i, ((k_a, s_a), (k_b, s_b))) in
            ssa_a_sorted.iter().zip(ssa_b_sorted.iter()).enumerate()
        {
            if format!("{k_a:?}") != format!("{k_b:?}") {
                failures.push(format!(
                    "{}: SsaFuncSummary key {i} differs: {:?} vs {:?}",
                    fixture.name, k_a, k_b,
                ));
                continue;
            }
            let ja = serde_json::to_string(s_a).expect("serialize SsaFuncSummary a");
            let jb = serde_json::to_string(s_b).expect("serialize SsaFuncSummary b");
            if ja != jb {
                failures.push(format!(
                    "{}: SsaFuncSummary for {k_a:?} not bitwise-stable:\n  a={}\n  b={}",
                    fixture.name, ja, jb,
                ));
            }
        }
    }

    assert!(
        files_checked > 50,
        "sanity: expected >50 files checked, got {files_checked}"
    );
    assert!(
        failures.is_empty(),
        "summary extraction is non-deterministic in {} case(s):\n{}",
        failures.len(),
        failures.join("\n")
    );
}

// ── Tier 5: Scan stability on a curated subset ───────────────────────────

/// Curated one-per-language fixture subset used for cross-run diag stability.
/// Keeps the test bounded (~10 fixtures × 2 scans) while still touching every
/// language's full taint pipeline.
const SCAN_STABILITY_SUBSET: &[&str] = &[
    "rust/taint/env_to_command",
    "rust/taint/actix_xss",
    "c/taint/buffer_overflow",
    "cpp/taint/cmdi_execl",
    "java/taint/cast_to_string_still_tainted",
    "php/taint/closure_taint",
    "python/taint/attribute_taint",
    "ruby/taint/cmdi_backticks",
    "typescript/taint/async_await_taint",
    "javascript/taint/alias_no_sanitize_unsafe",
    "go/taint/cmdi_http",
];

#[test]
fn scan_is_stable_across_runs() {
    let fixtures = discover_fixtures();
    let by_name: std::collections::HashMap<&str, &Fixture> =
        fixtures.iter().map(|f| (f.name.as_str(), f)).collect();

    let mut failures: Vec<String> = Vec::new();
    let mut scanned: usize = 0;

    for &name in SCAN_STABILITY_SUBSET {
        let Some(fixture) = by_name.get(name).copied() else {
            // Not a hard failure, curated names may drift as the corpus
            // evolves.  Log but continue so this tier stays useful.
            if verbose() {
                eprintln!("scan_is_stable_across_runs: missing fixture {name}");
            }
            continue;
        };

        let a = scan_single_file(fixture);
        let b = scan_single_file(fixture);
        scanned += 1;

        let fa = diag_fingerprint(&a);
        let fb = diag_fingerprint(&b);
        if fa != fb {
            failures.push(format!(
                "{name}: diag set diverges across runs\n  --- run A ---\n{fa}  --- run B ---\n{fb}"
            ));
        }
    }

    assert!(
        scanned >= 3,
        "scan_is_stable_across_runs: only {scanned} fixtures available — did the corpus paths move?"
    );
    assert!(
        failures.is_empty(),
        "scan is non-deterministic across runs:\n{}",
        failures.join("\n")
    );
}

// ── Tier 6: SSA lowering coverage sanity ─────────────────────────────────

/// Guards against a silent regression that would make `lower_to_ssa`
/// return empty / trivially-satisfying bodies, which would make every
/// invariant check pass vacuously.  Enforces that the corpus produces
/// non-trivial SSA: many blocks, many instructions, at least one phi
/// somewhere, at least one loop (back edge), and at least one call.
#[test]
fn ssa_lowering_produces_non_trivial_bodies() {
    let fixtures = discover_fixtures();
    let cfg = test_config(AnalysisMode::Full);

    let mut total_blocks: usize = 0;
    let mut total_insts: usize = 0;
    let mut total_phis: usize = 0;
    let mut total_calls: usize = 0;
    let mut bodies_with_phi: usize = 0;
    let mut bodies_with_call: usize = 0;
    let mut multi_block_bodies: usize = 0;
    let mut bodies: usize = 0;

    for fixture in &fixtures {
        let Ok(Some((file_cfg, _))) = build_cfg_for_file(&fixture.source_path, &cfg) else {
            continue;
        };
        for body in each_body(&file_cfg.bodies) {
            let Ok(ssa) = lower_to_ssa(&body.graph, body.entry, None, true) else {
                continue;
            };
            bodies += 1;
            total_blocks += ssa.blocks.len();
            if ssa.blocks.len() > 1 {
                multi_block_bodies += 1;
            }
            let mut body_has_phi = false;
            let mut body_has_call = false;
            for block in &ssa.blocks {
                total_insts += block.body.len() + block.phis.len();
                total_phis += block.phis.len();
                if !block.phis.is_empty() {
                    body_has_phi = true;
                }
                for inst in &block.body {
                    if matches!(inst.op, nyx_scanner::ssa::SsaOp::Call { .. }) {
                        total_calls += 1;
                        body_has_call = true;
                    }
                }
            }
            if body_has_phi {
                bodies_with_phi += 1;
            }
            if body_has_call {
                bodies_with_call += 1;
            }
        }
    }

    // Thresholds are generous, they only catch gross regressions (e.g. a
    // lowering bug that silently produces single-block bodies with no body
    // instructions).  Update if the corpus intentionally shrinks.
    assert!(bodies > 200, "expected >200 bodies, got {bodies}");
    assert!(
        multi_block_bodies > 50,
        "expected >50 multi-block bodies (guard against collapse regression), got {multi_block_bodies}"
    );
    assert!(
        total_blocks > 500,
        "expected >500 blocks across corpus, got {total_blocks}"
    );
    assert!(
        total_insts > 1000,
        "expected >1000 SSA instructions across corpus, got {total_insts}"
    );
    assert!(
        total_phis > 0,
        "expected at least one phi somewhere in the corpus, got 0"
    );
    assert!(
        total_calls > 100,
        "expected >100 call instructions, got {total_calls}"
    );
    assert!(
        bodies_with_phi > 20,
        "expected >20 bodies with phis, got {bodies_with_phi}"
    );
    assert!(
        bodies_with_call > 100,
        "expected >100 bodies with calls, got {bodies_with_call}"
    );

    if verbose() {
        eprintln!(
            "ssa coverage: bodies={bodies} multi_block={multi_block_bodies} blocks={total_blocks} insts={total_insts} phis={total_phis} calls={total_calls} bodies_with_phi={bodies_with_phi} bodies_with_call={bodies_with_call}"
        );
    }
}

// ── Tier 7: Original panic-free smoke check (preserved) ─────────────────

#[test]
fn ssa_corpus_does_not_panic() {
    let fixtures = discover_fixtures();
    assert!(!fixtures.is_empty(), "no fixtures found");
    let cfg = test_config(AnalysisMode::Full);
    let mut failures: Vec<String> = Vec::new();

    for fixture in &fixtures {
        let result = std::panic::catch_unwind(|| build_and_lower_all(&fixture.source_path, &cfg));
        if result.is_err() {
            failures.push(format!("PANIC in {}", fixture.name));
        }
    }

    assert!(
        failures.is_empty(),
        "SSA corpus panics:\n{}",
        failures.join("\n")
    );
}

fn build_and_lower_all(path: &Path, cfg: &Config) -> usize {
    let Ok(Some((file_cfg, _))) = build_cfg_for_file(path, cfg) else {
        return 0;
    };
    let mut n = 0usize;
    for body in &file_cfg.bodies {
        if lower_to_ssa(&body.graph, body.entry, None, true).is_ok() {
            n += 1;
        }
    }
    n
}

// ── Phase 12: Rust `await_expression` Assign-op count ───────────────────
//
// Each Rust `await_expression` node carries `is_await_forward = true`
// (set via the new `Kind::AwaitForward` mapping in `src/labels/rust.rs`).
// The SSA lowering must emit at most one `SsaOp::Assign` per such CFG
// node — duplicating the emission would inflate the body's value count
// and propagate the awaited taint twice.  Use the new
// `tests/fixtures/realistic/async_await/await_count.rs` fixture which
// places three `await_expression` nodes in distinct positions.
#[test]
fn await_emits_at_most_one_assign_per_node() {
    use nyx_scanner::ssa::SsaOp;
    let fixture = Path::new(env!("CARGO_MANIFEST_DIR"))
        .join("tests/fixtures/realistic/async_await/await_count.rs");
    let cfg = test_config(AnalysisMode::Full);
    let (file_cfg, _) = build_cfg_for_file(&fixture, &cfg)
        .expect("parse fixture")
        .expect("non-empty bodies");

    let mut total_await_nodes = 0usize;
    for body in &file_cfg.bodies {
        let graph = &body.graph;

        // Collect CFG node indices whose NodeInfo has `is_await_forward`.
        let await_nodes: Vec<_> = graph
            .node_indices()
            .filter(|n| graph[*n].is_await_forward)
            .collect();
        total_await_nodes += await_nodes.len();

        let ssa = match lower_to_ssa(graph, body.entry, None, true) {
            Ok(s) => s,
            Err(_) => continue,
        };

        // Count Assign ops attributed to each await CFG node.
        for cfg_node in &await_nodes {
            let assign_count: usize = ssa
                .blocks
                .iter()
                .flat_map(|b| b.body.iter())
                .filter(|inst| inst.cfg_node == *cfg_node && matches!(inst.op, SsaOp::Assign(_)))
                .count();
            assert!(
                assign_count <= 1,
                "await_expression CFG node {:?} lowered to {} Assign ops (expected <= 1) in body {:?}",
                cfg_node,
                assign_count,
                body.meta.name.as_deref().unwrap_or("<toplevel>"),
            );
        }
    }
    // Sanity guard: the fixture is hand-crafted to put `await_expression`
    // nodes in three positions (let-binding, statement, implicit return).
    // If the Rust KINDS-map entry regresses or the per-node `is_await_forward`
    // dispatch breaks, this count drops to zero and the count-cap above
    // becomes vacuous.  Pin a lower bound so the regression surfaces here.
    assert!(
        total_await_nodes >= 1,
        "expected at least one await_expression CFG node across all bodies, got 0 — fixture or mapping regressed"
    );
}

// ── Catch-block orphan invariant ────────────────────────────────────────
//
// Construct a synthetic SsaBody where a block carries `SsaOp::CatchParam`
// but is neither reachable from entry via normal flow nor listed as a
// target of any exception edge. The invariant must report the
// orphan, this is the CFG-construction-bug signal the invariant is
// designed to surface.
//
// The test stays on the pure-function `check_catch_block_reachability`
// path to avoid the debug-build panic inside `lower_to_ssa`; it
// exercises the release-build semantics (warn + error report) which
// is what production bodies go through when compiled without
// `debug_assertions`.

#[test]
fn orphan_catch_block_triggers_reachability_invariant() {
    use nyx_scanner::ssa::invariants::check_catch_block_reachability;
    use nyx_scanner::ssa::{
        BlockId, SsaBlock, SsaBody, SsaInst, SsaOp, SsaValue, Terminator, ValueDef,
    };
    use petgraph::graph::NodeIndex;
    use smallvec::smallvec;

    let dummy_cfg = NodeIndex::new(0);

    // Block 0: entry, does not reach block 1 via succs.
    // Block 1: orphan, carries CatchParam, not listed in exception_edges.
    let body = SsaBody {
        blocks: vec![
            SsaBlock {
                id: BlockId(0),
                phis: vec![],
                body: vec![],
                terminator: Terminator::Return(None),
                preds: smallvec![],
                succs: smallvec![],
            },
            SsaBlock {
                id: BlockId(1),
                phis: vec![],
                body: vec![SsaInst {
                    value: SsaValue(0),
                    op: SsaOp::CatchParam,
                    cfg_node: dummy_cfg,
                    var_name: Some("e".into()),
                    span: (0, 0),
                }],
                terminator: Terminator::Return(None),
                preds: smallvec![],
                succs: smallvec![],
            },
        ],
        entry: BlockId(0),
        value_defs: vec![ValueDef {
            var_name: Some("e".into()),
            cfg_node: dummy_cfg,
            block: BlockId(1),
        }],
        cfg_node_map: Default::default(),
        exception_edges: vec![], // intentionally empty, the orphan condition,
        field_interner: nyx_scanner::ssa::ir::FieldInterner::default(),
        field_writes: std::collections::HashMap::new(),

        synthetic_externals: std::collections::HashSet::new(),
        slot_scoped_assigns: std::collections::HashSet::new(),
    };

    let err = check_catch_block_reachability(&body)
        .expect_err("orphan catch block must fail the reachability invariant");
    assert!(
        err.messages
            .iter()
            .any(|m| m.contains("catch-block orphan")),
        "expected orphan-catch message, got: {:?}",
        err.messages,
    );
}

#[test]
fn normally_reachable_catch_block_passes_invariant() {
    // Regression guard: CatchParam in a block reached from entry via normal
    // flow (not an exception edge) satisfies the invariant.
    use nyx_scanner::ssa::invariants::check_catch_block_reachability;
    use nyx_scanner::ssa::{
        BlockId, SsaBlock, SsaBody, SsaInst, SsaOp, SsaValue, Terminator, ValueDef,
    };
    use petgraph::graph::NodeIndex;
    use smallvec::smallvec;

    let dummy_cfg = NodeIndex::new(0);

    let body = SsaBody {
        blocks: vec![
            SsaBlock {
                id: BlockId(0),
                phis: vec![],
                body: vec![],
                terminator: Terminator::Goto(BlockId(1)),
                preds: smallvec![],
                succs: smallvec![BlockId(1)],
            },
            SsaBlock {
                id: BlockId(1),
                phis: vec![],
                body: vec![SsaInst {
                    value: SsaValue(0),
                    op: SsaOp::CatchParam,
                    cfg_node: dummy_cfg,
                    var_name: Some("e".into()),
                    span: (0, 0),
                }],
                terminator: Terminator::Return(None),
                preds: smallvec![BlockId(0)],
                succs: smallvec![],
            },
        ],
        entry: BlockId(0),
        value_defs: vec![ValueDef {
            var_name: Some("e".into()),
            cfg_node: dummy_cfg,
            block: BlockId(1),
        }],
        cfg_node_map: Default::default(),
        exception_edges: vec![],
        field_interner: nyx_scanner::ssa::ir::FieldInterner::default(),
        field_writes: std::collections::HashMap::new(),

        synthetic_externals: std::collections::HashSet::new(),
        slot_scoped_assigns: std::collections::HashSet::new(),
    };

    assert!(check_catch_block_reachability(&body).is_ok());
}

#[test]
fn exception_edge_catch_block_passes_invariant() {
    // A CatchParam-carrying block reached only via an exception edge
    // (the typical try/catch shape) must pass the invariant.
    use nyx_scanner::ssa::invariants::check_catch_block_reachability;
    use nyx_scanner::ssa::{
        BlockId, SsaBlock, SsaBody, SsaInst, SsaOp, SsaValue, Terminator, ValueDef,
    };
    use petgraph::graph::NodeIndex;
    use smallvec::smallvec;

    let dummy_cfg = NodeIndex::new(0);

    let body = SsaBody {
        blocks: vec![
            SsaBlock {
                id: BlockId(0),
                phis: vec![],
                body: vec![],
                terminator: Terminator::Return(None),
                preds: smallvec![],
                succs: smallvec![],
            },
            SsaBlock {
                id: BlockId(1),
                phis: vec![],
                body: vec![SsaInst {
                    value: SsaValue(0),
                    op: SsaOp::CatchParam,
                    cfg_node: dummy_cfg,
                    var_name: Some("e".into()),
                    span: (0, 0),
                }],
                terminator: Terminator::Return(None),
                preds: smallvec![],
                succs: smallvec![],
            },
        ],
        entry: BlockId(0),
        value_defs: vec![ValueDef {
            var_name: Some("e".into()),
            cfg_node: dummy_cfg,
            block: BlockId(1),
        }],
        cfg_node_map: Default::default(),
        exception_edges: vec![(BlockId(0), BlockId(1))],
        field_interner: nyx_scanner::ssa::ir::FieldInterner::default(),
        field_writes: std::collections::HashMap::new(),

        synthetic_externals: std::collections::HashSet::new(),
        slot_scoped_assigns: std::collections::HashSet::new(),
    };

    assert!(check_catch_block_reachability(&body).is_ok());
}