nyx/src/state/facts.rs

#![allow(clippy::unnecessary_map_or)]

use super::domain::{AuthLevel, ProductState, ResourceLifecycle};
use super::engine::DataflowResult;
use super::symbol::SymbolInterner;
use super::transfer::{TransferEvent, TransferEventKind};
use crate::cfg::{Cfg, StmtKind};
use crate::labels::{Cap, DataLabel};
use crate::patterns::Severity;
use crate::symbol::Lang;
use petgraph::visit::IntoNodeReferences;

/// Normalize a callee description for display.
fn sanitize_desc(s: &str) -> String {
    crate::fmt::normalize_snippet(s)
}

/// Returns true if `idx` is the terminal exit of a function body, the
/// convergence node where all execution paths join before leaving the function.
///
/// **Invariant:** Only terminal exits carry the complete merged lifecycle state
/// needed for leak analysis.  Return nodes are intermediate in per-body graphs
/// (they flow into the synthetic Exit node) but become terminal in legacy
/// supergraphs (their successor is the file-level Exit with
/// `enclosing_func = None`).
///
/// Detection combines a kind filter with a topological check.  Only nodes
/// whose `StmtKind` actually terminates execution (`Exit`, `Return`, `Throw`)
/// are considered, then we require that they have no successor in the same
/// function scope.  Without the kind filter, dangling Seq nodes left behind
/// when nested function literals (e.g. `obj.fn = () => {...}`) get a
/// placeholder in the parent graph would be misclassified as terminal exits
/// and produce spurious resource-leak findings at the function-literal span.
fn is_terminal_function_exit(
    idx: petgraph::graph::NodeIndex,
    info: &crate::cfg::NodeInfo,
    cfg: &Cfg,
) -> bool {
    if !matches!(
        info.kind,
        StmtKind::Exit | StmtKind::Return | StmtKind::Throw
    ) {
        return false;
    }
    info.ast.enclosing_func.is_some()
        && !cfg
            .neighbors_directed(idx, petgraph::Direction::Outgoing)
            .any(|succ| cfg[succ].ast.enclosing_func == info.ast.enclosing_func)
}

/// A finding produced by state analysis.
#[derive(Debug, Clone)]
pub struct StateFinding {
    pub rule_id: String,
    pub severity: Severity,
    pub span: (usize, usize),
    pub message: String,
    /// State machine that produced this finding: `"resource"` or `"auth"`.
    pub machine: &'static str,
    /// Variable name involved, if available.
    pub subject: Option<String>,
    /// State before the event (e.g. `"closed"`, `"open"`, `"unauthed"`).
    pub from_state: &'static str,
    /// State after the event (e.g. `"used"`, `"closed"`, `"leaked"`, `"access"`).
    pub to_state: &'static str,
}

/// Extract findings from converged dataflow state + transfer events.
///
/// `path_safe_suppressed_sink_spans` lists CFG sink spans whose tainted
/// inputs were proved path-safe by the SSA taint engine; the privileged
/// `state-unauthed-access` finding is suppressed on those spans because
/// the user-controlled input has already been proved unable to escape
/// into a privileged location.
#[allow(clippy::too_many_arguments)]
pub fn extract_findings(
    result: &DataflowResult<ProductState, TransferEvent>,
    cfg: &Cfg,
    interner: &SymbolInterner,
    lang: Lang,
    func_summaries: &crate::cfg::FuncSummaries,
    enable_auth: bool,
    path_safe_suppressed_sink_spans: &std::collections::HashSet<(usize, usize)>,
    closure_released_var_names: Option<&std::collections::HashSet<String>>,
) -> Vec<StateFinding> {
    let mut findings = Vec::new();

    // ── 1. Use-after-close from transfer events ──────────────────────────
    for event in &result.events {
        let info = &cfg[event.node];
        let var_name = interner.resolve(event.var);
        match event.kind {
            TransferEventKind::UseAfterClose => {
                findings.push(StateFinding {
                    rule_id: "state-use-after-close".into(),
                    severity: Severity::High,
                    span: info.ast.span,
                    message: format!("variable `{var_name}` used after close"),
                    machine: "resource",
                    subject: Some(var_name.to_string()),
                    from_state: "closed",
                    to_state: "used",
                });
            }
            TransferEventKind::DoubleClose => {
                findings.push(StateFinding {
                    rule_id: "state-double-close".into(),
                    severity: Severity::Medium,
                    span: info.ast.span,
                    message: format!("variable `{var_name}` closed twice"),
                    machine: "resource",
                    subject: Some(var_name.to_string()),
                    from_state: "closed",
                    to_state: "closed",
                });
            }
        }
    }

    // ── 2. Resource leaks at Exit and function-Return nodes ──────────────

    // Collect variables with a deferred release call (Go `defer f.Close()`).
    // These remain OPEN at function exit because transfer skips deferred
    // releases, but the runtime guarantees cleanup.
    let deferred_close_vars: std::collections::HashSet<super::symbol::SymbolId> = {
        let pairs = crate::cfg_analysis::rules::resource_pairs(lang);
        cfg.node_references()
            .filter(|(_, ni)| {
                ni.in_defer
                    && ni.kind == StmtKind::Call
                    && ni.call.callee.as_ref().is_some_and(|c| {
                        let cl = c.to_ascii_lowercase();
                        pairs.iter().any(|p| {
                            p.release.iter().any(|r| {
                                let rl = r.to_ascii_lowercase();
                                if rl.starts_with('.') {
                                    cl.ends_with(&rl)
                                } else {
                                    cl.ends_with(&rl) || cl == rl
                                }
                            })
                        })
                    })
            })
            .flat_map(|(_, ni)| {
                let scope = ni.ast.enclosing_func.clone();
                ni.taint
                    .uses
                    .iter()
                    .filter_map(move |v| interner.get_scoped(scope.as_deref(), v))
            })
            .collect()
    };

    // Collect variables released via inner-call-in-arg shape (Go testify
    // `require.NoError(t, f.Close())`, `errs = append(errs, f.Close())`,
    // JUnit `assertEquals(0, in.read())`).  The transfer flips the
    // lifecycle to CLOSED on the success branch, but the err-return
    // predecessor that ran after the bare acquire (`f, err := os.Open(...)`)
    // still merges OPEN at the function-exit join.  Mirror the
    // `deferred_close_vars` suppression so the OPEN|CLOSED join doesn't
    // emit a leak-possible for a resource that has a real release site.
    let inner_arg_close_vars: std::collections::HashSet<super::symbol::SymbolId> = {
        let pairs = crate::cfg_analysis::rules::resource_pairs(lang);
        let mut set = std::collections::HashSet::new();
        for (_, ni) in cfg.node_references() {
            if ni.in_defer || ni.arg_callees.is_empty() {
                continue;
            }
            let scope = ni.ast.enclosing_func.as_deref();
            for arg_callee in &ni.arg_callees {
                let Some(arg_callee_text) = arg_callee.as_deref() else {
                    continue;
                };
                let Some(dot_idx) = arg_callee_text.rfind('.') else {
                    continue;
                };
                let recv_text = &arg_callee_text[..dot_idx];
                if recv_text.contains('.') {
                    continue;
                }
                let arg_callee_lower = arg_callee_text.to_ascii_lowercase();
                let matches_release = pairs.iter().any(|p| {
                    p.release.iter().any(|r| {
                        let rl = r.to_ascii_lowercase();
                        if rl.starts_with('.') {
                            arg_callee_lower.ends_with(&rl)
                        } else {
                            arg_callee_lower.ends_with(&rl) || arg_callee_lower == rl
                        }
                    })
                });
                if !matches_release {
                    continue;
                }
                if let Some(sym) = interner.get_scoped(scope, recv_text) {
                    set.insert(sym);
                }
            }
        }
        set
    };

    for (idx, info) in cfg.node_references() {
        // File-level Exit (program termination, no enclosing function).
        let is_file_exit = info.kind == StmtKind::Exit && info.ast.enclosing_func.is_none();
        // Terminal function exit, the convergence node where all paths join.
        // Return nodes are intermediate and carry only path-specific state;
        // only the terminal exit carries the complete merged lifecycle.
        let is_func_terminal = is_terminal_function_exit(idx, info, cfg);
        if !is_file_exit && !is_func_terminal {
            continue;
        }
        let Some(state) = result.states.get(&idx) else {
            continue;
        };

        for (&sym, &lifecycle) in &state.resource.vars {
            if !lifecycle.contains(ResourceLifecycle::OPEN) {
                continue;
            }
            let var_name = interner.resolve(sym);
            let scope = if is_func_terminal {
                info.ast.enclosing_func.as_deref()
            } else {
                None
            };
            let acquire_node = find_acquire_node(cfg, sym, interner, scope);

            // At the file-level Exit, skip variables whose acquire site is
            // inside a function, those are already handled by the per-
            // function exit checks above.  Without this, the file-level Exit
            // would duplicate leak findings with a misleading acquire span
            // (the first global match instead of the correct function-local one).
            if is_file_exit {
                if let Some(acq) = acquire_node {
                    if cfg[acq].ast.enclosing_func.is_some() {
                        continue;
                    }
                }
            }

            // Suppress leaks for resources acquired inside managed scopes
            // (Python `with`, Java try-with-resources). The suppression is
            // tied to the specific acquire site, not the variable name.
            if let Some(acq) = acquire_node {
                if cfg[acq].managed_resource {
                    continue;
                }
            }

            // Suppress leaks for variables with a deferred close call
            // (Go `defer f.Close()`). The deferred call guarantees cleanup
            // at function exit even though transfer didn't mark it CLOSED.
            if deferred_close_vars.contains(&sym) {
                continue;
            }

            // Suppress leaks for variables released via inner-call-in-arg
            // shape.  Mirrors the deferred-close suppression so the
            // OPEN-on-err-return / CLOSED-on-success-branch merge at
            // function exit does not surface as leak-possible.
            if inner_arg_close_vars.contains(&sym) {
                continue;
            }

            // Suppress leaks for variables whose release call lives in a
            // nested closure (callback / event handler) outside this
            // body's CFG.  Common JS/TS shape:
            //   const ws = new WebSocket(url);
            //   socket.on("close", () => ws.close());
            // The per-body resource analysis cannot observe the close
            // inside the registered handler body; without this gate the
            // handle reads as a definite leak.  Match by variable name —
            // closure-captured handles share the binding name with the
            // handle in the outer scope.
            if closure_released_var_names
                .map(|s| s.contains(var_name))
                .unwrap_or(false)
            {
                continue;
            }

            // Prefer direct acquire node span; fall back to proxy span
            // from ResourceMethodSummary (cross-body resource tracking).
            let acquire_span = acquire_node
                .map(|n| cfg[n].ast.span)
                .or_else(|| state.proxy_acquire_spans.get(&sym).copied());

            // Suppress/downgrade leaks for variables returned from the
            // function (factory pattern).  Only suppress when ALL
            // predecessors that have the variable OPEN also return it.
            // Mixed cases (some paths return, some leak) are downgraded
            // to state-resource-leak-possible.
            if is_func_terminal {
                let scope = info.ast.enclosing_func.as_deref();
                let mut returned_open = 0u32;
                let mut non_returned_open = 0u32;
                for pred in cfg.neighbors_directed(idx, petgraph::Direction::Incoming) {
                    let Some(ps) = result.states.get(&pred) else {
                        continue;
                    };
                    let pred_has_open = ps
                        .resource
                        .vars
                        .get(&sym)
                        .map_or(false, |lc| lc.contains(ResourceLifecycle::OPEN));
                    if !pred_has_open {
                        continue;
                    }
                    // Only Return nodes can transfer resource ownership to the
                    // caller.  Non-Return predecessors (exception edges, implicit
                    // fallthrough) with OPEN resources represent genuine leaks.
                    let returns_var = cfg[pred].kind == StmtKind::Return
                        && cfg[pred]
                            .taint
                            .uses
                            .iter()
                            .any(|u| interner.get_scoped(scope, u) == Some(sym));
                    if returns_var {
                        returned_open += 1;
                    } else {
                        non_returned_open += 1;
                    }
                }
                if returned_open > 0 && non_returned_open == 0 {
                    continue; // all OPEN paths transfer ownership to caller
                }
                if returned_open > 0 && non_returned_open > 0 {
                    // Mixed: some paths return resource, some leak it.
                    findings.push(StateFinding {
                        rule_id: "state-resource-leak-possible".into(),
                        severity: Severity::Low,
                        span: acquire_span.unwrap_or(info.ast.span),
                        message: format!("resource `{var_name}` may not be closed on all paths"),
                        machine: "resource",
                        subject: Some(var_name.to_string()),
                        from_state: "open",
                        to_state: "possibly_leaked",
                    });
                    continue;
                }
                // returned_open == 0: fall through to normal leak detection
            }

            if !lifecycle.contains(ResourceLifecycle::CLOSED)
                && !lifecycle.contains(ResourceLifecycle::MOVED)
            {
                // Definite leak: open on all paths, never closed
                findings.push(StateFinding {
                    rule_id: "state-resource-leak".into(),
                    severity: Severity::Medium,
                    span: acquire_span.unwrap_or(info.ast.span),
                    message: format!("resource `{var_name}` is never closed"),
                    machine: "resource",
                    subject: Some(var_name.to_string()),
                    from_state: "open",
                    to_state: "leaked",
                });
            } else if lifecycle.contains(ResourceLifecycle::CLOSED) {
                // May-leak: open on some paths, closed on others
                findings.push(StateFinding {
                    rule_id: "state-resource-leak-possible".into(),
                    severity: Severity::Low,
                    span: acquire_span.unwrap_or(info.ast.span),
                    message: format!("resource `{var_name}` may not be closed on all paths"),
                    machine: "resource",
                    subject: Some(var_name.to_string()),
                    from_state: "open",
                    to_state: "possibly_leaked",
                });
            }
        }
    }

    // ── 2b. Proxy-acquired possible leaks (exception-path heuristic) ────
    // In JS/TS, any call can throw. If a proxy-acquired resource is fully
    // CLOSED at function exit (no OPEN paths), check whether there are
    // intervening calls between the proxy acquire and release nodes that
    // could throw and bypass the release. If so, emit a possible leak.
    //
    // **Language gate**: this heuristic is JS/TS-specific.  Other
    // languages (Go, Java, C, C++, Python, Rust, Ruby, PHP) use
    // explicit error returns / try-catch with deterministic control
    // flow, an intervening call does NOT silently bypass a release.
    // Firing this on Go gave the gin/context.go FP where any method
    // calling another method (`c.Set`, `c.Get`) was flagged as a
    // possible leak on the receiver.  Skip the section but continue
    // to section 3 (auth-required sinks) which is independent of the
    // resource state machine.
    if matches!(lang, Lang::JavaScript | Lang::TypeScript) {
        for (idx, info) in cfg.node_references() {
            if !is_terminal_function_exit(idx, info, cfg) {
                continue;
            }
            let Some(state) = result.states.get(&idx) else {
                continue;
            };
            for (&sym, &lifecycle) in &state.resource.vars {
                // Only for proxy-acquired resources that are fully CLOSED at exit
                if !state.proxy_acquire_spans.contains_key(&sym) {
                    continue;
                }
                if lifecycle.contains(ResourceLifecycle::OPEN) {
                    continue; // Already handled by the normal leak detection above
                }
                if !lifecycle.contains(ResourceLifecycle::CLOSED) {
                    continue;
                }
                // Check if there are intervening Call nodes between acquire and
                // release in the CFG (these could throw and bypass the release).
                //
                // NOTE: a stricter variant (audit #59) tried to exclude the
                // resource's own lifecycle ops (the acquire/release proxy
                // calls) and require reachability from the acquire node, to
                // suppress spurious findings on correctly open/close-paired
                // proxies.  That over-suppressed a *tested* true positive: a
                // class-field resource (`this.fd = fs.openSync(...)` in `open()`
                // with `close()` in a separate method — see
                // `tests/fixtures/real_world/typescript/cfg/try_catch_typed.ts`)
                // has only its own acquire call in scope, so excluding it left
                // zero intervening calls and dropped the must-match leak
                // finding.  Distinguishing a clean same-scope open/close pair
                // from a cross-method field leak needs proper inter-method
                // lifecycle modelling (deep-fix queue), so we keep the original
                // span-based exclusion here.
                let has_intervening_calls = cfg.node_references().any(|(_, ni)| {
                    ni.kind == StmtKind::Call
                        && ni.ast.enclosing_func == info.ast.enclosing_func
                        && ni.call.callee.is_some()
                        // Not the acquire or release proxy itself
                        && !state.proxy_acquire_spans.values().any(|s| *s == ni.ast.span)
                });
                if has_intervening_calls {
                    let var_name = interner.resolve(sym);
                    let acquire_span = state.proxy_acquire_spans.get(&sym).copied();
                    findings.push(StateFinding {
                        rule_id: "state-resource-leak-possible".into(),
                        severity: Severity::Low,
                        span: acquire_span.unwrap_or(info.ast.span),
                        message: format!("resource `{var_name}` may not be closed on all paths"),
                        machine: "resource",
                        subject: Some(var_name.to_string()),
                        from_state: "open",
                        to_state: "possibly_leaked",
                    });
                }
            }
        }
    }

    // ── 3. Auth-required sinks ───────────────────────────────────────────
    // Only run auth analysis when explicitly enabled (higher FP rate).
    // Check if any function is a web entrypoint
    let has_web_entrypoint = enable_auth
        && cfg.node_references().any(|(_, info)| {
            if let Some(ref func_name) = info.ast.enclosing_func {
                is_web_entrypoint_simple(func_name, lang, func_summaries, cfg)
            } else {
                false
            }
        });

    if has_web_entrypoint {
        for (idx, info) in cfg.node_references() {
            if !is_privileged_sink(info) {
                continue;
            }
            let Some(state) = result.states.get(&idx) else {
                continue;
            };
            if state.auth.auth_level == AuthLevel::Unauthed {
                // Suppress when the SSA taint engine has already proved
                // the tainted input flowing into this sink is path-safe
                // (PathFact `dotdot=No && absolute=No`).  A web handler
                // reading a sanitised user-controlled path is not the
                // same shape as a handler reading any user-controlled
                // path, the auth concern reduces once the data cannot
                // escape into a privileged location.  Note this is per
                // CFG-node span, so co-located unrelated sinks are
                // unaffected.
                if path_safe_suppressed_sink_spans.contains(&info.ast.span) {
                    continue;
                }
                let callee_desc =
                    sanitize_desc(info.call.callee.as_deref().unwrap_or("(sensitive op)"));
                findings.push(StateFinding {
                    rule_id: "state-unauthed-access".into(),
                    severity: Severity::High,
                    span: info.ast.span,
                    message: format!(
                        "sensitive operation `{callee_desc}` reached without authentication"
                    ),
                    machine: "auth",
                    subject: None,
                    from_state: "unauthed",
                    to_state: "access",
                });
            }
        }
    }

    // Dedup
    findings.sort_by(|a, b| a.span.cmp(&b.span).then_with(|| a.rule_id.cmp(&b.rule_id)));
    findings.dedup_by(|a, b| a.span == b.span && a.rule_id == b.rule_id);

    findings
}

/// Find the CFG node where a variable was acquired (defined via Call node).
fn find_acquire_node(
    cfg: &Cfg,
    sym: super::symbol::SymbolId,
    interner: &SymbolInterner,
    enclosing_func: Option<&str>,
) -> Option<petgraph::graph::NodeIndex> {
    let var_name = interner.resolve(sym);
    // Try function-scoped match first (correct for multi-function files
    // where the same variable name appears in multiple functions).
    if let Some(func) = enclosing_func {
        for (idx, info) in cfg.node_references() {
            if info.kind == StmtKind::Call
                && info.ast.enclosing_func.as_deref() == Some(func)
                && info.taint.defines.as_deref() == Some(var_name)
            {
                return Some(idx);
            }
        }
    }
    // Fallback: first global match (for file-level Exit or top-level code).
    for (idx, info) in cfg.node_references() {
        if info.kind == StmtKind::Call && info.taint.defines.as_deref() == Some(var_name) {
            return Some(idx);
        }
    }
    None
}

/// Check if a node is a privileged sink (shell execution or file I/O).
fn is_privileged_sink(info: &crate::cfg::NodeInfo) -> bool {
    info.taint.labels.iter().any(|l| {
        if let DataLabel::Sink(caps) = l {
            caps.intersects(Cap::SHELL_ESCAPE | Cap::FILE_IO)
        } else {
            false
        }
    })
}

/// Simplified web entrypoint check (avoids AnalysisContext dependency).
fn is_web_entrypoint_simple(
    func_name: &str,
    lang: Lang,
    func_summaries: &crate::cfg::FuncSummaries,
    _cfg: &Cfg,
) -> bool {
    let name_lower = func_name.to_ascii_lowercase();

    // Skip bare "main", it's typically a CLI entry
    if name_lower == "main" {
        return false;
    }

    let is_handler_name = name_lower.starts_with("handle_")
        || name_lower.starts_with("route_")
        || name_lower.starts_with("api_")
        || name_lower.starts_with("serve_")
        || name_lower.starts_with("process_")
        || name_lower == "handler";

    if !is_handler_name {
        return false;
    }

    // Check for web-like parameters
    let web_params: &[&str] = match lang {
        Lang::Rust => &["request", "req", "json", "query", "form", "payload", "body"],
        Lang::JavaScript | Lang::TypeScript => &["req", "request", "ctx", "res", "response"],
        Lang::Python => &["request", "req"],
        Lang::Go => &["w", "writer", "r", "req", "request"],
        Lang::Java => &["request", "req"],
        _ => &["request", "req"],
    };

    // Confirm web parameters against THIS candidate handler only, not any
    // function in the file.  Scanning every summary made an unrelated
    // function's `req`/`r`/`ctx` parameter promote every
    // `process_*`/`api_*`/`serve_*` function in the file to a web
    // entrypoint, firing High-severity state-unauthed-access on batch/CLI
    // code.  Filter the file-level summary map down to the named function
    // via `FuncKey.name` (matches `info.ast.enclosing_func`); summary
    // `entry` NodeIndexes are not valid in the per-body CFG, so the name
    // is the safe selector here.
    let has_web_params = func_summaries
        .iter()
        .filter(|(key, _)| key.name == func_name)
        .any(|(_, s)| {
            s.param_names
                .iter()
                .any(|p| web_params.contains(&p.to_ascii_lowercase().as_str()))
        });

    // Only handle_* and route_* are strong enough to skip param confirmation.
    // api_*, serve_*, process_* require web parameter evidence.
    let strong_name = name_lower.starts_with("handle_") || name_lower.starts_with("route_");

    has_web_params || strong_name
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::cfg::{AstMeta, CallMeta, EdgeKind, NodeInfo, TaintMeta};
    use crate::cfg_analysis::rules;
    use crate::state::domain::ProductState;
    use crate::state::engine;
    use crate::state::symbol::SymbolInterner;
    use crate::state::transfer::DefaultTransfer;
    use petgraph::Graph;
    use std::collections::HashMap;

    fn make_node(kind: StmtKind) -> NodeInfo {
        NodeInfo {
            kind,
            ..Default::default()
        }
    }

    #[test]
    fn detects_resource_leak() {
        // Entry → fopen(f) → Exit (no close)
        let mut cfg: Cfg = Graph::new();
        let entry = cfg.add_node(make_node(StmtKind::Entry));
        let open_node = cfg.add_node(NodeInfo {
            kind: StmtKind::Call,
            ast: AstMeta {
                span: (10, 20),
                ..Default::default()
            },
            taint: TaintMeta {
                defines: Some("f".into()),
                ..Default::default()
            },
            call: CallMeta {
                callee: Some("fopen".into()),
                ..Default::default()
            },
            ..Default::default()
        });
        let exit = cfg.add_node(make_node(StmtKind::Exit));

        cfg.add_edge(entry, open_node, EdgeKind::Seq);
        cfg.add_edge(open_node, exit, EdgeKind::Seq);

        let interner = SymbolInterner::from_cfg(&cfg);
        let transfer = DefaultTransfer {
            lang: Lang::C,
            resource_pairs: rules::resource_pairs(Lang::C),
            interner: &interner,
            resource_method_summaries: &[],
            ptr_proxy_hints: None,
        };

        let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial());
        let findings = extract_findings(
            &result,
            &cfg,
            &interner,
            Lang::C,
            &HashMap::new(),
            false,
            &std::collections::HashSet::new(),
            None,
        );

        assert_eq!(findings.len(), 1);
        assert_eq!(findings[0].rule_id, "state-resource-leak");
        assert!(findings[0].message.contains("f"));
    }

    #[test]
    fn clean_open_close_no_findings() {
        // Entry → fopen(f) → fclose(f) → Exit
        let mut cfg: Cfg = Graph::new();
        let entry = cfg.add_node(make_node(StmtKind::Entry));
        let open_node = cfg.add_node(NodeInfo {
            kind: StmtKind::Call,
            taint: TaintMeta {
                defines: Some("f".into()),
                ..Default::default()
            },
            call: CallMeta {
                callee: Some("fopen".into()),
                ..Default::default()
            },
            ..Default::default()
        });
        let close_node = cfg.add_node(NodeInfo {
            kind: StmtKind::Call,
            taint: TaintMeta {
                uses: vec!["f".into()],
                ..Default::default()
            },
            call: CallMeta {
                callee: Some("fclose".into()),
                ..Default::default()
            },
            ..Default::default()
        });
        let exit = cfg.add_node(make_node(StmtKind::Exit));

        cfg.add_edge(entry, open_node, EdgeKind::Seq);
        cfg.add_edge(open_node, close_node, EdgeKind::Seq);
        cfg.add_edge(close_node, exit, EdgeKind::Seq);

        let interner = SymbolInterner::from_cfg(&cfg);
        let transfer = DefaultTransfer {
            lang: Lang::C,
            resource_pairs: rules::resource_pairs(Lang::C),
            interner: &interner,
            resource_method_summaries: &[],
            ptr_proxy_hints: None,
        };

        let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial());
        let findings = extract_findings(
            &result,
            &cfg,
            &interner,
            Lang::C,
            &HashMap::new(),
            false,
            &std::collections::HashSet::new(),
            None,
        );

        assert!(findings.is_empty());
    }

    fn make_func_node(kind: StmtKind, func: &str) -> NodeInfo {
        NodeInfo {
            kind,
            ast: AstMeta {
                enclosing_func: Some(func.to_string()),
                ..Default::default()
            },
            ..Default::default()
        }
    }

    #[test]
    fn terminal_exit_is_topological() {
        // Per-body graph: Entry → Call → Return → Exit (all enclosing_func=Some)
        // Only Exit should be terminal (no successors in same scope).
        let mut cfg: Cfg = Graph::new();
        let entry = cfg.add_node(make_func_node(StmtKind::Entry, "f"));
        let call = cfg.add_node(NodeInfo {
            kind: StmtKind::Call,
            call: CallMeta {
                callee: Some("fopen".into()),
                ..Default::default()
            },
            taint: TaintMeta {
                defines: Some("x".into()),
                ..Default::default()
            },
            ast: AstMeta {
                enclosing_func: Some("f".into()),
                ..Default::default()
            },
            ..Default::default()
        });
        let ret = cfg.add_node(NodeInfo {
            kind: StmtKind::Return,
            taint: TaintMeta {
                uses: vec!["x".into()],
                ..Default::default()
            },
            ast: AstMeta {
                enclosing_func: Some("f".into()),
                ..Default::default()
            },
            ..Default::default()
        });
        let exit = cfg.add_node(make_func_node(StmtKind::Exit, "f"));

        cfg.add_edge(entry, call, EdgeKind::Seq);
        cfg.add_edge(call, ret, EdgeKind::Seq);
        cfg.add_edge(ret, exit, EdgeKind::Seq);

        assert!(
            !is_terminal_function_exit(entry, &cfg[entry], &cfg),
            "Entry must not be terminal"
        );
        assert!(
            !is_terminal_function_exit(call, &cfg[call], &cfg),
            "Call must not be terminal"
        );
        assert!(
            !is_terminal_function_exit(ret, &cfg[ret], &cfg),
            "Return must not be terminal — it flows into Exit"
        );
        assert!(
            is_terminal_function_exit(exit, &cfg[exit], &cfg),
            "Exit must be terminal — no successors in same scope"
        );
    }

    #[test]
    fn per_body_factory_returned_resource_no_finding() {
        // Per-body graph: Entry → fopen(f) → return f → Exit
        // All nodes have enclosing_func=Some("factory").
        // The resource is returned, no leak finding expected.
        let func = "factory";
        let mut cfg: Cfg = Graph::new();
        let entry = cfg.add_node(make_func_node(StmtKind::Entry, func));
        let open_node = cfg.add_node(NodeInfo {
            kind: StmtKind::Call,
            ast: AstMeta {
                span: (10, 20),
                enclosing_func: Some(func.into()),
            },
            taint: TaintMeta {
                defines: Some("f".into()),
                ..Default::default()
            },
            call: CallMeta {
                callee: Some("fopen".into()),
                ..Default::default()
            },
            ..Default::default()
        });
        let ret = cfg.add_node(NodeInfo {
            kind: StmtKind::Return,
            taint: TaintMeta {
                uses: vec!["f".into()],
                ..Default::default()
            },
            ast: AstMeta {
                enclosing_func: Some(func.into()),
                ..Default::default()
            },
            ..Default::default()
        });
        let exit = cfg.add_node(make_func_node(StmtKind::Exit, func));

        cfg.add_edge(entry, open_node, EdgeKind::Seq);
        cfg.add_edge(open_node, ret, EdgeKind::Seq);
        cfg.add_edge(ret, exit, EdgeKind::Seq);

        let interner = SymbolInterner::from_cfg_scoped(&cfg);
        let transfer = DefaultTransfer {
            lang: Lang::C,
            resource_pairs: rules::resource_pairs(Lang::C),
            interner: &interner,
            resource_method_summaries: &[],
            ptr_proxy_hints: None,
        };

        let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial());
        let findings = extract_findings(
            &result,
            &cfg,
            &interner,
            Lang::C,
            &HashMap::new(),
            false,
            &std::collections::HashSet::new(),
            None,
        );

        assert!(
            findings.is_empty(),
            "Resource returned from factory must not produce leak finding.\n  Got: {:?}",
            findings.iter().map(|f| &f.rule_id).collect::<Vec<_>>()
        );
    }

    #[test]
    fn per_body_non_returned_resource_leaks() {
        // Per-body graph: Entry → fopen(f) → return (no uses) → Exit
        // All nodes have enclosing_func=Some("leaker").
        // Resource is NOT returned, exactly one state-resource-leak expected.
        let func = "leaker";
        let mut cfg: Cfg = Graph::new();
        let entry = cfg.add_node(make_func_node(StmtKind::Entry, func));
        let open_node = cfg.add_node(NodeInfo {
            kind: StmtKind::Call,
            ast: AstMeta {
                span: (10, 20),
                enclosing_func: Some(func.into()),
            },
            taint: TaintMeta {
                defines: Some("f".into()),
                ..Default::default()
            },
            call: CallMeta {
                callee: Some("fopen".into()),
                ..Default::default()
            },
            ..Default::default()
        });
        let ret = cfg.add_node(NodeInfo {
            kind: StmtKind::Return,
            ast: AstMeta {
                enclosing_func: Some(func.into()),
                ..Default::default()
            },
            ..Default::default()
        });
        let exit = cfg.add_node(make_func_node(StmtKind::Exit, func));

        cfg.add_edge(entry, open_node, EdgeKind::Seq);
        cfg.add_edge(open_node, ret, EdgeKind::Seq);
        cfg.add_edge(ret, exit, EdgeKind::Seq);

        let interner = SymbolInterner::from_cfg_scoped(&cfg);
        let transfer = DefaultTransfer {
            lang: Lang::C,
            resource_pairs: rules::resource_pairs(Lang::C),
            interner: &interner,
            resource_method_summaries: &[],
            ptr_proxy_hints: None,
        };

        let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial());
        let findings = extract_findings(
            &result,
            &cfg,
            &interner,
            Lang::C,
            &HashMap::new(),
            false,
            &std::collections::HashSet::new(),
            None,
        );

        assert_eq!(
            findings.len(),
            1,
            "Non-returned resource must produce exactly one finding.\n  Got: {:?}",
            findings.iter().map(|f| &f.rule_id).collect::<Vec<_>>()
        );
        assert_eq!(findings[0].rule_id, "state-resource-leak");
    }

    /// Finding #64: `is_web_entrypoint_simple` must confirm web parameters
    /// against the *candidate* handler only, not any function in the file.
    /// Before the fix, an unrelated `read_stream(req)` in the same file
    /// promoted every `process_*` function to a web entrypoint, firing
    /// High-severity `state-unauthed-access` on batch/CLI code.
    #[test]
    fn web_entrypoint_param_confirmation_is_per_function() {
        use crate::cfg::LocalFuncSummary;
        use crate::symbol::FuncKey;
        use petgraph::graph::NodeIndex;

        fn summary(name: &str, params: &[&str]) -> (FuncKey, LocalFuncSummary) {
            let key = FuncKey::new_function(Lang::Python, "f.py", name, Some(params.len()));
            let s = LocalFuncSummary {
                entry: NodeIndex::new(0),
                source_caps: Cap::empty(),
                sanitizer_caps: Cap::empty(),
                sink_caps: Cap::empty(),
                param_count: params.len(),
                param_names: params.iter().map(|p| p.to_string()).collect(),
                propagating_params: Vec::new(),
                tainted_sink_params: Vec::new(),
                callees: Vec::new(),
                container: String::new(),
                disambig: None,
                kind: crate::symbol::FuncKind::Function,
            };
            (key, s)
        }

        // `process_data` has NO web-like parameters; `read_stream` does.
        let mut summaries: crate::cfg::FuncSummaries = HashMap::new();
        let (k1, s1) = summary("process_data", &["data"]);
        let (k2, s2) = summary("read_stream", &["req"]);
        summaries.insert(k1, s1);
        summaries.insert(k2, s2);

        let cfg: Cfg = Graph::new();

        // The unrelated `read_stream(req)` must NOT promote `process_data`.
        assert!(
            !is_web_entrypoint_simple("process_data", Lang::Python, &summaries, &cfg),
            "process_data has no web params and must not be a web entrypoint just \
             because an unrelated function in the file does"
        );

        // `read_stream` is not a handler-prefixed name, so even though it
        // carries the `req` param it is NOT an entrypoint — confirms the
        // name gate still stands independently of the param check.
        assert!(
            !is_web_entrypoint_simple("read_stream", Lang::Python, &summaries, &cfg),
            "read_stream lacks a handler-prefixed name and must not be an entrypoint"
        );

        // Positive control: give `process_data` its own `req` param.
        let mut summaries2: crate::cfg::FuncSummaries = HashMap::new();
        let (k3, s3) = summary("process_data", &["req"]);
        summaries2.insert(k3, s3);
        assert!(
            is_web_entrypoint_simple("process_data", Lang::Python, &summaries2, &cfg),
            "process_data with its own web param must be a web entrypoint"
        );
    }
}