#![allow(clippy::unnecessary_map_or)] use super::domain::{AuthLevel, ProductState, ResourceLifecycle}; use super::engine::DataflowResult; use super::symbol::SymbolInterner; use super::transfer::{TransferEvent, TransferEventKind}; use crate::cfg::{Cfg, StmtKind}; use crate::labels::{Cap, DataLabel}; use crate::patterns::Severity; use crate::symbol::Lang; use petgraph::visit::IntoNodeReferences; /// Normalize a callee description for display. fn sanitize_desc(s: &str) -> String { crate::fmt::normalize_snippet(s) } /// Returns true if `idx` is the terminal exit of a function body, the /// convergence node where all execution paths join before leaving the function. /// /// **Invariant:** Only terminal exits carry the complete merged lifecycle state /// needed for leak analysis. Return nodes are intermediate in per-body graphs /// (they flow into the synthetic Exit node) but become terminal in legacy /// supergraphs (their successor is the file-level Exit with /// `enclosing_func = None`). /// /// Detection combines a kind filter with a topological check. Only nodes /// whose `StmtKind` actually terminates execution (`Exit`, `Return`, `Throw`) /// are considered, then we require that they have no successor in the same /// function scope. Without the kind filter, dangling Seq nodes left behind /// when nested function literals (e.g. `obj.fn = () => {...}`) get a /// placeholder in the parent graph would be misclassified as terminal exits /// and produce spurious resource-leak findings at the function-literal span. fn is_terminal_function_exit( idx: petgraph::graph::NodeIndex, info: &crate::cfg::NodeInfo, cfg: &Cfg, ) -> bool { if !matches!( info.kind, StmtKind::Exit | StmtKind::Return | StmtKind::Throw ) { return false; } info.ast.enclosing_func.is_some() && !cfg .neighbors_directed(idx, petgraph::Direction::Outgoing) .any(|succ| cfg[succ].ast.enclosing_func == info.ast.enclosing_func) } /// A finding produced by state analysis. #[derive(Debug, Clone)] pub struct StateFinding { pub rule_id: String, pub severity: Severity, pub span: (usize, usize), pub message: String, /// State machine that produced this finding: `"resource"` or `"auth"`. pub machine: &'static str, /// Variable name involved, if available. pub subject: Option, /// State before the event (e.g. `"closed"`, `"open"`, `"unauthed"`). pub from_state: &'static str, /// State after the event (e.g. `"used"`, `"closed"`, `"leaked"`, `"access"`). pub to_state: &'static str, } /// Extract findings from converged dataflow state + transfer events. /// /// `path_safe_suppressed_sink_spans` lists CFG sink spans whose tainted /// inputs were proved path-safe by the SSA taint engine; the privileged /// `state-unauthed-access` finding is suppressed on those spans because /// the user-controlled input has already been proved unable to escape /// into a privileged location. #[allow(clippy::too_many_arguments)] pub fn extract_findings( result: &DataflowResult, cfg: &Cfg, interner: &SymbolInterner, lang: Lang, func_summaries: &crate::cfg::FuncSummaries, enable_auth: bool, path_safe_suppressed_sink_spans: &std::collections::HashSet<(usize, usize)>, closure_released_var_names: Option<&std::collections::HashSet>, ) -> Vec { let mut findings = Vec::new(); // ── 1. Use-after-close from transfer events ────────────────────────── for event in &result.events { let info = &cfg[event.node]; let var_name = interner.resolve(event.var); match event.kind { TransferEventKind::UseAfterClose => { findings.push(StateFinding { rule_id: "state-use-after-close".into(), severity: Severity::High, span: info.ast.span, message: format!("variable `{var_name}` used after close"), machine: "resource", subject: Some(var_name.to_string()), from_state: "closed", to_state: "used", }); } TransferEventKind::DoubleClose => { findings.push(StateFinding { rule_id: "state-double-close".into(), severity: Severity::Medium, span: info.ast.span, message: format!("variable `{var_name}` closed twice"), machine: "resource", subject: Some(var_name.to_string()), from_state: "closed", to_state: "closed", }); } } } // ── 2. Resource leaks at Exit and function-Return nodes ────────────── // Collect variables with a deferred release call (Go `defer f.Close()`). // These remain OPEN at function exit because transfer skips deferred // releases, but the runtime guarantees cleanup. let deferred_close_vars: std::collections::HashSet = { let pairs = crate::cfg_analysis::rules::resource_pairs(lang); cfg.node_references() .filter(|(_, ni)| { ni.in_defer && ni.kind == StmtKind::Call && ni.call.callee.as_ref().is_some_and(|c| { let cl = c.to_ascii_lowercase(); pairs.iter().any(|p| { p.release.iter().any(|r| { let rl = r.to_ascii_lowercase(); if rl.starts_with('.') { cl.ends_with(&rl) } else { cl.ends_with(&rl) || cl == rl } }) }) }) }) .flat_map(|(_, ni)| { let scope = ni.ast.enclosing_func.clone(); ni.taint .uses .iter() .filter_map(move |v| interner.get_scoped(scope.as_deref(), v)) }) .collect() }; // Collect variables released via inner-call-in-arg shape (Go testify // `require.NoError(t, f.Close())`, `errs = append(errs, f.Close())`, // JUnit `assertEquals(0, in.read())`). The transfer flips the // lifecycle to CLOSED on the success branch, but the err-return // predecessor that ran after the bare acquire (`f, err := os.Open(...)`) // still merges OPEN at the function-exit join. Mirror the // `deferred_close_vars` suppression so the OPEN|CLOSED join doesn't // emit a leak-possible for a resource that has a real release site. let inner_arg_close_vars: std::collections::HashSet = { let pairs = crate::cfg_analysis::rules::resource_pairs(lang); let mut set = std::collections::HashSet::new(); for (_, ni) in cfg.node_references() { if ni.in_defer || ni.arg_callees.is_empty() { continue; } let scope = ni.ast.enclosing_func.as_deref(); for arg_callee in &ni.arg_callees { let Some(arg_callee_text) = arg_callee.as_deref() else { continue; }; let Some(dot_idx) = arg_callee_text.rfind('.') else { continue; }; let recv_text = &arg_callee_text[..dot_idx]; if recv_text.contains('.') { continue; } let arg_callee_lower = arg_callee_text.to_ascii_lowercase(); let matches_release = pairs.iter().any(|p| { p.release.iter().any(|r| { let rl = r.to_ascii_lowercase(); if rl.starts_with('.') { arg_callee_lower.ends_with(&rl) } else { arg_callee_lower.ends_with(&rl) || arg_callee_lower == rl } }) }); if !matches_release { continue; } if let Some(sym) = interner.get_scoped(scope, recv_text) { set.insert(sym); } } } set }; for (idx, info) in cfg.node_references() { // File-level Exit (program termination, no enclosing function). let is_file_exit = info.kind == StmtKind::Exit && info.ast.enclosing_func.is_none(); // Terminal function exit, the convergence node where all paths join. // Return nodes are intermediate and carry only path-specific state; // only the terminal exit carries the complete merged lifecycle. let is_func_terminal = is_terminal_function_exit(idx, info, cfg); if !is_file_exit && !is_func_terminal { continue; } let Some(state) = result.states.get(&idx) else { continue; }; for (&sym, &lifecycle) in &state.resource.vars { if !lifecycle.contains(ResourceLifecycle::OPEN) { continue; } let var_name = interner.resolve(sym); let scope = if is_func_terminal { info.ast.enclosing_func.as_deref() } else { None }; let acquire_node = find_acquire_node(cfg, sym, interner, scope); // At the file-level Exit, skip variables whose acquire site is // inside a function, those are already handled by the per- // function exit checks above. Without this, the file-level Exit // would duplicate leak findings with a misleading acquire span // (the first global match instead of the correct function-local one). if is_file_exit { if let Some(acq) = acquire_node { if cfg[acq].ast.enclosing_func.is_some() { continue; } } } // Suppress leaks for resources acquired inside managed scopes // (Python `with`, Java try-with-resources). The suppression is // tied to the specific acquire site, not the variable name. if let Some(acq) = acquire_node { if cfg[acq].managed_resource { continue; } } // Suppress leaks for variables with a deferred close call // (Go `defer f.Close()`). The deferred call guarantees cleanup // at function exit even though transfer didn't mark it CLOSED. if deferred_close_vars.contains(&sym) { continue; } // Suppress leaks for variables released via inner-call-in-arg // shape. Mirrors the deferred-close suppression so the // OPEN-on-err-return / CLOSED-on-success-branch merge at // function exit does not surface as leak-possible. if inner_arg_close_vars.contains(&sym) { continue; } // Suppress leaks for variables whose release call lives in a // nested closure (callback / event handler) outside this // body's CFG. Common JS/TS shape: // const ws = new WebSocket(url); // socket.on("close", () => ws.close()); // The per-body resource analysis cannot observe the close // inside the registered handler body; without this gate the // handle reads as a definite leak. Match by variable name — // closure-captured handles share the binding name with the // handle in the outer scope. if closure_released_var_names .map(|s| s.contains(var_name)) .unwrap_or(false) { continue; } // Prefer direct acquire node span; fall back to proxy span // from ResourceMethodSummary (cross-body resource tracking). let acquire_span = acquire_node .map(|n| cfg[n].ast.span) .or_else(|| state.proxy_acquire_spans.get(&sym).copied()); // Suppress/downgrade leaks for variables returned from the // function (factory pattern). Only suppress when ALL // predecessors that have the variable OPEN also return it. // Mixed cases (some paths return, some leak) are downgraded // to state-resource-leak-possible. if is_func_terminal { let scope = info.ast.enclosing_func.as_deref(); let mut returned_open = 0u32; let mut non_returned_open = 0u32; for pred in cfg.neighbors_directed(idx, petgraph::Direction::Incoming) { let Some(ps) = result.states.get(&pred) else { continue; }; let pred_has_open = ps .resource .vars .get(&sym) .map_or(false, |lc| lc.contains(ResourceLifecycle::OPEN)); if !pred_has_open { continue; } // Only Return nodes can transfer resource ownership to the // caller. Non-Return predecessors (exception edges, implicit // fallthrough) with OPEN resources represent genuine leaks. let returns_var = cfg[pred].kind == StmtKind::Return && cfg[pred] .taint .uses .iter() .any(|u| interner.get_scoped(scope, u) == Some(sym)); if returns_var { returned_open += 1; } else { non_returned_open += 1; } } if returned_open > 0 && non_returned_open == 0 { continue; // all OPEN paths transfer ownership to caller } if returned_open > 0 && non_returned_open > 0 { // Mixed: some paths return resource, some leak it. findings.push(StateFinding { rule_id: "state-resource-leak-possible".into(), severity: Severity::Low, span: acquire_span.unwrap_or(info.ast.span), message: format!("resource `{var_name}` may not be closed on all paths"), machine: "resource", subject: Some(var_name.to_string()), from_state: "open", to_state: "possibly_leaked", }); continue; } // returned_open == 0: fall through to normal leak detection } if !lifecycle.contains(ResourceLifecycle::CLOSED) && !lifecycle.contains(ResourceLifecycle::MOVED) { // Definite leak: open on all paths, never closed findings.push(StateFinding { rule_id: "state-resource-leak".into(), severity: Severity::Medium, span: acquire_span.unwrap_or(info.ast.span), message: format!("resource `{var_name}` is never closed"), machine: "resource", subject: Some(var_name.to_string()), from_state: "open", to_state: "leaked", }); } else if lifecycle.contains(ResourceLifecycle::CLOSED) { // May-leak: open on some paths, closed on others findings.push(StateFinding { rule_id: "state-resource-leak-possible".into(), severity: Severity::Low, span: acquire_span.unwrap_or(info.ast.span), message: format!("resource `{var_name}` may not be closed on all paths"), machine: "resource", subject: Some(var_name.to_string()), from_state: "open", to_state: "possibly_leaked", }); } } } // ── 2b. Proxy-acquired possible leaks (exception-path heuristic) ──── // In JS/TS, any call can throw. If a proxy-acquired resource is fully // CLOSED at function exit (no OPEN paths), check whether there are // intervening calls between the proxy acquire and release nodes that // could throw and bypass the release. If so, emit a possible leak. // // **Language gate**: this heuristic is JS/TS-specific. Other // languages (Go, Java, C, C++, Python, Rust, Ruby, PHP) use // explicit error returns / try-catch with deterministic control // flow, an intervening call does NOT silently bypass a release. // Firing this on Go gave the gin/context.go FP where any method // calling another method (`c.Set`, `c.Get`) was flagged as a // possible leak on the receiver. Skip the section but continue // to section 3 (auth-required sinks) which is independent of the // resource state machine. if matches!(lang, Lang::JavaScript | Lang::TypeScript) { for (idx, info) in cfg.node_references() { if !is_terminal_function_exit(idx, info, cfg) { continue; } let Some(state) = result.states.get(&idx) else { continue; }; for (&sym, &lifecycle) in &state.resource.vars { // Only for proxy-acquired resources that are fully CLOSED at exit if !state.proxy_acquire_spans.contains_key(&sym) { continue; } if lifecycle.contains(ResourceLifecycle::OPEN) { continue; // Already handled by the normal leak detection above } if !lifecycle.contains(ResourceLifecycle::CLOSED) { continue; } // Check if there are intervening Call nodes between acquire and // release in the CFG (these could throw and bypass the release). // // NOTE: a stricter variant (audit #59) tried to exclude the // resource's own lifecycle ops (the acquire/release proxy // calls) and require reachability from the acquire node, to // suppress spurious findings on correctly open/close-paired // proxies. That over-suppressed a *tested* true positive: a // class-field resource (`this.fd = fs.openSync(...)` in `open()` // with `close()` in a separate method — see // `tests/fixtures/real_world/typescript/cfg/try_catch_typed.ts`) // has only its own acquire call in scope, so excluding it left // zero intervening calls and dropped the must-match leak // finding. Distinguishing a clean same-scope open/close pair // from a cross-method field leak needs proper inter-method // lifecycle modelling (deep-fix queue), so we keep the original // span-based exclusion here. let has_intervening_calls = cfg.node_references().any(|(_, ni)| { ni.kind == StmtKind::Call && ni.ast.enclosing_func == info.ast.enclosing_func && ni.call.callee.is_some() // Not the acquire or release proxy itself && !state.proxy_acquire_spans.values().any(|s| *s == ni.ast.span) }); if has_intervening_calls { let var_name = interner.resolve(sym); let acquire_span = state.proxy_acquire_spans.get(&sym).copied(); findings.push(StateFinding { rule_id: "state-resource-leak-possible".into(), severity: Severity::Low, span: acquire_span.unwrap_or(info.ast.span), message: format!("resource `{var_name}` may not be closed on all paths"), machine: "resource", subject: Some(var_name.to_string()), from_state: "open", to_state: "possibly_leaked", }); } } } } // ── 3. Auth-required sinks ─────────────────────────────────────────── // Only run auth analysis when explicitly enabled (higher FP rate). // Check if any function is a web entrypoint let has_web_entrypoint = enable_auth && cfg.node_references().any(|(_, info)| { if let Some(ref func_name) = info.ast.enclosing_func { is_web_entrypoint_simple(func_name, lang, func_summaries, cfg) } else { false } }); if has_web_entrypoint { for (idx, info) in cfg.node_references() { if !is_privileged_sink(info) { continue; } let Some(state) = result.states.get(&idx) else { continue; }; if state.auth.auth_level == AuthLevel::Unauthed { // Suppress when the SSA taint engine has already proved // the tainted input flowing into this sink is path-safe // (PathFact `dotdot=No && absolute=No`). A web handler // reading a sanitised user-controlled path is not the // same shape as a handler reading any user-controlled // path, the auth concern reduces once the data cannot // escape into a privileged location. Note this is per // CFG-node span, so co-located unrelated sinks are // unaffected. if path_safe_suppressed_sink_spans.contains(&info.ast.span) { continue; } let callee_desc = sanitize_desc(info.call.callee.as_deref().unwrap_or("(sensitive op)")); findings.push(StateFinding { rule_id: "state-unauthed-access".into(), severity: Severity::High, span: info.ast.span, message: format!( "sensitive operation `{callee_desc}` reached without authentication" ), machine: "auth", subject: None, from_state: "unauthed", to_state: "access", }); } } } // Dedup findings.sort_by(|a, b| a.span.cmp(&b.span).then_with(|| a.rule_id.cmp(&b.rule_id))); findings.dedup_by(|a, b| a.span == b.span && a.rule_id == b.rule_id); findings } /// Find the CFG node where a variable was acquired (defined via Call node). fn find_acquire_node( cfg: &Cfg, sym: super::symbol::SymbolId, interner: &SymbolInterner, enclosing_func: Option<&str>, ) -> Option { let var_name = interner.resolve(sym); // Try function-scoped match first (correct for multi-function files // where the same variable name appears in multiple functions). if let Some(func) = enclosing_func { for (idx, info) in cfg.node_references() { if info.kind == StmtKind::Call && info.ast.enclosing_func.as_deref() == Some(func) && info.taint.defines.as_deref() == Some(var_name) { return Some(idx); } } } // Fallback: first global match (for file-level Exit or top-level code). for (idx, info) in cfg.node_references() { if info.kind == StmtKind::Call && info.taint.defines.as_deref() == Some(var_name) { return Some(idx); } } None } /// Check if a node is a privileged sink (shell execution or file I/O). fn is_privileged_sink(info: &crate::cfg::NodeInfo) -> bool { info.taint.labels.iter().any(|l| { if let DataLabel::Sink(caps) = l { caps.intersects(Cap::SHELL_ESCAPE | Cap::FILE_IO) } else { false } }) } /// Simplified web entrypoint check (avoids AnalysisContext dependency). fn is_web_entrypoint_simple( func_name: &str, lang: Lang, func_summaries: &crate::cfg::FuncSummaries, _cfg: &Cfg, ) -> bool { let name_lower = func_name.to_ascii_lowercase(); // Skip bare "main", it's typically a CLI entry if name_lower == "main" { return false; } let is_handler_name = name_lower.starts_with("handle_") || name_lower.starts_with("route_") || name_lower.starts_with("api_") || name_lower.starts_with("serve_") || name_lower.starts_with("process_") || name_lower == "handler"; if !is_handler_name { return false; } // Check for web-like parameters let web_params: &[&str] = match lang { Lang::Rust => &["request", "req", "json", "query", "form", "payload", "body"], Lang::JavaScript | Lang::TypeScript => &["req", "request", "ctx", "res", "response"], Lang::Python => &["request", "req"], Lang::Go => &["w", "writer", "r", "req", "request"], Lang::Java => &["request", "req"], _ => &["request", "req"], }; // Confirm web parameters against THIS candidate handler only, not any // function in the file. Scanning every summary made an unrelated // function's `req`/`r`/`ctx` parameter promote every // `process_*`/`api_*`/`serve_*` function in the file to a web // entrypoint, firing High-severity state-unauthed-access on batch/CLI // code. Filter the file-level summary map down to the named function // via `FuncKey.name` (matches `info.ast.enclosing_func`); summary // `entry` NodeIndexes are not valid in the per-body CFG, so the name // is the safe selector here. let has_web_params = func_summaries .iter() .filter(|(key, _)| key.name == func_name) .any(|(_, s)| { s.param_names .iter() .any(|p| web_params.contains(&p.to_ascii_lowercase().as_str())) }); // Only handle_* and route_* are strong enough to skip param confirmation. // api_*, serve_*, process_* require web parameter evidence. let strong_name = name_lower.starts_with("handle_") || name_lower.starts_with("route_"); has_web_params || strong_name } #[cfg(test)] mod tests { use super::*; use crate::cfg::{AstMeta, CallMeta, EdgeKind, NodeInfo, TaintMeta}; use crate::cfg_analysis::rules; use crate::state::domain::ProductState; use crate::state::engine; use crate::state::symbol::SymbolInterner; use crate::state::transfer::DefaultTransfer; use petgraph::Graph; use std::collections::HashMap; fn make_node(kind: StmtKind) -> NodeInfo { NodeInfo { kind, ..Default::default() } } #[test] fn detects_resource_leak() { // Entry → fopen(f) → Exit (no close) let mut cfg: Cfg = Graph::new(); let entry = cfg.add_node(make_node(StmtKind::Entry)); let open_node = cfg.add_node(NodeInfo { kind: StmtKind::Call, ast: AstMeta { span: (10, 20), ..Default::default() }, taint: TaintMeta { defines: Some("f".into()), ..Default::default() }, call: CallMeta { callee: Some("fopen".into()), ..Default::default() }, ..Default::default() }); let exit = cfg.add_node(make_node(StmtKind::Exit)); cfg.add_edge(entry, open_node, EdgeKind::Seq); cfg.add_edge(open_node, exit, EdgeKind::Seq); let interner = SymbolInterner::from_cfg(&cfg); let transfer = DefaultTransfer { lang: Lang::C, resource_pairs: rules::resource_pairs(Lang::C), interner: &interner, resource_method_summaries: &[], ptr_proxy_hints: None, }; let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial()); let findings = extract_findings( &result, &cfg, &interner, Lang::C, &HashMap::new(), false, &std::collections::HashSet::new(), None, ); assert_eq!(findings.len(), 1); assert_eq!(findings[0].rule_id, "state-resource-leak"); assert!(findings[0].message.contains("f")); } #[test] fn clean_open_close_no_findings() { // Entry → fopen(f) → fclose(f) → Exit let mut cfg: Cfg = Graph::new(); let entry = cfg.add_node(make_node(StmtKind::Entry)); let open_node = cfg.add_node(NodeInfo { kind: StmtKind::Call, taint: TaintMeta { defines: Some("f".into()), ..Default::default() }, call: CallMeta { callee: Some("fopen".into()), ..Default::default() }, ..Default::default() }); let close_node = cfg.add_node(NodeInfo { kind: StmtKind::Call, taint: TaintMeta { uses: vec!["f".into()], ..Default::default() }, call: CallMeta { callee: Some("fclose".into()), ..Default::default() }, ..Default::default() }); let exit = cfg.add_node(make_node(StmtKind::Exit)); cfg.add_edge(entry, open_node, EdgeKind::Seq); cfg.add_edge(open_node, close_node, EdgeKind::Seq); cfg.add_edge(close_node, exit, EdgeKind::Seq); let interner = SymbolInterner::from_cfg(&cfg); let transfer = DefaultTransfer { lang: Lang::C, resource_pairs: rules::resource_pairs(Lang::C), interner: &interner, resource_method_summaries: &[], ptr_proxy_hints: None, }; let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial()); let findings = extract_findings( &result, &cfg, &interner, Lang::C, &HashMap::new(), false, &std::collections::HashSet::new(), None, ); assert!(findings.is_empty()); } fn make_func_node(kind: StmtKind, func: &str) -> NodeInfo { NodeInfo { kind, ast: AstMeta { enclosing_func: Some(func.to_string()), ..Default::default() }, ..Default::default() } } #[test] fn terminal_exit_is_topological() { // Per-body graph: Entry → Call → Return → Exit (all enclosing_func=Some) // Only Exit should be terminal (no successors in same scope). let mut cfg: Cfg = Graph::new(); let entry = cfg.add_node(make_func_node(StmtKind::Entry, "f")); let call = cfg.add_node(NodeInfo { kind: StmtKind::Call, call: CallMeta { callee: Some("fopen".into()), ..Default::default() }, taint: TaintMeta { defines: Some("x".into()), ..Default::default() }, ast: AstMeta { enclosing_func: Some("f".into()), ..Default::default() }, ..Default::default() }); let ret = cfg.add_node(NodeInfo { kind: StmtKind::Return, taint: TaintMeta { uses: vec!["x".into()], ..Default::default() }, ast: AstMeta { enclosing_func: Some("f".into()), ..Default::default() }, ..Default::default() }); let exit = cfg.add_node(make_func_node(StmtKind::Exit, "f")); cfg.add_edge(entry, call, EdgeKind::Seq); cfg.add_edge(call, ret, EdgeKind::Seq); cfg.add_edge(ret, exit, EdgeKind::Seq); assert!( !is_terminal_function_exit(entry, &cfg[entry], &cfg), "Entry must not be terminal" ); assert!( !is_terminal_function_exit(call, &cfg[call], &cfg), "Call must not be terminal" ); assert!( !is_terminal_function_exit(ret, &cfg[ret], &cfg), "Return must not be terminal — it flows into Exit" ); assert!( is_terminal_function_exit(exit, &cfg[exit], &cfg), "Exit must be terminal — no successors in same scope" ); } #[test] fn per_body_factory_returned_resource_no_finding() { // Per-body graph: Entry → fopen(f) → return f → Exit // All nodes have enclosing_func=Some("factory"). // The resource is returned, no leak finding expected. let func = "factory"; let mut cfg: Cfg = Graph::new(); let entry = cfg.add_node(make_func_node(StmtKind::Entry, func)); let open_node = cfg.add_node(NodeInfo { kind: StmtKind::Call, ast: AstMeta { span: (10, 20), enclosing_func: Some(func.into()), }, taint: TaintMeta { defines: Some("f".into()), ..Default::default() }, call: CallMeta { callee: Some("fopen".into()), ..Default::default() }, ..Default::default() }); let ret = cfg.add_node(NodeInfo { kind: StmtKind::Return, taint: TaintMeta { uses: vec!["f".into()], ..Default::default() }, ast: AstMeta { enclosing_func: Some(func.into()), ..Default::default() }, ..Default::default() }); let exit = cfg.add_node(make_func_node(StmtKind::Exit, func)); cfg.add_edge(entry, open_node, EdgeKind::Seq); cfg.add_edge(open_node, ret, EdgeKind::Seq); cfg.add_edge(ret, exit, EdgeKind::Seq); let interner = SymbolInterner::from_cfg_scoped(&cfg); let transfer = DefaultTransfer { lang: Lang::C, resource_pairs: rules::resource_pairs(Lang::C), interner: &interner, resource_method_summaries: &[], ptr_proxy_hints: None, }; let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial()); let findings = extract_findings( &result, &cfg, &interner, Lang::C, &HashMap::new(), false, &std::collections::HashSet::new(), None, ); assert!( findings.is_empty(), "Resource returned from factory must not produce leak finding.\n Got: {:?}", findings.iter().map(|f| &f.rule_id).collect::>() ); } #[test] fn per_body_non_returned_resource_leaks() { // Per-body graph: Entry → fopen(f) → return (no uses) → Exit // All nodes have enclosing_func=Some("leaker"). // Resource is NOT returned, exactly one state-resource-leak expected. let func = "leaker"; let mut cfg: Cfg = Graph::new(); let entry = cfg.add_node(make_func_node(StmtKind::Entry, func)); let open_node = cfg.add_node(NodeInfo { kind: StmtKind::Call, ast: AstMeta { span: (10, 20), enclosing_func: Some(func.into()), }, taint: TaintMeta { defines: Some("f".into()), ..Default::default() }, call: CallMeta { callee: Some("fopen".into()), ..Default::default() }, ..Default::default() }); let ret = cfg.add_node(NodeInfo { kind: StmtKind::Return, ast: AstMeta { enclosing_func: Some(func.into()), ..Default::default() }, ..Default::default() }); let exit = cfg.add_node(make_func_node(StmtKind::Exit, func)); cfg.add_edge(entry, open_node, EdgeKind::Seq); cfg.add_edge(open_node, ret, EdgeKind::Seq); cfg.add_edge(ret, exit, EdgeKind::Seq); let interner = SymbolInterner::from_cfg_scoped(&cfg); let transfer = DefaultTransfer { lang: Lang::C, resource_pairs: rules::resource_pairs(Lang::C), interner: &interner, resource_method_summaries: &[], ptr_proxy_hints: None, }; let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial()); let findings = extract_findings( &result, &cfg, &interner, Lang::C, &HashMap::new(), false, &std::collections::HashSet::new(), None, ); assert_eq!( findings.len(), 1, "Non-returned resource must produce exactly one finding.\n Got: {:?}", findings.iter().map(|f| &f.rule_id).collect::>() ); assert_eq!(findings[0].rule_id, "state-resource-leak"); } /// Finding #64: `is_web_entrypoint_simple` must confirm web parameters /// against the *candidate* handler only, not any function in the file. /// Before the fix, an unrelated `read_stream(req)` in the same file /// promoted every `process_*` function to a web entrypoint, firing /// High-severity `state-unauthed-access` on batch/CLI code. #[test] fn web_entrypoint_param_confirmation_is_per_function() { use crate::cfg::LocalFuncSummary; use crate::symbol::FuncKey; use petgraph::graph::NodeIndex; fn summary(name: &str, params: &[&str]) -> (FuncKey, LocalFuncSummary) { let key = FuncKey::new_function(Lang::Python, "f.py", name, Some(params.len())); let s = LocalFuncSummary { entry: NodeIndex::new(0), source_caps: Cap::empty(), sanitizer_caps: Cap::empty(), sink_caps: Cap::empty(), param_count: params.len(), param_names: params.iter().map(|p| p.to_string()).collect(), propagating_params: Vec::new(), tainted_sink_params: Vec::new(), callees: Vec::new(), container: String::new(), disambig: None, kind: crate::symbol::FuncKind::Function, }; (key, s) } // `process_data` has NO web-like parameters; `read_stream` does. let mut summaries: crate::cfg::FuncSummaries = HashMap::new(); let (k1, s1) = summary("process_data", &["data"]); let (k2, s2) = summary("read_stream", &["req"]); summaries.insert(k1, s1); summaries.insert(k2, s2); let cfg: Cfg = Graph::new(); // The unrelated `read_stream(req)` must NOT promote `process_data`. assert!( !is_web_entrypoint_simple("process_data", Lang::Python, &summaries, &cfg), "process_data has no web params and must not be a web entrypoint just \ because an unrelated function in the file does" ); // `read_stream` is not a handler-prefixed name, so even though it // carries the `req` param it is NOT an entrypoint — confirms the // name gate still stands independently of the param check. assert!( !is_web_entrypoint_simple("read_stream", Lang::Python, &summaries, &cfg), "read_stream lacks a handler-prefixed name and must not be an entrypoint" ); // Positive control: give `process_data` its own `req` param. let mut summaries2: crate::cfg::FuncSummaries = HashMap::new(); let (k3, s3) = summary("process_data", &["req"]); summaries2.insert(k3, s3); assert!( is_web_entrypoint_simple("process_data", Lang::Python, &summaries2, &cfg), "process_data with its own web param must be a web entrypoint" ); } }