mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
1010 lines
40 KiB
Rust
1010 lines
40 KiB
Rust
#![allow(clippy::unnecessary_map_or)]
|
|
|
|
use super::domain::{AuthLevel, ProductState, ResourceLifecycle};
|
|
use super::engine::DataflowResult;
|
|
use super::symbol::SymbolInterner;
|
|
use super::transfer::{TransferEvent, TransferEventKind};
|
|
use crate::cfg::{Cfg, StmtKind};
|
|
use crate::labels::{Cap, DataLabel};
|
|
use crate::patterns::Severity;
|
|
use crate::symbol::Lang;
|
|
use petgraph::visit::IntoNodeReferences;
|
|
|
|
/// Normalize a callee description for display.
|
|
fn sanitize_desc(s: &str) -> String {
|
|
crate::fmt::normalize_snippet(s)
|
|
}
|
|
|
|
/// Returns true if `idx` is the terminal exit of a function body, the
|
|
/// convergence node where all execution paths join before leaving the function.
|
|
///
|
|
/// **Invariant:** Only terminal exits carry the complete merged lifecycle state
|
|
/// needed for leak analysis. Return nodes are intermediate in per-body graphs
|
|
/// (they flow into the synthetic Exit node) but become terminal in legacy
|
|
/// supergraphs (their successor is the file-level Exit with
|
|
/// `enclosing_func = None`).
|
|
///
|
|
/// Detection combines a kind filter with a topological check. Only nodes
|
|
/// whose `StmtKind` actually terminates execution (`Exit`, `Return`, `Throw`)
|
|
/// are considered, then we require that they have no successor in the same
|
|
/// function scope. Without the kind filter, dangling Seq nodes left behind
|
|
/// when nested function literals (e.g. `obj.fn = () => {...}`) get a
|
|
/// placeholder in the parent graph would be misclassified as terminal exits
|
|
/// and produce spurious resource-leak findings at the function-literal span.
|
|
fn is_terminal_function_exit(
|
|
idx: petgraph::graph::NodeIndex,
|
|
info: &crate::cfg::NodeInfo,
|
|
cfg: &Cfg,
|
|
) -> bool {
|
|
if !matches!(
|
|
info.kind,
|
|
StmtKind::Exit | StmtKind::Return | StmtKind::Throw
|
|
) {
|
|
return false;
|
|
}
|
|
info.ast.enclosing_func.is_some()
|
|
&& !cfg
|
|
.neighbors_directed(idx, petgraph::Direction::Outgoing)
|
|
.any(|succ| cfg[succ].ast.enclosing_func == info.ast.enclosing_func)
|
|
}
|
|
|
|
/// A finding produced by state analysis.
|
|
#[derive(Debug, Clone)]
|
|
pub struct StateFinding {
|
|
pub rule_id: String,
|
|
pub severity: Severity,
|
|
pub span: (usize, usize),
|
|
pub message: String,
|
|
/// State machine that produced this finding: `"resource"` or `"auth"`.
|
|
pub machine: &'static str,
|
|
/// Variable name involved, if available.
|
|
pub subject: Option<String>,
|
|
/// State before the event (e.g. `"closed"`, `"open"`, `"unauthed"`).
|
|
pub from_state: &'static str,
|
|
/// State after the event (e.g. `"used"`, `"closed"`, `"leaked"`, `"access"`).
|
|
pub to_state: &'static str,
|
|
}
|
|
|
|
/// Extract findings from converged dataflow state + transfer events.
|
|
///
|
|
/// `path_safe_suppressed_sink_spans` lists CFG sink spans whose tainted
|
|
/// inputs were proved path-safe by the SSA taint engine; the privileged
|
|
/// `state-unauthed-access` finding is suppressed on those spans because
|
|
/// the user-controlled input has already been proved unable to escape
|
|
/// into a privileged location.
|
|
#[allow(clippy::too_many_arguments)]
|
|
pub fn extract_findings(
|
|
result: &DataflowResult<ProductState, TransferEvent>,
|
|
cfg: &Cfg,
|
|
interner: &SymbolInterner,
|
|
lang: Lang,
|
|
func_summaries: &crate::cfg::FuncSummaries,
|
|
enable_auth: bool,
|
|
path_safe_suppressed_sink_spans: &std::collections::HashSet<(usize, usize)>,
|
|
closure_released_var_names: Option<&std::collections::HashSet<String>>,
|
|
) -> Vec<StateFinding> {
|
|
let mut findings = Vec::new();
|
|
|
|
// ── 1. Use-after-close from transfer events ──────────────────────────
|
|
for event in &result.events {
|
|
let info = &cfg[event.node];
|
|
let var_name = interner.resolve(event.var);
|
|
match event.kind {
|
|
TransferEventKind::UseAfterClose => {
|
|
findings.push(StateFinding {
|
|
rule_id: "state-use-after-close".into(),
|
|
severity: Severity::High,
|
|
span: info.ast.span,
|
|
message: format!("variable `{var_name}` used after close"),
|
|
machine: "resource",
|
|
subject: Some(var_name.to_string()),
|
|
from_state: "closed",
|
|
to_state: "used",
|
|
});
|
|
}
|
|
TransferEventKind::DoubleClose => {
|
|
findings.push(StateFinding {
|
|
rule_id: "state-double-close".into(),
|
|
severity: Severity::Medium,
|
|
span: info.ast.span,
|
|
message: format!("variable `{var_name}` closed twice"),
|
|
machine: "resource",
|
|
subject: Some(var_name.to_string()),
|
|
from_state: "closed",
|
|
to_state: "closed",
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── 2. Resource leaks at Exit and function-Return nodes ──────────────
|
|
|
|
// Collect variables with a deferred release call (Go `defer f.Close()`).
|
|
// These remain OPEN at function exit because transfer skips deferred
|
|
// releases, but the runtime guarantees cleanup.
|
|
let deferred_close_vars: std::collections::HashSet<super::symbol::SymbolId> = {
|
|
let pairs = crate::cfg_analysis::rules::resource_pairs(lang);
|
|
cfg.node_references()
|
|
.filter(|(_, ni)| {
|
|
ni.in_defer
|
|
&& ni.kind == StmtKind::Call
|
|
&& ni.call.callee.as_ref().is_some_and(|c| {
|
|
let cl = c.to_ascii_lowercase();
|
|
pairs.iter().any(|p| {
|
|
p.release.iter().any(|r| {
|
|
let rl = r.to_ascii_lowercase();
|
|
if rl.starts_with('.') {
|
|
cl.ends_with(&rl)
|
|
} else {
|
|
cl.ends_with(&rl) || cl == rl
|
|
}
|
|
})
|
|
})
|
|
})
|
|
})
|
|
.flat_map(|(_, ni)| {
|
|
let scope = ni.ast.enclosing_func.clone();
|
|
ni.taint
|
|
.uses
|
|
.iter()
|
|
.filter_map(move |v| interner.get_scoped(scope.as_deref(), v))
|
|
})
|
|
.collect()
|
|
};
|
|
|
|
// Collect variables released via inner-call-in-arg shape (Go testify
|
|
// `require.NoError(t, f.Close())`, `errs = append(errs, f.Close())`,
|
|
// JUnit `assertEquals(0, in.read())`). The transfer flips the
|
|
// lifecycle to CLOSED on the success branch, but the err-return
|
|
// predecessor that ran after the bare acquire (`f, err := os.Open(...)`)
|
|
// still merges OPEN at the function-exit join. Mirror the
|
|
// `deferred_close_vars` suppression so the OPEN|CLOSED join doesn't
|
|
// emit a leak-possible for a resource that has a real release site.
|
|
let inner_arg_close_vars: std::collections::HashSet<super::symbol::SymbolId> = {
|
|
let pairs = crate::cfg_analysis::rules::resource_pairs(lang);
|
|
let mut set = std::collections::HashSet::new();
|
|
for (_, ni) in cfg.node_references() {
|
|
if ni.in_defer || ni.arg_callees.is_empty() {
|
|
continue;
|
|
}
|
|
let scope = ni.ast.enclosing_func.as_deref();
|
|
for arg_callee in &ni.arg_callees {
|
|
let Some(arg_callee_text) = arg_callee.as_deref() else {
|
|
continue;
|
|
};
|
|
let Some(dot_idx) = arg_callee_text.rfind('.') else {
|
|
continue;
|
|
};
|
|
let recv_text = &arg_callee_text[..dot_idx];
|
|
if recv_text.contains('.') {
|
|
continue;
|
|
}
|
|
let arg_callee_lower = arg_callee_text.to_ascii_lowercase();
|
|
let matches_release = pairs.iter().any(|p| {
|
|
p.release.iter().any(|r| {
|
|
let rl = r.to_ascii_lowercase();
|
|
if rl.starts_with('.') {
|
|
arg_callee_lower.ends_with(&rl)
|
|
} else {
|
|
arg_callee_lower.ends_with(&rl) || arg_callee_lower == rl
|
|
}
|
|
})
|
|
});
|
|
if !matches_release {
|
|
continue;
|
|
}
|
|
if let Some(sym) = interner.get_scoped(scope, recv_text) {
|
|
set.insert(sym);
|
|
}
|
|
}
|
|
}
|
|
set
|
|
};
|
|
|
|
for (idx, info) in cfg.node_references() {
|
|
// File-level Exit (program termination, no enclosing function).
|
|
let is_file_exit = info.kind == StmtKind::Exit && info.ast.enclosing_func.is_none();
|
|
// Terminal function exit, the convergence node where all paths join.
|
|
// Return nodes are intermediate and carry only path-specific state;
|
|
// only the terminal exit carries the complete merged lifecycle.
|
|
let is_func_terminal = is_terminal_function_exit(idx, info, cfg);
|
|
if !is_file_exit && !is_func_terminal {
|
|
continue;
|
|
}
|
|
let Some(state) = result.states.get(&idx) else {
|
|
continue;
|
|
};
|
|
|
|
for (&sym, &lifecycle) in &state.resource.vars {
|
|
if !lifecycle.contains(ResourceLifecycle::OPEN) {
|
|
continue;
|
|
}
|
|
let var_name = interner.resolve(sym);
|
|
let scope = if is_func_terminal {
|
|
info.ast.enclosing_func.as_deref()
|
|
} else {
|
|
None
|
|
};
|
|
let acquire_node = find_acquire_node(cfg, sym, interner, scope);
|
|
|
|
// At the file-level Exit, skip variables whose acquire site is
|
|
// inside a function, those are already handled by the per-
|
|
// function exit checks above. Without this, the file-level Exit
|
|
// would duplicate leak findings with a misleading acquire span
|
|
// (the first global match instead of the correct function-local one).
|
|
if is_file_exit {
|
|
if let Some(acq) = acquire_node {
|
|
if cfg[acq].ast.enclosing_func.is_some() {
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Suppress leaks for resources acquired inside managed scopes
|
|
// (Python `with`, Java try-with-resources). The suppression is
|
|
// tied to the specific acquire site, not the variable name.
|
|
if let Some(acq) = acquire_node {
|
|
if cfg[acq].managed_resource {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Suppress leaks for variables with a deferred close call
|
|
// (Go `defer f.Close()`). The deferred call guarantees cleanup
|
|
// at function exit even though transfer didn't mark it CLOSED.
|
|
if deferred_close_vars.contains(&sym) {
|
|
continue;
|
|
}
|
|
|
|
// Suppress leaks for variables released via inner-call-in-arg
|
|
// shape. Mirrors the deferred-close suppression so the
|
|
// OPEN-on-err-return / CLOSED-on-success-branch merge at
|
|
// function exit does not surface as leak-possible.
|
|
if inner_arg_close_vars.contains(&sym) {
|
|
continue;
|
|
}
|
|
|
|
// Suppress leaks for variables whose release call lives in a
|
|
// nested closure (callback / event handler) outside this
|
|
// body's CFG. Common JS/TS shape:
|
|
// const ws = new WebSocket(url);
|
|
// socket.on("close", () => ws.close());
|
|
// The per-body resource analysis cannot observe the close
|
|
// inside the registered handler body; without this gate the
|
|
// handle reads as a definite leak. Match by variable name —
|
|
// closure-captured handles share the binding name with the
|
|
// handle in the outer scope.
|
|
if closure_released_var_names
|
|
.map(|s| s.contains(var_name))
|
|
.unwrap_or(false)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
// Prefer direct acquire node span; fall back to proxy span
|
|
// from ResourceMethodSummary (cross-body resource tracking).
|
|
let acquire_span = acquire_node
|
|
.map(|n| cfg[n].ast.span)
|
|
.or_else(|| state.proxy_acquire_spans.get(&sym).copied());
|
|
|
|
// Suppress/downgrade leaks for variables returned from the
|
|
// function (factory pattern). Only suppress when ALL
|
|
// predecessors that have the variable OPEN also return it.
|
|
// Mixed cases (some paths return, some leak) are downgraded
|
|
// to state-resource-leak-possible.
|
|
if is_func_terminal {
|
|
let scope = info.ast.enclosing_func.as_deref();
|
|
let mut returned_open = 0u32;
|
|
let mut non_returned_open = 0u32;
|
|
for pred in cfg.neighbors_directed(idx, petgraph::Direction::Incoming) {
|
|
let Some(ps) = result.states.get(&pred) else {
|
|
continue;
|
|
};
|
|
let pred_has_open = ps
|
|
.resource
|
|
.vars
|
|
.get(&sym)
|
|
.map_or(false, |lc| lc.contains(ResourceLifecycle::OPEN));
|
|
if !pred_has_open {
|
|
continue;
|
|
}
|
|
// Only Return nodes can transfer resource ownership to the
|
|
// caller. Non-Return predecessors (exception edges, implicit
|
|
// fallthrough) with OPEN resources represent genuine leaks.
|
|
let returns_var = cfg[pred].kind == StmtKind::Return
|
|
&& cfg[pred]
|
|
.taint
|
|
.uses
|
|
.iter()
|
|
.any(|u| interner.get_scoped(scope, u) == Some(sym));
|
|
if returns_var {
|
|
returned_open += 1;
|
|
} else {
|
|
non_returned_open += 1;
|
|
}
|
|
}
|
|
if returned_open > 0 && non_returned_open == 0 {
|
|
continue; // all OPEN paths transfer ownership to caller
|
|
}
|
|
if returned_open > 0 && non_returned_open > 0 {
|
|
// Mixed: some paths return resource, some leak it.
|
|
findings.push(StateFinding {
|
|
rule_id: "state-resource-leak-possible".into(),
|
|
severity: Severity::Low,
|
|
span: acquire_span.unwrap_or(info.ast.span),
|
|
message: format!("resource `{var_name}` may not be closed on all paths"),
|
|
machine: "resource",
|
|
subject: Some(var_name.to_string()),
|
|
from_state: "open",
|
|
to_state: "possibly_leaked",
|
|
});
|
|
continue;
|
|
}
|
|
// returned_open == 0: fall through to normal leak detection
|
|
}
|
|
|
|
if !lifecycle.contains(ResourceLifecycle::CLOSED)
|
|
&& !lifecycle.contains(ResourceLifecycle::MOVED)
|
|
{
|
|
// Definite leak: open on all paths, never closed
|
|
findings.push(StateFinding {
|
|
rule_id: "state-resource-leak".into(),
|
|
severity: Severity::Medium,
|
|
span: acquire_span.unwrap_or(info.ast.span),
|
|
message: format!("resource `{var_name}` is never closed"),
|
|
machine: "resource",
|
|
subject: Some(var_name.to_string()),
|
|
from_state: "open",
|
|
to_state: "leaked",
|
|
});
|
|
} else if lifecycle.contains(ResourceLifecycle::CLOSED) {
|
|
// May-leak: open on some paths, closed on others
|
|
findings.push(StateFinding {
|
|
rule_id: "state-resource-leak-possible".into(),
|
|
severity: Severity::Low,
|
|
span: acquire_span.unwrap_or(info.ast.span),
|
|
message: format!("resource `{var_name}` may not be closed on all paths"),
|
|
machine: "resource",
|
|
subject: Some(var_name.to_string()),
|
|
from_state: "open",
|
|
to_state: "possibly_leaked",
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── 2b. Proxy-acquired possible leaks (exception-path heuristic) ────
|
|
// In JS/TS, any call can throw. If a proxy-acquired resource is fully
|
|
// CLOSED at function exit (no OPEN paths), check whether there are
|
|
// intervening calls between the proxy acquire and release nodes that
|
|
// could throw and bypass the release. If so, emit a possible leak.
|
|
//
|
|
// **Language gate**: this heuristic is JS/TS-specific. Other
|
|
// languages (Go, Java, C, C++, Python, Rust, Ruby, PHP) use
|
|
// explicit error returns / try-catch with deterministic control
|
|
// flow, an intervening call does NOT silently bypass a release.
|
|
// Firing this on Go gave the gin/context.go FP where any method
|
|
// calling another method (`c.Set`, `c.Get`) was flagged as a
|
|
// possible leak on the receiver. Skip the section but continue
|
|
// to section 3 (auth-required sinks) which is independent of the
|
|
// resource state machine.
|
|
if matches!(lang, Lang::JavaScript | Lang::TypeScript) {
|
|
for (idx, info) in cfg.node_references() {
|
|
if !is_terminal_function_exit(idx, info, cfg) {
|
|
continue;
|
|
}
|
|
let Some(state) = result.states.get(&idx) else {
|
|
continue;
|
|
};
|
|
for (&sym, &lifecycle) in &state.resource.vars {
|
|
// Only for proxy-acquired resources that are fully CLOSED at exit
|
|
if !state.proxy_acquire_spans.contains_key(&sym) {
|
|
continue;
|
|
}
|
|
if lifecycle.contains(ResourceLifecycle::OPEN) {
|
|
continue; // Already handled by the normal leak detection above
|
|
}
|
|
if !lifecycle.contains(ResourceLifecycle::CLOSED) {
|
|
continue;
|
|
}
|
|
// Check if there are intervening Call nodes between acquire and
|
|
// release in the CFG (these could throw and bypass the release).
|
|
//
|
|
// NOTE: a stricter variant (audit #59) tried to exclude the
|
|
// resource's own lifecycle ops (the acquire/release proxy
|
|
// calls) and require reachability from the acquire node, to
|
|
// suppress spurious findings on correctly open/close-paired
|
|
// proxies. That over-suppressed a *tested* true positive: a
|
|
// class-field resource (`this.fd = fs.openSync(...)` in `open()`
|
|
// with `close()` in a separate method — see
|
|
// `tests/fixtures/real_world/typescript/cfg/try_catch_typed.ts`)
|
|
// has only its own acquire call in scope, so excluding it left
|
|
// zero intervening calls and dropped the must-match leak
|
|
// finding. Distinguishing a clean same-scope open/close pair
|
|
// from a cross-method field leak needs proper inter-method
|
|
// lifecycle modelling (deep-fix queue), so we keep the original
|
|
// span-based exclusion here.
|
|
let has_intervening_calls = cfg.node_references().any(|(_, ni)| {
|
|
ni.kind == StmtKind::Call
|
|
&& ni.ast.enclosing_func == info.ast.enclosing_func
|
|
&& ni.call.callee.is_some()
|
|
// Not the acquire or release proxy itself
|
|
&& !state.proxy_acquire_spans.values().any(|s| *s == ni.ast.span)
|
|
});
|
|
if has_intervening_calls {
|
|
let var_name = interner.resolve(sym);
|
|
let acquire_span = state.proxy_acquire_spans.get(&sym).copied();
|
|
findings.push(StateFinding {
|
|
rule_id: "state-resource-leak-possible".into(),
|
|
severity: Severity::Low,
|
|
span: acquire_span.unwrap_or(info.ast.span),
|
|
message: format!("resource `{var_name}` may not be closed on all paths"),
|
|
machine: "resource",
|
|
subject: Some(var_name.to_string()),
|
|
from_state: "open",
|
|
to_state: "possibly_leaked",
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── 3. Auth-required sinks ───────────────────────────────────────────
|
|
// Only run auth analysis when explicitly enabled (higher FP rate).
|
|
// Check if any function is a web entrypoint
|
|
let has_web_entrypoint = enable_auth
|
|
&& cfg.node_references().any(|(_, info)| {
|
|
if let Some(ref func_name) = info.ast.enclosing_func {
|
|
is_web_entrypoint_simple(func_name, lang, func_summaries, cfg)
|
|
} else {
|
|
false
|
|
}
|
|
});
|
|
|
|
if has_web_entrypoint {
|
|
for (idx, info) in cfg.node_references() {
|
|
if !is_privileged_sink(info) {
|
|
continue;
|
|
}
|
|
let Some(state) = result.states.get(&idx) else {
|
|
continue;
|
|
};
|
|
if state.auth.auth_level == AuthLevel::Unauthed {
|
|
// Suppress when the SSA taint engine has already proved
|
|
// the tainted input flowing into this sink is path-safe
|
|
// (PathFact `dotdot=No && absolute=No`). A web handler
|
|
// reading a sanitised user-controlled path is not the
|
|
// same shape as a handler reading any user-controlled
|
|
// path, the auth concern reduces once the data cannot
|
|
// escape into a privileged location. Note this is per
|
|
// CFG-node span, so co-located unrelated sinks are
|
|
// unaffected.
|
|
if path_safe_suppressed_sink_spans.contains(&info.ast.span) {
|
|
continue;
|
|
}
|
|
let callee_desc =
|
|
sanitize_desc(info.call.callee.as_deref().unwrap_or("(sensitive op)"));
|
|
findings.push(StateFinding {
|
|
rule_id: "state-unauthed-access".into(),
|
|
severity: Severity::High,
|
|
span: info.ast.span,
|
|
message: format!(
|
|
"sensitive operation `{callee_desc}` reached without authentication"
|
|
),
|
|
machine: "auth",
|
|
subject: None,
|
|
from_state: "unauthed",
|
|
to_state: "access",
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
// Dedup
|
|
findings.sort_by(|a, b| a.span.cmp(&b.span).then_with(|| a.rule_id.cmp(&b.rule_id)));
|
|
findings.dedup_by(|a, b| a.span == b.span && a.rule_id == b.rule_id);
|
|
|
|
findings
|
|
}
|
|
|
|
/// Find the CFG node where a variable was acquired (defined via Call node).
|
|
fn find_acquire_node(
|
|
cfg: &Cfg,
|
|
sym: super::symbol::SymbolId,
|
|
interner: &SymbolInterner,
|
|
enclosing_func: Option<&str>,
|
|
) -> Option<petgraph::graph::NodeIndex> {
|
|
let var_name = interner.resolve(sym);
|
|
// Try function-scoped match first (correct for multi-function files
|
|
// where the same variable name appears in multiple functions).
|
|
if let Some(func) = enclosing_func {
|
|
for (idx, info) in cfg.node_references() {
|
|
if info.kind == StmtKind::Call
|
|
&& info.ast.enclosing_func.as_deref() == Some(func)
|
|
&& info.taint.defines.as_deref() == Some(var_name)
|
|
{
|
|
return Some(idx);
|
|
}
|
|
}
|
|
}
|
|
// Fallback: first global match (for file-level Exit or top-level code).
|
|
for (idx, info) in cfg.node_references() {
|
|
if info.kind == StmtKind::Call && info.taint.defines.as_deref() == Some(var_name) {
|
|
return Some(idx);
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Check if a node is a privileged sink (shell execution or file I/O).
|
|
fn is_privileged_sink(info: &crate::cfg::NodeInfo) -> bool {
|
|
info.taint.labels.iter().any(|l| {
|
|
if let DataLabel::Sink(caps) = l {
|
|
caps.intersects(Cap::SHELL_ESCAPE | Cap::FILE_IO)
|
|
} else {
|
|
false
|
|
}
|
|
})
|
|
}
|
|
|
|
/// Simplified web entrypoint check (avoids AnalysisContext dependency).
|
|
fn is_web_entrypoint_simple(
|
|
func_name: &str,
|
|
lang: Lang,
|
|
func_summaries: &crate::cfg::FuncSummaries,
|
|
_cfg: &Cfg,
|
|
) -> bool {
|
|
let name_lower = func_name.to_ascii_lowercase();
|
|
|
|
// Skip bare "main", it's typically a CLI entry
|
|
if name_lower == "main" {
|
|
return false;
|
|
}
|
|
|
|
let is_handler_name = name_lower.starts_with("handle_")
|
|
|| name_lower.starts_with("route_")
|
|
|| name_lower.starts_with("api_")
|
|
|| name_lower.starts_with("serve_")
|
|
|| name_lower.starts_with("process_")
|
|
|| name_lower == "handler";
|
|
|
|
if !is_handler_name {
|
|
return false;
|
|
}
|
|
|
|
// Check for web-like parameters
|
|
let web_params: &[&str] = match lang {
|
|
Lang::Rust => &["request", "req", "json", "query", "form", "payload", "body"],
|
|
Lang::JavaScript | Lang::TypeScript => &["req", "request", "ctx", "res", "response"],
|
|
Lang::Python => &["request", "req"],
|
|
Lang::Go => &["w", "writer", "r", "req", "request"],
|
|
Lang::Java => &["request", "req"],
|
|
_ => &["request", "req"],
|
|
};
|
|
|
|
// Confirm web parameters against THIS candidate handler only, not any
|
|
// function in the file. Scanning every summary made an unrelated
|
|
// function's `req`/`r`/`ctx` parameter promote every
|
|
// `process_*`/`api_*`/`serve_*` function in the file to a web
|
|
// entrypoint, firing High-severity state-unauthed-access on batch/CLI
|
|
// code. Filter the file-level summary map down to the named function
|
|
// via `FuncKey.name` (matches `info.ast.enclosing_func`); summary
|
|
// `entry` NodeIndexes are not valid in the per-body CFG, so the name
|
|
// is the safe selector here.
|
|
let has_web_params = func_summaries
|
|
.iter()
|
|
.filter(|(key, _)| key.name == func_name)
|
|
.any(|(_, s)| {
|
|
s.param_names
|
|
.iter()
|
|
.any(|p| web_params.contains(&p.to_ascii_lowercase().as_str()))
|
|
});
|
|
|
|
// Only handle_* and route_* are strong enough to skip param confirmation.
|
|
// api_*, serve_*, process_* require web parameter evidence.
|
|
let strong_name = name_lower.starts_with("handle_") || name_lower.starts_with("route_");
|
|
|
|
has_web_params || strong_name
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::cfg::{AstMeta, CallMeta, EdgeKind, NodeInfo, TaintMeta};
|
|
use crate::cfg_analysis::rules;
|
|
use crate::state::domain::ProductState;
|
|
use crate::state::engine;
|
|
use crate::state::symbol::SymbolInterner;
|
|
use crate::state::transfer::DefaultTransfer;
|
|
use petgraph::Graph;
|
|
use std::collections::HashMap;
|
|
|
|
fn make_node(kind: StmtKind) -> NodeInfo {
|
|
NodeInfo {
|
|
kind,
|
|
..Default::default()
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn detects_resource_leak() {
|
|
// Entry → fopen(f) → Exit (no close)
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let open_node = cfg.add_node(NodeInfo {
|
|
kind: StmtKind::Call,
|
|
ast: AstMeta {
|
|
span: (10, 20),
|
|
..Default::default()
|
|
},
|
|
taint: TaintMeta {
|
|
defines: Some("f".into()),
|
|
..Default::default()
|
|
},
|
|
call: CallMeta {
|
|
callee: Some("fopen".into()),
|
|
..Default::default()
|
|
},
|
|
..Default::default()
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, open_node, EdgeKind::Seq);
|
|
cfg.add_edge(open_node, exit, EdgeKind::Seq);
|
|
|
|
let interner = SymbolInterner::from_cfg(&cfg);
|
|
let transfer = DefaultTransfer {
|
|
lang: Lang::C,
|
|
resource_pairs: rules::resource_pairs(Lang::C),
|
|
interner: &interner,
|
|
resource_method_summaries: &[],
|
|
ptr_proxy_hints: None,
|
|
};
|
|
|
|
let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial());
|
|
let findings = extract_findings(
|
|
&result,
|
|
&cfg,
|
|
&interner,
|
|
Lang::C,
|
|
&HashMap::new(),
|
|
false,
|
|
&std::collections::HashSet::new(),
|
|
None,
|
|
);
|
|
|
|
assert_eq!(findings.len(), 1);
|
|
assert_eq!(findings[0].rule_id, "state-resource-leak");
|
|
assert!(findings[0].message.contains("f"));
|
|
}
|
|
|
|
#[test]
|
|
fn clean_open_close_no_findings() {
|
|
// Entry → fopen(f) → fclose(f) → Exit
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let open_node = cfg.add_node(NodeInfo {
|
|
kind: StmtKind::Call,
|
|
taint: TaintMeta {
|
|
defines: Some("f".into()),
|
|
..Default::default()
|
|
},
|
|
call: CallMeta {
|
|
callee: Some("fopen".into()),
|
|
..Default::default()
|
|
},
|
|
..Default::default()
|
|
});
|
|
let close_node = cfg.add_node(NodeInfo {
|
|
kind: StmtKind::Call,
|
|
taint: TaintMeta {
|
|
uses: vec!["f".into()],
|
|
..Default::default()
|
|
},
|
|
call: CallMeta {
|
|
callee: Some("fclose".into()),
|
|
..Default::default()
|
|
},
|
|
..Default::default()
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, open_node, EdgeKind::Seq);
|
|
cfg.add_edge(open_node, close_node, EdgeKind::Seq);
|
|
cfg.add_edge(close_node, exit, EdgeKind::Seq);
|
|
|
|
let interner = SymbolInterner::from_cfg(&cfg);
|
|
let transfer = DefaultTransfer {
|
|
lang: Lang::C,
|
|
resource_pairs: rules::resource_pairs(Lang::C),
|
|
interner: &interner,
|
|
resource_method_summaries: &[],
|
|
ptr_proxy_hints: None,
|
|
};
|
|
|
|
let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial());
|
|
let findings = extract_findings(
|
|
&result,
|
|
&cfg,
|
|
&interner,
|
|
Lang::C,
|
|
&HashMap::new(),
|
|
false,
|
|
&std::collections::HashSet::new(),
|
|
None,
|
|
);
|
|
|
|
assert!(findings.is_empty());
|
|
}
|
|
|
|
fn make_func_node(kind: StmtKind, func: &str) -> NodeInfo {
|
|
NodeInfo {
|
|
kind,
|
|
ast: AstMeta {
|
|
enclosing_func: Some(func.to_string()),
|
|
..Default::default()
|
|
},
|
|
..Default::default()
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn terminal_exit_is_topological() {
|
|
// Per-body graph: Entry → Call → Return → Exit (all enclosing_func=Some)
|
|
// Only Exit should be terminal (no successors in same scope).
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_func_node(StmtKind::Entry, "f"));
|
|
let call = cfg.add_node(NodeInfo {
|
|
kind: StmtKind::Call,
|
|
call: CallMeta {
|
|
callee: Some("fopen".into()),
|
|
..Default::default()
|
|
},
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
ast: AstMeta {
|
|
enclosing_func: Some("f".into()),
|
|
..Default::default()
|
|
},
|
|
..Default::default()
|
|
});
|
|
let ret = cfg.add_node(NodeInfo {
|
|
kind: StmtKind::Return,
|
|
taint: TaintMeta {
|
|
uses: vec!["x".into()],
|
|
..Default::default()
|
|
},
|
|
ast: AstMeta {
|
|
enclosing_func: Some("f".into()),
|
|
..Default::default()
|
|
},
|
|
..Default::default()
|
|
});
|
|
let exit = cfg.add_node(make_func_node(StmtKind::Exit, "f"));
|
|
|
|
cfg.add_edge(entry, call, EdgeKind::Seq);
|
|
cfg.add_edge(call, ret, EdgeKind::Seq);
|
|
cfg.add_edge(ret, exit, EdgeKind::Seq);
|
|
|
|
assert!(
|
|
!is_terminal_function_exit(entry, &cfg[entry], &cfg),
|
|
"Entry must not be terminal"
|
|
);
|
|
assert!(
|
|
!is_terminal_function_exit(call, &cfg[call], &cfg),
|
|
"Call must not be terminal"
|
|
);
|
|
assert!(
|
|
!is_terminal_function_exit(ret, &cfg[ret], &cfg),
|
|
"Return must not be terminal — it flows into Exit"
|
|
);
|
|
assert!(
|
|
is_terminal_function_exit(exit, &cfg[exit], &cfg),
|
|
"Exit must be terminal — no successors in same scope"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn per_body_factory_returned_resource_no_finding() {
|
|
// Per-body graph: Entry → fopen(f) → return f → Exit
|
|
// All nodes have enclosing_func=Some("factory").
|
|
// The resource is returned, no leak finding expected.
|
|
let func = "factory";
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_func_node(StmtKind::Entry, func));
|
|
let open_node = cfg.add_node(NodeInfo {
|
|
kind: StmtKind::Call,
|
|
ast: AstMeta {
|
|
span: (10, 20),
|
|
enclosing_func: Some(func.into()),
|
|
},
|
|
taint: TaintMeta {
|
|
defines: Some("f".into()),
|
|
..Default::default()
|
|
},
|
|
call: CallMeta {
|
|
callee: Some("fopen".into()),
|
|
..Default::default()
|
|
},
|
|
..Default::default()
|
|
});
|
|
let ret = cfg.add_node(NodeInfo {
|
|
kind: StmtKind::Return,
|
|
taint: TaintMeta {
|
|
uses: vec!["f".into()],
|
|
..Default::default()
|
|
},
|
|
ast: AstMeta {
|
|
enclosing_func: Some(func.into()),
|
|
..Default::default()
|
|
},
|
|
..Default::default()
|
|
});
|
|
let exit = cfg.add_node(make_func_node(StmtKind::Exit, func));
|
|
|
|
cfg.add_edge(entry, open_node, EdgeKind::Seq);
|
|
cfg.add_edge(open_node, ret, EdgeKind::Seq);
|
|
cfg.add_edge(ret, exit, EdgeKind::Seq);
|
|
|
|
let interner = SymbolInterner::from_cfg_scoped(&cfg);
|
|
let transfer = DefaultTransfer {
|
|
lang: Lang::C,
|
|
resource_pairs: rules::resource_pairs(Lang::C),
|
|
interner: &interner,
|
|
resource_method_summaries: &[],
|
|
ptr_proxy_hints: None,
|
|
};
|
|
|
|
let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial());
|
|
let findings = extract_findings(
|
|
&result,
|
|
&cfg,
|
|
&interner,
|
|
Lang::C,
|
|
&HashMap::new(),
|
|
false,
|
|
&std::collections::HashSet::new(),
|
|
None,
|
|
);
|
|
|
|
assert!(
|
|
findings.is_empty(),
|
|
"Resource returned from factory must not produce leak finding.\n Got: {:?}",
|
|
findings.iter().map(|f| &f.rule_id).collect::<Vec<_>>()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn per_body_non_returned_resource_leaks() {
|
|
// Per-body graph: Entry → fopen(f) → return (no uses) → Exit
|
|
// All nodes have enclosing_func=Some("leaker").
|
|
// Resource is NOT returned, exactly one state-resource-leak expected.
|
|
let func = "leaker";
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_func_node(StmtKind::Entry, func));
|
|
let open_node = cfg.add_node(NodeInfo {
|
|
kind: StmtKind::Call,
|
|
ast: AstMeta {
|
|
span: (10, 20),
|
|
enclosing_func: Some(func.into()),
|
|
},
|
|
taint: TaintMeta {
|
|
defines: Some("f".into()),
|
|
..Default::default()
|
|
},
|
|
call: CallMeta {
|
|
callee: Some("fopen".into()),
|
|
..Default::default()
|
|
},
|
|
..Default::default()
|
|
});
|
|
let ret = cfg.add_node(NodeInfo {
|
|
kind: StmtKind::Return,
|
|
ast: AstMeta {
|
|
enclosing_func: Some(func.into()),
|
|
..Default::default()
|
|
},
|
|
..Default::default()
|
|
});
|
|
let exit = cfg.add_node(make_func_node(StmtKind::Exit, func));
|
|
|
|
cfg.add_edge(entry, open_node, EdgeKind::Seq);
|
|
cfg.add_edge(open_node, ret, EdgeKind::Seq);
|
|
cfg.add_edge(ret, exit, EdgeKind::Seq);
|
|
|
|
let interner = SymbolInterner::from_cfg_scoped(&cfg);
|
|
let transfer = DefaultTransfer {
|
|
lang: Lang::C,
|
|
resource_pairs: rules::resource_pairs(Lang::C),
|
|
interner: &interner,
|
|
resource_method_summaries: &[],
|
|
ptr_proxy_hints: None,
|
|
};
|
|
|
|
let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial());
|
|
let findings = extract_findings(
|
|
&result,
|
|
&cfg,
|
|
&interner,
|
|
Lang::C,
|
|
&HashMap::new(),
|
|
false,
|
|
&std::collections::HashSet::new(),
|
|
None,
|
|
);
|
|
|
|
assert_eq!(
|
|
findings.len(),
|
|
1,
|
|
"Non-returned resource must produce exactly one finding.\n Got: {:?}",
|
|
findings.iter().map(|f| &f.rule_id).collect::<Vec<_>>()
|
|
);
|
|
assert_eq!(findings[0].rule_id, "state-resource-leak");
|
|
}
|
|
|
|
/// Finding #64: `is_web_entrypoint_simple` must confirm web parameters
|
|
/// against the *candidate* handler only, not any function in the file.
|
|
/// Before the fix, an unrelated `read_stream(req)` in the same file
|
|
/// promoted every `process_*` function to a web entrypoint, firing
|
|
/// High-severity `state-unauthed-access` on batch/CLI code.
|
|
#[test]
|
|
fn web_entrypoint_param_confirmation_is_per_function() {
|
|
use crate::cfg::LocalFuncSummary;
|
|
use crate::symbol::FuncKey;
|
|
use petgraph::graph::NodeIndex;
|
|
|
|
fn summary(name: &str, params: &[&str]) -> (FuncKey, LocalFuncSummary) {
|
|
let key = FuncKey::new_function(Lang::Python, "f.py", name, Some(params.len()));
|
|
let s = LocalFuncSummary {
|
|
entry: NodeIndex::new(0),
|
|
source_caps: Cap::empty(),
|
|
sanitizer_caps: Cap::empty(),
|
|
sink_caps: Cap::empty(),
|
|
param_count: params.len(),
|
|
param_names: params.iter().map(|p| p.to_string()).collect(),
|
|
propagating_params: Vec::new(),
|
|
tainted_sink_params: Vec::new(),
|
|
callees: Vec::new(),
|
|
container: String::new(),
|
|
disambig: None,
|
|
kind: crate::symbol::FuncKind::Function,
|
|
};
|
|
(key, s)
|
|
}
|
|
|
|
// `process_data` has NO web-like parameters; `read_stream` does.
|
|
let mut summaries: crate::cfg::FuncSummaries = HashMap::new();
|
|
let (k1, s1) = summary("process_data", &["data"]);
|
|
let (k2, s2) = summary("read_stream", &["req"]);
|
|
summaries.insert(k1, s1);
|
|
summaries.insert(k2, s2);
|
|
|
|
let cfg: Cfg = Graph::new();
|
|
|
|
// The unrelated `read_stream(req)` must NOT promote `process_data`.
|
|
assert!(
|
|
!is_web_entrypoint_simple("process_data", Lang::Python, &summaries, &cfg),
|
|
"process_data has no web params and must not be a web entrypoint just \
|
|
because an unrelated function in the file does"
|
|
);
|
|
|
|
// `read_stream` is not a handler-prefixed name, so even though it
|
|
// carries the `req` param it is NOT an entrypoint — confirms the
|
|
// name gate still stands independently of the param check.
|
|
assert!(
|
|
!is_web_entrypoint_simple("read_stream", Lang::Python, &summaries, &cfg),
|
|
"read_stream lacks a handler-prefixed name and must not be an entrypoint"
|
|
);
|
|
|
|
// Positive control: give `process_data` its own `req` param.
|
|
let mut summaries2: crate::cfg::FuncSummaries = HashMap::new();
|
|
let (k3, s3) = summary("process_data", &["req"]);
|
|
summaries2.insert(k3, s3);
|
|
assert!(
|
|
is_web_entrypoint_simple("process_data", Lang::Python, &summaries2, &cfg),
|
|
"process_data with its own web param must be a web entrypoint"
|
|
);
|
|
}
|
|
}
|