nyx/src/state/mod.rs

358 lines
15 KiB
Rust

//! State-model analysis: resource lifecycle and authentication state tracking.
//!
//! Runs a per-function state machine over the CFG to detect use-after-close,
//! double-close, resource leaks, and unauthenticated access to privileged
//! operations.
//!
//! Enabled by default. Disable via `scanner.enable_state_analysis = false`.
//! Runs in `--mode full` and `--mode taint`; skipped in AST-only mode.
//!
//! # Rule IDs
//!
//! | Rule ID | Severity | What it detects |
//! |---------|----------|-----------------|
//! | `state-use-after-close` | High | Operation on a resource after it was closed |
//! | `state-double-close` | Medium | Resource closed twice |
//! | `state-resource-leak` | Medium | Resource opened and never closed on any path |
//! | `state-resource-leak-possible` | Low | Resource closed on some paths but not others |
//! | `state-unauthed-access` | High | Web handler reaches privileged sink without an auth call |
//!
//! # Managed-resource suppression
//!
//! Language-specific cleanup patterns suppress leak findings automatically:
//!
//! | Pattern | Languages |
//! |---------|-----------|
//! | RAII / Drop | Rust (all leak findings suppressed except `alloc`/`dealloc`) |
//! | Smart pointers (`make_unique`, `make_shared`) | C++ |
//! | `defer f.Close()` | Go |
//! | `with open(f) as f:` | Python |
//! | try-with-resources | Java |
//!
//! # Tracked acquire/release pairs
//!
//! C/C++: `fopen`/`fclose`, `open`/`close`, `socket`/`close`,
//! `malloc`/`free`, `pthread_mutex_lock`/`pthread_mutex_unlock`,
//! `new`/`delete`.
//!
//! Rust: `File::open`/`close`, `TcpStream::connect`/`shutdown`,
//! mutex `lock`/`read`/`write`/`drop`.
//!
//! Java: stream/connection/socket constructors / `close`, `getConnection`/`close`.
//!
//! Go, Python, JavaScript, Ruby, PHP follow language-idiomatic equivalents.
//!
//! # Submodules
//!
//! - [`domain`]: state lattice (`ResourceState`, `AuthState`, `StateCell`)
//! - [`engine`]: generic forward transfer engine (`Transfer` trait, `run_forward`)
//! - [`facts`]: per-node state fact extraction
//! - [`lattice`]: lattice join/meet for state values
//! - [`symbol`]: resource symbol normalisation
//! - [`transfer`]: `DefaultTransfer` — the concrete resource-lifecycle transfer function
pub mod domain;
pub mod engine;
pub mod facts;
pub mod lattice;
pub mod symbol;
pub mod transfer;
use crate::cfg::{Cfg, FuncSummaries};
use crate::cfg_analysis::rules;
use crate::summary::GlobalSummaries;
use crate::symbol::Lang;
use domain::{AuthLevel, ProductState};
use engine::MAX_TRACKED_VARS;
use facts::StateFinding;
use petgraph::graph::NodeIndex;
use symbol::SymbolInterner;
use transfer::DefaultTransfer;
/// Classify decorator/annotation/attribute names against the language's auth
/// rules and return the resulting `AuthLevel`. Any admin-like match produces
/// `Admin`; any generic auth match produces `Authed`; otherwise `Unauthed`.
pub fn classify_auth_decorators(lang: Lang, decorators: &[String]) -> AuthLevel {
if decorators.is_empty() {
return AuthLevel::Unauthed;
}
let auth_rules = rules::auth_rules(lang);
let mut level = AuthLevel::Unauthed;
for dec in decorators {
let d = dec.to_ascii_lowercase();
// Admin patterns, match the same static list used by the call-site
// transfer so decorators and runtime checks agree on privilege.
if d.contains("admin") || d.contains("hasrole") || d.contains("superuser") {
return AuthLevel::Admin;
}
let matches = auth_rules.iter().any(|rule| {
rule.matchers.iter().any(|m| {
let ml = m.to_ascii_lowercase();
d == ml || d.ends_with(&ml)
})
});
if matches && level < AuthLevel::Authed {
level = AuthLevel::Authed;
}
}
level
}
/// Run state-model dataflow analysis on a single function's CFG.
///
/// Returns findings for use-after-close, double-close, resource leaks,
/// and unauthenticated access to sensitive sinks.
///
/// `path_safe_suppressed_sink_spans` lists CFG sink spans whose tainted
/// inputs were proved path-safe by the SSA taint engine. When a
/// privileged sink at one of those spans is reached without
/// authentication, `state-unauthed-access` is suppressed: the taint
/// engine has already proved the user-controlled input cannot escape
/// into a privileged location, so the auth concern is structurally
/// reduced.
#[allow(clippy::too_many_arguments)]
pub fn run_state_analysis(
cfg: &Cfg,
entry: NodeIndex,
lang: Lang,
_source_bytes: &[u8],
func_summaries: &FuncSummaries,
_global_summaries: Option<&GlobalSummaries>,
enable_auth: bool,
resource_method_summaries: &[transfer::ResourceMethodSummary],
auth_decorators: &[String],
path_safe_suppressed_sink_spans: &std::collections::HashSet<(usize, usize)>,
// Optional `var_name → PtrProxyHint` map derived from the body's
// PointsToFacts. When present, the proxy-acquire transfer suppresses
// SymbolId attribution on field-aliased receivers (`m := c.mu;
// m.Lock()`) and routes them through `chain_proxies` instead. Pass
// `None` to disable, strict-additive.
ptr_proxy_hints: Option<&std::collections::HashMap<String, crate::pointer::PtrProxyHint>>,
// Names of variables whose `.close()`/release calls live in a nested
// closure (event handler, deferred callback) that the per-body CFG
// can't observe directly. Used to suppress resource-leak findings
// for handles whose cleanup is registered as a callback (`ws.on(
// "close", () => ws2.close())`). Pass `None` for languages or
// shapes that don't need this.
closure_released_var_names: Option<&std::collections::HashSet<String>>,
) -> Vec<StateFinding> {
let _span = tracing::debug_span!("run_state_analysis").entered();
let interner = SymbolInterner::from_cfg_scoped(cfg);
if interner.len() > MAX_TRACKED_VARS {
tracing::warn!(
symbols = interner.len(),
max = MAX_TRACKED_VARS,
"state analysis: too many variables, capping tracking"
);
}
let resource_pairs = rules::resource_pairs(lang);
let transfer = DefaultTransfer {
lang,
resource_pairs,
interner: &interner,
resource_method_summaries,
ptr_proxy_hints,
};
// Seed initial auth level from decorator-based authorization markers.
// Functions tagged with an auth decorator/annotation/attribute start in
// `Authed` (or `Admin`) instead of `Unauthed`, so the privileged-sink
// check in `extract_findings` suppresses findings framework-level auth
// already enforces.
let mut initial = ProductState::initial();
initial.auth.auth_level = classify_auth_decorators(lang, auth_decorators);
let result = engine::run_forward(cfg, entry, &transfer, initial);
facts::extract_findings(
&result,
cfg,
&interner,
lang,
func_summaries,
enable_auth,
path_safe_suppressed_sink_spans,
closure_released_var_names,
)
}
/// Build a per-body map of variable names whose release calls
/// (`.close`, `.destroy`, `.end`, `.release`, …) appear inside a
/// **descendant** body (a closure / event handler nested inside the
/// body that opens the handle).
///
/// Returned: `body_id → set of var names released somewhere inside
/// that body's nested-closure subtree`. Used by the structural
/// ResourceMisuse pass and the state-model leak pass to suppress
/// findings whose cleanup lives in a callback the per-body CFG can't
/// follow (`socket.on("close", () => ws.close())`).
///
/// Restricted to descendants — sibling methods on the same class
/// don't share resource ownership, so a release in `queryAndClose`
/// must NOT silence a leak in sibling `queryAndLeak`. Only true
/// nested-closure parent / child relationships participate.
pub fn collect_closure_released_var_names(
bodies: &[crate::cfg::BodyCfg],
lang: Lang,
) -> std::collections::HashMap<crate::cfg::BodyId, std::collections::HashSet<String>> {
use crate::cfg::{BodyId, StmtKind};
use petgraph::visit::IntoNodeReferences;
// Step 1: collect releases per body. Only nested (non-toplevel)
// closures are eligible — top-level bodies' own releases are
// already tracked by the dataflow.
let pairs = rules::resource_pairs(lang);
let mut per_body: std::collections::HashMap<BodyId, std::collections::HashSet<String>> =
std::collections::HashMap::new();
for body in bodies {
if body.meta.parent_body_id.is_none() {
continue;
}
let mut local = std::collections::HashSet::new();
for (_idx, info) in body.graph.node_references() {
if info.kind != StmtKind::Call {
continue;
}
let Some(callee) = info.call.callee.as_deref() else {
continue;
};
let cl = callee.to_ascii_lowercase();
let is_release = pairs.iter().any(|p| {
p.release.iter().any(|r| {
let rl = r.to_ascii_lowercase();
if let Some(method) = rl.strip_prefix('.') {
cl.ends_with(&format!(".{method}"))
} else {
cl == rl || cl.ends_with(&format!(".{rl}"))
}
})
});
if !is_release {
continue;
}
if let Some(rcv) = info.call.receiver.as_deref() {
local.insert(rcv.to_string());
} else if let Some((rcv, _)) = callee.rsplit_once('.')
&& !rcv.is_empty()
{
local.insert(rcv.to_string());
}
}
if !local.is_empty() {
per_body.insert(body.meta.id, local);
}
}
// Step 2: roll up into ancestor bodies. Walk each non-top body's
// parent chain and union its release set into every ancestor's
// entry. Class methods at the same nesting level (siblings under a
// class body) do not roll up into each other — they have distinct
// BodyId entries and the chain only flows through `parent_body_id`.
let mut rollup: std::collections::HashMap<BodyId, std::collections::HashSet<String>> =
std::collections::HashMap::new();
let by_id: std::collections::HashMap<BodyId, &crate::cfg::BodyCfg> =
bodies.iter().map(|b| (b.meta.id, b)).collect();
for body in bodies {
let Some(local) = per_body.get(&body.meta.id) else {
continue;
};
let mut cur = body.meta.parent_body_id;
while let Some(pid) = cur {
rollup.entry(pid).or_default().extend(local.iter().cloned());
cur = by_id.get(&pid).and_then(|b| b.meta.parent_body_id);
}
}
rollup
}
/// Build resource method summaries by pre-scanning all method bodies for known
/// resource acquire/release operations. Only creates summaries for methods whose
/// bodies actually contain matching operations, never infers from names alone.
pub fn build_resource_method_summaries(
bodies: &[crate::cfg::BodyCfg],
lang: Lang,
) -> Vec<transfer::ResourceMethodSummary> {
use petgraph::visit::IntoNodeReferences;
let resource_pairs = rules::resource_pairs(lang);
let mut summaries = Vec::new();
for body in bodies {
let method_name = match &body.meta.name {
Some(name) => name.clone(),
None => continue,
};
let class_group = match body.meta.parent_body_id {
Some(pid) => pid,
None => continue, // top-level functions are not class methods
};
for (_, info) in body.graph.node_references() {
// Check both Call and Seq (Assignment) nodes, resource operations
// can appear as RHS of assignments (e.g., `this.fd = fs.openSync(...)`).
if !matches!(
info.kind,
crate::cfg::StmtKind::Call | crate::cfg::StmtKind::Seq
) {
continue;
}
// Skip acquires whose lifetime is bounded by a managed cleanup
// scope (Python `with`, Java try-with-resources, Ruby
// File.open-with-block, Rust RAII). The acquired handle is
// released before the method returns, so propagating an
// Acquire effect onto the caller's receiver creates an FP
// class where callers of `def foo(self): with open(...): ...`
// are flagged as leaking the receiver.
if info.managed_resource {
continue;
}
let callee = match &info.call.callee {
Some(c) => c.to_ascii_lowercase(),
None => continue,
};
for pair in resource_pairs {
if pair
.acquire
.iter()
.any(|a| transfer::callee_matches_pub(&callee, a))
{
// The receiver-proxy mechanism (state/transfer.rs)
// matches a method-name summary against `recv.method()`
// call sites and marks the receiver as OPEN. This is
// only meaningful when the acquire actually binds a
// resource into receiver state (`self.fd = open(...)`,
// `this.fd = fs.openSync(...)`). Acquires with no
// binding (`return open(...)`) or with a local-only
// binding (`f = open(...); f.close()`) do not transfer
// ownership onto the caller's receiver. Gate the
// summary on a defines field so anonymous and local-
// only acquires no longer leak through this path.
if info.taint.defines.is_none() {
continue;
}
summaries.push(transfer::ResourceMethodSummary {
method_name: method_name.clone(),
effect: transfer::ResourceEffect::Acquire,
class_group,
original_span: info.ast.span,
});
}
if pair
.release
.iter()
.any(|r| transfer::callee_matches_pub(&callee, r))
{
summaries.push(transfer::ResourceMethodSummary {
method_name: method_name.clone(),
effect: transfer::ResourceEffect::Release,
class_group,
original_span: info.ast.span,
});
}
}
}
}
summaries
}