mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
358 lines
15 KiB
Rust
358 lines
15 KiB
Rust
//! State-model analysis: resource lifecycle and authentication state tracking.
|
|
//!
|
|
//! Runs a per-function state machine over the CFG to detect use-after-close,
|
|
//! double-close, resource leaks, and unauthenticated access to privileged
|
|
//! operations.
|
|
//!
|
|
//! Enabled by default. Disable via `scanner.enable_state_analysis = false`.
|
|
//! Runs in `--mode full` and `--mode taint`; skipped in AST-only mode.
|
|
//!
|
|
//! # Rule IDs
|
|
//!
|
|
//! | Rule ID | Severity | What it detects |
|
|
//! |---------|----------|-----------------|
|
|
//! | `state-use-after-close` | High | Operation on a resource after it was closed |
|
|
//! | `state-double-close` | Medium | Resource closed twice |
|
|
//! | `state-resource-leak` | Medium | Resource opened and never closed on any path |
|
|
//! | `state-resource-leak-possible` | Low | Resource closed on some paths but not others |
|
|
//! | `state-unauthed-access` | High | Web handler reaches privileged sink without an auth call |
|
|
//!
|
|
//! # Managed-resource suppression
|
|
//!
|
|
//! Language-specific cleanup patterns suppress leak findings automatically:
|
|
//!
|
|
//! | Pattern | Languages |
|
|
//! |---------|-----------|
|
|
//! | RAII / Drop | Rust (all leak findings suppressed except `alloc`/`dealloc`) |
|
|
//! | Smart pointers (`make_unique`, `make_shared`) | C++ |
|
|
//! | `defer f.Close()` | Go |
|
|
//! | `with open(f) as f:` | Python |
|
|
//! | try-with-resources | Java |
|
|
//!
|
|
//! # Tracked acquire/release pairs
|
|
//!
|
|
//! C/C++: `fopen`/`fclose`, `open`/`close`, `socket`/`close`,
|
|
//! `malloc`/`free`, `pthread_mutex_lock`/`pthread_mutex_unlock`,
|
|
//! `new`/`delete`.
|
|
//!
|
|
//! Rust: `File::open`/`close`, `TcpStream::connect`/`shutdown`,
|
|
//! mutex `lock`/`read`/`write`/`drop`.
|
|
//!
|
|
//! Java: stream/connection/socket constructors / `close`, `getConnection`/`close`.
|
|
//!
|
|
//! Go, Python, JavaScript, Ruby, PHP follow language-idiomatic equivalents.
|
|
//!
|
|
//! # Submodules
|
|
//!
|
|
//! - [`domain`]: state lattice (`ResourceState`, `AuthState`, `StateCell`)
|
|
//! - [`engine`]: generic forward transfer engine (`Transfer` trait, `run_forward`)
|
|
//! - [`facts`]: per-node state fact extraction
|
|
//! - [`lattice`]: lattice join/meet for state values
|
|
//! - [`symbol`]: resource symbol normalisation
|
|
//! - [`transfer`]: `DefaultTransfer` — the concrete resource-lifecycle transfer function
|
|
|
|
pub mod domain;
|
|
pub mod engine;
|
|
pub mod facts;
|
|
pub mod lattice;
|
|
pub mod symbol;
|
|
pub mod transfer;
|
|
|
|
use crate::cfg::{Cfg, FuncSummaries};
|
|
use crate::cfg_analysis::rules;
|
|
use crate::summary::GlobalSummaries;
|
|
use crate::symbol::Lang;
|
|
use domain::{AuthLevel, ProductState};
|
|
use engine::MAX_TRACKED_VARS;
|
|
use facts::StateFinding;
|
|
use petgraph::graph::NodeIndex;
|
|
use symbol::SymbolInterner;
|
|
use transfer::DefaultTransfer;
|
|
|
|
/// Classify decorator/annotation/attribute names against the language's auth
|
|
/// rules and return the resulting `AuthLevel`. Any admin-like match produces
|
|
/// `Admin`; any generic auth match produces `Authed`; otherwise `Unauthed`.
|
|
pub fn classify_auth_decorators(lang: Lang, decorators: &[String]) -> AuthLevel {
|
|
if decorators.is_empty() {
|
|
return AuthLevel::Unauthed;
|
|
}
|
|
let auth_rules = rules::auth_rules(lang);
|
|
let mut level = AuthLevel::Unauthed;
|
|
for dec in decorators {
|
|
let d = dec.to_ascii_lowercase();
|
|
// Admin patterns, match the same static list used by the call-site
|
|
// transfer so decorators and runtime checks agree on privilege.
|
|
if d.contains("admin") || d.contains("hasrole") || d.contains("superuser") {
|
|
return AuthLevel::Admin;
|
|
}
|
|
let matches = auth_rules.iter().any(|rule| {
|
|
rule.matchers.iter().any(|m| {
|
|
let ml = m.to_ascii_lowercase();
|
|
d == ml || d.ends_with(&ml)
|
|
})
|
|
});
|
|
if matches && level < AuthLevel::Authed {
|
|
level = AuthLevel::Authed;
|
|
}
|
|
}
|
|
level
|
|
}
|
|
|
|
/// Run state-model dataflow analysis on a single function's CFG.
|
|
///
|
|
/// Returns findings for use-after-close, double-close, resource leaks,
|
|
/// and unauthenticated access to sensitive sinks.
|
|
///
|
|
/// `path_safe_suppressed_sink_spans` lists CFG sink spans whose tainted
|
|
/// inputs were proved path-safe by the SSA taint engine. When a
|
|
/// privileged sink at one of those spans is reached without
|
|
/// authentication, `state-unauthed-access` is suppressed: the taint
|
|
/// engine has already proved the user-controlled input cannot escape
|
|
/// into a privileged location, so the auth concern is structurally
|
|
/// reduced.
|
|
#[allow(clippy::too_many_arguments)]
|
|
pub fn run_state_analysis(
|
|
cfg: &Cfg,
|
|
entry: NodeIndex,
|
|
lang: Lang,
|
|
_source_bytes: &[u8],
|
|
func_summaries: &FuncSummaries,
|
|
_global_summaries: Option<&GlobalSummaries>,
|
|
enable_auth: bool,
|
|
resource_method_summaries: &[transfer::ResourceMethodSummary],
|
|
auth_decorators: &[String],
|
|
path_safe_suppressed_sink_spans: &std::collections::HashSet<(usize, usize)>,
|
|
// Optional `var_name → PtrProxyHint` map derived from the body's
|
|
// PointsToFacts. When present, the proxy-acquire transfer suppresses
|
|
// SymbolId attribution on field-aliased receivers (`m := c.mu;
|
|
// m.Lock()`) and routes them through `chain_proxies` instead. Pass
|
|
// `None` to disable, strict-additive.
|
|
ptr_proxy_hints: Option<&std::collections::HashMap<String, crate::pointer::PtrProxyHint>>,
|
|
// Names of variables whose `.close()`/release calls live in a nested
|
|
// closure (event handler, deferred callback) that the per-body CFG
|
|
// can't observe directly. Used to suppress resource-leak findings
|
|
// for handles whose cleanup is registered as a callback (`ws.on(
|
|
// "close", () => ws2.close())`). Pass `None` for languages or
|
|
// shapes that don't need this.
|
|
closure_released_var_names: Option<&std::collections::HashSet<String>>,
|
|
) -> Vec<StateFinding> {
|
|
let _span = tracing::debug_span!("run_state_analysis").entered();
|
|
|
|
let interner = SymbolInterner::from_cfg_scoped(cfg);
|
|
|
|
if interner.len() > MAX_TRACKED_VARS {
|
|
tracing::warn!(
|
|
symbols = interner.len(),
|
|
max = MAX_TRACKED_VARS,
|
|
"state analysis: too many variables, capping tracking"
|
|
);
|
|
}
|
|
|
|
let resource_pairs = rules::resource_pairs(lang);
|
|
let transfer = DefaultTransfer {
|
|
lang,
|
|
resource_pairs,
|
|
interner: &interner,
|
|
resource_method_summaries,
|
|
ptr_proxy_hints,
|
|
};
|
|
|
|
// Seed initial auth level from decorator-based authorization markers.
|
|
// Functions tagged with an auth decorator/annotation/attribute start in
|
|
// `Authed` (or `Admin`) instead of `Unauthed`, so the privileged-sink
|
|
// check in `extract_findings` suppresses findings framework-level auth
|
|
// already enforces.
|
|
let mut initial = ProductState::initial();
|
|
initial.auth.auth_level = classify_auth_decorators(lang, auth_decorators);
|
|
let result = engine::run_forward(cfg, entry, &transfer, initial);
|
|
|
|
facts::extract_findings(
|
|
&result,
|
|
cfg,
|
|
&interner,
|
|
lang,
|
|
func_summaries,
|
|
enable_auth,
|
|
path_safe_suppressed_sink_spans,
|
|
closure_released_var_names,
|
|
)
|
|
}
|
|
|
|
/// Build a per-body map of variable names whose release calls
|
|
/// (`.close`, `.destroy`, `.end`, `.release`, …) appear inside a
|
|
/// **descendant** body (a closure / event handler nested inside the
|
|
/// body that opens the handle).
|
|
///
|
|
/// Returned: `body_id → set of var names released somewhere inside
|
|
/// that body's nested-closure subtree`. Used by the structural
|
|
/// ResourceMisuse pass and the state-model leak pass to suppress
|
|
/// findings whose cleanup lives in a callback the per-body CFG can't
|
|
/// follow (`socket.on("close", () => ws.close())`).
|
|
///
|
|
/// Restricted to descendants — sibling methods on the same class
|
|
/// don't share resource ownership, so a release in `queryAndClose`
|
|
/// must NOT silence a leak in sibling `queryAndLeak`. Only true
|
|
/// nested-closure parent / child relationships participate.
|
|
pub fn collect_closure_released_var_names(
|
|
bodies: &[crate::cfg::BodyCfg],
|
|
lang: Lang,
|
|
) -> std::collections::HashMap<crate::cfg::BodyId, std::collections::HashSet<String>> {
|
|
use crate::cfg::{BodyId, StmtKind};
|
|
use petgraph::visit::IntoNodeReferences;
|
|
|
|
// Step 1: collect releases per body. Only nested (non-toplevel)
|
|
// closures are eligible — top-level bodies' own releases are
|
|
// already tracked by the dataflow.
|
|
let pairs = rules::resource_pairs(lang);
|
|
let mut per_body: std::collections::HashMap<BodyId, std::collections::HashSet<String>> =
|
|
std::collections::HashMap::new();
|
|
for body in bodies {
|
|
if body.meta.parent_body_id.is_none() {
|
|
continue;
|
|
}
|
|
let mut local = std::collections::HashSet::new();
|
|
for (_idx, info) in body.graph.node_references() {
|
|
if info.kind != StmtKind::Call {
|
|
continue;
|
|
}
|
|
let Some(callee) = info.call.callee.as_deref() else {
|
|
continue;
|
|
};
|
|
let cl = callee.to_ascii_lowercase();
|
|
let is_release = pairs.iter().any(|p| {
|
|
p.release.iter().any(|r| {
|
|
let rl = r.to_ascii_lowercase();
|
|
if let Some(method) = rl.strip_prefix('.') {
|
|
cl.ends_with(&format!(".{method}"))
|
|
} else {
|
|
cl == rl || cl.ends_with(&format!(".{rl}"))
|
|
}
|
|
})
|
|
});
|
|
if !is_release {
|
|
continue;
|
|
}
|
|
if let Some(rcv) = info.call.receiver.as_deref() {
|
|
local.insert(rcv.to_string());
|
|
} else if let Some((rcv, _)) = callee.rsplit_once('.')
|
|
&& !rcv.is_empty()
|
|
{
|
|
local.insert(rcv.to_string());
|
|
}
|
|
}
|
|
if !local.is_empty() {
|
|
per_body.insert(body.meta.id, local);
|
|
}
|
|
}
|
|
|
|
// Step 2: roll up into ancestor bodies. Walk each non-top body's
|
|
// parent chain and union its release set into every ancestor's
|
|
// entry. Class methods at the same nesting level (siblings under a
|
|
// class body) do not roll up into each other — they have distinct
|
|
// BodyId entries and the chain only flows through `parent_body_id`.
|
|
let mut rollup: std::collections::HashMap<BodyId, std::collections::HashSet<String>> =
|
|
std::collections::HashMap::new();
|
|
let by_id: std::collections::HashMap<BodyId, &crate::cfg::BodyCfg> =
|
|
bodies.iter().map(|b| (b.meta.id, b)).collect();
|
|
for body in bodies {
|
|
let Some(local) = per_body.get(&body.meta.id) else {
|
|
continue;
|
|
};
|
|
let mut cur = body.meta.parent_body_id;
|
|
while let Some(pid) = cur {
|
|
rollup.entry(pid).or_default().extend(local.iter().cloned());
|
|
cur = by_id.get(&pid).and_then(|b| b.meta.parent_body_id);
|
|
}
|
|
}
|
|
rollup
|
|
}
|
|
|
|
/// Build resource method summaries by pre-scanning all method bodies for known
|
|
/// resource acquire/release operations. Only creates summaries for methods whose
|
|
/// bodies actually contain matching operations, never infers from names alone.
|
|
pub fn build_resource_method_summaries(
|
|
bodies: &[crate::cfg::BodyCfg],
|
|
lang: Lang,
|
|
) -> Vec<transfer::ResourceMethodSummary> {
|
|
use petgraph::visit::IntoNodeReferences;
|
|
|
|
let resource_pairs = rules::resource_pairs(lang);
|
|
let mut summaries = Vec::new();
|
|
|
|
for body in bodies {
|
|
let method_name = match &body.meta.name {
|
|
Some(name) => name.clone(),
|
|
None => continue,
|
|
};
|
|
let class_group = match body.meta.parent_body_id {
|
|
Some(pid) => pid,
|
|
None => continue, // top-level functions are not class methods
|
|
};
|
|
|
|
for (_, info) in body.graph.node_references() {
|
|
// Check both Call and Seq (Assignment) nodes, resource operations
|
|
// can appear as RHS of assignments (e.g., `this.fd = fs.openSync(...)`).
|
|
if !matches!(
|
|
info.kind,
|
|
crate::cfg::StmtKind::Call | crate::cfg::StmtKind::Seq
|
|
) {
|
|
continue;
|
|
}
|
|
// Skip acquires whose lifetime is bounded by a managed cleanup
|
|
// scope (Python `with`, Java try-with-resources, Ruby
|
|
// File.open-with-block, Rust RAII). The acquired handle is
|
|
// released before the method returns, so propagating an
|
|
// Acquire effect onto the caller's receiver creates an FP
|
|
// class where callers of `def foo(self): with open(...): ...`
|
|
// are flagged as leaking the receiver.
|
|
if info.managed_resource {
|
|
continue;
|
|
}
|
|
let callee = match &info.call.callee {
|
|
Some(c) => c.to_ascii_lowercase(),
|
|
None => continue,
|
|
};
|
|
for pair in resource_pairs {
|
|
if pair
|
|
.acquire
|
|
.iter()
|
|
.any(|a| transfer::callee_matches_pub(&callee, a))
|
|
{
|
|
// The receiver-proxy mechanism (state/transfer.rs)
|
|
// matches a method-name summary against `recv.method()`
|
|
// call sites and marks the receiver as OPEN. This is
|
|
// only meaningful when the acquire actually binds a
|
|
// resource into receiver state (`self.fd = open(...)`,
|
|
// `this.fd = fs.openSync(...)`). Acquires with no
|
|
// binding (`return open(...)`) or with a local-only
|
|
// binding (`f = open(...); f.close()`) do not transfer
|
|
// ownership onto the caller's receiver. Gate the
|
|
// summary on a defines field so anonymous and local-
|
|
// only acquires no longer leak through this path.
|
|
if info.taint.defines.is_none() {
|
|
continue;
|
|
}
|
|
summaries.push(transfer::ResourceMethodSummary {
|
|
method_name: method_name.clone(),
|
|
effect: transfer::ResourceEffect::Acquire,
|
|
class_group,
|
|
original_span: info.ast.span,
|
|
});
|
|
}
|
|
if pair
|
|
.release
|
|
.iter()
|
|
.any(|r| transfer::callee_matches_pub(&callee, r))
|
|
{
|
|
summaries.push(transfer::ResourceMethodSummary {
|
|
method_name: method_name.clone(),
|
|
effect: transfer::ResourceEffect::Release,
|
|
class_group,
|
|
original_span: info.ast.span,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
summaries
|
|
}
|