* chore: Exclude CLAUDE.md from Cargo.toml

* feat: add callgraph module and integrate into main analysis flow

* feat: enhance CLI with new severity filtering and analysis modes

* feat: update CHANGELOG with recent enhancements and fixes to severity filtering and output handling

* feat: implement state-model dataflow analysis for resource lifecycle and auth state

* feat: enhance diagnostic output formatting and add evidence structure

* feat: implement attack surface ranking for diagnostics with scoring and sorting

* feat: add comprehensive documentation for installation, usage, and rules reference

* feat: add multiple language support for command execution and evaluation endpoints

* feat: implement inline suppression for findings using `nyx:ignore` comments

* feat: add confidence levels to AST patterns and update output structure

* feat: implement low-noise prioritization system with category filtering, rollup grouping, and configurable budgets

* feat: bump version to 0.4.0 and update changelog with new features and improvements

* feat: add dead code allowances to various functions in mod.rs and real_world_tests.rs
This commit is contained in:
Eli Peter 2026-02-25 21:16:36 -05:00 committed by GitHub
parent 19b578c5c4
commit 1bbe4b1cfb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
456 changed files with 25628 additions and 1228 deletions

313
src/state/domain.rs Normal file
View file

@ -0,0 +1,313 @@
use super::lattice::Lattice;
use super::symbol::SymbolId;
use bitflags::bitflags;
use std::collections::{HashMap, HashSet};
// ── ResourceLifecycle ────────────────────────────────────────────────────
bitflags! {
/// Bitset of possible lifecycle states for a single resource handle.
///
/// Join = bitwise OR (a variable may be in multiple states across paths).
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct ResourceLifecycle: u8 {
const UNINIT = 0b0001;
const OPEN = 0b0010;
const CLOSED = 0b0100;
const MOVED = 0b1000;
}
}
impl Lattice for ResourceLifecycle {
fn bot() -> Self {
ResourceLifecycle::empty()
}
fn join(&self, other: &Self) -> Self {
*self | *other
}
fn leq(&self, other: &Self) -> bool {
self.intersection(*other) == *self
}
}
// ── ResourceDomainState ──────────────────────────────────────────────────
/// Maps interned variable IDs to their lifecycle bitsets.
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct ResourceDomainState {
pub vars: HashMap<SymbolId, ResourceLifecycle>,
}
impl ResourceDomainState {
pub fn new() -> Self {
Self::default()
}
pub fn get(&self, sym: SymbolId) -> ResourceLifecycle {
self.vars
.get(&sym)
.copied()
.unwrap_or(ResourceLifecycle::empty())
}
pub fn set(&mut self, sym: SymbolId, state: ResourceLifecycle) {
self.vars.insert(sym, state);
}
}
impl Lattice for ResourceDomainState {
fn bot() -> Self {
Self::new()
}
fn join(&self, other: &Self) -> Self {
let mut merged = self.clone();
for (&sym, &other_lc) in &other.vars {
let entry = merged.vars.entry(sym).or_insert(ResourceLifecycle::empty());
*entry = entry.join(&other_lc);
}
merged
}
fn leq(&self, other: &Self) -> bool {
for (&sym, &self_lc) in &self.vars {
let other_lc = other.get(sym);
if !self_lc.leq(&other_lc) {
return false;
}
}
true
}
}
// ── AuthLevel ────────────────────────────────────────────────────────────
/// Simple ordered lattice for path authentication state.
///
/// Bot = `Unauthed`. Join = `min` (conservative: if any path is unauthed,
/// the joined state is unauthed).
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum AuthLevel {
Unauthed,
Authed,
Admin,
}
impl Lattice for AuthLevel {
fn bot() -> Self {
AuthLevel::Unauthed
}
fn join(&self, other: &Self) -> Self {
// Conservative: take the minimum (least privileged)
(*self).min(*other)
}
fn leq(&self, other: &Self) -> bool {
// Higher auth subsumes lower: Unauthed ⊑ Authed ⊑ Admin
// In our lattice, join = min, so leq means self >= other
*self >= *other
}
}
// ── AuthDomainState ──────────────────────────────────────────────────────
/// Path auth level + per-variable validation bit.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct AuthDomainState {
pub auth_level: AuthLevel,
pub validated: HashSet<SymbolId>,
}
impl Default for AuthDomainState {
fn default() -> Self {
Self {
auth_level: AuthLevel::Unauthed,
validated: HashSet::new(),
}
}
}
impl AuthDomainState {
pub fn new() -> Self {
Self::default()
}
}
impl Lattice for AuthDomainState {
fn bot() -> Self {
Self::new()
}
fn join(&self, other: &Self) -> Self {
Self {
auth_level: self.auth_level.join(&other.auth_level),
// Only validated on ALL paths counts
validated: self
.validated
.intersection(&other.validated)
.copied()
.collect(),
}
}
fn leq(&self, other: &Self) -> bool {
self.auth_level.leq(&other.auth_level) && self.validated.is_superset(&other.validated)
}
}
// ── ProductState ─────────────────────────────────────────────────────────
/// Composable product of resource and auth domains.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ProductState {
pub resource: ResourceDomainState,
pub auth: AuthDomainState,
}
impl ProductState {
pub fn initial() -> Self {
Self {
resource: ResourceDomainState::new(),
auth: AuthDomainState::new(),
}
}
}
impl Lattice for ProductState {
fn bot() -> Self {
Self {
resource: ResourceDomainState::bot(),
auth: AuthDomainState::bot(),
}
}
fn join(&self, other: &Self) -> Self {
Self {
resource: self.resource.join(&other.resource),
auth: self.auth.join(&other.auth),
}
}
fn leq(&self, other: &Self) -> bool {
self.resource.leq(&other.resource) && self.auth.leq(&other.auth)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn resource_lifecycle_join_is_or() {
let a = ResourceLifecycle::OPEN;
let b = ResourceLifecycle::CLOSED;
assert_eq!(
a.join(&b),
ResourceLifecycle::OPEN | ResourceLifecycle::CLOSED
);
}
#[test]
fn resource_lifecycle_bot_identity() {
let a = ResourceLifecycle::OPEN;
assert_eq!(a.join(&ResourceLifecycle::bot()), a);
}
#[test]
fn resource_lifecycle_leq() {
let a = ResourceLifecycle::OPEN;
let b = ResourceLifecycle::OPEN | ResourceLifecycle::CLOSED;
assert!(a.leq(&b));
assert!(!b.leq(&a));
}
#[test]
fn resource_domain_join_merges_keys() {
let mut a = ResourceDomainState::new();
let mut b = ResourceDomainState::new();
let sym_x = SymbolId(0);
let sym_y = SymbolId(1);
a.set(sym_x, ResourceLifecycle::OPEN);
b.set(sym_x, ResourceLifecycle::CLOSED);
b.set(sym_y, ResourceLifecycle::OPEN);
let joined = a.join(&b);
assert_eq!(
joined.get(sym_x),
ResourceLifecycle::OPEN | ResourceLifecycle::CLOSED
);
assert_eq!(joined.get(sym_y), ResourceLifecycle::OPEN);
}
#[test]
fn auth_level_join_is_min() {
assert_eq!(
AuthLevel::Admin.join(&AuthLevel::Unauthed),
AuthLevel::Unauthed
);
assert_eq!(AuthLevel::Authed.join(&AuthLevel::Admin), AuthLevel::Authed);
assert_eq!(
AuthLevel::Authed.join(&AuthLevel::Authed),
AuthLevel::Authed
);
}
#[test]
fn auth_domain_join_intersects_validated() {
let sym_a = SymbolId(0);
let sym_b = SymbolId(1);
let sym_c = SymbolId(2);
let a = AuthDomainState {
auth_level: AuthLevel::Authed,
validated: [sym_a, sym_b].into_iter().collect(),
};
let b = AuthDomainState {
auth_level: AuthLevel::Admin,
validated: [sym_b, sym_c].into_iter().collect(),
};
let joined = a.join(&b);
assert_eq!(joined.auth_level, AuthLevel::Authed);
assert_eq!(joined.validated, [sym_b].into_iter().collect());
}
#[test]
fn product_state_join() {
let a = ProductState::initial();
let b = ProductState::initial();
let joined = a.join(&b);
assert_eq!(joined, ProductState::initial());
}
#[test]
fn may_must_leak_semantics() {
// Must-leak: OPEN only
let must_leak = ResourceLifecycle::OPEN;
assert!(must_leak.contains(ResourceLifecycle::OPEN));
assert!(!must_leak.contains(ResourceLifecycle::CLOSED));
assert!(!must_leak.contains(ResourceLifecycle::MOVED));
// May-leak: OPEN | CLOSED (some paths close, some don't)
let may_leak = ResourceLifecycle::OPEN | ResourceLifecycle::CLOSED;
assert!(may_leak.contains(ResourceLifecycle::OPEN));
assert!(may_leak.contains(ResourceLifecycle::CLOSED));
// No leak: CLOSED only
let no_leak = ResourceLifecycle::CLOSED;
assert!(!no_leak.contains(ResourceLifecycle::OPEN));
assert!(no_leak.contains(ResourceLifecycle::CLOSED));
}
// SymbolId is a newtype used in domain tests; ensure it's Copy
#[test]
fn symbol_id_is_copy() {
let s = SymbolId(0);
let s2 = s;
assert_eq!(s, s2);
}
}

288
src/state/engine.rs Normal file
View file

@ -0,0 +1,288 @@
use super::lattice::Lattice;
use crate::cfg::{Cfg, EdgeKind, NodeInfo};
use petgraph::graph::NodeIndex;
use petgraph::visit::EdgeRef;
use std::collections::{HashMap, VecDeque};
/// Maximum tracked variables per function (guarded degradation).
pub const MAX_TRACKED_VARS: usize = 64;
/// Default worklist iteration budget.
pub const MAX_WORKLIST_ITERATIONS: usize = 100_000;
/// Generic transfer function trait for forward dataflow analysis.
///
/// Domains implement this to define how abstract state flows through
/// CFG nodes and what events (findings) are emitted.
pub trait Transfer<S: Lattice> {
/// Side-channel events emitted during transfer (e.g., findings, violations).
type Event: Clone;
/// Apply the transfer function to a node, returning the output state
/// and any events.
fn apply(
&self,
node: NodeIndex,
info: &NodeInfo,
edge: Option<EdgeKind>,
state: S,
) -> (S, Vec<Self::Event>);
/// Per-domain iteration budget. Defaults to [`MAX_WORKLIST_ITERATIONS`].
fn iteration_budget(&self) -> usize {
MAX_WORKLIST_ITERATIONS
}
/// Called when the budget is exhausted. Returns true if the engine
/// should continue with the current (non-converged) state, false to bail.
fn on_budget_exceeded(&self) -> bool {
false
}
}
/// Result of running the forward dataflow engine.
pub struct DataflowResult<S, E> {
/// Converged state at the entry of each node.
pub states: HashMap<NodeIndex, S>,
/// Events emitted during Phase 2 transfer over converged states.
pub events: Vec<E>,
/// Whether the analysis converged (false if budget was hit).
#[allow(dead_code)]
pub converged: bool,
}
/// Run a forward worklist dataflow analysis over the CFG.
///
/// Two-phase design:
/// - Phase 1: fixed-point iteration to converge states (no event collection).
/// - Phase 2: single pass over converged states to collect events.
///
/// Termination is guaranteed by lattice finiteness + iteration budget.
pub fn run_forward<S: Lattice, T: Transfer<S>>(
cfg: &Cfg,
entry: NodeIndex,
transfer: &T,
initial: S,
) -> DataflowResult<S, T::Event> {
let mut states: HashMap<NodeIndex, S> = HashMap::new();
let budget = transfer.iteration_budget();
// Initialize entry node
states.insert(entry, initial);
// ── Phase 1: fixed-point iteration (compute converged states) ─────
let mut worklist: VecDeque<NodeIndex> = VecDeque::new();
worklist.push_back(entry);
let mut iterations: usize = 0;
let mut converged = true;
while let Some(node) = worklist.pop_front() {
iterations += 1;
if iterations > budget {
converged = !transfer.on_budget_exceeded();
if !converged {
break;
}
}
let node_state = match states.get(&node) {
Some(s) => s.clone(),
None => continue,
};
let edges: Vec<_> = cfg.edges(node).map(|e| (*e.weight(), e.target())).collect();
// No outgoing edges — nothing to propagate (exit/dead end).
if edges.is_empty() {
continue;
}
for (edge_kind, target) in edges {
let info = &cfg[node];
let (out_state, _events) =
transfer.apply(node, info, Some(edge_kind), node_state.clone());
// Join into target's state
let target_state = states.get(&target);
let new_target = match target_state {
Some(existing) => existing.join(&out_state),
None => out_state,
};
let changed = target_state.is_none_or(|existing| *existing != new_target);
if changed {
states.insert(target, new_target);
if !worklist.contains(&target) {
worklist.push_back(target);
}
}
}
}
// ── Phase 2: single pass over converged states to collect events ──
let mut events: Vec<T::Event> = Vec::new();
let mut seen_edges: std::collections::HashSet<(NodeIndex, NodeIndex)> =
std::collections::HashSet::new();
for node in states.keys().copied().collect::<Vec<_>>() {
let node_state = match states.get(&node) {
Some(s) => s.clone(),
None => continue,
};
let edges: Vec<_> = cfg.edges(node).map(|e| (*e.weight(), e.target())).collect();
if edges.is_empty() {
// Exit / dead end — apply transfer for event collection.
let info = &cfg[node];
let (_out_state, new_events) = transfer.apply(node, info, None, node_state);
events.extend(new_events);
continue;
}
for (edge_kind, target) in edges {
if !seen_edges.insert((node, target)) {
continue;
}
let info = &cfg[node];
let (_out_state, new_events) =
transfer.apply(node, info, Some(edge_kind), node_state.clone());
events.extend(new_events);
}
}
DataflowResult {
states,
events,
converged,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cfg::{EdgeKind, NodeInfo, StmtKind};
use crate::cfg_analysis::rules;
use crate::state::domain::ResourceLifecycle;
use crate::state::symbol::SymbolInterner;
use crate::state::transfer::DefaultTransfer;
use crate::symbol::Lang;
use petgraph::Graph;
fn make_node(kind: StmtKind) -> NodeInfo {
NodeInfo {
kind,
span: (0, 0),
label: None,
defines: None,
uses: vec![],
callee: None,
enclosing_func: None,
call_ordinal: 0,
condition_text: None,
condition_vars: vec![],
condition_negated: false,
}
}
#[test]
fn linear_cfg_converges() {
use crate::state::domain::ProductState;
// Entry → fopen(f) → fclose(f) → Exit
let mut cfg: Cfg = Graph::new();
let entry = cfg.add_node(make_node(StmtKind::Entry));
let open_node = cfg.add_node(NodeInfo {
kind: StmtKind::Call,
defines: Some("f".into()),
callee: Some("fopen".into()),
..make_node(StmtKind::Call)
});
let close_node = cfg.add_node(NodeInfo {
kind: StmtKind::Call,
uses: vec!["f".into()],
callee: Some("fclose".into()),
..make_node(StmtKind::Call)
});
let exit = cfg.add_node(make_node(StmtKind::Exit));
cfg.add_edge(entry, open_node, EdgeKind::Seq);
cfg.add_edge(open_node, close_node, EdgeKind::Seq);
cfg.add_edge(close_node, exit, EdgeKind::Seq);
let interner = SymbolInterner::from_cfg(&cfg);
let transfer = DefaultTransfer {
lang: Lang::C,
resource_pairs: rules::resource_pairs(Lang::C),
interner: &interner,
};
let result = run_forward(&cfg, entry, &transfer, ProductState::initial());
// No events (clean open→close)
assert!(result.events.is_empty());
assert!(result.converged);
// At exit, f should be CLOSED
let sym_f = interner.get("f").unwrap();
let exit_state = result.states.get(&exit).unwrap();
assert_eq!(exit_state.resource.get(sym_f), ResourceLifecycle::CLOSED);
}
#[test]
fn diamond_cfg_joins_states() {
use crate::state::domain::ProductState;
// Entry
// |
// fopen(f)
// |
// If
// / \
// fclose(f) (no close)
// \ /
// Exit
let mut cfg: Cfg = Graph::new();
let entry = cfg.add_node(make_node(StmtKind::Entry));
let open_node = cfg.add_node(NodeInfo {
kind: StmtKind::Call,
defines: Some("f".into()),
callee: Some("fopen".into()),
..make_node(StmtKind::Call)
});
let if_node = cfg.add_node(make_node(StmtKind::If));
let close_node = cfg.add_node(NodeInfo {
kind: StmtKind::Call,
uses: vec!["f".into()],
callee: Some("fclose".into()),
..make_node(StmtKind::Call)
});
let no_close = cfg.add_node(make_node(StmtKind::Seq));
let exit = cfg.add_node(make_node(StmtKind::Exit));
cfg.add_edge(entry, open_node, EdgeKind::Seq);
cfg.add_edge(open_node, if_node, EdgeKind::Seq);
cfg.add_edge(if_node, close_node, EdgeKind::True);
cfg.add_edge(if_node, no_close, EdgeKind::False);
cfg.add_edge(close_node, exit, EdgeKind::Seq);
cfg.add_edge(no_close, exit, EdgeKind::Seq);
let interner = SymbolInterner::from_cfg(&cfg);
let transfer = DefaultTransfer {
lang: Lang::C,
resource_pairs: rules::resource_pairs(Lang::C),
interner: &interner,
};
let result = run_forward(&cfg, entry, &transfer, ProductState::initial());
// At exit, f should be OPEN | CLOSED (may-leak)
let sym_f = interner.get("f").unwrap();
let exit_state = result.states.get(&exit).unwrap();
assert_eq!(
exit_state.resource.get(sym_f),
ResourceLifecycle::OPEN | ResourceLifecycle::CLOSED
);
}
}

355
src/state/facts.rs Normal file
View file

@ -0,0 +1,355 @@
use super::domain::{AuthLevel, ProductState, ResourceLifecycle};
use super::engine::DataflowResult;
use super::symbol::SymbolInterner;
use super::transfer::{TransferEvent, TransferEventKind};
use crate::cfg::{Cfg, StmtKind};
use crate::labels::{Cap, DataLabel};
use crate::patterns::Severity;
use crate::symbol::Lang;
use petgraph::visit::IntoNodeReferences;
/// Normalize a callee description for display.
fn sanitize_desc(s: &str) -> String {
crate::fmt::normalize_snippet(s)
}
/// A finding produced by state analysis.
#[derive(Debug, Clone)]
pub struct StateFinding {
pub rule_id: String,
pub severity: Severity,
pub span: (usize, usize),
pub message: String,
/// State machine that produced this finding: `"resource"` or `"auth"`.
pub machine: &'static str,
/// Variable name involved, if available.
pub subject: Option<String>,
/// State before the event (e.g. `"closed"`, `"open"`, `"unauthed"`).
pub from_state: &'static str,
/// State after the event (e.g. `"used"`, `"closed"`, `"leaked"`, `"access"`).
pub to_state: &'static str,
}
/// Extract findings from converged dataflow state + transfer events.
pub fn extract_findings(
result: &DataflowResult<ProductState, TransferEvent>,
cfg: &Cfg,
interner: &SymbolInterner,
lang: Lang,
func_summaries: &crate::cfg::FuncSummaries,
) -> Vec<StateFinding> {
let mut findings = Vec::new();
// ── 1. Use-after-close from transfer events ──────────────────────────
for event in &result.events {
let info = &cfg[event.node];
let var_name = interner.resolve(event.var);
match event.kind {
TransferEventKind::UseAfterClose => {
findings.push(StateFinding {
rule_id: "state-use-after-close".into(),
severity: Severity::High,
span: info.span,
message: format!("variable `{var_name}` used after close"),
machine: "resource",
subject: Some(var_name.to_string()),
from_state: "closed",
to_state: "used",
});
}
TransferEventKind::DoubleClose => {
findings.push(StateFinding {
rule_id: "state-double-close".into(),
severity: Severity::Medium,
span: info.span,
message: format!("variable `{var_name}` closed twice"),
machine: "resource",
subject: Some(var_name.to_string()),
from_state: "closed",
to_state: "closed",
});
}
}
}
// ── 2. Resource leaks at Exit and function-Return nodes ──────────────
for (idx, info) in cfg.node_references() {
// Check both the file-level Exit node and the *synthesised* function
// exit node (a Return node). Skip early-return nodes — they flow
// into the synthesised exit and carry only path-specific state.
// The synthesised exit is the one Return node that does NOT have an
// outgoing edge to another Return in the same function.
let is_exit = info.kind == StmtKind::Exit;
let is_func_exit = info.kind == StmtKind::Return && info.enclosing_func.is_some();
if !is_exit && !is_func_exit {
continue;
}
if is_func_exit {
use petgraph::Direction;
let is_early_return = cfg
.neighbors_directed(idx, Direction::Outgoing)
.any(|succ| {
let s = &cfg[succ];
s.kind == StmtKind::Return && s.enclosing_func == info.enclosing_func
});
if is_early_return {
continue;
}
}
let Some(state) = result.states.get(&idx) else {
continue;
};
for (&sym, &lifecycle) in &state.resource.vars {
if !lifecycle.contains(ResourceLifecycle::OPEN) {
continue;
}
let var_name = interner.resolve(sym);
if !lifecycle.contains(ResourceLifecycle::CLOSED)
&& !lifecycle.contains(ResourceLifecycle::MOVED)
{
// Definite leak: open on all paths, never closed
// Find the acquire span by scanning backwards for this variable's define
let acquire_span = find_acquire_span(cfg, sym, interner);
findings.push(StateFinding {
rule_id: "state-resource-leak".into(),
severity: Severity::Medium,
span: acquire_span.unwrap_or(info.span),
message: format!("resource `{var_name}` is never closed"),
machine: "resource",
subject: Some(var_name.to_string()),
from_state: "open",
to_state: "leaked",
});
} else if lifecycle.contains(ResourceLifecycle::CLOSED) {
// May-leak: open on some paths, closed on others
let acquire_span = find_acquire_span(cfg, sym, interner);
findings.push(StateFinding {
rule_id: "state-resource-leak-possible".into(),
severity: Severity::Low,
span: acquire_span.unwrap_or(info.span),
message: format!("resource `{var_name}` may not be closed on all paths"),
machine: "resource",
subject: Some(var_name.to_string()),
from_state: "open",
to_state: "possibly_leaked",
});
}
}
}
// ── 3. Auth-required sinks ───────────────────────────────────────────
// Check if any function is a web entrypoint
let has_web_entrypoint = cfg.node_references().any(|(_, info)| {
if let Some(ref func_name) = info.enclosing_func {
is_web_entrypoint_simple(func_name, lang, func_summaries, cfg)
} else {
false
}
});
if has_web_entrypoint {
for (idx, info) in cfg.node_references() {
if !is_privileged_sink(info) {
continue;
}
let Some(state) = result.states.get(&idx) else {
continue;
};
if state.auth.auth_level == AuthLevel::Unauthed {
let callee_desc = sanitize_desc(info.callee.as_deref().unwrap_or("(sensitive op)"));
findings.push(StateFinding {
rule_id: "state-unauthed-access".into(),
severity: Severity::High,
span: info.span,
message: format!(
"sensitive operation `{callee_desc}` reached without authentication"
),
machine: "auth",
subject: None,
from_state: "unauthed",
to_state: "access",
});
}
}
}
// Dedup
findings.sort_by(|a, b| a.span.cmp(&b.span).then_with(|| a.rule_id.cmp(&b.rule_id)));
findings.dedup_by(|a, b| a.span == b.span && a.rule_id == b.rule_id);
findings
}
/// Find the span where a variable was acquired (defined via Call node).
fn find_acquire_span(
cfg: &Cfg,
sym: super::symbol::SymbolId,
interner: &SymbolInterner,
) -> Option<(usize, usize)> {
let var_name = interner.resolve(sym);
for (_idx, info) in cfg.node_references() {
if info.kind == StmtKind::Call
&& let Some(ref def) = info.defines
&& def == var_name
{
return Some(info.span);
}
}
None
}
/// Check if a node is a privileged sink (shell execution or file I/O).
fn is_privileged_sink(info: &crate::cfg::NodeInfo) -> bool {
match info.label {
Some(DataLabel::Sink(caps)) => caps.intersects(Cap::SHELL_ESCAPE | Cap::FILE_IO),
_ => false,
}
}
/// Simplified web entrypoint check (avoids AnalysisContext dependency).
fn is_web_entrypoint_simple(
func_name: &str,
lang: Lang,
func_summaries: &crate::cfg::FuncSummaries,
_cfg: &Cfg,
) -> bool {
let name_lower = func_name.to_ascii_lowercase();
// Skip bare "main" — it's typically a CLI entry
if name_lower == "main" {
return false;
}
let is_handler_name = name_lower.starts_with("handle_")
|| name_lower.starts_with("route_")
|| name_lower.starts_with("api_")
|| name_lower.starts_with("serve_")
|| name_lower.starts_with("process_")
|| name_lower == "handler";
if !is_handler_name {
return false;
}
// Check for web-like parameters
let web_params: &[&str] = match lang {
Lang::Rust => &["request", "req", "json", "query", "form", "payload", "body"],
Lang::JavaScript | Lang::TypeScript => &["req", "request", "ctx", "res", "response"],
Lang::Python => &["request", "req"],
Lang::Go => &["w", "writer", "r", "req", "request"],
Lang::Java => &["request", "req"],
_ => &["request", "req"],
};
let has_web_params = func_summaries.values().any(|s| {
s.param_names
.iter()
.any(|p| web_params.contains(&p.to_ascii_lowercase().as_str()))
});
// Strong handler names are enough even without web params
let strong_name = name_lower.starts_with("handle_")
|| name_lower.starts_with("route_")
|| name_lower.starts_with("api_");
has_web_params || strong_name
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cfg::{EdgeKind, NodeInfo};
use crate::cfg_analysis::rules;
use crate::state::domain::ProductState;
use crate::state::engine;
use crate::state::symbol::SymbolInterner;
use crate::state::transfer::DefaultTransfer;
use petgraph::Graph;
use std::collections::HashMap;
fn make_node(kind: StmtKind) -> NodeInfo {
NodeInfo {
kind,
span: (0, 0),
label: None,
defines: None,
uses: vec![],
callee: None,
enclosing_func: None,
call_ordinal: 0,
condition_text: None,
condition_vars: vec![],
condition_negated: false,
}
}
#[test]
fn detects_resource_leak() {
// Entry → fopen(f) → Exit (no close)
let mut cfg: Cfg = Graph::new();
let entry = cfg.add_node(make_node(StmtKind::Entry));
let open_node = cfg.add_node(NodeInfo {
kind: StmtKind::Call,
span: (10, 20),
defines: Some("f".into()),
callee: Some("fopen".into()),
..make_node(StmtKind::Call)
});
let exit = cfg.add_node(make_node(StmtKind::Exit));
cfg.add_edge(entry, open_node, EdgeKind::Seq);
cfg.add_edge(open_node, exit, EdgeKind::Seq);
let interner = SymbolInterner::from_cfg(&cfg);
let transfer = DefaultTransfer {
lang: Lang::C,
resource_pairs: rules::resource_pairs(Lang::C),
interner: &interner,
};
let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial());
let findings = extract_findings(&result, &cfg, &interner, Lang::C, &HashMap::new());
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].rule_id, "state-resource-leak");
assert!(findings[0].message.contains("f"));
}
#[test]
fn clean_open_close_no_findings() {
// Entry → fopen(f) → fclose(f) → Exit
let mut cfg: Cfg = Graph::new();
let entry = cfg.add_node(make_node(StmtKind::Entry));
let open_node = cfg.add_node(NodeInfo {
kind: StmtKind::Call,
defines: Some("f".into()),
callee: Some("fopen".into()),
..make_node(StmtKind::Call)
});
let close_node = cfg.add_node(NodeInfo {
kind: StmtKind::Call,
uses: vec!["f".into()],
callee: Some("fclose".into()),
..make_node(StmtKind::Call)
});
let exit = cfg.add_node(make_node(StmtKind::Exit));
cfg.add_edge(entry, open_node, EdgeKind::Seq);
cfg.add_edge(open_node, close_node, EdgeKind::Seq);
cfg.add_edge(close_node, exit, EdgeKind::Seq);
let interner = SymbolInterner::from_cfg(&cfg);
let transfer = DefaultTransfer {
lang: Lang::C,
resource_pairs: rules::resource_pairs(Lang::C),
interner: &interner,
};
let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial());
let findings = extract_findings(&result, &cfg, &interner, Lang::C, &HashMap::new());
assert!(findings.is_empty());
}
}

91
src/state/lattice.rs Normal file
View file

@ -0,0 +1,91 @@
/// A bounded semi-lattice with bottom element and monotone join.
///
/// Implementations must satisfy:
/// - `join` is commutative, associative, and idempotent
/// - `bot()` is the identity for `join`
/// - `leq(a, b)` iff `join(a, b) == b`
#[allow(dead_code)]
pub trait Lattice: Clone + Eq + Sized {
/// Bottom element (least information / unreachable).
fn bot() -> Self;
/// Least upper bound: merge two abstract values.
fn join(&self, other: &Self) -> Self;
/// Partial order: `self ⊑ other`.
fn leq(&self, other: &Self) -> bool;
}
#[cfg(test)]
mod tests {
use super::*;
/// A trivial 3-element lattice for testing the trait contract.
#[derive(Clone, Debug, PartialEq, Eq)]
struct Three(u8); // 0=bot, 1, 2=top-ish
impl Lattice for Three {
fn bot() -> Self {
Three(0)
}
fn join(&self, other: &Self) -> Self {
Three(self.0.max(other.0))
}
fn leq(&self, other: &Self) -> bool {
self.0 <= other.0
}
}
#[test]
fn bot_identity() {
let a = Three(1);
assert_eq!(a.join(&Three::bot()), a);
assert_eq!(Three::bot().join(&a), a);
}
#[test]
fn join_commutative() {
let a = Three(1);
let b = Three(2);
assert_eq!(a.join(&b), b.join(&a));
}
#[test]
fn join_associative() {
let a = Three(0);
let b = Three(1);
let c = Three(2);
assert_eq!(a.join(&b).join(&c), a.join(&b.join(&c)));
}
#[test]
fn join_idempotent() {
let a = Three(1);
assert_eq!(a.join(&a), a);
}
#[test]
fn leq_reflexive() {
let a = Three(1);
assert!(a.leq(&a));
}
#[test]
fn leq_transitive() {
let a = Three(0);
let b = Three(1);
let c = Three(2);
assert!(a.leq(&b));
assert!(b.leq(&c));
assert!(a.leq(&c));
}
#[test]
fn leq_consistent_with_join() {
let a = Three(1);
let b = Three(2);
// a ⊑ b iff join(a, b) == b
assert!(a.leq(&b));
assert_eq!(a.join(&b), b);
}
}

62
src/state/mod.rs Normal file
View file

@ -0,0 +1,62 @@
pub mod domain;
pub mod engine;
pub mod facts;
pub mod lattice;
pub mod symbol;
pub mod transfer;
use crate::cfg::{Cfg, FuncSummaries};
use crate::cfg_analysis::rules;
use crate::summary::GlobalSummaries;
use crate::symbol::Lang;
use domain::ProductState;
use engine::MAX_TRACKED_VARS;
use facts::StateFinding;
use petgraph::graph::NodeIndex;
use symbol::SymbolInterner;
use transfer::DefaultTransfer;
/// Run state-model dataflow analysis on a single function's CFG.
///
/// Returns findings for use-after-close, double-close, resource leaks,
/// and unauthenticated access to sensitive sinks.
pub fn run_state_analysis(
cfg: &Cfg,
entry: NodeIndex,
lang: Lang,
_source_bytes: &[u8],
func_summaries: &FuncSummaries,
_global_summaries: Option<&GlobalSummaries>,
) -> Vec<StateFinding> {
let _span = tracing::debug_span!("run_state_analysis").entered();
// 1. Build symbol interner from CFG
let interner = SymbolInterner::from_cfg(cfg);
// Guarded degradation: cap tracked variables
if interner.len() > MAX_TRACKED_VARS {
tracing::warn!(
symbols = interner.len(),
max = MAX_TRACKED_VARS,
"state analysis: too many variables, capping tracking"
);
// Still run — the interner has all symbols, but transfer will only
// track the first MAX_TRACKED_VARS due to HashMap insertion order.
// This is conservative but safe.
}
// 2. Construct transfer function
let resource_pairs = rules::resource_pairs(lang);
let transfer = DefaultTransfer {
lang,
resource_pairs,
interner: &interner,
};
// 3. Run forward dataflow engine
let initial = ProductState::initial();
let result = engine::run_forward(cfg, entry, &transfer, initial);
// 4. Extract findings
facts::extract_findings(&result, cfg, &interner, lang, func_summaries)
}

101
src/state/symbol.rs Normal file
View file

@ -0,0 +1,101 @@
use crate::cfg::Cfg;
use petgraph::visit::IntoNodeReferences;
use std::collections::HashMap;
/// Cheap `Copy` handle into a [`SymbolInterner`].
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct SymbolId(pub(crate) u32);
/// Per-function interner: maps `String` ↔ [`SymbolId`].
///
/// Built once from CFG node `defines`/`uses`, reused throughout analysis.
#[derive(Default)]
pub struct SymbolInterner {
to_id: HashMap<String, SymbolId>,
to_str: Vec<String>,
}
impl SymbolInterner {
pub fn new() -> Self {
Self::default()
}
/// Intern a name, returning its stable [`SymbolId`].
pub fn intern(&mut self, name: &str) -> SymbolId {
if let Some(&id) = self.to_id.get(name) {
return id;
}
let id = SymbolId(self.to_str.len() as u32);
self.to_str.push(name.to_owned());
self.to_id.insert(name.to_owned(), id);
id
}
/// Look up a name without interning it.
pub fn get(&self, name: &str) -> Option<SymbolId> {
self.to_id.get(name).copied()
}
/// Resolve an id back to its string.
pub fn resolve(&self, id: SymbolId) -> &str {
&self.to_str[id.0 as usize]
}
/// Number of interned symbols.
pub fn len(&self) -> usize {
self.to_str.len()
}
/// Whether the interner is empty.
#[allow(dead_code)]
pub fn is_empty(&self) -> bool {
self.to_str.is_empty()
}
/// Build from a CFG: walk all nodes, intern every `defines`/`uses` string.
pub fn from_cfg(cfg: &Cfg) -> Self {
let mut interner = Self::new();
for (_idx, info) in cfg.node_references() {
if let Some(ref d) = info.defines {
interner.intern(d);
}
for u in &info.uses {
interner.intern(u);
}
}
interner
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn intern_resolve_roundtrip() {
let mut interner = SymbolInterner::new();
let a = interner.intern("foo");
let b = interner.intern("bar");
let a2 = interner.intern("foo");
assert_eq!(a, a2);
assert_ne!(a, b);
assert_eq!(interner.resolve(a), "foo");
assert_eq!(interner.resolve(b), "bar");
}
#[test]
fn get_returns_none_for_unknown() {
let interner = SymbolInterner::new();
assert!(interner.get("missing").is_none());
}
#[test]
fn len_tracks_unique_symbols() {
let mut interner = SymbolInterner::new();
interner.intern("a");
interner.intern("b");
interner.intern("a"); // duplicate
assert_eq!(interner.len(), 2);
}
}

426
src/state/transfer.rs Normal file
View file

@ -0,0 +1,426 @@
use super::domain::{AuthLevel, ProductState, ResourceLifecycle};
use super::engine::Transfer;
use super::symbol::{SymbolId, SymbolInterner};
use crate::cfg::{EdgeKind, NodeInfo, StmtKind};
use crate::cfg_analysis::rules::{self, ResourcePair};
use crate::symbol::Lang;
use petgraph::graph::NodeIndex;
/// Events emitted during transfer for illegal state transitions.
/// These are NOT lattice values — they become findings in `facts.rs`.
#[derive(Debug, Clone)]
pub struct TransferEvent {
pub kind: TransferEventKind,
pub node: NodeIndex,
pub var: SymbolId,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TransferEventKind {
UseAfterClose,
DoubleClose,
}
/// Resource-use patterns: callees that read/write/operate on a resource handle
/// (triggering use-after-close if the handle is closed).
static RESOURCE_USE_PATTERNS: &[&str] = &[
"read", "write", "send", "recv", "fread", "fwrite", "fgets", "fputs", "fprintf", "fscanf",
"fflush", "fseek", "ftell", "rewind", "feof", "ferror", "fgetc", "fputc", "getc", "putc",
"ungetc", "query", "execute", "fetch", "sendto", "recvfrom", "ioctl", "fcntl",
// Memory access functions (for malloc/free use-after-free detection)
"strcpy", "strncpy", "strcat", "strncat", "memcpy", "memmove", "memset", "memcmp", "strcmp",
"strncmp", "strlen", "sprintf", "snprintf",
];
/// Auth-call matchers for admin-level privilege.
static ADMIN_PATTERNS: &[&str] = &[
"is_admin",
"hasrole",
"has_role",
"check_admin",
"require_admin",
];
pub struct DefaultTransfer<'a> {
pub lang: Lang,
pub resource_pairs: &'a [ResourcePair],
pub interner: &'a SymbolInterner,
}
impl Transfer<ProductState> for DefaultTransfer<'_> {
type Event = TransferEvent;
fn apply(
&self,
node_idx: NodeIndex,
info: &NodeInfo,
edge: Option<EdgeKind>,
mut state: ProductState,
) -> (ProductState, Vec<TransferEvent>) {
let mut events = Vec::new();
match info.kind {
StmtKind::Call => {
self.apply_call(node_idx, info, &mut state, &mut events);
}
StmtKind::If => {
self.apply_if(info, edge, &mut state);
}
StmtKind::Seq => {
self.apply_assignment(node_idx, info, &mut state);
}
_ => {}
}
(state, events)
}
}
impl DefaultTransfer<'_> {
fn apply_call(
&self,
node_idx: NodeIndex,
info: &NodeInfo,
state: &mut ProductState,
events: &mut Vec<TransferEvent>,
) {
let callee = match &info.callee {
Some(c) => c.to_ascii_lowercase(),
None => return,
};
// ── Resource acquire ─────────────────────────────────────────────
for pair in self.resource_pairs {
let is_acquire = pair.acquire.iter().any(|a| callee_matches(&callee, a));
let is_excluded = pair
.exclude_acquire
.iter()
.any(|e| callee_matches(&callee, e));
if is_acquire
&& !is_excluded
&& let Some(ref def) = info.defines
&& let Some(sym) = self.interner.get(def)
{
state.resource.set(sym, ResourceLifecycle::OPEN);
}
}
// ── Resource release ─────────────────────────────────────────────
// Track which variables have already been released to avoid double-
// matching across multiple resource pair definitions.
let mut released: smallvec::SmallVec<[SymbolId; 4]> = smallvec::SmallVec::new();
for pair in self.resource_pairs {
let is_release = pair.release.iter().any(|r| callee_matches(&callee, r));
if is_release {
for used in &info.uses {
if let Some(sym) = self.interner.get(used) {
if released.contains(&sym) {
continue;
}
let current = state.resource.get(sym);
if current == ResourceLifecycle::CLOSED {
// Double close
events.push(TransferEvent {
kind: TransferEventKind::DoubleClose,
node: node_idx,
var: sym,
});
} else if current.contains(ResourceLifecycle::OPEN) {
state.resource.set(sym, ResourceLifecycle::CLOSED);
}
released.push(sym);
}
}
}
}
// ── Resource use (read/write/etc.) ───────────────────────────────
let is_use = RESOURCE_USE_PATTERNS
.iter()
.any(|p| callee_matches(&callee, p));
if is_use {
for used in &info.uses {
if let Some(sym) = self.interner.get(used) {
let current = state.resource.get(sym);
if current == ResourceLifecycle::CLOSED {
events.push(TransferEvent {
kind: TransferEventKind::UseAfterClose,
node: node_idx,
var: sym,
});
}
}
}
}
// ── Auth call ────────────────────────────────────────────────────
let auth_rules = rules::auth_rules(self.lang);
let is_auth = auth_rules.iter().any(|rule| {
rule.matchers
.iter()
.any(|m| callee_matches(&callee, &m.to_ascii_lowercase()))
});
if is_auth {
let is_admin = ADMIN_PATTERNS.iter().any(|p| callee_matches(&callee, p));
let new_level = if is_admin {
AuthLevel::Admin
} else {
AuthLevel::Authed
};
if new_level > state.auth.auth_level {
state.auth.auth_level = new_level;
}
}
// ── Validation call (guard) ──────────────────────────────────────
if is_guard_like(&callee) {
for used in &info.uses {
if let Some(sym) = self.interner.get(used) {
state.auth.validated.insert(sym);
}
}
}
}
fn apply_if(&self, info: &NodeInfo, edge: Option<EdgeKind>, state: &mut ProductState) {
// On the True edge of an If node whose condition is an auth check,
// refine auth level.
let is_true_edge = matches!(edge, Some(EdgeKind::True));
if !is_true_edge {
return;
}
if let Some(ref cond) = info.condition_text {
let cond_lower = cond.to_ascii_lowercase();
// Auth-related condition
let auth_rules = rules::auth_rules(self.lang);
let is_auth_cond = auth_rules.iter().any(|rule| {
rule.matchers
.iter()
.any(|m| cond_lower.contains(&m.to_ascii_lowercase()))
});
if is_auth_cond && !info.condition_negated {
let is_admin = ADMIN_PATTERNS.iter().any(|p| cond_lower.contains(p));
let new_level = if is_admin {
AuthLevel::Admin
} else {
AuthLevel::Authed
};
if new_level > state.auth.auth_level {
state.auth.auth_level = new_level;
}
}
// Validation-related condition
if is_guard_like(&cond_lower) && !info.condition_negated {
for var in &info.condition_vars {
if let Some(sym) = self.interner.get(var) {
state.auth.validated.insert(sym);
}
}
}
}
}
fn apply_assignment(&self, _node_idx: NodeIndex, info: &NodeInfo, state: &mut ProductState) {
// Ownership transfer: if `defines` reassigns a tracked resource
// variable from a `uses` variable, transfer the lifecycle.
if let Some(ref def) = info.defines
&& let Some(def_sym) = self.interner.get(def)
{
// If the RHS is a tracked resource, transfer its state
for used in &info.uses {
if let Some(use_sym) = self.interner.get(used) {
let lc = state.resource.get(use_sym);
if lc.contains(ResourceLifecycle::OPEN) {
state.resource.set(def_sym, lc);
state.resource.set(use_sym, ResourceLifecycle::MOVED);
return;
}
}
}
}
}
}
/// Check if a callee matches a pattern.
/// Supports suffix matching (e.g., "fclose" matches callee "my_fclose")
/// and dot-prefix matching (e.g., ".close" matches "file.close").
fn callee_matches(callee: &str, pattern: &str) -> bool {
let pattern_lower = pattern.to_ascii_lowercase();
if pattern_lower.starts_with('.') {
// Method pattern: ".close" matches "x.close", "file.close", etc.
callee.ends_with(&pattern_lower)
} else {
// Exact or suffix match
callee == pattern_lower || callee.ends_with(&pattern_lower)
}
}
/// Check if a callee looks like a guard/validation function.
fn is_guard_like(callee: &str) -> bool {
static GUARD_PREFIXES: &[&str] = &["validate", "sanitize", "check_", "verify_", "assert_"];
GUARD_PREFIXES.iter().any(|p| callee.starts_with(p))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn callee_matches_exact() {
assert!(callee_matches("fopen", "fopen"));
assert!(!callee_matches("fopen", "fclose"));
}
#[test]
fn callee_matches_suffix() {
assert!(callee_matches("curlx_fclose", "fclose"));
}
#[test]
fn callee_matches_dot_prefix() {
assert!(callee_matches("file.close", ".close"));
assert!(!callee_matches("file.close", ".open"));
}
#[test]
fn acquire_sets_open() {
let mut interner = SymbolInterner::new();
let sym_f = interner.intern("f");
let transfer = DefaultTransfer {
lang: Lang::C,
resource_pairs: rules::resource_pairs(Lang::C),
interner: &interner,
};
let info = NodeInfo {
kind: StmtKind::Call,
span: (0, 10),
label: None,
defines: Some("f".into()),
uses: vec![],
callee: Some("fopen".into()),
enclosing_func: None,
call_ordinal: 0,
condition_text: None,
condition_vars: vec![],
condition_negated: false,
};
let (state, events) =
transfer.apply(NodeIndex::new(0), &info, None, ProductState::initial());
assert!(events.is_empty());
assert_eq!(state.resource.get(sym_f), ResourceLifecycle::OPEN);
}
#[test]
fn close_after_open_sets_closed() {
let mut interner = SymbolInterner::new();
let sym_f = interner.intern("f");
let transfer = DefaultTransfer {
lang: Lang::C,
resource_pairs: rules::resource_pairs(Lang::C),
interner: &interner,
};
let mut state = ProductState::initial();
state.resource.set(sym_f, ResourceLifecycle::OPEN);
let info = NodeInfo {
kind: StmtKind::Call,
span: (10, 20),
label: None,
defines: None,
uses: vec!["f".into()],
callee: Some("fclose".into()),
enclosing_func: None,
call_ordinal: 0,
condition_text: None,
condition_vars: vec![],
condition_negated: false,
};
let (state, events) = transfer.apply(NodeIndex::new(1), &info, None, state);
assert!(events.is_empty());
assert_eq!(state.resource.get(sym_f), ResourceLifecycle::CLOSED);
}
#[test]
fn double_close_emits_event() {
let mut interner = SymbolInterner::new();
let sym_f = interner.intern("f");
let transfer = DefaultTransfer {
lang: Lang::C,
resource_pairs: rules::resource_pairs(Lang::C),
interner: &interner,
};
let mut state = ProductState::initial();
state.resource.set(sym_f, ResourceLifecycle::CLOSED);
let info = NodeInfo {
kind: StmtKind::Call,
span: (20, 30),
label: None,
defines: None,
uses: vec!["f".into()],
callee: Some("fclose".into()),
enclosing_func: None,
call_ordinal: 0,
condition_text: None,
condition_vars: vec![],
condition_negated: false,
};
let (_state, events) = transfer.apply(NodeIndex::new(2), &info, None, state);
assert_eq!(events.len(), 1);
assert_eq!(events[0].kind, TransferEventKind::DoubleClose);
assert_eq!(events[0].var, sym_f);
}
#[test]
fn use_after_close_emits_event() {
let mut interner = SymbolInterner::new();
let sym_f = interner.intern("f");
let transfer = DefaultTransfer {
lang: Lang::C,
resource_pairs: rules::resource_pairs(Lang::C),
interner: &interner,
};
let mut state = ProductState::initial();
state.resource.set(sym_f, ResourceLifecycle::CLOSED);
let info = NodeInfo {
kind: StmtKind::Call,
span: (30, 40),
label: None,
defines: None,
uses: vec!["f".into()],
callee: Some("fread".into()),
enclosing_func: None,
call_ordinal: 0,
condition_text: None,
condition_vars: vec![],
condition_negated: false,
};
let (_state, events) = transfer.apply(NodeIndex::new(3), &info, None, state);
assert_eq!(events.len(), 1);
assert_eq!(events[0].kind, TransferEventKind::UseAfterClose);
}
#[test]
fn is_guard_like_check() {
assert!(is_guard_like("validate_input"));
assert!(is_guard_like("sanitize_html"));
assert!(is_guard_like("check_permission"));
assert!(!is_guard_like("open_file"));
}
}