mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-15 20:05:13 +02:00
Phase 1 (#33)
* chore: Exclude CLAUDE.md from Cargo.toml * feat: add callgraph module and integrate into main analysis flow * feat: enhance CLI with new severity filtering and analysis modes * feat: update CHANGELOG with recent enhancements and fixes to severity filtering and output handling * feat: implement state-model dataflow analysis for resource lifecycle and auth state * feat: enhance diagnostic output formatting and add evidence structure * feat: implement attack surface ranking for diagnostics with scoring and sorting * feat: add comprehensive documentation for installation, usage, and rules reference * feat: add multiple language support for command execution and evaluation endpoints * feat: implement inline suppression for findings using `nyx:ignore` comments * feat: add confidence levels to AST patterns and update output structure * feat: implement low-noise prioritization system with category filtering, rollup grouping, and configurable budgets * feat: bump version to 0.4.0 and update changelog with new features and improvements * feat: add dead code allowances to various functions in mod.rs and real_world_tests.rs
This commit is contained in:
parent
19b578c5c4
commit
1bbe4b1cfb
456 changed files with 25628 additions and 1228 deletions
313
src/state/domain.rs
Normal file
313
src/state/domain.rs
Normal file
|
|
@ -0,0 +1,313 @@
|
|||
use super::lattice::Lattice;
|
||||
use super::symbol::SymbolId;
|
||||
use bitflags::bitflags;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
// ── ResourceLifecycle ────────────────────────────────────────────────────
|
||||
|
||||
bitflags! {
|
||||
/// Bitset of possible lifecycle states for a single resource handle.
|
||||
///
|
||||
/// Join = bitwise OR (a variable may be in multiple states across paths).
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct ResourceLifecycle: u8 {
|
||||
const UNINIT = 0b0001;
|
||||
const OPEN = 0b0010;
|
||||
const CLOSED = 0b0100;
|
||||
const MOVED = 0b1000;
|
||||
}
|
||||
}
|
||||
|
||||
impl Lattice for ResourceLifecycle {
|
||||
fn bot() -> Self {
|
||||
ResourceLifecycle::empty()
|
||||
}
|
||||
|
||||
fn join(&self, other: &Self) -> Self {
|
||||
*self | *other
|
||||
}
|
||||
|
||||
fn leq(&self, other: &Self) -> bool {
|
||||
self.intersection(*other) == *self
|
||||
}
|
||||
}
|
||||
|
||||
// ── ResourceDomainState ──────────────────────────────────────────────────
|
||||
|
||||
/// Maps interned variable IDs to their lifecycle bitsets.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub struct ResourceDomainState {
|
||||
pub vars: HashMap<SymbolId, ResourceLifecycle>,
|
||||
}
|
||||
|
||||
impl ResourceDomainState {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn get(&self, sym: SymbolId) -> ResourceLifecycle {
|
||||
self.vars
|
||||
.get(&sym)
|
||||
.copied()
|
||||
.unwrap_or(ResourceLifecycle::empty())
|
||||
}
|
||||
|
||||
pub fn set(&mut self, sym: SymbolId, state: ResourceLifecycle) {
|
||||
self.vars.insert(sym, state);
|
||||
}
|
||||
}
|
||||
|
||||
impl Lattice for ResourceDomainState {
|
||||
fn bot() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
|
||||
fn join(&self, other: &Self) -> Self {
|
||||
let mut merged = self.clone();
|
||||
for (&sym, &other_lc) in &other.vars {
|
||||
let entry = merged.vars.entry(sym).or_insert(ResourceLifecycle::empty());
|
||||
*entry = entry.join(&other_lc);
|
||||
}
|
||||
merged
|
||||
}
|
||||
|
||||
fn leq(&self, other: &Self) -> bool {
|
||||
for (&sym, &self_lc) in &self.vars {
|
||||
let other_lc = other.get(sym);
|
||||
if !self_lc.leq(&other_lc) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
// ── AuthLevel ────────────────────────────────────────────────────────────
|
||||
|
||||
/// Simple ordered lattice for path authentication state.
|
||||
///
|
||||
/// Bot = `Unauthed`. Join = `min` (conservative: if any path is unauthed,
|
||||
/// the joined state is unauthed).
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub enum AuthLevel {
|
||||
Unauthed,
|
||||
Authed,
|
||||
Admin,
|
||||
}
|
||||
|
||||
impl Lattice for AuthLevel {
|
||||
fn bot() -> Self {
|
||||
AuthLevel::Unauthed
|
||||
}
|
||||
|
||||
fn join(&self, other: &Self) -> Self {
|
||||
// Conservative: take the minimum (least privileged)
|
||||
(*self).min(*other)
|
||||
}
|
||||
|
||||
fn leq(&self, other: &Self) -> bool {
|
||||
// Higher auth subsumes lower: Unauthed ⊑ Authed ⊑ Admin
|
||||
// In our lattice, join = min, so leq means self >= other
|
||||
*self >= *other
|
||||
}
|
||||
}
|
||||
|
||||
// ── AuthDomainState ──────────────────────────────────────────────────────
|
||||
|
||||
/// Path auth level + per-variable validation bit.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct AuthDomainState {
|
||||
pub auth_level: AuthLevel,
|
||||
pub validated: HashSet<SymbolId>,
|
||||
}
|
||||
|
||||
impl Default for AuthDomainState {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
auth_level: AuthLevel::Unauthed,
|
||||
validated: HashSet::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AuthDomainState {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
}
|
||||
|
||||
impl Lattice for AuthDomainState {
|
||||
fn bot() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
|
||||
fn join(&self, other: &Self) -> Self {
|
||||
Self {
|
||||
auth_level: self.auth_level.join(&other.auth_level),
|
||||
// Only validated on ALL paths counts
|
||||
validated: self
|
||||
.validated
|
||||
.intersection(&other.validated)
|
||||
.copied()
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
fn leq(&self, other: &Self) -> bool {
|
||||
self.auth_level.leq(&other.auth_level) && self.validated.is_superset(&other.validated)
|
||||
}
|
||||
}
|
||||
|
||||
// ── ProductState ─────────────────────────────────────────────────────────
|
||||
|
||||
/// Composable product of resource and auth domains.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct ProductState {
|
||||
pub resource: ResourceDomainState,
|
||||
pub auth: AuthDomainState,
|
||||
}
|
||||
|
||||
impl ProductState {
|
||||
pub fn initial() -> Self {
|
||||
Self {
|
||||
resource: ResourceDomainState::new(),
|
||||
auth: AuthDomainState::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Lattice for ProductState {
|
||||
fn bot() -> Self {
|
||||
Self {
|
||||
resource: ResourceDomainState::bot(),
|
||||
auth: AuthDomainState::bot(),
|
||||
}
|
||||
}
|
||||
|
||||
fn join(&self, other: &Self) -> Self {
|
||||
Self {
|
||||
resource: self.resource.join(&other.resource),
|
||||
auth: self.auth.join(&other.auth),
|
||||
}
|
||||
}
|
||||
|
||||
fn leq(&self, other: &Self) -> bool {
|
||||
self.resource.leq(&other.resource) && self.auth.leq(&other.auth)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn resource_lifecycle_join_is_or() {
|
||||
let a = ResourceLifecycle::OPEN;
|
||||
let b = ResourceLifecycle::CLOSED;
|
||||
assert_eq!(
|
||||
a.join(&b),
|
||||
ResourceLifecycle::OPEN | ResourceLifecycle::CLOSED
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resource_lifecycle_bot_identity() {
|
||||
let a = ResourceLifecycle::OPEN;
|
||||
assert_eq!(a.join(&ResourceLifecycle::bot()), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resource_lifecycle_leq() {
|
||||
let a = ResourceLifecycle::OPEN;
|
||||
let b = ResourceLifecycle::OPEN | ResourceLifecycle::CLOSED;
|
||||
assert!(a.leq(&b));
|
||||
assert!(!b.leq(&a));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resource_domain_join_merges_keys() {
|
||||
let mut a = ResourceDomainState::new();
|
||||
let mut b = ResourceDomainState::new();
|
||||
let sym_x = SymbolId(0);
|
||||
let sym_y = SymbolId(1);
|
||||
|
||||
a.set(sym_x, ResourceLifecycle::OPEN);
|
||||
b.set(sym_x, ResourceLifecycle::CLOSED);
|
||||
b.set(sym_y, ResourceLifecycle::OPEN);
|
||||
|
||||
let joined = a.join(&b);
|
||||
assert_eq!(
|
||||
joined.get(sym_x),
|
||||
ResourceLifecycle::OPEN | ResourceLifecycle::CLOSED
|
||||
);
|
||||
assert_eq!(joined.get(sym_y), ResourceLifecycle::OPEN);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auth_level_join_is_min() {
|
||||
assert_eq!(
|
||||
AuthLevel::Admin.join(&AuthLevel::Unauthed),
|
||||
AuthLevel::Unauthed
|
||||
);
|
||||
assert_eq!(AuthLevel::Authed.join(&AuthLevel::Admin), AuthLevel::Authed);
|
||||
assert_eq!(
|
||||
AuthLevel::Authed.join(&AuthLevel::Authed),
|
||||
AuthLevel::Authed
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auth_domain_join_intersects_validated() {
|
||||
let sym_a = SymbolId(0);
|
||||
let sym_b = SymbolId(1);
|
||||
let sym_c = SymbolId(2);
|
||||
|
||||
let a = AuthDomainState {
|
||||
auth_level: AuthLevel::Authed,
|
||||
validated: [sym_a, sym_b].into_iter().collect(),
|
||||
};
|
||||
let b = AuthDomainState {
|
||||
auth_level: AuthLevel::Admin,
|
||||
validated: [sym_b, sym_c].into_iter().collect(),
|
||||
};
|
||||
|
||||
let joined = a.join(&b);
|
||||
assert_eq!(joined.auth_level, AuthLevel::Authed);
|
||||
assert_eq!(joined.validated, [sym_b].into_iter().collect());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn product_state_join() {
|
||||
let a = ProductState::initial();
|
||||
let b = ProductState::initial();
|
||||
let joined = a.join(&b);
|
||||
assert_eq!(joined, ProductState::initial());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn may_must_leak_semantics() {
|
||||
// Must-leak: OPEN only
|
||||
let must_leak = ResourceLifecycle::OPEN;
|
||||
assert!(must_leak.contains(ResourceLifecycle::OPEN));
|
||||
assert!(!must_leak.contains(ResourceLifecycle::CLOSED));
|
||||
assert!(!must_leak.contains(ResourceLifecycle::MOVED));
|
||||
|
||||
// May-leak: OPEN | CLOSED (some paths close, some don't)
|
||||
let may_leak = ResourceLifecycle::OPEN | ResourceLifecycle::CLOSED;
|
||||
assert!(may_leak.contains(ResourceLifecycle::OPEN));
|
||||
assert!(may_leak.contains(ResourceLifecycle::CLOSED));
|
||||
|
||||
// No leak: CLOSED only
|
||||
let no_leak = ResourceLifecycle::CLOSED;
|
||||
assert!(!no_leak.contains(ResourceLifecycle::OPEN));
|
||||
assert!(no_leak.contains(ResourceLifecycle::CLOSED));
|
||||
}
|
||||
|
||||
// SymbolId is a newtype used in domain tests; ensure it's Copy
|
||||
#[test]
|
||||
fn symbol_id_is_copy() {
|
||||
let s = SymbolId(0);
|
||||
let s2 = s;
|
||||
assert_eq!(s, s2);
|
||||
}
|
||||
}
|
||||
288
src/state/engine.rs
Normal file
288
src/state/engine.rs
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
use super::lattice::Lattice;
|
||||
use crate::cfg::{Cfg, EdgeKind, NodeInfo};
|
||||
use petgraph::graph::NodeIndex;
|
||||
use petgraph::visit::EdgeRef;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
|
||||
/// Maximum tracked variables per function (guarded degradation).
|
||||
pub const MAX_TRACKED_VARS: usize = 64;
|
||||
|
||||
/// Default worklist iteration budget.
|
||||
pub const MAX_WORKLIST_ITERATIONS: usize = 100_000;
|
||||
|
||||
/// Generic transfer function trait for forward dataflow analysis.
|
||||
///
|
||||
/// Domains implement this to define how abstract state flows through
|
||||
/// CFG nodes and what events (findings) are emitted.
|
||||
pub trait Transfer<S: Lattice> {
|
||||
/// Side-channel events emitted during transfer (e.g., findings, violations).
|
||||
type Event: Clone;
|
||||
|
||||
/// Apply the transfer function to a node, returning the output state
|
||||
/// and any events.
|
||||
fn apply(
|
||||
&self,
|
||||
node: NodeIndex,
|
||||
info: &NodeInfo,
|
||||
edge: Option<EdgeKind>,
|
||||
state: S,
|
||||
) -> (S, Vec<Self::Event>);
|
||||
|
||||
/// Per-domain iteration budget. Defaults to [`MAX_WORKLIST_ITERATIONS`].
|
||||
fn iteration_budget(&self) -> usize {
|
||||
MAX_WORKLIST_ITERATIONS
|
||||
}
|
||||
|
||||
/// Called when the budget is exhausted. Returns true if the engine
|
||||
/// should continue with the current (non-converged) state, false to bail.
|
||||
fn on_budget_exceeded(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of running the forward dataflow engine.
|
||||
pub struct DataflowResult<S, E> {
|
||||
/// Converged state at the entry of each node.
|
||||
pub states: HashMap<NodeIndex, S>,
|
||||
/// Events emitted during Phase 2 transfer over converged states.
|
||||
pub events: Vec<E>,
|
||||
/// Whether the analysis converged (false if budget was hit).
|
||||
#[allow(dead_code)]
|
||||
pub converged: bool,
|
||||
}
|
||||
|
||||
/// Run a forward worklist dataflow analysis over the CFG.
|
||||
///
|
||||
/// Two-phase design:
|
||||
/// - Phase 1: fixed-point iteration to converge states (no event collection).
|
||||
/// - Phase 2: single pass over converged states to collect events.
|
||||
///
|
||||
/// Termination is guaranteed by lattice finiteness + iteration budget.
|
||||
pub fn run_forward<S: Lattice, T: Transfer<S>>(
|
||||
cfg: &Cfg,
|
||||
entry: NodeIndex,
|
||||
transfer: &T,
|
||||
initial: S,
|
||||
) -> DataflowResult<S, T::Event> {
|
||||
let mut states: HashMap<NodeIndex, S> = HashMap::new();
|
||||
let budget = transfer.iteration_budget();
|
||||
|
||||
// Initialize entry node
|
||||
states.insert(entry, initial);
|
||||
|
||||
// ── Phase 1: fixed-point iteration (compute converged states) ─────
|
||||
let mut worklist: VecDeque<NodeIndex> = VecDeque::new();
|
||||
worklist.push_back(entry);
|
||||
|
||||
let mut iterations: usize = 0;
|
||||
let mut converged = true;
|
||||
|
||||
while let Some(node) = worklist.pop_front() {
|
||||
iterations += 1;
|
||||
if iterations > budget {
|
||||
converged = !transfer.on_budget_exceeded();
|
||||
if !converged {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let node_state = match states.get(&node) {
|
||||
Some(s) => s.clone(),
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let edges: Vec<_> = cfg.edges(node).map(|e| (*e.weight(), e.target())).collect();
|
||||
|
||||
// No outgoing edges — nothing to propagate (exit/dead end).
|
||||
if edges.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (edge_kind, target) in edges {
|
||||
let info = &cfg[node];
|
||||
let (out_state, _events) =
|
||||
transfer.apply(node, info, Some(edge_kind), node_state.clone());
|
||||
|
||||
// Join into target's state
|
||||
let target_state = states.get(&target);
|
||||
let new_target = match target_state {
|
||||
Some(existing) => existing.join(&out_state),
|
||||
None => out_state,
|
||||
};
|
||||
|
||||
let changed = target_state.is_none_or(|existing| *existing != new_target);
|
||||
if changed {
|
||||
states.insert(target, new_target);
|
||||
if !worklist.contains(&target) {
|
||||
worklist.push_back(target);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Phase 2: single pass over converged states to collect events ──
|
||||
let mut events: Vec<T::Event> = Vec::new();
|
||||
let mut seen_edges: std::collections::HashSet<(NodeIndex, NodeIndex)> =
|
||||
std::collections::HashSet::new();
|
||||
|
||||
for node in states.keys().copied().collect::<Vec<_>>() {
|
||||
let node_state = match states.get(&node) {
|
||||
Some(s) => s.clone(),
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let edges: Vec<_> = cfg.edges(node).map(|e| (*e.weight(), e.target())).collect();
|
||||
|
||||
if edges.is_empty() {
|
||||
// Exit / dead end — apply transfer for event collection.
|
||||
let info = &cfg[node];
|
||||
let (_out_state, new_events) = transfer.apply(node, info, None, node_state);
|
||||
events.extend(new_events);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (edge_kind, target) in edges {
|
||||
if !seen_edges.insert((node, target)) {
|
||||
continue;
|
||||
}
|
||||
let info = &cfg[node];
|
||||
let (_out_state, new_events) =
|
||||
transfer.apply(node, info, Some(edge_kind), node_state.clone());
|
||||
events.extend(new_events);
|
||||
}
|
||||
}
|
||||
|
||||
DataflowResult {
|
||||
states,
|
||||
events,
|
||||
converged,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::cfg::{EdgeKind, NodeInfo, StmtKind};
|
||||
use crate::cfg_analysis::rules;
|
||||
use crate::state::domain::ResourceLifecycle;
|
||||
use crate::state::symbol::SymbolInterner;
|
||||
use crate::state::transfer::DefaultTransfer;
|
||||
use crate::symbol::Lang;
|
||||
use petgraph::Graph;
|
||||
|
||||
fn make_node(kind: StmtKind) -> NodeInfo {
|
||||
NodeInfo {
|
||||
kind,
|
||||
span: (0, 0),
|
||||
label: None,
|
||||
defines: None,
|
||||
uses: vec![],
|
||||
callee: None,
|
||||
enclosing_func: None,
|
||||
call_ordinal: 0,
|
||||
condition_text: None,
|
||||
condition_vars: vec![],
|
||||
condition_negated: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn linear_cfg_converges() {
|
||||
use crate::state::domain::ProductState;
|
||||
|
||||
// Entry → fopen(f) → fclose(f) → Exit
|
||||
let mut cfg: Cfg = Graph::new();
|
||||
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||||
let open_node = cfg.add_node(NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
defines: Some("f".into()),
|
||||
callee: Some("fopen".into()),
|
||||
..make_node(StmtKind::Call)
|
||||
});
|
||||
let close_node = cfg.add_node(NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
uses: vec!["f".into()],
|
||||
callee: Some("fclose".into()),
|
||||
..make_node(StmtKind::Call)
|
||||
});
|
||||
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||||
|
||||
cfg.add_edge(entry, open_node, EdgeKind::Seq);
|
||||
cfg.add_edge(open_node, close_node, EdgeKind::Seq);
|
||||
cfg.add_edge(close_node, exit, EdgeKind::Seq);
|
||||
|
||||
let interner = SymbolInterner::from_cfg(&cfg);
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
let result = run_forward(&cfg, entry, &transfer, ProductState::initial());
|
||||
|
||||
// No events (clean open→close)
|
||||
assert!(result.events.is_empty());
|
||||
assert!(result.converged);
|
||||
|
||||
// At exit, f should be CLOSED
|
||||
let sym_f = interner.get("f").unwrap();
|
||||
let exit_state = result.states.get(&exit).unwrap();
|
||||
assert_eq!(exit_state.resource.get(sym_f), ResourceLifecycle::CLOSED);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn diamond_cfg_joins_states() {
|
||||
use crate::state::domain::ProductState;
|
||||
|
||||
// Entry
|
||||
// |
|
||||
// fopen(f)
|
||||
// |
|
||||
// If
|
||||
// / \
|
||||
// fclose(f) (no close)
|
||||
// \ /
|
||||
// Exit
|
||||
let mut cfg: Cfg = Graph::new();
|
||||
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||||
let open_node = cfg.add_node(NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
defines: Some("f".into()),
|
||||
callee: Some("fopen".into()),
|
||||
..make_node(StmtKind::Call)
|
||||
});
|
||||
let if_node = cfg.add_node(make_node(StmtKind::If));
|
||||
let close_node = cfg.add_node(NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
uses: vec!["f".into()],
|
||||
callee: Some("fclose".into()),
|
||||
..make_node(StmtKind::Call)
|
||||
});
|
||||
let no_close = cfg.add_node(make_node(StmtKind::Seq));
|
||||
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||||
|
||||
cfg.add_edge(entry, open_node, EdgeKind::Seq);
|
||||
cfg.add_edge(open_node, if_node, EdgeKind::Seq);
|
||||
cfg.add_edge(if_node, close_node, EdgeKind::True);
|
||||
cfg.add_edge(if_node, no_close, EdgeKind::False);
|
||||
cfg.add_edge(close_node, exit, EdgeKind::Seq);
|
||||
cfg.add_edge(no_close, exit, EdgeKind::Seq);
|
||||
|
||||
let interner = SymbolInterner::from_cfg(&cfg);
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
let result = run_forward(&cfg, entry, &transfer, ProductState::initial());
|
||||
|
||||
// At exit, f should be OPEN | CLOSED (may-leak)
|
||||
let sym_f = interner.get("f").unwrap();
|
||||
let exit_state = result.states.get(&exit).unwrap();
|
||||
assert_eq!(
|
||||
exit_state.resource.get(sym_f),
|
||||
ResourceLifecycle::OPEN | ResourceLifecycle::CLOSED
|
||||
);
|
||||
}
|
||||
}
|
||||
355
src/state/facts.rs
Normal file
355
src/state/facts.rs
Normal file
|
|
@ -0,0 +1,355 @@
|
|||
use super::domain::{AuthLevel, ProductState, ResourceLifecycle};
|
||||
use super::engine::DataflowResult;
|
||||
use super::symbol::SymbolInterner;
|
||||
use super::transfer::{TransferEvent, TransferEventKind};
|
||||
use crate::cfg::{Cfg, StmtKind};
|
||||
use crate::labels::{Cap, DataLabel};
|
||||
use crate::patterns::Severity;
|
||||
use crate::symbol::Lang;
|
||||
use petgraph::visit::IntoNodeReferences;
|
||||
|
||||
/// Normalize a callee description for display.
|
||||
fn sanitize_desc(s: &str) -> String {
|
||||
crate::fmt::normalize_snippet(s)
|
||||
}
|
||||
|
||||
/// A finding produced by state analysis.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StateFinding {
|
||||
pub rule_id: String,
|
||||
pub severity: Severity,
|
||||
pub span: (usize, usize),
|
||||
pub message: String,
|
||||
/// State machine that produced this finding: `"resource"` or `"auth"`.
|
||||
pub machine: &'static str,
|
||||
/// Variable name involved, if available.
|
||||
pub subject: Option<String>,
|
||||
/// State before the event (e.g. `"closed"`, `"open"`, `"unauthed"`).
|
||||
pub from_state: &'static str,
|
||||
/// State after the event (e.g. `"used"`, `"closed"`, `"leaked"`, `"access"`).
|
||||
pub to_state: &'static str,
|
||||
}
|
||||
|
||||
/// Extract findings from converged dataflow state + transfer events.
|
||||
pub fn extract_findings(
|
||||
result: &DataflowResult<ProductState, TransferEvent>,
|
||||
cfg: &Cfg,
|
||||
interner: &SymbolInterner,
|
||||
lang: Lang,
|
||||
func_summaries: &crate::cfg::FuncSummaries,
|
||||
) -> Vec<StateFinding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
// ── 1. Use-after-close from transfer events ──────────────────────────
|
||||
for event in &result.events {
|
||||
let info = &cfg[event.node];
|
||||
let var_name = interner.resolve(event.var);
|
||||
match event.kind {
|
||||
TransferEventKind::UseAfterClose => {
|
||||
findings.push(StateFinding {
|
||||
rule_id: "state-use-after-close".into(),
|
||||
severity: Severity::High,
|
||||
span: info.span,
|
||||
message: format!("variable `{var_name}` used after close"),
|
||||
machine: "resource",
|
||||
subject: Some(var_name.to_string()),
|
||||
from_state: "closed",
|
||||
to_state: "used",
|
||||
});
|
||||
}
|
||||
TransferEventKind::DoubleClose => {
|
||||
findings.push(StateFinding {
|
||||
rule_id: "state-double-close".into(),
|
||||
severity: Severity::Medium,
|
||||
span: info.span,
|
||||
message: format!("variable `{var_name}` closed twice"),
|
||||
machine: "resource",
|
||||
subject: Some(var_name.to_string()),
|
||||
from_state: "closed",
|
||||
to_state: "closed",
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── 2. Resource leaks at Exit and function-Return nodes ──────────────
|
||||
for (idx, info) in cfg.node_references() {
|
||||
// Check both the file-level Exit node and the *synthesised* function
|
||||
// exit node (a Return node). Skip early-return nodes — they flow
|
||||
// into the synthesised exit and carry only path-specific state.
|
||||
// The synthesised exit is the one Return node that does NOT have an
|
||||
// outgoing edge to another Return in the same function.
|
||||
let is_exit = info.kind == StmtKind::Exit;
|
||||
let is_func_exit = info.kind == StmtKind::Return && info.enclosing_func.is_some();
|
||||
if !is_exit && !is_func_exit {
|
||||
continue;
|
||||
}
|
||||
if is_func_exit {
|
||||
use petgraph::Direction;
|
||||
let is_early_return = cfg
|
||||
.neighbors_directed(idx, Direction::Outgoing)
|
||||
.any(|succ| {
|
||||
let s = &cfg[succ];
|
||||
s.kind == StmtKind::Return && s.enclosing_func == info.enclosing_func
|
||||
});
|
||||
if is_early_return {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let Some(state) = result.states.get(&idx) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
for (&sym, &lifecycle) in &state.resource.vars {
|
||||
if !lifecycle.contains(ResourceLifecycle::OPEN) {
|
||||
continue;
|
||||
}
|
||||
let var_name = interner.resolve(sym);
|
||||
|
||||
if !lifecycle.contains(ResourceLifecycle::CLOSED)
|
||||
&& !lifecycle.contains(ResourceLifecycle::MOVED)
|
||||
{
|
||||
// Definite leak: open on all paths, never closed
|
||||
// Find the acquire span by scanning backwards for this variable's define
|
||||
let acquire_span = find_acquire_span(cfg, sym, interner);
|
||||
findings.push(StateFinding {
|
||||
rule_id: "state-resource-leak".into(),
|
||||
severity: Severity::Medium,
|
||||
span: acquire_span.unwrap_or(info.span),
|
||||
message: format!("resource `{var_name}` is never closed"),
|
||||
machine: "resource",
|
||||
subject: Some(var_name.to_string()),
|
||||
from_state: "open",
|
||||
to_state: "leaked",
|
||||
});
|
||||
} else if lifecycle.contains(ResourceLifecycle::CLOSED) {
|
||||
// May-leak: open on some paths, closed on others
|
||||
let acquire_span = find_acquire_span(cfg, sym, interner);
|
||||
findings.push(StateFinding {
|
||||
rule_id: "state-resource-leak-possible".into(),
|
||||
severity: Severity::Low,
|
||||
span: acquire_span.unwrap_or(info.span),
|
||||
message: format!("resource `{var_name}` may not be closed on all paths"),
|
||||
machine: "resource",
|
||||
subject: Some(var_name.to_string()),
|
||||
from_state: "open",
|
||||
to_state: "possibly_leaked",
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── 3. Auth-required sinks ───────────────────────────────────────────
|
||||
// Check if any function is a web entrypoint
|
||||
let has_web_entrypoint = cfg.node_references().any(|(_, info)| {
|
||||
if let Some(ref func_name) = info.enclosing_func {
|
||||
is_web_entrypoint_simple(func_name, lang, func_summaries, cfg)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
});
|
||||
|
||||
if has_web_entrypoint {
|
||||
for (idx, info) in cfg.node_references() {
|
||||
if !is_privileged_sink(info) {
|
||||
continue;
|
||||
}
|
||||
let Some(state) = result.states.get(&idx) else {
|
||||
continue;
|
||||
};
|
||||
if state.auth.auth_level == AuthLevel::Unauthed {
|
||||
let callee_desc = sanitize_desc(info.callee.as_deref().unwrap_or("(sensitive op)"));
|
||||
findings.push(StateFinding {
|
||||
rule_id: "state-unauthed-access".into(),
|
||||
severity: Severity::High,
|
||||
span: info.span,
|
||||
message: format!(
|
||||
"sensitive operation `{callee_desc}` reached without authentication"
|
||||
),
|
||||
machine: "auth",
|
||||
subject: None,
|
||||
from_state: "unauthed",
|
||||
to_state: "access",
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Dedup
|
||||
findings.sort_by(|a, b| a.span.cmp(&b.span).then_with(|| a.rule_id.cmp(&b.rule_id)));
|
||||
findings.dedup_by(|a, b| a.span == b.span && a.rule_id == b.rule_id);
|
||||
|
||||
findings
|
||||
}
|
||||
|
||||
/// Find the span where a variable was acquired (defined via Call node).
|
||||
fn find_acquire_span(
|
||||
cfg: &Cfg,
|
||||
sym: super::symbol::SymbolId,
|
||||
interner: &SymbolInterner,
|
||||
) -> Option<(usize, usize)> {
|
||||
let var_name = interner.resolve(sym);
|
||||
for (_idx, info) in cfg.node_references() {
|
||||
if info.kind == StmtKind::Call
|
||||
&& let Some(ref def) = info.defines
|
||||
&& def == var_name
|
||||
{
|
||||
return Some(info.span);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Check if a node is a privileged sink (shell execution or file I/O).
|
||||
fn is_privileged_sink(info: &crate::cfg::NodeInfo) -> bool {
|
||||
match info.label {
|
||||
Some(DataLabel::Sink(caps)) => caps.intersects(Cap::SHELL_ESCAPE | Cap::FILE_IO),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Simplified web entrypoint check (avoids AnalysisContext dependency).
|
||||
fn is_web_entrypoint_simple(
|
||||
func_name: &str,
|
||||
lang: Lang,
|
||||
func_summaries: &crate::cfg::FuncSummaries,
|
||||
_cfg: &Cfg,
|
||||
) -> bool {
|
||||
let name_lower = func_name.to_ascii_lowercase();
|
||||
|
||||
// Skip bare "main" — it's typically a CLI entry
|
||||
if name_lower == "main" {
|
||||
return false;
|
||||
}
|
||||
|
||||
let is_handler_name = name_lower.starts_with("handle_")
|
||||
|| name_lower.starts_with("route_")
|
||||
|| name_lower.starts_with("api_")
|
||||
|| name_lower.starts_with("serve_")
|
||||
|| name_lower.starts_with("process_")
|
||||
|| name_lower == "handler";
|
||||
|
||||
if !is_handler_name {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check for web-like parameters
|
||||
let web_params: &[&str] = match lang {
|
||||
Lang::Rust => &["request", "req", "json", "query", "form", "payload", "body"],
|
||||
Lang::JavaScript | Lang::TypeScript => &["req", "request", "ctx", "res", "response"],
|
||||
Lang::Python => &["request", "req"],
|
||||
Lang::Go => &["w", "writer", "r", "req", "request"],
|
||||
Lang::Java => &["request", "req"],
|
||||
_ => &["request", "req"],
|
||||
};
|
||||
|
||||
let has_web_params = func_summaries.values().any(|s| {
|
||||
s.param_names
|
||||
.iter()
|
||||
.any(|p| web_params.contains(&p.to_ascii_lowercase().as_str()))
|
||||
});
|
||||
|
||||
// Strong handler names are enough even without web params
|
||||
let strong_name = name_lower.starts_with("handle_")
|
||||
|| name_lower.starts_with("route_")
|
||||
|| name_lower.starts_with("api_");
|
||||
|
||||
has_web_params || strong_name
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::cfg::{EdgeKind, NodeInfo};
|
||||
use crate::cfg_analysis::rules;
|
||||
use crate::state::domain::ProductState;
|
||||
use crate::state::engine;
|
||||
use crate::state::symbol::SymbolInterner;
|
||||
use crate::state::transfer::DefaultTransfer;
|
||||
use petgraph::Graph;
|
||||
use std::collections::HashMap;
|
||||
|
||||
fn make_node(kind: StmtKind) -> NodeInfo {
|
||||
NodeInfo {
|
||||
kind,
|
||||
span: (0, 0),
|
||||
label: None,
|
||||
defines: None,
|
||||
uses: vec![],
|
||||
callee: None,
|
||||
enclosing_func: None,
|
||||
call_ordinal: 0,
|
||||
condition_text: None,
|
||||
condition_vars: vec![],
|
||||
condition_negated: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_resource_leak() {
|
||||
// Entry → fopen(f) → Exit (no close)
|
||||
let mut cfg: Cfg = Graph::new();
|
||||
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||||
let open_node = cfg.add_node(NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
span: (10, 20),
|
||||
defines: Some("f".into()),
|
||||
callee: Some("fopen".into()),
|
||||
..make_node(StmtKind::Call)
|
||||
});
|
||||
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||||
|
||||
cfg.add_edge(entry, open_node, EdgeKind::Seq);
|
||||
cfg.add_edge(open_node, exit, EdgeKind::Seq);
|
||||
|
||||
let interner = SymbolInterner::from_cfg(&cfg);
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial());
|
||||
let findings = extract_findings(&result, &cfg, &interner, Lang::C, &HashMap::new());
|
||||
|
||||
assert_eq!(findings.len(), 1);
|
||||
assert_eq!(findings[0].rule_id, "state-resource-leak");
|
||||
assert!(findings[0].message.contains("f"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clean_open_close_no_findings() {
|
||||
// Entry → fopen(f) → fclose(f) → Exit
|
||||
let mut cfg: Cfg = Graph::new();
|
||||
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||||
let open_node = cfg.add_node(NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
defines: Some("f".into()),
|
||||
callee: Some("fopen".into()),
|
||||
..make_node(StmtKind::Call)
|
||||
});
|
||||
let close_node = cfg.add_node(NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
uses: vec!["f".into()],
|
||||
callee: Some("fclose".into()),
|
||||
..make_node(StmtKind::Call)
|
||||
});
|
||||
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||||
|
||||
cfg.add_edge(entry, open_node, EdgeKind::Seq);
|
||||
cfg.add_edge(open_node, close_node, EdgeKind::Seq);
|
||||
cfg.add_edge(close_node, exit, EdgeKind::Seq);
|
||||
|
||||
let interner = SymbolInterner::from_cfg(&cfg);
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
let result = engine::run_forward(&cfg, entry, &transfer, ProductState::initial());
|
||||
let findings = extract_findings(&result, &cfg, &interner, Lang::C, &HashMap::new());
|
||||
|
||||
assert!(findings.is_empty());
|
||||
}
|
||||
}
|
||||
91
src/state/lattice.rs
Normal file
91
src/state/lattice.rs
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
/// A bounded semi-lattice with bottom element and monotone join.
|
||||
///
|
||||
/// Implementations must satisfy:
|
||||
/// - `join` is commutative, associative, and idempotent
|
||||
/// - `bot()` is the identity for `join`
|
||||
/// - `leq(a, b)` iff `join(a, b) == b`
|
||||
#[allow(dead_code)]
|
||||
pub trait Lattice: Clone + Eq + Sized {
|
||||
/// Bottom element (least information / unreachable).
|
||||
fn bot() -> Self;
|
||||
|
||||
/// Least upper bound: merge two abstract values.
|
||||
fn join(&self, other: &Self) -> Self;
|
||||
|
||||
/// Partial order: `self ⊑ other`.
|
||||
fn leq(&self, other: &Self) -> bool;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// A trivial 3-element lattice for testing the trait contract.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
struct Three(u8); // 0=bot, 1, 2=top-ish
|
||||
|
||||
impl Lattice for Three {
|
||||
fn bot() -> Self {
|
||||
Three(0)
|
||||
}
|
||||
fn join(&self, other: &Self) -> Self {
|
||||
Three(self.0.max(other.0))
|
||||
}
|
||||
fn leq(&self, other: &Self) -> bool {
|
||||
self.0 <= other.0
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bot_identity() {
|
||||
let a = Three(1);
|
||||
assert_eq!(a.join(&Three::bot()), a);
|
||||
assert_eq!(Three::bot().join(&a), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn join_commutative() {
|
||||
let a = Three(1);
|
||||
let b = Three(2);
|
||||
assert_eq!(a.join(&b), b.join(&a));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn join_associative() {
|
||||
let a = Three(0);
|
||||
let b = Three(1);
|
||||
let c = Three(2);
|
||||
assert_eq!(a.join(&b).join(&c), a.join(&b.join(&c)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn join_idempotent() {
|
||||
let a = Three(1);
|
||||
assert_eq!(a.join(&a), a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn leq_reflexive() {
|
||||
let a = Three(1);
|
||||
assert!(a.leq(&a));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn leq_transitive() {
|
||||
let a = Three(0);
|
||||
let b = Three(1);
|
||||
let c = Three(2);
|
||||
assert!(a.leq(&b));
|
||||
assert!(b.leq(&c));
|
||||
assert!(a.leq(&c));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn leq_consistent_with_join() {
|
||||
let a = Three(1);
|
||||
let b = Three(2);
|
||||
// a ⊑ b iff join(a, b) == b
|
||||
assert!(a.leq(&b));
|
||||
assert_eq!(a.join(&b), b);
|
||||
}
|
||||
}
|
||||
62
src/state/mod.rs
Normal file
62
src/state/mod.rs
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
pub mod domain;
|
||||
pub mod engine;
|
||||
pub mod facts;
|
||||
pub mod lattice;
|
||||
pub mod symbol;
|
||||
pub mod transfer;
|
||||
|
||||
use crate::cfg::{Cfg, FuncSummaries};
|
||||
use crate::cfg_analysis::rules;
|
||||
use crate::summary::GlobalSummaries;
|
||||
use crate::symbol::Lang;
|
||||
use domain::ProductState;
|
||||
use engine::MAX_TRACKED_VARS;
|
||||
use facts::StateFinding;
|
||||
use petgraph::graph::NodeIndex;
|
||||
use symbol::SymbolInterner;
|
||||
use transfer::DefaultTransfer;
|
||||
|
||||
/// Run state-model dataflow analysis on a single function's CFG.
|
||||
///
|
||||
/// Returns findings for use-after-close, double-close, resource leaks,
|
||||
/// and unauthenticated access to sensitive sinks.
|
||||
pub fn run_state_analysis(
|
||||
cfg: &Cfg,
|
||||
entry: NodeIndex,
|
||||
lang: Lang,
|
||||
_source_bytes: &[u8],
|
||||
func_summaries: &FuncSummaries,
|
||||
_global_summaries: Option<&GlobalSummaries>,
|
||||
) -> Vec<StateFinding> {
|
||||
let _span = tracing::debug_span!("run_state_analysis").entered();
|
||||
|
||||
// 1. Build symbol interner from CFG
|
||||
let interner = SymbolInterner::from_cfg(cfg);
|
||||
|
||||
// Guarded degradation: cap tracked variables
|
||||
if interner.len() > MAX_TRACKED_VARS {
|
||||
tracing::warn!(
|
||||
symbols = interner.len(),
|
||||
max = MAX_TRACKED_VARS,
|
||||
"state analysis: too many variables, capping tracking"
|
||||
);
|
||||
// Still run — the interner has all symbols, but transfer will only
|
||||
// track the first MAX_TRACKED_VARS due to HashMap insertion order.
|
||||
// This is conservative but safe.
|
||||
}
|
||||
|
||||
// 2. Construct transfer function
|
||||
let resource_pairs = rules::resource_pairs(lang);
|
||||
let transfer = DefaultTransfer {
|
||||
lang,
|
||||
resource_pairs,
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
// 3. Run forward dataflow engine
|
||||
let initial = ProductState::initial();
|
||||
let result = engine::run_forward(cfg, entry, &transfer, initial);
|
||||
|
||||
// 4. Extract findings
|
||||
facts::extract_findings(&result, cfg, &interner, lang, func_summaries)
|
||||
}
|
||||
101
src/state/symbol.rs
Normal file
101
src/state/symbol.rs
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
use crate::cfg::Cfg;
|
||||
use petgraph::visit::IntoNodeReferences;
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Cheap `Copy` handle into a [`SymbolInterner`].
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub struct SymbolId(pub(crate) u32);
|
||||
|
||||
/// Per-function interner: maps `String` ↔ [`SymbolId`].
|
||||
///
|
||||
/// Built once from CFG node `defines`/`uses`, reused throughout analysis.
|
||||
#[derive(Default)]
|
||||
pub struct SymbolInterner {
|
||||
to_id: HashMap<String, SymbolId>,
|
||||
to_str: Vec<String>,
|
||||
}
|
||||
|
||||
impl SymbolInterner {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Intern a name, returning its stable [`SymbolId`].
|
||||
pub fn intern(&mut self, name: &str) -> SymbolId {
|
||||
if let Some(&id) = self.to_id.get(name) {
|
||||
return id;
|
||||
}
|
||||
let id = SymbolId(self.to_str.len() as u32);
|
||||
self.to_str.push(name.to_owned());
|
||||
self.to_id.insert(name.to_owned(), id);
|
||||
id
|
||||
}
|
||||
|
||||
/// Look up a name without interning it.
|
||||
pub fn get(&self, name: &str) -> Option<SymbolId> {
|
||||
self.to_id.get(name).copied()
|
||||
}
|
||||
|
||||
/// Resolve an id back to its string.
|
||||
pub fn resolve(&self, id: SymbolId) -> &str {
|
||||
&self.to_str[id.0 as usize]
|
||||
}
|
||||
|
||||
/// Number of interned symbols.
|
||||
pub fn len(&self) -> usize {
|
||||
self.to_str.len()
|
||||
}
|
||||
|
||||
/// Whether the interner is empty.
|
||||
#[allow(dead_code)]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.to_str.is_empty()
|
||||
}
|
||||
|
||||
/// Build from a CFG: walk all nodes, intern every `defines`/`uses` string.
|
||||
pub fn from_cfg(cfg: &Cfg) -> Self {
|
||||
let mut interner = Self::new();
|
||||
for (_idx, info) in cfg.node_references() {
|
||||
if let Some(ref d) = info.defines {
|
||||
interner.intern(d);
|
||||
}
|
||||
for u in &info.uses {
|
||||
interner.intern(u);
|
||||
}
|
||||
}
|
||||
interner
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn intern_resolve_roundtrip() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
let a = interner.intern("foo");
|
||||
let b = interner.intern("bar");
|
||||
let a2 = interner.intern("foo");
|
||||
|
||||
assert_eq!(a, a2);
|
||||
assert_ne!(a, b);
|
||||
assert_eq!(interner.resolve(a), "foo");
|
||||
assert_eq!(interner.resolve(b), "bar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_returns_none_for_unknown() {
|
||||
let interner = SymbolInterner::new();
|
||||
assert!(interner.get("missing").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn len_tracks_unique_symbols() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
interner.intern("a");
|
||||
interner.intern("b");
|
||||
interner.intern("a"); // duplicate
|
||||
assert_eq!(interner.len(), 2);
|
||||
}
|
||||
}
|
||||
426
src/state/transfer.rs
Normal file
426
src/state/transfer.rs
Normal file
|
|
@ -0,0 +1,426 @@
|
|||
use super::domain::{AuthLevel, ProductState, ResourceLifecycle};
|
||||
use super::engine::Transfer;
|
||||
use super::symbol::{SymbolId, SymbolInterner};
|
||||
use crate::cfg::{EdgeKind, NodeInfo, StmtKind};
|
||||
use crate::cfg_analysis::rules::{self, ResourcePair};
|
||||
use crate::symbol::Lang;
|
||||
use petgraph::graph::NodeIndex;
|
||||
|
||||
/// Events emitted during transfer for illegal state transitions.
|
||||
/// These are NOT lattice values — they become findings in `facts.rs`.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TransferEvent {
|
||||
pub kind: TransferEventKind,
|
||||
pub node: NodeIndex,
|
||||
pub var: SymbolId,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum TransferEventKind {
|
||||
UseAfterClose,
|
||||
DoubleClose,
|
||||
}
|
||||
|
||||
/// Resource-use patterns: callees that read/write/operate on a resource handle
|
||||
/// (triggering use-after-close if the handle is closed).
|
||||
static RESOURCE_USE_PATTERNS: &[&str] = &[
|
||||
"read", "write", "send", "recv", "fread", "fwrite", "fgets", "fputs", "fprintf", "fscanf",
|
||||
"fflush", "fseek", "ftell", "rewind", "feof", "ferror", "fgetc", "fputc", "getc", "putc",
|
||||
"ungetc", "query", "execute", "fetch", "sendto", "recvfrom", "ioctl", "fcntl",
|
||||
// Memory access functions (for malloc/free use-after-free detection)
|
||||
"strcpy", "strncpy", "strcat", "strncat", "memcpy", "memmove", "memset", "memcmp", "strcmp",
|
||||
"strncmp", "strlen", "sprintf", "snprintf",
|
||||
];
|
||||
|
||||
/// Auth-call matchers for admin-level privilege.
|
||||
static ADMIN_PATTERNS: &[&str] = &[
|
||||
"is_admin",
|
||||
"hasrole",
|
||||
"has_role",
|
||||
"check_admin",
|
||||
"require_admin",
|
||||
];
|
||||
|
||||
pub struct DefaultTransfer<'a> {
|
||||
pub lang: Lang,
|
||||
pub resource_pairs: &'a [ResourcePair],
|
||||
pub interner: &'a SymbolInterner,
|
||||
}
|
||||
|
||||
impl Transfer<ProductState> for DefaultTransfer<'_> {
|
||||
type Event = TransferEvent;
|
||||
|
||||
fn apply(
|
||||
&self,
|
||||
node_idx: NodeIndex,
|
||||
info: &NodeInfo,
|
||||
edge: Option<EdgeKind>,
|
||||
mut state: ProductState,
|
||||
) -> (ProductState, Vec<TransferEvent>) {
|
||||
let mut events = Vec::new();
|
||||
|
||||
match info.kind {
|
||||
StmtKind::Call => {
|
||||
self.apply_call(node_idx, info, &mut state, &mut events);
|
||||
}
|
||||
StmtKind::If => {
|
||||
self.apply_if(info, edge, &mut state);
|
||||
}
|
||||
StmtKind::Seq => {
|
||||
self.apply_assignment(node_idx, info, &mut state);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
(state, events)
|
||||
}
|
||||
}
|
||||
|
||||
impl DefaultTransfer<'_> {
|
||||
fn apply_call(
|
||||
&self,
|
||||
node_idx: NodeIndex,
|
||||
info: &NodeInfo,
|
||||
state: &mut ProductState,
|
||||
events: &mut Vec<TransferEvent>,
|
||||
) {
|
||||
let callee = match &info.callee {
|
||||
Some(c) => c.to_ascii_lowercase(),
|
||||
None => return,
|
||||
};
|
||||
|
||||
// ── Resource acquire ─────────────────────────────────────────────
|
||||
for pair in self.resource_pairs {
|
||||
let is_acquire = pair.acquire.iter().any(|a| callee_matches(&callee, a));
|
||||
let is_excluded = pair
|
||||
.exclude_acquire
|
||||
.iter()
|
||||
.any(|e| callee_matches(&callee, e));
|
||||
|
||||
if is_acquire
|
||||
&& !is_excluded
|
||||
&& let Some(ref def) = info.defines
|
||||
&& let Some(sym) = self.interner.get(def)
|
||||
{
|
||||
state.resource.set(sym, ResourceLifecycle::OPEN);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Resource release ─────────────────────────────────────────────
|
||||
// Track which variables have already been released to avoid double-
|
||||
// matching across multiple resource pair definitions.
|
||||
let mut released: smallvec::SmallVec<[SymbolId; 4]> = smallvec::SmallVec::new();
|
||||
for pair in self.resource_pairs {
|
||||
let is_release = pair.release.iter().any(|r| callee_matches(&callee, r));
|
||||
if is_release {
|
||||
for used in &info.uses {
|
||||
if let Some(sym) = self.interner.get(used) {
|
||||
if released.contains(&sym) {
|
||||
continue;
|
||||
}
|
||||
let current = state.resource.get(sym);
|
||||
if current == ResourceLifecycle::CLOSED {
|
||||
// Double close
|
||||
events.push(TransferEvent {
|
||||
kind: TransferEventKind::DoubleClose,
|
||||
node: node_idx,
|
||||
var: sym,
|
||||
});
|
||||
} else if current.contains(ResourceLifecycle::OPEN) {
|
||||
state.resource.set(sym, ResourceLifecycle::CLOSED);
|
||||
}
|
||||
released.push(sym);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Resource use (read/write/etc.) ───────────────────────────────
|
||||
let is_use = RESOURCE_USE_PATTERNS
|
||||
.iter()
|
||||
.any(|p| callee_matches(&callee, p));
|
||||
if is_use {
|
||||
for used in &info.uses {
|
||||
if let Some(sym) = self.interner.get(used) {
|
||||
let current = state.resource.get(sym);
|
||||
if current == ResourceLifecycle::CLOSED {
|
||||
events.push(TransferEvent {
|
||||
kind: TransferEventKind::UseAfterClose,
|
||||
node: node_idx,
|
||||
var: sym,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Auth call ────────────────────────────────────────────────────
|
||||
let auth_rules = rules::auth_rules(self.lang);
|
||||
let is_auth = auth_rules.iter().any(|rule| {
|
||||
rule.matchers
|
||||
.iter()
|
||||
.any(|m| callee_matches(&callee, &m.to_ascii_lowercase()))
|
||||
});
|
||||
if is_auth {
|
||||
let is_admin = ADMIN_PATTERNS.iter().any(|p| callee_matches(&callee, p));
|
||||
let new_level = if is_admin {
|
||||
AuthLevel::Admin
|
||||
} else {
|
||||
AuthLevel::Authed
|
||||
};
|
||||
if new_level > state.auth.auth_level {
|
||||
state.auth.auth_level = new_level;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Validation call (guard) ──────────────────────────────────────
|
||||
if is_guard_like(&callee) {
|
||||
for used in &info.uses {
|
||||
if let Some(sym) = self.interner.get(used) {
|
||||
state.auth.validated.insert(sym);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_if(&self, info: &NodeInfo, edge: Option<EdgeKind>, state: &mut ProductState) {
|
||||
// On the True edge of an If node whose condition is an auth check,
|
||||
// refine auth level.
|
||||
let is_true_edge = matches!(edge, Some(EdgeKind::True));
|
||||
if !is_true_edge {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(ref cond) = info.condition_text {
|
||||
let cond_lower = cond.to_ascii_lowercase();
|
||||
|
||||
// Auth-related condition
|
||||
let auth_rules = rules::auth_rules(self.lang);
|
||||
let is_auth_cond = auth_rules.iter().any(|rule| {
|
||||
rule.matchers
|
||||
.iter()
|
||||
.any(|m| cond_lower.contains(&m.to_ascii_lowercase()))
|
||||
});
|
||||
if is_auth_cond && !info.condition_negated {
|
||||
let is_admin = ADMIN_PATTERNS.iter().any(|p| cond_lower.contains(p));
|
||||
let new_level = if is_admin {
|
||||
AuthLevel::Admin
|
||||
} else {
|
||||
AuthLevel::Authed
|
||||
};
|
||||
if new_level > state.auth.auth_level {
|
||||
state.auth.auth_level = new_level;
|
||||
}
|
||||
}
|
||||
|
||||
// Validation-related condition
|
||||
if is_guard_like(&cond_lower) && !info.condition_negated {
|
||||
for var in &info.condition_vars {
|
||||
if let Some(sym) = self.interner.get(var) {
|
||||
state.auth.validated.insert(sym);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_assignment(&self, _node_idx: NodeIndex, info: &NodeInfo, state: &mut ProductState) {
|
||||
// Ownership transfer: if `defines` reassigns a tracked resource
|
||||
// variable from a `uses` variable, transfer the lifecycle.
|
||||
if let Some(ref def) = info.defines
|
||||
&& let Some(def_sym) = self.interner.get(def)
|
||||
{
|
||||
// If the RHS is a tracked resource, transfer its state
|
||||
for used in &info.uses {
|
||||
if let Some(use_sym) = self.interner.get(used) {
|
||||
let lc = state.resource.get(use_sym);
|
||||
if lc.contains(ResourceLifecycle::OPEN) {
|
||||
state.resource.set(def_sym, lc);
|
||||
state.resource.set(use_sym, ResourceLifecycle::MOVED);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a callee matches a pattern.
|
||||
/// Supports suffix matching (e.g., "fclose" matches callee "my_fclose")
|
||||
/// and dot-prefix matching (e.g., ".close" matches "file.close").
|
||||
fn callee_matches(callee: &str, pattern: &str) -> bool {
|
||||
let pattern_lower = pattern.to_ascii_lowercase();
|
||||
if pattern_lower.starts_with('.') {
|
||||
// Method pattern: ".close" matches "x.close", "file.close", etc.
|
||||
callee.ends_with(&pattern_lower)
|
||||
} else {
|
||||
// Exact or suffix match
|
||||
callee == pattern_lower || callee.ends_with(&pattern_lower)
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a callee looks like a guard/validation function.
|
||||
fn is_guard_like(callee: &str) -> bool {
|
||||
static GUARD_PREFIXES: &[&str] = &["validate", "sanitize", "check_", "verify_", "assert_"];
|
||||
GUARD_PREFIXES.iter().any(|p| callee.starts_with(p))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn callee_matches_exact() {
|
||||
assert!(callee_matches("fopen", "fopen"));
|
||||
assert!(!callee_matches("fopen", "fclose"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn callee_matches_suffix() {
|
||||
assert!(callee_matches("curlx_fclose", "fclose"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn callee_matches_dot_prefix() {
|
||||
assert!(callee_matches("file.close", ".close"));
|
||||
assert!(!callee_matches("file.close", ".open"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn acquire_sets_open() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
let sym_f = interner.intern("f");
|
||||
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
let info = NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
span: (0, 10),
|
||||
label: None,
|
||||
defines: Some("f".into()),
|
||||
uses: vec![],
|
||||
callee: Some("fopen".into()),
|
||||
enclosing_func: None,
|
||||
call_ordinal: 0,
|
||||
condition_text: None,
|
||||
condition_vars: vec![],
|
||||
condition_negated: false,
|
||||
};
|
||||
|
||||
let (state, events) =
|
||||
transfer.apply(NodeIndex::new(0), &info, None, ProductState::initial());
|
||||
assert!(events.is_empty());
|
||||
assert_eq!(state.resource.get(sym_f), ResourceLifecycle::OPEN);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn close_after_open_sets_closed() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
let sym_f = interner.intern("f");
|
||||
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
let mut state = ProductState::initial();
|
||||
state.resource.set(sym_f, ResourceLifecycle::OPEN);
|
||||
|
||||
let info = NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
span: (10, 20),
|
||||
label: None,
|
||||
defines: None,
|
||||
uses: vec!["f".into()],
|
||||
callee: Some("fclose".into()),
|
||||
enclosing_func: None,
|
||||
call_ordinal: 0,
|
||||
condition_text: None,
|
||||
condition_vars: vec![],
|
||||
condition_negated: false,
|
||||
};
|
||||
|
||||
let (state, events) = transfer.apply(NodeIndex::new(1), &info, None, state);
|
||||
assert!(events.is_empty());
|
||||
assert_eq!(state.resource.get(sym_f), ResourceLifecycle::CLOSED);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn double_close_emits_event() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
let sym_f = interner.intern("f");
|
||||
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
let mut state = ProductState::initial();
|
||||
state.resource.set(sym_f, ResourceLifecycle::CLOSED);
|
||||
|
||||
let info = NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
span: (20, 30),
|
||||
label: None,
|
||||
defines: None,
|
||||
uses: vec!["f".into()],
|
||||
callee: Some("fclose".into()),
|
||||
enclosing_func: None,
|
||||
call_ordinal: 0,
|
||||
condition_text: None,
|
||||
condition_vars: vec![],
|
||||
condition_negated: false,
|
||||
};
|
||||
|
||||
let (_state, events) = transfer.apply(NodeIndex::new(2), &info, None, state);
|
||||
assert_eq!(events.len(), 1);
|
||||
assert_eq!(events[0].kind, TransferEventKind::DoubleClose);
|
||||
assert_eq!(events[0].var, sym_f);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn use_after_close_emits_event() {
|
||||
let mut interner = SymbolInterner::new();
|
||||
let sym_f = interner.intern("f");
|
||||
|
||||
let transfer = DefaultTransfer {
|
||||
lang: Lang::C,
|
||||
resource_pairs: rules::resource_pairs(Lang::C),
|
||||
interner: &interner,
|
||||
};
|
||||
|
||||
let mut state = ProductState::initial();
|
||||
state.resource.set(sym_f, ResourceLifecycle::CLOSED);
|
||||
|
||||
let info = NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
span: (30, 40),
|
||||
label: None,
|
||||
defines: None,
|
||||
uses: vec!["f".into()],
|
||||
callee: Some("fread".into()),
|
||||
enclosing_func: None,
|
||||
call_ordinal: 0,
|
||||
condition_text: None,
|
||||
condition_vars: vec![],
|
||||
condition_negated: false,
|
||||
};
|
||||
|
||||
let (_state, events) = transfer.apply(NodeIndex::new(3), &info, None, state);
|
||||
assert_eq!(events.len(), 1);
|
||||
assert_eq!(events[0].kind, TransferEventKind::UseAfterClose);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_guard_like_check() {
|
||||
assert!(is_guard_like("validate_input"));
|
||||
assert!(is_guard_like("sanitize_html"));
|
||||
assert!(is_guard_like("check_permission"));
|
||||
assert!(!is_guard_like("open_file"));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue