#![doc = include_str!(concat!(env!("OUT_DIR"), "/cfg_analysis.md"))] pub mod auth; pub mod dominators; pub mod error_handling; pub mod guards; pub mod resources; pub mod rules; pub mod scoring; #[cfg(test)] mod tests; pub mod unreachable; use crate::cfg::{FuncSummaries, NodeInfo, StmtKind}; use crate::labels::{DataLabel, LangAnalysisRules}; use crate::patterns::Severity; use crate::ssa::const_prop::ConstLattice; use crate::ssa::type_facts::TypeFactResult; use crate::ssa::{SsaBody, SsaValue}; use crate::summary::GlobalSummaries; use crate::symbol::Lang; use crate::taint; use petgraph::graph::NodeIndex; use std::collections::{HashMap, HashSet}; /// Per-body SSA facts used by structural analyses for finer-grained /// constancy checks. Produced once per body in `run_cfg_analyses` and /// passed via `AnalysisContext::body_const_facts`. pub struct BodyConstFacts { pub ssa: SsaBody, pub const_values: HashMap, pub type_facts: TypeFactResult, /// Field-sensitive Steensgaard points-to facts. /// /// Computed only when [`crate::pointer::is_enabled()`]. /// `state::transfer.rs` consumes this to suppress proxy-acquire /// mis-attribution on field-aliased locals like `m := c.mu`. When /// `None`, consumers fall back to pointer-unaware behaviour. pub pointer_facts: Option, } /// Lower a body to SSA and run constant propagation. Returns `None` when /// lowering fails (empty CFG, invalid entry), callers treat absence as /// "no SSA facts available" and fall back to the syntactic path. pub fn build_body_const_facts(body: &crate::cfg::BodyCfg, lang: Lang) -> Option { let mut ssa = crate::ssa::lower_to_ssa_with_params( &body.graph, body.entry, body.meta.name.as_deref(), body.meta.parent_body_id.is_none(), &body.meta.params, ) .ok()?; let opt = crate::ssa::optimize_ssa_with_param_types( &mut ssa, &body.graph, Some(lang), &body.meta.param_types, ); let pointer_facts = if crate::pointer::is_enabled() { Some(crate::pointer::analyse_body(&ssa, body.meta.id)) } else { None }; Some(BodyConstFacts { ssa, const_values: opt.const_values, type_facts: opt.type_facts, pointer_facts, }) } #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub enum Confidence { Low, Medium, High, } #[derive(Debug, Clone)] pub struct CfgFinding { pub rule_id: String, #[allow(dead_code)] pub title: String, pub severity: Severity, pub confidence: Confidence, pub span: (usize, usize), pub message: String, pub evidence: Vec, pub score: Option, } pub struct AnalysisContext<'a> { pub cfg: &'a crate::cfg::Cfg, pub entry: NodeIndex, pub lang: Lang, #[allow(dead_code)] pub file_path: &'a str, #[allow(dead_code)] pub source_bytes: &'a [u8], pub func_summaries: &'a FuncSummaries, #[allow(dead_code)] pub global_summaries: Option<&'a GlobalSummaries>, pub taint_findings: &'a [taint::Finding], pub analysis_rules: Option<&'a LangAnalysisRules>, /// Whether full taint analysis was active for this file (global summaries /// existed and taint engine ran). When false, structural findings without /// taint confirmation should be treated with lower confidence. pub taint_active: bool, /// Optional per-body SSA + constant-propagation facts. When present, /// structural analyses can use SSA const-prop to prove that all argument /// flows into a sink resolve to literal constants, suppressing false /// positives that the one-hop CFG trace alone cannot. pub body_const_facts: Option<&'a BodyConstFacts>, /// Optional per-body type-fact result produced by `optimize_ssa`. /// Structural analyses use it to suppress findings when a sink's argument /// SSA values are proven to carry non-injectable types (e.g. integers /// parsed from a raw source can't form SHELL/SQL/path payloads). Sourced /// from `body_const_facts` when present, keep both pointers coherent. pub type_facts: Option<&'a TypeFactResult>, /// Decorators / annotations / attributes attached to the body's /// declaration (e.g. Python `@login_required`, Java `@PreAuthorize`, /// Symfony `#[IsGranted(...)]`). Consumed by the AuthGap analysis to /// suppress `cfg-auth-gap` when the framework already enforces auth at /// the function-declaration level, the gap only matters when the /// auth call has to live inside the body. pub auth_decorators: &'a [String], /// Names of variables whose `.close()` / release calls live in a /// nested closure body somewhere else in the file (e.g. /// `socket.on("close", () => ws.close())`). ResourceMisuse uses this /// to suppress `cfg-resource-leak` for handles whose cleanup happens /// in a callback the per-body CFG can't observe. When `None`, no /// closure-based suppression is applied. pub closure_released_var_names: Option<&'a std::collections::HashSet>, } pub trait CfgAnalysis { #[allow(dead_code)] fn name(&self) -> &'static str; fn run(&self, ctx: &AnalysisContext) -> Vec; } /// Run all registered analyses and return merged findings. pub fn run_all(ctx: &AnalysisContext) -> Vec { let analyses: Vec> = vec![ Box::new(unreachable::UnreachableCode), Box::new(guards::UnguardedSink), Box::new(auth::AuthGap), Box::new(error_handling::IncompleteErrorHandling), Box::new(resources::ResourceMisuse), ]; let mut findings: Vec = analyses.iter().flat_map(|a| a.run(ctx)).collect(); // ── Dedup: suppress cfg-unguarded-sink when taint already covers the span ── // Collect spans where taint findings exist (sink byte offset). let taint_spans: HashSet<(usize, usize)> = ctx .taint_findings .iter() .map(|f| ctx.cfg[f.sink].ast.span) .collect(); findings.retain(|f| { // If both taint and cfg-unguarded-sink fire on the same span, // suppress the structural CFG finding (taint is the primary signal). if f.rule_id == "cfg-unguarded-sink" && taint_spans.contains(&f.span) { return false; } true }); // ── Dedup: suppress cfg-unguarded-sink when cfg-unreachable-sink covers the span ── let unreachable_spans: HashSet<(usize, usize)> = findings .iter() .filter(|f| f.rule_id == "cfg-unreachable-sink") .map(|f| f.span) .collect(); findings.retain(|f| { if f.rule_id == "cfg-unguarded-sink" && unreachable_spans.contains(&f.span) { return false; } true }); scoring::score_findings(&mut findings, ctx); findings.sort_by(|a, b| { b.score .partial_cmp(&a.score) .unwrap_or(std::cmp::Ordering::Equal) }); findings } /// Helper: check whether a node is a guard call (validate, sanitize, check, etc.). pub(crate) fn is_guard_call( info: &NodeInfo, lang: Lang, analysis_rules: Option<&LangAnalysisRules>, ) -> bool { if info.kind != StmtKind::Call { return false; } if let Some(callee) = &info.call.callee { // Check config sanitizer rules if let Some(extras) = analysis_rules { let callee_lower = callee.to_ascii_lowercase(); for rule in &extras.extra_labels { if !matches!(rule.label, DataLabel::Sanitizer(_)) { continue; } for m in &rule.matchers { let ml = m.to_ascii_lowercase(); if ml.ends_with('_') { if callee_lower.starts_with(&ml) { return true; } } else if callee_lower.ends_with(&ml) { return true; } } } } // Check built-in guard rules let guard_rules = rules::guard_rules(lang); let callee_lower = callee.to_ascii_lowercase(); for rule in guard_rules { for &m in rule.matchers { let ml = m.to_ascii_lowercase(); if ml.ends_with('_') { if callee_lower.starts_with(&ml) { return true; } } else if callee_lower.ends_with(&ml) { return true; } } } } false } /// Helper: check whether a node is an auth check call. pub(crate) fn is_auth_call(info: &NodeInfo, lang: Lang) -> bool { if info.kind != StmtKind::Call { return false; } if let Some(callee) = &info.call.callee { let auth_rules = rules::auth_rules(lang); let callee_lower = callee.to_ascii_lowercase(); for rule in auth_rules { for &m in rule.matchers { let ml = m.to_ascii_lowercase(); if ml.ends_with('_') { if callee_lower.starts_with(&ml) { return true; } } else if callee_lower.ends_with(&ml) { return true; } } } } false } /// Helper: check if a function name looks like an entry point (HTTP handler, main, etc.). pub(crate) fn is_entry_point_func(func_name: &str, lang: Lang) -> bool { let ep_rules = rules::entry_point_rules(lang); let name_lower = func_name.to_ascii_lowercase(); for rule in ep_rules { for &m in rule.matchers { let ml = m.to_ascii_lowercase(); if ml.ends_with('*') { let prefix = &ml[..ml.len() - 1]; if name_lower.starts_with(prefix) { return true; } } else if name_lower == ml { return true; } } } false } /// Helper: check if a node is a sink. pub(crate) fn is_sink(info: &NodeInfo) -> bool { info.taint .labels .iter() .any(|l| matches!(l, DataLabel::Sink(_))) }