nyx/src/cfg_analysis/dominators.rs

155 lines
4.6 KiB
Rust
Raw Normal View History

Feat/full cfg (#30) * feat: Enhance control flow analysis with function summaries and taint analysis * feat: Update taint analysis to utilize function summaries for enhanced tracking * Refactor `walk.rs` batch processing and override handling: - Renamed `Batcher` to `BatchSender` for clarity. - Added `BatchSender::new` constructor for cleaner initialization. - Simplified batch size management in `BatchSender`. - Extracted `build_overrides` function for reusable override construction. - Improved error handling and validation in override building. - Enhanced performance with directory and file type filtering in `walk`. * Improve logging and streamline directory walk process: - Added detailed `tracing` logs for debugging batch flushes, override construction, and walk initialization/completion. - Optimized and simplified `filter_entry` logic for directory and file type filters. - Improved metadata checks and max file size enforcement during the scan. * Refactor and optimize taint tracking, label rules, and directory walk process: - Replaced `DefaultHasher` with `blake3::Hasher` for improved taint hashing. - Enhanced sorting and hashing logic in `taint.rs` for consistency and efficiency. - Removed unused `set_hash` function and redundant imports across files. - Improved batch sender logic in `walk.rs`, renaming key components for clarity. - Unified `spawn_senders` and `spawn_file_walker` with thread handling and channel tuple return. - Expanded label rules with additional matchers for sources, sanitizers, and sinks. - Deprecated `dump_cfg` and specific logging utilities in `cfg.rs` for code cleanup. * fix: fixed let chains error in walk.rs * fix: updated dependencies * fix: updated dependencies * chore: Remove standard error in scan.rs * feat: Introduce function summaries for enhanced taint and control flow analysis * feat: Enhance taint analysis with interop support and function summaries * feat: Add configuration analysis module and enhance matcher rules * feat: Add arity column to function_summaries and handle schema migration * fix: fixed clippy &PathBuf warnings * chore: Update dependencies and versioning in Cargo files * docs: Update README to enhance clarity and detail on features and analysis modes * chore: Update CHANGELOG for version 0.2.0 with new features, changes, and fixes * docs: Update SECURITY.md to clarify version support status --------- Co-authored-by: elipeter <eli.peter@es.fcm.travel>
2026-02-24 23:44:07 -05:00
use crate::cfg::{Cfg, EdgeKind, NodeInfo, StmtKind};
use crate::labels::DataLabel;
use petgraph::algo::dominators::{Dominators, simple_fast};
use petgraph::graph::NodeIndex;
use petgraph::prelude::*;
use petgraph::visit::Bfs;
use std::collections::HashSet;
/// Compute forward dominators from entry.
pub fn compute_dominators(cfg: &Cfg, entry: NodeIndex) -> Dominators<NodeIndex> {
simple_fast(cfg, entry)
}
/// Compute post-dominators by reversing all edges and computing dominators from exit.
/// Returns None if no Exit node exists.
pub fn compute_post_dominators(cfg: &Cfg) -> Option<Dominators<NodeIndex>> {
let exit = find_exit_node(cfg)?;
let reversed = build_reversed_graph(cfg);
Some(simple_fast(&reversed, exit))
}
/// Reachable node set via BFS from entry.
pub fn reachable_set(cfg: &Cfg, entry: NodeIndex) -> HashSet<NodeIndex> {
let mut set = HashSet::new();
let mut bfs = Bfs::new(cfg, entry);
while let Some(nx) = bfs.next(cfg) {
set.insert(nx);
}
set
}
/// Find the Exit node (StmtKind::Exit).
pub fn find_exit_node(cfg: &Cfg) -> Option<NodeIndex> {
cfg.node_indices()
.find(|&idx| cfg[idx].kind == StmtKind::Exit)
}
/// Find all nodes that are sinks (have DataLabel::Sink).
pub fn find_sink_nodes(cfg: &Cfg) -> Vec<NodeIndex> {
cfg.node_indices()
.filter(|&idx| matches!(cfg[idx].label, Some(DataLabel::Sink(_))))
.collect()
}
/// Check if `dominator` dominates `target` in the given dominator tree.
pub fn dominates(doms: &Dominators<NodeIndex>, dominator: NodeIndex, target: NodeIndex) -> bool {
if dominator == target {
return true;
}
// Walk up the dominator tree from target
let mut current = target;
while let Some(idom) = doms.immediate_dominator(current) {
if idom == current {
// Reached root
break;
}
if idom == dominator {
return true;
}
current = idom;
}
false
}
/// Build a reversed copy of the graph (swap edge directions).
fn build_reversed_graph(cfg: &Cfg) -> Graph<NodeInfo, EdgeKind> {
let mut rev = Graph::<NodeInfo, EdgeKind>::with_capacity(cfg.node_count(), cfg.edge_count());
// Clone nodes (preserving indices)
let mut index_map = Vec::with_capacity(cfg.node_count());
for idx in cfg.node_indices() {
let new_idx = rev.add_node(cfg[idx].clone());
index_map.push((idx, new_idx));
}
// Add edges in reverse direction
for edge in cfg.edge_references() {
let src = edge.source();
let tgt = edge.target();
// Find the new indices
let new_src = index_map
.iter()
.find(|(old, _)| *old == tgt)
.map(|(_, new)| *new)
.unwrap();
let new_tgt = index_map
.iter()
.find(|(old, _)| *old == src)
.map(|(_, new)| *new)
.unwrap();
rev.add_edge(new_src, new_tgt, *edge.weight());
}
rev
}
/// Find all nodes matching a specific callee name pattern.
#[allow(dead_code)]
pub fn find_call_nodes_matching(cfg: &Cfg, matchers: &[&str]) -> Vec<NodeIndex> {
cfg.node_indices()
.filter(|&idx| {
if cfg[idx].kind != StmtKind::Call {
return false;
}
if let Some(callee) = &cfg[idx].callee {
let callee_lower = callee.to_ascii_lowercase();
matchers.iter().any(|m| {
let ml = m.to_ascii_lowercase();
if ml.ends_with('_') {
callee_lower.starts_with(&ml)
} else {
callee_lower.ends_with(&ml)
}
})
} else {
false
}
})
.collect()
}
/// Check if there exists any path from `from` to `to` in the CFG.
#[allow(dead_code)]
pub fn has_path(cfg: &Cfg, from: NodeIndex, to: NodeIndex) -> bool {
let reachable = reachable_set(cfg, from);
reachable.contains(&to)
}
/// Compute shortest distance (in hops) from `from` to `to`.
pub fn shortest_distance(cfg: &Cfg, from: NodeIndex, to: NodeIndex) -> Option<usize> {
use std::collections::VecDeque;
if from == to {
return Some(0);
}
let mut visited = HashSet::new();
let mut queue = VecDeque::new();
queue.push_back((from, 0usize));
visited.insert(from);
while let Some((node, dist)) = queue.pop_front() {
for succ in cfg.neighbors(node) {
if succ == to {
return Some(dist + 1);
}
if visited.insert(succ) {
queue.push_back((succ, dist + 1));
}
}
}
None
}