Feat/configurable sanitizers and js precision (#32)

* chore: Exclude CLAUDE.md from Cargo.toml

* feat: Add configurable analysis rules and CLI commands for custom sanitizers and terminators

* feat: Enhance resource management and analysis efficiency

- Implemented parallel summary merging in `scan_filesystem` using rayon for improved performance.
- Introduced `GlobalSummaries::merge()` for efficient merging of summaries.
- Optimized file reading and hashing to eliminate redundant I/O operations.
- Added `should_scan_with_hash()` and `upsert_file_with_hash()` methods to streamline file processing.
- Enhanced taint analysis with in-place mutations to reduce memory allocations.
- Updated resource acquisition patterns to exclude false positives for `freopen` and wrapper functions.

* feat: Implement severity downgrade for findings in non-production paths and add source kind inference

* feat: Update versioning information in SECURITY.md for new stable line

* feat: Update categories in Cargo.toml to include parser-implementations and text-processing

* feat: Update dependencies in Cargo.lock for improved compatibility and performance

* feat: Update dependencies in Cargo.lock and Cargo.toml for improved compatibility
This commit is contained in:
Eli Peter 2026-02-25 04:02:11 -05:00 committed by GitHub
parent f96a89e7c1
commit 19b578c5c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
37 changed files with 3775 additions and 432 deletions

View file

@ -2,15 +2,75 @@ use super::dominators::{self, dominates};
use super::rules;
use super::{AnalysisContext, CfgAnalysis, CfgFinding, Confidence, is_entry_point_func};
use crate::cfg::StmtKind;
use crate::labels::{Cap, DataLabel};
use crate::labels::{Cap, DataLabel, RuntimeLabelRule};
use crate::patterns::Severity;
use petgraph::graph::NodeIndex;
pub struct UnguardedSink;
/// Check whether **all** arguments to the sink are constants (no taint-capable
/// variable flows). Extends the inline callee-part check by tracing one hop
/// through the CFG: if a used variable is defined by a node that itself has
/// empty `uses` and no Source label, the definition is treated as a constant
/// binding (e.g. `let cmd = "git"; Command::new(cmd)`).
fn is_all_args_constant(ctx: &AnalysisContext, sink: NodeIndex) -> bool {
let sink_info = &ctx.cfg[sink];
let callee_desc = sink_info.callee.as_deref().unwrap_or("");
let callee_parts: Vec<&str> = callee_desc.split(['.', ':']).collect();
let sink_func = sink_info.enclosing_func.as_deref();
sink_info.uses.iter().all(|u| {
// Part of the callee name itself → constant
if callee_parts.contains(&u.as_str()) {
return true;
}
// One-hop trace: find the defining node in the same function
for idx in ctx.cfg.node_indices() {
let info = &ctx.cfg[idx];
if info.enclosing_func.as_deref() != sink_func {
continue;
}
if info.defines.as_deref() == Some(u.as_str()) {
// If the defining node has no uses (pure constant) and is not
// a Source, the variable is constant.
if info.uses.is_empty() && !matches!(info.label, Some(DataLabel::Source(_))) {
return true;
}
}
}
false
})
}
/// Check if a callee matches any of the runtime label rules that are sanitizers.
fn match_config_sanitizer(callee: &str, extra: &[RuntimeLabelRule]) -> Option<Cap> {
let callee_lower = callee.to_ascii_lowercase();
for rule in extra {
let cap = match rule.label {
DataLabel::Sanitizer(c) => c,
_ => continue,
};
for m in &rule.matchers {
let ml = m.to_ascii_lowercase();
if ml.ends_with('_') {
if callee_lower.starts_with(&ml) {
return Some(cap);
}
} else if callee_lower.ends_with(&ml) {
return Some(cap);
}
}
}
None
}
/// Find all nodes in the CFG that are calls to guard functions.
fn find_guard_nodes(ctx: &AnalysisContext) -> Vec<(NodeIndex, Cap)> {
let guard_rules = rules::guard_rules(ctx.lang);
let config_rules = ctx
.analysis_rules
.map(|r| r.extra_labels.as_slice())
.unwrap_or(&[]);
let mut result = Vec::new();
for idx in ctx.cfg.node_indices() {
@ -19,6 +79,13 @@ fn find_guard_nodes(ctx: &AnalysisContext) -> Vec<(NodeIndex, Cap)> {
continue;
}
if let Some(callee) = &info.callee {
// Check config sanitizer rules first
if let Some(cap) = match_config_sanitizer(callee, config_rules) {
result.push((idx, cap));
continue;
}
// Then check built-in guard rules
let callee_lower = callee.to_ascii_lowercase();
for rule in guard_rules {
let matched = rule.matchers.iter().any(|m| {
@ -174,6 +241,13 @@ impl CfgAnalysis for UnguardedSink {
let has_taint = taint_confirms_sink(ctx, *sink);
let source_derived = sink_arg_is_source_derived(ctx, *sink);
// If sink args are all constants (including one-hop constant bindings)
// and taint didn't confirm, this is a false positive — skip it.
if is_all_args_constant(ctx, *sink) && !has_taint && !source_derived {
continue;
}
let param_only = sink_arg_is_parameter_only(ctx, *sink);
let in_entrypoint = sink_in_entrypoint(ctx, *sink);
@ -183,6 +257,9 @@ impl CfgAnalysis for UnguardedSink {
} else if param_only && !in_entrypoint {
// Wrapper function consuming only parameters → LOW
(Severity::Low, Confidence::Low)
} else if !ctx.taint_active && !source_derived {
// CFG-only mode without taint confirmation → LOW
(Severity::Low, Confidence::Low)
} else if in_entrypoint && !param_only {
// Entrypoint with non-parameter args but no taint confirmation → MEDIUM
(Severity::Medium, Confidence::Medium)

View file

@ -10,7 +10,7 @@ mod tests;
pub mod unreachable;
use crate::cfg::{FuncSummaries, NodeInfo, StmtKind};
use crate::labels::DataLabel;
use crate::labels::{DataLabel, LangAnalysisRules};
use crate::patterns::Severity;
use crate::summary::GlobalSummaries;
use crate::symbol::Lang;
@ -51,6 +51,11 @@ pub struct AnalysisContext<'a> {
#[allow(dead_code)]
pub global_summaries: Option<&'a GlobalSummaries>,
pub taint_findings: &'a [taint::Finding],
pub analysis_rules: Option<&'a LangAnalysisRules>,
/// Whether full taint analysis was active for this file (global summaries
/// existed and taint engine ran). When false, structural findings without
/// taint confirmation should be treated with lower confidence.
pub taint_active: bool,
}
pub trait CfgAnalysis {
@ -87,6 +92,20 @@ pub fn run_all(ctx: &AnalysisContext) -> Vec<CfgFinding> {
true
});
// ── Dedup: suppress cfg-unguarded-sink when cfg-unreachable-sink covers the span ──
let unreachable_spans: HashSet<(usize, usize)> = findings
.iter()
.filter(|f| f.rule_id == "cfg-unreachable-sink")
.map(|f| f.span)
.collect();
findings.retain(|f| {
if f.rule_id == "cfg-unguarded-sink" && unreachable_spans.contains(&f.span) {
return false;
}
true
});
scoring::score_findings(&mut findings, ctx);
findings.sort_by(|a, b| {
b.score
@ -97,11 +116,36 @@ pub fn run_all(ctx: &AnalysisContext) -> Vec<CfgFinding> {
}
/// Helper: check whether a node is a guard call (validate, sanitize, check, etc.).
pub(crate) fn is_guard_call(info: &NodeInfo, lang: Lang) -> bool {
pub(crate) fn is_guard_call(
info: &NodeInfo,
lang: Lang,
analysis_rules: Option<&LangAnalysisRules>,
) -> bool {
if info.kind != StmtKind::Call {
return false;
}
if let Some(callee) = &info.callee {
// Check config sanitizer rules
if let Some(extras) = analysis_rules {
let callee_lower = callee.to_ascii_lowercase();
for rule in &extras.extra_labels {
if !matches!(rule.label, DataLabel::Sanitizer(_)) {
continue;
}
for m in &rule.matchers {
let ml = m.to_ascii_lowercase();
if ml.ends_with('_') {
if callee_lower.starts_with(&ml) {
return true;
}
} else if callee_lower.ends_with(&ml) {
return true;
}
}
}
}
// Check built-in guard rules
let guard_rules = rules::guard_rules(lang);
let callee_lower = callee.to_ascii_lowercase();
for rule in guard_rules {

View file

@ -8,8 +8,13 @@ use std::collections::HashSet;
pub struct ResourceMisuse;
/// Find nodes matching acquire patterns for a given resource pair.
fn find_acquire_nodes(ctx: &AnalysisContext, acquire_patterns: &[&str]) -> Vec<NodeIndex> {
/// Find nodes matching acquire patterns for a given resource pair,
/// excluding any that match `exclude_patterns`.
fn find_acquire_nodes(
ctx: &AnalysisContext,
acquire_patterns: &[&str],
exclude_patterns: &[&str],
) -> Vec<NodeIndex> {
ctx.cfg
.node_indices()
.filter(|&idx| {
@ -19,6 +24,16 @@ fn find_acquire_nodes(ctx: &AnalysisContext, acquire_patterns: &[&str]) -> Vec<N
}
if let Some(callee) = &info.callee {
let callee_lower = callee.to_ascii_lowercase();
// Check exclusions first — if the callee matches an exclude
// pattern, it is NOT an acquire even if it also matches an
// acquire pattern (e.g. `freopen` ends with `fopen`).
let excluded = exclude_patterns.iter().any(|p| {
let pl = p.to_ascii_lowercase();
callee_lower.ends_with(&pl) || callee_lower == pl
});
if excluded {
return false;
}
acquire_patterns.iter().any(|p| {
let pl = p.to_ascii_lowercase();
callee_lower.ends_with(&pl) || callee_lower == pl
@ -113,6 +128,204 @@ fn all_paths_pass_through(
true
}
/// Check whether the acquired variable is stored into a struct field (ownership
/// transfer) downstream of the acquire node. Patterns recognised:
/// - `ptr->field = var` (C arrow operator)
/// - `obj.field = var` (C dot / generic field store)
/// - `list->next = ...` (linked-list insertion)
///
/// If the variable is transferred, there is no leak — the receiving struct is
/// responsible for the lifetime.
fn is_ownership_transferred(ctx: &AnalysisContext, acquire: NodeIndex) -> bool {
let acquired_var = match &ctx.cfg[acquire].defines {
Some(v) => v.clone(),
None => return false,
};
// BFS through CFG successors looking for a node whose span text
// mentions the acquired variable in a struct-field store context.
use std::collections::VecDeque;
let mut visited = HashSet::new();
let mut queue = VecDeque::new();
for succ in ctx.cfg.neighbors(acquire) {
if visited.insert(succ) {
queue.push_back(succ);
}
}
while let Some(node) = queue.pop_front() {
let info = &ctx.cfg[node];
let (start, end) = info.span;
// Check the source text at this node's span for the acquired variable
// appearing in a struct-field store context.
let references_var = info.uses.iter().any(|u| u == &acquired_var)
|| info.defines.as_ref().is_some_and(|d| d == &acquired_var);
if references_var && start < end && end <= ctx.source_bytes.len() {
let span_text = &ctx.source_bytes[start..end];
// `->` anywhere in span means pointer-to-member store
if span_text.windows(2).any(|w| w == b"->") {
return true;
}
// `.field = var` pattern (but not `==`)
if has_dot_field_assignment(span_text) {
return true;
}
}
// If the variable is truly redefined (not a field write), stop
// following this path. A true redefinition is when `defines` matches
// but the span doesn't contain `->` or `.field =` patterns.
if info.defines.as_ref().is_some_and(|d| d == &acquired_var) {
let is_field_write = if start < end && end <= ctx.source_bytes.len() {
let span_text = &ctx.source_bytes[start..end];
span_text.windows(2).any(|w| w == b"->") || has_dot_field_assignment(span_text)
} else {
false
};
if !is_field_write {
continue; // genuine redefinition — stop this path
}
}
for succ in ctx.cfg.neighbors(node) {
if visited.insert(succ) {
queue.push_back(succ);
}
}
}
false
}
/// Check if `span_text` contains a dot-field assignment pattern like
/// `obj.field = var` (but not `obj.method(...)` or `a == b`).
fn has_dot_field_assignment(span_text: &[u8]) -> bool {
// Look for `.` followed (possibly with ident chars) by `=` but not `==`
let mut i = 0;
while i < span_text.len() {
if span_text[i] == b'.' {
// Scan forward past identifier chars to find `=`
let mut j = i + 1;
while j < span_text.len()
&& (span_text[j].is_ascii_alphanumeric() || span_text[j] == b'_')
{
j += 1;
}
// Skip whitespace
while j < span_text.len() && span_text[j].is_ascii_whitespace() {
j += 1;
}
// Check for `=` but not `==`
if j < span_text.len()
&& span_text[j] == b'='
&& (j + 1 >= span_text.len() || span_text[j + 1] != b'=')
{
return true;
}
}
i += 1;
}
false
}
/// Check whether the acquired variable is consumed by an ownership-taking
/// function (e.g. `FileResponse(f)`, `send_file(f)`) downstream of the
/// acquire node. These functions take ownership of the file handle so there
/// is no leak.
fn is_consumed_by_owner(ctx: &AnalysisContext, acquire: NodeIndex) -> bool {
static CONSUMING_SINKS: &[&str] = &[
"fileresponse",
"streaminghttpresponse",
"send_file",
"make_response",
];
let acquired_var = match &ctx.cfg[acquire].defines {
Some(v) => v.clone(),
None => return false,
};
use std::collections::VecDeque;
let mut visited = HashSet::new();
let mut queue = VecDeque::new();
for succ in ctx.cfg.neighbors(acquire) {
if visited.insert(succ) {
queue.push_back(succ);
}
}
while let Some(node) = queue.pop_front() {
let info = &ctx.cfg[node];
// Check Call nodes with callee that matches a consuming sink
if info.kind == StmtKind::Call
&& let Some(callee) = &info.callee
{
let callee_lower = callee.to_ascii_lowercase();
let is_consuming = CONSUMING_SINKS.iter().any(|s| callee_lower.ends_with(s));
if is_consuming && info.uses.iter().any(|u| u == &acquired_var) {
return true;
}
}
// Also check the span text for consuming calls — handles cases where
// the call is embedded in a return statement (e.g. `return FileResponse(f)`)
if info.uses.iter().any(|u| u == &acquired_var) {
let (start, end) = info.span;
if start < end && end <= ctx.source_bytes.len() {
let span_lower: Vec<u8> = ctx.source_bytes[start..end]
.iter()
.map(|b| b.to_ascii_lowercase())
.collect();
if CONSUMING_SINKS
.iter()
.any(|s| span_lower.windows(s.len()).any(|w| w == s.as_bytes()))
{
return true;
}
}
}
for succ in ctx.cfg.neighbors(node) {
if visited.insert(succ) {
queue.push_back(succ);
}
}
}
false
}
/// For mutex pairs, check that an explicit `.acquire()` or `.lock()` call
/// exists on the acquired variable in the CFG. If only the constructor
/// (e.g. `threading.Lock()`) is observed without acquire, skip the finding.
fn has_explicit_lock_acquire(ctx: &AnalysisContext, acquire: NodeIndex) -> bool {
let acquired_var = match &ctx.cfg[acquire].defines {
Some(v) => v.clone(),
None => return false,
};
for idx in ctx.cfg.node_indices() {
let info = &ctx.cfg[idx];
if info.kind != StmtKind::Call {
continue;
}
if let Some(callee) = &info.callee {
let callee_lower = callee.to_ascii_lowercase();
let is_lock_call = callee_lower.ends_with(".acquire")
|| callee_lower.ends_with(".lock")
|| callee_lower == "pthread_mutex_lock";
if is_lock_call && info.uses.iter().any(|u| u == &acquired_var) {
return true;
}
}
}
false
}
impl CfgAnalysis for ResourceMisuse {
fn name(&self) -> &'static str {
"resource-misuse"
@ -128,11 +341,18 @@ impl CfgAnalysis for ResourceMisuse {
let mut findings = Vec::new();
for pair in pairs {
let acquire_nodes = find_acquire_nodes(ctx, pair.acquire);
let acquire_nodes = find_acquire_nodes(ctx, pair.acquire, pair.exclude_acquire);
let release_nodes = find_release_nodes(ctx, pair.release);
for &acquire in &acquire_nodes {
if !release_on_all_exit_paths(ctx, acquire, &release_nodes, exit) {
if !release_on_all_exit_paths(ctx, acquire, &release_nodes, exit)
&& !is_ownership_transferred(ctx, acquire)
&& !is_consumed_by_owner(ctx, acquire)
{
// For mutex pairs, require an explicit .acquire()/.lock() call
if pair.resource_name == "mutex" && !has_explicit_lock_acquire(ctx, acquire) {
continue;
}
let info = &ctx.cfg[acquire];
let callee_desc = info.callee.as_deref().unwrap_or("(acquire)");

View file

@ -21,6 +21,9 @@ pub struct EntryPointRule {
pub struct ResourcePair {
pub acquire: &'static [&'static str],
pub release: &'static [&'static str],
/// Patterns that look like acquire calls (e.g. `freopen` ends with `fopen`)
/// but should NOT be treated as acquisitions.
pub exclude_acquire: &'static [&'static str],
pub resource_name: &'static str,
}
@ -47,6 +50,16 @@ static COMMON_GUARDS: &[GuardRule] = &[
matchers: &["url_encode", "encode_uri", "urlencode"],
applies_to_sink_caps: Cap::URL_ENCODE,
},
GuardRule {
matchers: &[
"which",
"resolve_binary",
"find_program",
"lookup_path",
"shutil.which",
],
applies_to_sink_caps: Cap::SHELL_ESCAPE,
},
];
pub fn guard_rules(_lang: Lang) -> &'static [GuardRule] {
@ -168,21 +181,25 @@ static C_RESOURCES: &[ResourcePair] = &[
ResourcePair {
acquire: &["malloc", "calloc", "realloc"],
release: &["free"],
exclude_acquire: &[],
resource_name: "memory",
},
ResourcePair {
acquire: &["fopen"],
release: &["fclose"],
acquire: &["fopen", "fdopen", "curlx_fopen", "curlx_fdopen"],
release: &["fclose", "curlx_fclose"],
exclude_acquire: &["freopen", "curlx_freopen"],
resource_name: "file handle",
},
ResourcePair {
acquire: &["open"],
release: &["close"],
exclude_acquire: &["freopen", "curlx_freopen"],
resource_name: "file descriptor",
},
ResourcePair {
acquire: &["pthread_mutex_lock"],
release: &["pthread_mutex_unlock"],
exclude_acquire: &[],
resource_name: "mutex",
},
];
@ -191,11 +208,13 @@ static GO_RESOURCES: &[ResourcePair] = &[
ResourcePair {
acquire: &["os.Open", "os.Create", "os.OpenFile"],
release: &[".Close"],
exclude_acquire: &[],
resource_name: "file handle",
},
ResourcePair {
acquire: &[".Lock"],
release: &[".Unlock"],
exclude_acquire: &[],
resource_name: "mutex",
},
];
@ -205,6 +224,7 @@ static RUST_RESOURCES: &[ResourcePair] = &[
ResourcePair {
acquire: &["alloc"],
release: &["dealloc"],
exclude_acquire: &[],
resource_name: "raw memory",
},
];
@ -217,10 +237,93 @@ static JAVA_RESOURCES: &[ResourcePair] = &[ResourcePair {
"openConnection",
],
release: &[".close"],
exclude_acquire: &[],
resource_name: "stream/connection",
}];
static EMPTY_RESOURCES: &[ResourcePair] = &[];
static PYTHON_RESOURCES: &[ResourcePair] = &[
ResourcePair {
acquire: &["open"],
release: &[".close"],
exclude_acquire: &[],
resource_name: "file handle",
},
ResourcePair {
acquire: &["socket.socket", "socket"],
release: &[".close"],
exclude_acquire: &[],
resource_name: "socket",
},
ResourcePair {
acquire: &["connect", "cursor"],
release: &[".close"],
exclude_acquire: &["signal.connect", "event.connect", ".register"],
resource_name: "db connection",
},
ResourcePair {
acquire: &["threading.Lock", "threading.RLock"],
release: &[".release"],
exclude_acquire: &[],
resource_name: "mutex",
},
];
static RUBY_RESOURCES: &[ResourcePair] = &[
ResourcePair {
acquire: &["File.open", "open"],
release: &[".close"],
exclude_acquire: &[],
resource_name: "file handle",
},
ResourcePair {
acquire: &["TCPSocket.new", "UDPSocket.new"],
release: &[".close"],
exclude_acquire: &[],
resource_name: "socket",
},
ResourcePair {
acquire: &[".lock"],
release: &[".unlock"],
exclude_acquire: &[],
resource_name: "mutex",
},
];
static PHP_RESOURCES: &[ResourcePair] = &[
ResourcePair {
acquire: &["fopen"],
release: &["fclose"],
exclude_acquire: &["freopen"],
resource_name: "file handle",
},
ResourcePair {
acquire: &["mysqli_connect"],
release: &["mysqli_close"],
exclude_acquire: &[],
resource_name: "db connection",
},
ResourcePair {
acquire: &["curl_init"],
release: &["curl_close"],
exclude_acquire: &[],
resource_name: "curl handle",
},
];
static JS_RESOURCES: &[ResourcePair] = &[
ResourcePair {
acquire: &["fs.open", "fs.openSync"],
release: &["fs.close", "fs.closeSync"],
exclude_acquire: &[],
resource_name: "file descriptor",
},
ResourcePair {
acquire: &["createReadStream", "createWriteStream"],
release: &[".close", ".destroy"],
exclude_acquire: &[],
resource_name: "stream",
},
];
pub fn resource_pairs(lang: Lang) -> &'static [ResourcePair] {
match lang {
@ -229,6 +332,9 @@ pub fn resource_pairs(lang: Lang) -> &'static [ResourcePair] {
Lang::Go => GO_RESOURCES,
Lang::Rust => RUST_RESOURCES,
Lang::Java => JAVA_RESOURCES,
_ => EMPTY_RESOURCES,
Lang::Python => PYTHON_RESOURCES,
Lang::Ruby => RUBY_RESOURCES,
Lang::Php => PHP_RESOURCES,
Lang::JavaScript | Lang::TypeScript => JS_RESOURCES,
}
}

View file

@ -14,7 +14,7 @@ fn parse_and_analyse<A: CfgAnalysis>(
let mut parser = tree_sitter::Parser::new();
parser.set_language(&ts_lang).unwrap();
let tree = parser.parse(src, None).unwrap();
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs");
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs", None);
let lang = Lang::from_slug(lang_str).unwrap();
let ctx = AnalysisContext {
cfg: &cfg,
@ -25,6 +25,8 @@ fn parse_and_analyse<A: CfgAnalysis>(
func_summaries: &summaries,
global_summaries: None,
taint_findings: &[],
analysis_rules: None,
taint_active: true,
};
analysis.run(&ctx)
}
@ -34,7 +36,7 @@ fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFi
let mut parser = tree_sitter::Parser::new();
parser.set_language(&ts_lang).unwrap();
let tree = parser.parse(src, None).unwrap();
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs");
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs", None);
let lang = Lang::from_slug(lang_str).unwrap();
let ctx = AnalysisContext {
cfg: &cfg,
@ -45,6 +47,8 @@ fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFi
func_summaries: &summaries,
global_summaries: None,
taint_findings: &[],
analysis_rules: None,
taint_active: true,
};
run_all(&ctx)
}
@ -59,7 +63,7 @@ fn parse_and_run_all_with_taint(
let mut parser = tree_sitter::Parser::new();
parser.set_language(&ts_lang).unwrap();
let tree = parser.parse(src, None).unwrap();
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs");
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs", None);
let lang = Lang::from_slug(lang_str).unwrap();
let ctx = AnalysisContext {
cfg: &cfg,
@ -70,6 +74,8 @@ fn parse_and_run_all_with_taint(
func_summaries: &summaries,
global_summaries: None,
taint_findings,
analysis_rules: None,
taint_active: true,
};
run_all(&ctx)
}
@ -144,7 +150,7 @@ fn unreachable_detects_orphaned_nodes() {
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs");
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs", None);
// All nodes in linear code should be reachable
let reachable = dominators::reachable_set(&cfg, entry);
@ -469,7 +475,7 @@ fn reachable_set_contains_all_connected_nodes() {
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs");
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs", None);
let reachable = dominators::reachable_set(&cfg, entry);
@ -493,7 +499,7 @@ fn find_exit_node_exists() {
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let (cfg, _, _) = build_cfg(&tree, src, "rust", "test.rs");
let (cfg, _, _) = build_cfg(&tree, src, "rust", "test.rs", None);
let exit = dominators::find_exit_node(&cfg);
assert!(exit.is_some(), "Should find an exit node");
@ -512,7 +518,7 @@ fn shortest_distance_basic() {
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs");
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs", None);
let exit = dominators::find_exit_node(&cfg).unwrap();
let dist = dominators::shortest_distance(&cfg, entry, exit);
@ -656,7 +662,7 @@ fn taint_and_unguarded_sink_deduped() {
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
.unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let (cfg_graph, entry, _summaries) = build_cfg(&tree, src, "rust", "test.rs");
let (cfg_graph, entry, _summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
let _lang = Lang::from_slug("rust").unwrap();
// Find a sink node to create a synthetic taint finding
@ -674,6 +680,7 @@ fn taint_and_unguarded_sink_deduped() {
sink: sink_node,
source: entry,
path: vec![entry, sink_node],
source_kind: crate::labels::SourceKind::UserInput,
}];
let findings = parse_and_run_all_with_taint(
@ -719,3 +726,831 @@ fn process_star_without_web_params_no_auth_gap() {
auth_findings
);
}
// ─── Resource leak tests (additional languages) ────────────────────
#[test]
fn resource_leak_python_open_without_close() {
let src = br#"
def process():
f = open("data.txt")
data = f.read()
"#;
let findings = parse_and_analyse(
&resources::ResourceMisuse,
src,
"python",
Language::from(tree_sitter_python::LANGUAGE),
);
let leak_findings: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-resource-leak")
.collect();
assert!(
!leak_findings.is_empty(),
"Should detect open() without close() in Python"
);
}
#[test]
fn resource_leak_php_fopen_without_fclose() {
let src = br#"<?php
function read_file() {
$fp = fopen("data.txt", "r");
$data = fread($fp, 1024);
}
"#;
let findings = parse_and_analyse(
&resources::ResourceMisuse,
src,
"php",
Language::from(tree_sitter_php::LANGUAGE_PHP),
);
let leak_findings: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-resource-leak")
.collect();
assert!(
!leak_findings.is_empty(),
"Should detect fopen() without fclose() in PHP"
);
}
#[test]
fn resource_leak_js_open_without_close() {
let src = br#"
function readFile() {
var fd = fs.openSync("data.txt", "r");
var data = fs.readSync(fd, buf, 0, 100, 0);
}
"#;
let findings = parse_and_analyse(
&resources::ResourceMisuse,
src,
"javascript",
Language::from(tree_sitter_javascript::LANGUAGE),
);
let leak_findings: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-resource-leak")
.collect();
assert!(
!leak_findings.is_empty(),
"Should detect fs.openSync() without fs.closeSync() in JS"
);
}
// ─── JS CFG precision tests ────────────────────────────────────────
#[test]
fn js_throw_terminates_block() {
// throw should act as a terminator — code directly after throw in the same
// block should be unreachable.
let src = br#"
function fail() {
throw new Error("fatal");
eval("dead code");
}
"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let mut parser = tree_sitter::Parser::new();
parser.set_language(&ts_lang).unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let (cfg, entry, _) = build_cfg(&tree, src, "javascript", "test.js", None);
// Verify throw creates a Return-kind node
let throw_nodes: Vec<_> = cfg
.node_indices()
.filter(|&idx| {
cfg[idx].kind == crate::cfg::StmtKind::Return
&& cfg[idx].span.0 > 0
&& src[cfg[idx].span.0..].starts_with(b"throw")
})
.collect();
assert!(
!throw_nodes.is_empty(),
"throw statement should create a Return-kind node"
);
// eval after throw should be unreachable
let reachable = crate::cfg_analysis::dominators::reachable_set(&cfg, entry);
let eval_nodes: Vec<_> = cfg
.node_indices()
.filter(|&idx| cfg[idx].callee.as_deref().is_some_and(|c| c == "eval"))
.collect();
// eval might not even be in the CFG, or if it is, it should be unreachable
if !eval_nodes.is_empty() {
assert!(
eval_nodes.iter().all(|n| !reachable.contains(n)),
"eval after throw should be unreachable"
);
}
}
#[test]
fn configured_terminator_stops_flow() {
let src = br#"
function handler() {
process.exit(1);
eval("dangerous");
}
"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let rules = crate::labels::LangAnalysisRules {
extra_labels: vec![],
terminators: vec!["process.exit".into()],
event_handlers: vec![],
};
let mut parser = tree_sitter::Parser::new();
parser.set_language(&ts_lang).unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let (cfg, entry, _) = build_cfg(&tree, src, "javascript", "test.js", Some(&rules));
let reachable = crate::cfg_analysis::dominators::reachable_set(&cfg, entry);
// eval should be unreachable since process.exit is a terminator
let eval_nodes: Vec<_> = cfg
.node_indices()
.filter(|&idx| cfg[idx].callee.as_deref().is_some_and(|c| c == "eval"))
.collect();
if !eval_nodes.is_empty() {
assert!(
eval_nodes.iter().all(|n| !reachable.contains(n)),
"eval should be unreachable after process.exit terminator"
);
}
// If eval_nodes is empty it means the node wasn't created (also acceptable —
// it's after a terminator so the CFG may not even emit it)
}
// ─── Href classification tests ─────────────────────────────────────
#[test]
fn location_href_assignment_is_sink() {
let src = br#"
function redirect(url) {
location.href = url;
}
"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let mut parser = tree_sitter::Parser::new();
parser.set_language(&ts_lang).unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let (cfg, _entry, _summaries) = build_cfg(&tree, src, "javascript", "test.js", None);
let has_sink = cfg
.node_indices()
.any(|idx| matches!(cfg[idx].label, Some(crate::labels::DataLabel::Sink(_))));
assert!(has_sink, "location.href = url should produce a Sink node");
}
#[test]
fn a_href_assignment_is_not_sink() {
let src = br#"
function setLink(el) {
el.href = "/about";
}
"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let mut parser = tree_sitter::Parser::new();
parser.set_language(&ts_lang).unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let (cfg, _entry, _summaries) = build_cfg(&tree, src, "javascript", "test.js", None);
let has_sink = cfg
.node_indices()
.any(|idx| matches!(cfg[idx].label, Some(crate::labels::DataLabel::Sink(_))));
assert!(
!has_sink,
"el.href = '/about' should NOT produce a Sink node"
);
}
// ─── Config sanitizer tests ────────────────────────────────────────
#[test]
fn config_sanitizer_suppresses_unguarded_sink() {
// JS snippet: escapeHtml(x) before innerHTML = ... should not trigger
// cfg-unguarded-sink when escapeHtml is configured as a sanitizer.
let src = br#"
function render(input) {
var safe = escapeHtml(input);
document.body.innerHTML = safe;
}
"#;
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
let lang_str = "javascript";
// Build with config sanitizer rules
let rules = crate::labels::LangAnalysisRules {
extra_labels: vec![crate::labels::RuntimeLabelRule {
matchers: vec!["escapeHtml".into()],
label: crate::labels::DataLabel::Sanitizer(crate::labels::Cap::HTML_ESCAPE),
}],
terminators: vec![],
event_handlers: vec![],
};
let mut parser = tree_sitter::Parser::new();
parser.set_language(&ts_lang).unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs", Some(&rules));
let lang = Lang::from_slug(lang_str).unwrap();
let ctx = AnalysisContext {
cfg: &cfg,
entry,
lang,
file_path: "test.rs",
source_bytes: src,
func_summaries: &summaries,
global_summaries: None,
taint_findings: &[],
analysis_rules: Some(&rules),
taint_active: true,
};
let findings = run_all(&ctx);
let unguarded = findings
.iter()
.filter(|f| f.rule_id == "cfg-unguarded-sink")
.collect::<Vec<_>>();
assert!(
unguarded.is_empty(),
"escapeHtml config sanitizer should suppress cfg-unguarded-sink; got {:?}",
unguarded
);
}
// ─── Python precision tests ────────────────────────────────────────
#[test]
fn python_constant_subprocess_no_finding() {
// subprocess.run(["make","clean"]) with constant args should produce no finding
let src = br#"
import subprocess
def build():
subprocess.run(["make", "clean"])
"#;
let findings = parse_and_run_all(src, "python", Language::from(tree_sitter_python::LANGUAGE));
let unguarded: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-unguarded-sink")
.collect();
assert!(
unguarded.is_empty(),
"subprocess.run with constant list args should not be flagged; got {:?}",
unguarded
);
}
#[test]
fn python_constant_git_status_no_finding() {
let src = br#"
import subprocess
def check():
subprocess.run(["git", "status"])
"#;
let findings = parse_and_run_all(src, "python", Language::from(tree_sitter_python::LANGUAGE));
let unguarded: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-unguarded-sink")
.collect();
assert!(
unguarded.is_empty(),
"subprocess.run with constant git args should not be flagged; got {:?}",
unguarded
);
}
#[test]
fn python_tainted_os_system_produces_finding() {
// Source (sys.argv) flowing to os.system → should produce a finding
let src = br#"
import sys
import os
def run():
cmd = sys.argv[1]
os.system(cmd)
"#;
let findings = parse_and_run_all(src, "python", Language::from(tree_sitter_python::LANGUAGE));
let sink_findings: Vec<_> = findings
.iter()
.filter(|f| {
f.rule_id == "cfg-unguarded-sink" && f.severity == crate::patterns::Severity::High
})
.collect();
assert!(
!sink_findings.is_empty(),
"Source-derived os.system should produce a HIGH finding"
);
}
// ─── C++ precision tests ───────────────────────────────────────────
#[test]
fn cpp_cout_not_a_sink() {
let src = br#"
#include <iostream>
int main() {
std::cout << "hello" << std::endl;
return 0;
}
"#;
let findings = parse_and_run_all(src, "cpp", Language::from(tree_sitter_cpp::LANGUAGE));
let sink_findings: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-unguarded-sink")
.collect();
assert!(
sink_findings.is_empty(),
"std::cout should not produce an unguarded-sink finding; got {:?}",
sink_findings
);
}
#[test]
fn cpp_printf_constant_no_finding() {
// printf with constant args → FMT_STRING sink but constant-arg suppression
let src = br#"
#include <stdio.h>
int main() {
printf("hello\n");
return 0;
}
"#;
let findings = parse_and_run_all(src, "c", Language::from(tree_sitter_c::LANGUAGE));
let unguarded: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-unguarded-sink")
.collect();
assert!(
unguarded.is_empty(),
"printf with constant args should be suppressed; got {:?}",
unguarded
);
}
#[test]
fn cpp_system_with_getenv_produces_finding() {
let src = br#"
#include <stdlib.h>
int main() {
char* input = getenv("USER_CMD");
system(input);
return 0;
}
"#;
let findings = parse_and_run_all(src, "c", Language::from(tree_sitter_c::LANGUAGE));
let sink_findings: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-unguarded-sink")
.collect();
assert!(
!sink_findings.is_empty(),
"system(getenv(...)) should produce an unguarded-sink finding"
);
}
// ─── Unreachable + unguarded dedup test ─────────────────────────────
#[test]
fn unreachable_sink_suppresses_unguarded() {
// If a sink is in unreachable code, only cfg-unreachable-sink should fire,
// NOT also cfg-unguarded-sink.
let src = br#"
fn main() {
return;
std::process::Command::new("sh").arg("x").status().unwrap();
}
"#;
let findings = parse_and_run_all(src, "rust", Language::from(tree_sitter_rust::LANGUAGE));
let unreachable: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-unreachable-sink")
.collect();
let unguarded_at_same_span: Vec<_> = findings
.iter()
.filter(|f| {
f.rule_id == "cfg-unguarded-sink" && unreachable.iter().any(|u| u.span == f.span)
})
.collect();
assert!(
unguarded_at_same_span.is_empty(),
"cfg-unguarded-sink should be suppressed when cfg-unreachable-sink fires on same span; got {:?}",
unguarded_at_same_span
);
}
// ─── Fix 3: Wrapper resource names (curlx_fopen/curlx_fclose) ──────
#[test]
fn curlx_fopen_with_curlx_fclose_no_leak() {
let src = br#"
void process() {
FILE *fp = curlx_fopen("file.txt", "r");
curlx_fclose(fp);
}
"#;
let findings = parse_and_analyse(
&resources::ResourceMisuse,
src,
"c",
Language::from(tree_sitter_c::LANGUAGE),
);
let leak_findings: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-resource-leak")
.collect();
assert!(
leak_findings.is_empty(),
"curlx_fopen + curlx_fclose should not produce a resource leak; got {:?}",
leak_findings
);
}
// ─── Fix 4: freopen exclusion ───────────────────────────────────────
#[test]
fn freopen_not_treated_as_acquire() {
let src = br#"
void redirect_stderr() {
freopen("/dev/null", "w", stderr);
}
"#;
let findings = parse_and_analyse(
&resources::ResourceMisuse,
src,
"c",
Language::from(tree_sitter_c::LANGUAGE),
);
let leak_findings: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-resource-leak")
.collect();
assert!(
leak_findings.is_empty(),
"freopen should not produce a resource leak finding; got {:?}",
leak_findings
);
}
// ─── Fix 5: Struct field ownership transfer ─────────────────────────
#[test]
fn struct_field_ownership_transfer_no_leak() {
let src = br#"
void open_stream(struct session *s) {
FILE *fp = fopen("data.txt", "r");
s->stream = fp;
s->fopened = 1;
}
"#;
let findings = parse_and_analyse(
&resources::ResourceMisuse,
src,
"c",
Language::from(tree_sitter_c::LANGUAGE),
);
let leak_findings: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-resource-leak")
.collect();
assert!(
leak_findings.is_empty(),
"Struct field ownership transfer should suppress resource leak; got {:?}",
leak_findings
);
}
// ─── Fix 6: Linked-list / global insertion ──────────────────────────
#[test]
fn linked_list_insertion_no_leak() {
let src = br#"
void add_var(struct config *cfg, const char *name) {
struct var *p = malloc(sizeof(struct var));
p->next = cfg->variables;
cfg->variables = p;
}
"#;
let findings = parse_and_analyse(
&resources::ResourceMisuse,
src,
"c",
Language::from(tree_sitter_c::LANGUAGE),
);
let leak_findings: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-resource-leak")
.collect();
assert!(
leak_findings.is_empty(),
"Linked-list insertion should suppress resource leak; got {:?}",
leak_findings
);
}
// ─── Fix 2: Preproc dangling-else CFG recovery ─────────────────────
#[test]
fn preproc_ifdef_does_not_orphan_subsequent_code() {
// After a #ifdef block containing an if/else, subsequent code should
// still be reachable (no unreachable findings).
let src = br#"
void process() {
int x = 1;
#ifdef _WIN32
if (x) {
x = 2;
} else
#endif
{
x = 3;
}
free(x);
}
"#;
let ts_lang = Language::from(tree_sitter_c::LANGUAGE);
let mut parser = tree_sitter::Parser::new();
parser.set_language(&ts_lang).unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let (cfg, entry, _) = build_cfg(&tree, src, "c", "test.c", None);
let reachable = dominators::reachable_set(&cfg, entry);
// All nodes should be reachable — the preproc recovery should prevent
// the dangling-else from orphaning downstream code.
let unreachable_count = cfg.node_count() - reachable.len();
assert!(
unreachable_count == 0,
"Expected all nodes reachable after preproc block, but {} nodes are unreachable",
unreachable_count
);
}
// ─── Fix 1: Break in loop keeps post-loop code reachable ────────────
#[test]
fn break_in_loop_post_loop_reachable() {
let src = br#"
void process() {
int x = 0;
while(1) {
if(x) break;
x = x + 1;
}
free(x);
}
"#;
let ts_lang = Language::from(tree_sitter_c::LANGUAGE);
let mut parser = tree_sitter::Parser::new();
parser.set_language(&ts_lang).unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let (cfg, entry, _) = build_cfg(&tree, src, "c", "test.c", None);
let reachable = dominators::reachable_set(&cfg, entry);
// All nodes should be reachable — break exits the loop and post-loop
// code (free(x)) should be connected.
let unreachable_count = cfg.node_count() - reachable.len();
assert!(
unreachable_count == 0,
"Expected all nodes reachable after break in loop, but {} nodes are unreachable",
unreachable_count
);
}
// ─── PART 2A: One-hop constant binding trace ────────────────────────
#[test]
fn python_one_hop_constant_binding_no_finding() {
// cmd = "git"; subprocess.run([cmd, "status"]) → no finding
let src = br#"
import subprocess
def check():
cmd = "git"
subprocess.run([cmd, "status"])
"#;
let findings = parse_and_run_all(src, "python", Language::from(tree_sitter_python::LANGUAGE));
let unguarded: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-unguarded-sink")
.collect();
assert!(
unguarded.is_empty(),
"One-hop constant binding should suppress cfg-unguarded-sink; got {:?}",
unguarded
);
}
// ─── PART 2B: Exec-path guard rules ─────────────────────────────────
#[test]
fn exec_path_guard_suppresses_unguarded_sink() {
// resolve_binary(&bin); Command::new(bin); → no finding
let src = br#"
use std::process::Command;
fn main() {
let bin = std::env::var("BIN").unwrap();
resolve_binary(&bin);
Command::new("sh").arg(&bin).status().unwrap();
}"#;
let findings = parse_and_analyse(
&guards::UnguardedSink,
src,
"rust",
Language::from(tree_sitter_rust::LANGUAGE),
);
let unguarded: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-unguarded-sink")
.collect();
assert!(
unguarded.is_empty(),
"resolve_binary guard should suppress cfg-unguarded-sink; got {:?}",
unguarded
);
}
// ─── PART 2C: Evidence-based severity in cfg-only mode ──────────────
#[test]
fn cfg_only_no_taint_produces_low_severity() {
// In cfg-only mode (taint_active=false) with no source-derived evidence,
// unguarded sink should produce LOW severity instead of MEDIUM.
let src = br#"
use std::process::Command;
fn process_data() {
let x = compute_something();
Command::new("sh").arg(&x).status().unwrap();
}"#;
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
let mut parser = tree_sitter::Parser::new();
parser.set_language(&ts_lang).unwrap();
let tree = parser.parse(src as &[u8], None).unwrap();
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
let lang = Lang::from_slug("rust").unwrap();
let ctx = AnalysisContext {
cfg: &cfg,
entry,
lang,
file_path: "test.rs",
source_bytes: src,
func_summaries: &summaries,
global_summaries: None,
taint_findings: &[],
analysis_rules: None,
taint_active: false, // cfg-only mode
};
let findings = guards::UnguardedSink.run(&ctx);
let medium_or_high: Vec<_> = findings
.iter()
.filter(|f| {
f.rule_id == "cfg-unguarded-sink"
&& (f.severity == crate::patterns::Severity::Medium
|| f.severity == crate::patterns::Severity::High)
})
.collect();
assert!(
medium_or_high.is_empty(),
"cfg-only mode without taint should produce LOW severity, not MEDIUM/HIGH; got {:?}",
medium_or_high
);
}
// ─── PART 4B: FileResponse ownership transfer ──────────────────────
#[test]
fn file_response_ownership_transfer_no_leak() {
let src = br#"
def serve_file():
f = open("report.pdf", "rb")
return FileResponse(f)
"#;
let findings = parse_and_analyse(
&resources::ResourceMisuse,
src,
"python",
Language::from(tree_sitter_python::LANGUAGE),
);
let leak_findings: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-resource-leak")
.collect();
assert!(
leak_findings.is_empty(),
"FileResponse should suppress cfg-resource-leak; got {:?}",
leak_findings
);
}
// ─── PART 4C: Lock-not-released refinement ──────────────────────────
#[test]
fn python_lock_constructor_only_no_finding() {
// threading.Lock() without .acquire() → no finding
let src = br#"
import threading
def setup():
lock = threading.Lock()
do_work()
"#;
let findings = parse_and_analyse(
&resources::ResourceMisuse,
src,
"python",
Language::from(tree_sitter_python::LANGUAGE),
);
let lock_findings: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-lock-not-released")
.collect();
assert!(
lock_findings.is_empty(),
"Lock constructor without acquire should not produce cfg-lock-not-released; got {:?}",
lock_findings
);
}
// ─── PART 4A: signal.connect exclusion ──────────────────────────────
#[test]
fn python_signal_connect_not_treated_as_db_acquire() {
let src = br#"
def setup():
signal.connect(handler)
do_work()
"#;
let findings = parse_and_analyse(
&resources::ResourceMisuse,
src,
"python",
Language::from(tree_sitter_python::LANGUAGE),
);
let leak_findings: Vec<_> = findings
.iter()
.filter(|f| f.rule_id == "cfg-resource-leak")
.collect();
assert!(
leak_findings.is_empty(),
"signal.connect should not be treated as db acquire; got {:?}",
leak_findings
);
}

View file

@ -3,9 +3,40 @@ use super::{AnalysisContext, CfgAnalysis, CfgFinding, Confidence};
use crate::cfg::StmtKind;
use crate::labels::DataLabel;
use crate::patterns::Severity;
use std::collections::HashSet;
pub struct UnreachableCode;
/// Collect function names that appear as arguments to configured event handler calls.
fn event_handler_callbacks(ctx: &AnalysisContext) -> HashSet<String> {
let mut callbacks = HashSet::new();
let handlers = match ctx.analysis_rules {
Some(rules) if !rules.event_handlers.is_empty() => &rules.event_handlers,
_ => return callbacks,
};
for idx in ctx.cfg.node_indices() {
let info = &ctx.cfg[idx];
if info.kind != StmtKind::Call {
continue;
}
if let Some(callee) = &info.callee {
let callee_lower = callee.to_ascii_lowercase();
let is_handler = handlers
.iter()
.any(|h| callee_lower.ends_with(&h.to_ascii_lowercase()));
if is_handler {
// The callback function is typically used within the call — any function
// that appears as `uses` of this call node is a potential callback.
for u in &info.uses {
callbacks.insert(u.clone());
}
}
}
}
callbacks
}
impl CfgAnalysis for UnreachableCode {
fn name(&self) -> &'static str {
"unreachable-code"
@ -13,6 +44,7 @@ impl CfgAnalysis for UnreachableCode {
fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
let reachable = dominators::reachable_set(ctx.cfg, ctx.entry);
let handler_callbacks = event_handler_callbacks(ctx);
let mut findings = Vec::new();
for idx in ctx.cfg.node_indices() {
@ -27,6 +59,13 @@ impl CfgAnalysis for UnreachableCode {
continue;
}
// Suppress findings for nodes inside event handler callbacks
if let Some(func_name) = &info.enclosing_func
&& handler_callbacks.contains(func_name)
{
continue;
}
let (rule_id, title, severity) = match info.label {
Some(DataLabel::Sanitizer(_)) => (
"cfg-unreachable-sanitizer",
@ -43,7 +82,9 @@ impl CfgAnalysis for UnreachableCode {
),
_ => {
// Check if it's a guard/auth call
if super::is_guard_call(info, ctx.lang) || super::is_auth_call(info, ctx.lang) {
if super::is_guard_call(info, ctx.lang, ctx.analysis_rules)
|| super::is_auth_call(info, ctx.lang)
{
(
"cfg-unreachable-guard",
"Unreachable guard/auth check",