mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
Feat/configurable sanitizers and js precision (#32)
* chore: Exclude CLAUDE.md from Cargo.toml * feat: Add configurable analysis rules and CLI commands for custom sanitizers and terminators * feat: Enhance resource management and analysis efficiency - Implemented parallel summary merging in `scan_filesystem` using rayon for improved performance. - Introduced `GlobalSummaries::merge()` for efficient merging of summaries. - Optimized file reading and hashing to eliminate redundant I/O operations. - Added `should_scan_with_hash()` and `upsert_file_with_hash()` methods to streamline file processing. - Enhanced taint analysis with in-place mutations to reduce memory allocations. - Updated resource acquisition patterns to exclude false positives for `freopen` and wrapper functions. * feat: Implement severity downgrade for findings in non-production paths and add source kind inference * feat: Update versioning information in SECURITY.md for new stable line * feat: Update categories in Cargo.toml to include parser-implementations and text-processing * feat: Update dependencies in Cargo.lock for improved compatibility and performance * feat: Update dependencies in Cargo.lock and Cargo.toml for improved compatibility
This commit is contained in:
parent
f96a89e7c1
commit
19b578c5c4
37 changed files with 3775 additions and 432 deletions
|
|
@ -2,15 +2,75 @@ use super::dominators::{self, dominates};
|
|||
use super::rules;
|
||||
use super::{AnalysisContext, CfgAnalysis, CfgFinding, Confidence, is_entry_point_func};
|
||||
use crate::cfg::StmtKind;
|
||||
use crate::labels::{Cap, DataLabel};
|
||||
use crate::labels::{Cap, DataLabel, RuntimeLabelRule};
|
||||
use crate::patterns::Severity;
|
||||
use petgraph::graph::NodeIndex;
|
||||
|
||||
pub struct UnguardedSink;
|
||||
|
||||
/// Check whether **all** arguments to the sink are constants (no taint-capable
|
||||
/// variable flows). Extends the inline callee-part check by tracing one hop
|
||||
/// through the CFG: if a used variable is defined by a node that itself has
|
||||
/// empty `uses` and no Source label, the definition is treated as a constant
|
||||
/// binding (e.g. `let cmd = "git"; Command::new(cmd)`).
|
||||
fn is_all_args_constant(ctx: &AnalysisContext, sink: NodeIndex) -> bool {
|
||||
let sink_info = &ctx.cfg[sink];
|
||||
let callee_desc = sink_info.callee.as_deref().unwrap_or("");
|
||||
let callee_parts: Vec<&str> = callee_desc.split(['.', ':']).collect();
|
||||
let sink_func = sink_info.enclosing_func.as_deref();
|
||||
|
||||
sink_info.uses.iter().all(|u| {
|
||||
// Part of the callee name itself → constant
|
||||
if callee_parts.contains(&u.as_str()) {
|
||||
return true;
|
||||
}
|
||||
// One-hop trace: find the defining node in the same function
|
||||
for idx in ctx.cfg.node_indices() {
|
||||
let info = &ctx.cfg[idx];
|
||||
if info.enclosing_func.as_deref() != sink_func {
|
||||
continue;
|
||||
}
|
||||
if info.defines.as_deref() == Some(u.as_str()) {
|
||||
// If the defining node has no uses (pure constant) and is not
|
||||
// a Source, the variable is constant.
|
||||
if info.uses.is_empty() && !matches!(info.label, Some(DataLabel::Source(_))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
})
|
||||
}
|
||||
|
||||
/// Check if a callee matches any of the runtime label rules that are sanitizers.
|
||||
fn match_config_sanitizer(callee: &str, extra: &[RuntimeLabelRule]) -> Option<Cap> {
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
for rule in extra {
|
||||
let cap = match rule.label {
|
||||
DataLabel::Sanitizer(c) => c,
|
||||
_ => continue,
|
||||
};
|
||||
for m in &rule.matchers {
|
||||
let ml = m.to_ascii_lowercase();
|
||||
if ml.ends_with('_') {
|
||||
if callee_lower.starts_with(&ml) {
|
||||
return Some(cap);
|
||||
}
|
||||
} else if callee_lower.ends_with(&ml) {
|
||||
return Some(cap);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Find all nodes in the CFG that are calls to guard functions.
|
||||
fn find_guard_nodes(ctx: &AnalysisContext) -> Vec<(NodeIndex, Cap)> {
|
||||
let guard_rules = rules::guard_rules(ctx.lang);
|
||||
let config_rules = ctx
|
||||
.analysis_rules
|
||||
.map(|r| r.extra_labels.as_slice())
|
||||
.unwrap_or(&[]);
|
||||
let mut result = Vec::new();
|
||||
|
||||
for idx in ctx.cfg.node_indices() {
|
||||
|
|
@ -19,6 +79,13 @@ fn find_guard_nodes(ctx: &AnalysisContext) -> Vec<(NodeIndex, Cap)> {
|
|||
continue;
|
||||
}
|
||||
if let Some(callee) = &info.callee {
|
||||
// Check config sanitizer rules first
|
||||
if let Some(cap) = match_config_sanitizer(callee, config_rules) {
|
||||
result.push((idx, cap));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Then check built-in guard rules
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
for rule in guard_rules {
|
||||
let matched = rule.matchers.iter().any(|m| {
|
||||
|
|
@ -174,6 +241,13 @@ impl CfgAnalysis for UnguardedSink {
|
|||
|
||||
let has_taint = taint_confirms_sink(ctx, *sink);
|
||||
let source_derived = sink_arg_is_source_derived(ctx, *sink);
|
||||
|
||||
// If sink args are all constants (including one-hop constant bindings)
|
||||
// and taint didn't confirm, this is a false positive — skip it.
|
||||
if is_all_args_constant(ctx, *sink) && !has_taint && !source_derived {
|
||||
continue;
|
||||
}
|
||||
|
||||
let param_only = sink_arg_is_parameter_only(ctx, *sink);
|
||||
let in_entrypoint = sink_in_entrypoint(ctx, *sink);
|
||||
|
||||
|
|
@ -183,6 +257,9 @@ impl CfgAnalysis for UnguardedSink {
|
|||
} else if param_only && !in_entrypoint {
|
||||
// Wrapper function consuming only parameters → LOW
|
||||
(Severity::Low, Confidence::Low)
|
||||
} else if !ctx.taint_active && !source_derived {
|
||||
// CFG-only mode without taint confirmation → LOW
|
||||
(Severity::Low, Confidence::Low)
|
||||
} else if in_entrypoint && !param_only {
|
||||
// Entrypoint with non-parameter args but no taint confirmation → MEDIUM
|
||||
(Severity::Medium, Confidence::Medium)
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ mod tests;
|
|||
pub mod unreachable;
|
||||
|
||||
use crate::cfg::{FuncSummaries, NodeInfo, StmtKind};
|
||||
use crate::labels::DataLabel;
|
||||
use crate::labels::{DataLabel, LangAnalysisRules};
|
||||
use crate::patterns::Severity;
|
||||
use crate::summary::GlobalSummaries;
|
||||
use crate::symbol::Lang;
|
||||
|
|
@ -51,6 +51,11 @@ pub struct AnalysisContext<'a> {
|
|||
#[allow(dead_code)]
|
||||
pub global_summaries: Option<&'a GlobalSummaries>,
|
||||
pub taint_findings: &'a [taint::Finding],
|
||||
pub analysis_rules: Option<&'a LangAnalysisRules>,
|
||||
/// Whether full taint analysis was active for this file (global summaries
|
||||
/// existed and taint engine ran). When false, structural findings without
|
||||
/// taint confirmation should be treated with lower confidence.
|
||||
pub taint_active: bool,
|
||||
}
|
||||
|
||||
pub trait CfgAnalysis {
|
||||
|
|
@ -87,6 +92,20 @@ pub fn run_all(ctx: &AnalysisContext) -> Vec<CfgFinding> {
|
|||
true
|
||||
});
|
||||
|
||||
// ── Dedup: suppress cfg-unguarded-sink when cfg-unreachable-sink covers the span ──
|
||||
let unreachable_spans: HashSet<(usize, usize)> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unreachable-sink")
|
||||
.map(|f| f.span)
|
||||
.collect();
|
||||
|
||||
findings.retain(|f| {
|
||||
if f.rule_id == "cfg-unguarded-sink" && unreachable_spans.contains(&f.span) {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
});
|
||||
|
||||
scoring::score_findings(&mut findings, ctx);
|
||||
findings.sort_by(|a, b| {
|
||||
b.score
|
||||
|
|
@ -97,11 +116,36 @@ pub fn run_all(ctx: &AnalysisContext) -> Vec<CfgFinding> {
|
|||
}
|
||||
|
||||
/// Helper: check whether a node is a guard call (validate, sanitize, check, etc.).
|
||||
pub(crate) fn is_guard_call(info: &NodeInfo, lang: Lang) -> bool {
|
||||
pub(crate) fn is_guard_call(
|
||||
info: &NodeInfo,
|
||||
lang: Lang,
|
||||
analysis_rules: Option<&LangAnalysisRules>,
|
||||
) -> bool {
|
||||
if info.kind != StmtKind::Call {
|
||||
return false;
|
||||
}
|
||||
if let Some(callee) = &info.callee {
|
||||
// Check config sanitizer rules
|
||||
if let Some(extras) = analysis_rules {
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
for rule in &extras.extra_labels {
|
||||
if !matches!(rule.label, DataLabel::Sanitizer(_)) {
|
||||
continue;
|
||||
}
|
||||
for m in &rule.matchers {
|
||||
let ml = m.to_ascii_lowercase();
|
||||
if ml.ends_with('_') {
|
||||
if callee_lower.starts_with(&ml) {
|
||||
return true;
|
||||
}
|
||||
} else if callee_lower.ends_with(&ml) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check built-in guard rules
|
||||
let guard_rules = rules::guard_rules(lang);
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
for rule in guard_rules {
|
||||
|
|
|
|||
|
|
@ -8,8 +8,13 @@ use std::collections::HashSet;
|
|||
|
||||
pub struct ResourceMisuse;
|
||||
|
||||
/// Find nodes matching acquire patterns for a given resource pair.
|
||||
fn find_acquire_nodes(ctx: &AnalysisContext, acquire_patterns: &[&str]) -> Vec<NodeIndex> {
|
||||
/// Find nodes matching acquire patterns for a given resource pair,
|
||||
/// excluding any that match `exclude_patterns`.
|
||||
fn find_acquire_nodes(
|
||||
ctx: &AnalysisContext,
|
||||
acquire_patterns: &[&str],
|
||||
exclude_patterns: &[&str],
|
||||
) -> Vec<NodeIndex> {
|
||||
ctx.cfg
|
||||
.node_indices()
|
||||
.filter(|&idx| {
|
||||
|
|
@ -19,6 +24,16 @@ fn find_acquire_nodes(ctx: &AnalysisContext, acquire_patterns: &[&str]) -> Vec<N
|
|||
}
|
||||
if let Some(callee) = &info.callee {
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
// Check exclusions first — if the callee matches an exclude
|
||||
// pattern, it is NOT an acquire even if it also matches an
|
||||
// acquire pattern (e.g. `freopen` ends with `fopen`).
|
||||
let excluded = exclude_patterns.iter().any(|p| {
|
||||
let pl = p.to_ascii_lowercase();
|
||||
callee_lower.ends_with(&pl) || callee_lower == pl
|
||||
});
|
||||
if excluded {
|
||||
return false;
|
||||
}
|
||||
acquire_patterns.iter().any(|p| {
|
||||
let pl = p.to_ascii_lowercase();
|
||||
callee_lower.ends_with(&pl) || callee_lower == pl
|
||||
|
|
@ -113,6 +128,204 @@ fn all_paths_pass_through(
|
|||
true
|
||||
}
|
||||
|
||||
/// Check whether the acquired variable is stored into a struct field (ownership
|
||||
/// transfer) downstream of the acquire node. Patterns recognised:
|
||||
/// - `ptr->field = var` (C arrow operator)
|
||||
/// - `obj.field = var` (C dot / generic field store)
|
||||
/// - `list->next = ...` (linked-list insertion)
|
||||
///
|
||||
/// If the variable is transferred, there is no leak — the receiving struct is
|
||||
/// responsible for the lifetime.
|
||||
fn is_ownership_transferred(ctx: &AnalysisContext, acquire: NodeIndex) -> bool {
|
||||
let acquired_var = match &ctx.cfg[acquire].defines {
|
||||
Some(v) => v.clone(),
|
||||
None => return false,
|
||||
};
|
||||
|
||||
// BFS through CFG successors looking for a node whose span text
|
||||
// mentions the acquired variable in a struct-field store context.
|
||||
use std::collections::VecDeque;
|
||||
let mut visited = HashSet::new();
|
||||
let mut queue = VecDeque::new();
|
||||
for succ in ctx.cfg.neighbors(acquire) {
|
||||
if visited.insert(succ) {
|
||||
queue.push_back(succ);
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(node) = queue.pop_front() {
|
||||
let info = &ctx.cfg[node];
|
||||
let (start, end) = info.span;
|
||||
|
||||
// Check the source text at this node's span for the acquired variable
|
||||
// appearing in a struct-field store context.
|
||||
let references_var = info.uses.iter().any(|u| u == &acquired_var)
|
||||
|| info.defines.as_ref().is_some_and(|d| d == &acquired_var);
|
||||
|
||||
if references_var && start < end && end <= ctx.source_bytes.len() {
|
||||
let span_text = &ctx.source_bytes[start..end];
|
||||
// `->` anywhere in span means pointer-to-member store
|
||||
if span_text.windows(2).any(|w| w == b"->") {
|
||||
return true;
|
||||
}
|
||||
// `.field = var` pattern (but not `==`)
|
||||
if has_dot_field_assignment(span_text) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// If the variable is truly redefined (not a field write), stop
|
||||
// following this path. A true redefinition is when `defines` matches
|
||||
// but the span doesn't contain `->` or `.field =` patterns.
|
||||
if info.defines.as_ref().is_some_and(|d| d == &acquired_var) {
|
||||
let is_field_write = if start < end && end <= ctx.source_bytes.len() {
|
||||
let span_text = &ctx.source_bytes[start..end];
|
||||
span_text.windows(2).any(|w| w == b"->") || has_dot_field_assignment(span_text)
|
||||
} else {
|
||||
false
|
||||
};
|
||||
if !is_field_write {
|
||||
continue; // genuine redefinition — stop this path
|
||||
}
|
||||
}
|
||||
|
||||
for succ in ctx.cfg.neighbors(node) {
|
||||
if visited.insert(succ) {
|
||||
queue.push_back(succ);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Check if `span_text` contains a dot-field assignment pattern like
|
||||
/// `obj.field = var` (but not `obj.method(...)` or `a == b`).
|
||||
fn has_dot_field_assignment(span_text: &[u8]) -> bool {
|
||||
// Look for `.` followed (possibly with ident chars) by `=` but not `==`
|
||||
let mut i = 0;
|
||||
while i < span_text.len() {
|
||||
if span_text[i] == b'.' {
|
||||
// Scan forward past identifier chars to find `=`
|
||||
let mut j = i + 1;
|
||||
while j < span_text.len()
|
||||
&& (span_text[j].is_ascii_alphanumeric() || span_text[j] == b'_')
|
||||
{
|
||||
j += 1;
|
||||
}
|
||||
// Skip whitespace
|
||||
while j < span_text.len() && span_text[j].is_ascii_whitespace() {
|
||||
j += 1;
|
||||
}
|
||||
// Check for `=` but not `==`
|
||||
if j < span_text.len()
|
||||
&& span_text[j] == b'='
|
||||
&& (j + 1 >= span_text.len() || span_text[j + 1] != b'=')
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Check whether the acquired variable is consumed by an ownership-taking
|
||||
/// function (e.g. `FileResponse(f)`, `send_file(f)`) downstream of the
|
||||
/// acquire node. These functions take ownership of the file handle so there
|
||||
/// is no leak.
|
||||
fn is_consumed_by_owner(ctx: &AnalysisContext, acquire: NodeIndex) -> bool {
|
||||
static CONSUMING_SINKS: &[&str] = &[
|
||||
"fileresponse",
|
||||
"streaminghttpresponse",
|
||||
"send_file",
|
||||
"make_response",
|
||||
];
|
||||
|
||||
let acquired_var = match &ctx.cfg[acquire].defines {
|
||||
Some(v) => v.clone(),
|
||||
None => return false,
|
||||
};
|
||||
|
||||
use std::collections::VecDeque;
|
||||
let mut visited = HashSet::new();
|
||||
let mut queue = VecDeque::new();
|
||||
for succ in ctx.cfg.neighbors(acquire) {
|
||||
if visited.insert(succ) {
|
||||
queue.push_back(succ);
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(node) = queue.pop_front() {
|
||||
let info = &ctx.cfg[node];
|
||||
|
||||
// Check Call nodes with callee that matches a consuming sink
|
||||
if info.kind == StmtKind::Call
|
||||
&& let Some(callee) = &info.callee
|
||||
{
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
let is_consuming = CONSUMING_SINKS.iter().any(|s| callee_lower.ends_with(s));
|
||||
if is_consuming && info.uses.iter().any(|u| u == &acquired_var) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Also check the span text for consuming calls — handles cases where
|
||||
// the call is embedded in a return statement (e.g. `return FileResponse(f)`)
|
||||
if info.uses.iter().any(|u| u == &acquired_var) {
|
||||
let (start, end) = info.span;
|
||||
if start < end && end <= ctx.source_bytes.len() {
|
||||
let span_lower: Vec<u8> = ctx.source_bytes[start..end]
|
||||
.iter()
|
||||
.map(|b| b.to_ascii_lowercase())
|
||||
.collect();
|
||||
if CONSUMING_SINKS
|
||||
.iter()
|
||||
.any(|s| span_lower.windows(s.len()).any(|w| w == s.as_bytes()))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for succ in ctx.cfg.neighbors(node) {
|
||||
if visited.insert(succ) {
|
||||
queue.push_back(succ);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// For mutex pairs, check that an explicit `.acquire()` or `.lock()` call
|
||||
/// exists on the acquired variable in the CFG. If only the constructor
|
||||
/// (e.g. `threading.Lock()`) is observed without acquire, skip the finding.
|
||||
fn has_explicit_lock_acquire(ctx: &AnalysisContext, acquire: NodeIndex) -> bool {
|
||||
let acquired_var = match &ctx.cfg[acquire].defines {
|
||||
Some(v) => v.clone(),
|
||||
None => return false,
|
||||
};
|
||||
|
||||
for idx in ctx.cfg.node_indices() {
|
||||
let info = &ctx.cfg[idx];
|
||||
if info.kind != StmtKind::Call {
|
||||
continue;
|
||||
}
|
||||
if let Some(callee) = &info.callee {
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
let is_lock_call = callee_lower.ends_with(".acquire")
|
||||
|| callee_lower.ends_with(".lock")
|
||||
|| callee_lower == "pthread_mutex_lock";
|
||||
if is_lock_call && info.uses.iter().any(|u| u == &acquired_var) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
impl CfgAnalysis for ResourceMisuse {
|
||||
fn name(&self) -> &'static str {
|
||||
"resource-misuse"
|
||||
|
|
@ -128,11 +341,18 @@ impl CfgAnalysis for ResourceMisuse {
|
|||
let mut findings = Vec::new();
|
||||
|
||||
for pair in pairs {
|
||||
let acquire_nodes = find_acquire_nodes(ctx, pair.acquire);
|
||||
let acquire_nodes = find_acquire_nodes(ctx, pair.acquire, pair.exclude_acquire);
|
||||
let release_nodes = find_release_nodes(ctx, pair.release);
|
||||
|
||||
for &acquire in &acquire_nodes {
|
||||
if !release_on_all_exit_paths(ctx, acquire, &release_nodes, exit) {
|
||||
if !release_on_all_exit_paths(ctx, acquire, &release_nodes, exit)
|
||||
&& !is_ownership_transferred(ctx, acquire)
|
||||
&& !is_consumed_by_owner(ctx, acquire)
|
||||
{
|
||||
// For mutex pairs, require an explicit .acquire()/.lock() call
|
||||
if pair.resource_name == "mutex" && !has_explicit_lock_acquire(ctx, acquire) {
|
||||
continue;
|
||||
}
|
||||
let info = &ctx.cfg[acquire];
|
||||
let callee_desc = info.callee.as_deref().unwrap_or("(acquire)");
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,9 @@ pub struct EntryPointRule {
|
|||
pub struct ResourcePair {
|
||||
pub acquire: &'static [&'static str],
|
||||
pub release: &'static [&'static str],
|
||||
/// Patterns that look like acquire calls (e.g. `freopen` ends with `fopen`)
|
||||
/// but should NOT be treated as acquisitions.
|
||||
pub exclude_acquire: &'static [&'static str],
|
||||
pub resource_name: &'static str,
|
||||
}
|
||||
|
||||
|
|
@ -47,6 +50,16 @@ static COMMON_GUARDS: &[GuardRule] = &[
|
|||
matchers: &["url_encode", "encode_uri", "urlencode"],
|
||||
applies_to_sink_caps: Cap::URL_ENCODE,
|
||||
},
|
||||
GuardRule {
|
||||
matchers: &[
|
||||
"which",
|
||||
"resolve_binary",
|
||||
"find_program",
|
||||
"lookup_path",
|
||||
"shutil.which",
|
||||
],
|
||||
applies_to_sink_caps: Cap::SHELL_ESCAPE,
|
||||
},
|
||||
];
|
||||
|
||||
pub fn guard_rules(_lang: Lang) -> &'static [GuardRule] {
|
||||
|
|
@ -168,21 +181,25 @@ static C_RESOURCES: &[ResourcePair] = &[
|
|||
ResourcePair {
|
||||
acquire: &["malloc", "calloc", "realloc"],
|
||||
release: &["free"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "memory",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["fopen"],
|
||||
release: &["fclose"],
|
||||
acquire: &["fopen", "fdopen", "curlx_fopen", "curlx_fdopen"],
|
||||
release: &["fclose", "curlx_fclose"],
|
||||
exclude_acquire: &["freopen", "curlx_freopen"],
|
||||
resource_name: "file handle",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["open"],
|
||||
release: &["close"],
|
||||
exclude_acquire: &["freopen", "curlx_freopen"],
|
||||
resource_name: "file descriptor",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["pthread_mutex_lock"],
|
||||
release: &["pthread_mutex_unlock"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "mutex",
|
||||
},
|
||||
];
|
||||
|
|
@ -191,11 +208,13 @@ static GO_RESOURCES: &[ResourcePair] = &[
|
|||
ResourcePair {
|
||||
acquire: &["os.Open", "os.Create", "os.OpenFile"],
|
||||
release: &[".Close"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "file handle",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &[".Lock"],
|
||||
release: &[".Unlock"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "mutex",
|
||||
},
|
||||
];
|
||||
|
|
@ -205,6 +224,7 @@ static RUST_RESOURCES: &[ResourcePair] = &[
|
|||
ResourcePair {
|
||||
acquire: &["alloc"],
|
||||
release: &["dealloc"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "raw memory",
|
||||
},
|
||||
];
|
||||
|
|
@ -217,10 +237,93 @@ static JAVA_RESOURCES: &[ResourcePair] = &[ResourcePair {
|
|||
"openConnection",
|
||||
],
|
||||
release: &[".close"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "stream/connection",
|
||||
}];
|
||||
|
||||
static EMPTY_RESOURCES: &[ResourcePair] = &[];
|
||||
static PYTHON_RESOURCES: &[ResourcePair] = &[
|
||||
ResourcePair {
|
||||
acquire: &["open"],
|
||||
release: &[".close"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "file handle",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["socket.socket", "socket"],
|
||||
release: &[".close"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "socket",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["connect", "cursor"],
|
||||
release: &[".close"],
|
||||
exclude_acquire: &["signal.connect", "event.connect", ".register"],
|
||||
resource_name: "db connection",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["threading.Lock", "threading.RLock"],
|
||||
release: &[".release"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "mutex",
|
||||
},
|
||||
];
|
||||
|
||||
static RUBY_RESOURCES: &[ResourcePair] = &[
|
||||
ResourcePair {
|
||||
acquire: &["File.open", "open"],
|
||||
release: &[".close"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "file handle",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["TCPSocket.new", "UDPSocket.new"],
|
||||
release: &[".close"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "socket",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &[".lock"],
|
||||
release: &[".unlock"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "mutex",
|
||||
},
|
||||
];
|
||||
|
||||
static PHP_RESOURCES: &[ResourcePair] = &[
|
||||
ResourcePair {
|
||||
acquire: &["fopen"],
|
||||
release: &["fclose"],
|
||||
exclude_acquire: &["freopen"],
|
||||
resource_name: "file handle",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["mysqli_connect"],
|
||||
release: &["mysqli_close"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "db connection",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["curl_init"],
|
||||
release: &["curl_close"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "curl handle",
|
||||
},
|
||||
];
|
||||
|
||||
static JS_RESOURCES: &[ResourcePair] = &[
|
||||
ResourcePair {
|
||||
acquire: &["fs.open", "fs.openSync"],
|
||||
release: &["fs.close", "fs.closeSync"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "file descriptor",
|
||||
},
|
||||
ResourcePair {
|
||||
acquire: &["createReadStream", "createWriteStream"],
|
||||
release: &[".close", ".destroy"],
|
||||
exclude_acquire: &[],
|
||||
resource_name: "stream",
|
||||
},
|
||||
];
|
||||
|
||||
pub fn resource_pairs(lang: Lang) -> &'static [ResourcePair] {
|
||||
match lang {
|
||||
|
|
@ -229,6 +332,9 @@ pub fn resource_pairs(lang: Lang) -> &'static [ResourcePair] {
|
|||
Lang::Go => GO_RESOURCES,
|
||||
Lang::Rust => RUST_RESOURCES,
|
||||
Lang::Java => JAVA_RESOURCES,
|
||||
_ => EMPTY_RESOURCES,
|
||||
Lang::Python => PYTHON_RESOURCES,
|
||||
Lang::Ruby => RUBY_RESOURCES,
|
||||
Lang::Php => PHP_RESOURCES,
|
||||
Lang::JavaScript | Lang::TypeScript => JS_RESOURCES,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ fn parse_and_analyse<A: CfgAnalysis>(
|
|||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs", None);
|
||||
let lang = Lang::from_slug(lang_str).unwrap();
|
||||
let ctx = AnalysisContext {
|
||||
cfg: &cfg,
|
||||
|
|
@ -25,6 +25,8 @@ fn parse_and_analyse<A: CfgAnalysis>(
|
|||
func_summaries: &summaries,
|
||||
global_summaries: None,
|
||||
taint_findings: &[],
|
||||
analysis_rules: None,
|
||||
taint_active: true,
|
||||
};
|
||||
analysis.run(&ctx)
|
||||
}
|
||||
|
|
@ -34,7 +36,7 @@ fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFi
|
|||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs", None);
|
||||
let lang = Lang::from_slug(lang_str).unwrap();
|
||||
let ctx = AnalysisContext {
|
||||
cfg: &cfg,
|
||||
|
|
@ -45,6 +47,8 @@ fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFi
|
|||
func_summaries: &summaries,
|
||||
global_summaries: None,
|
||||
taint_findings: &[],
|
||||
analysis_rules: None,
|
||||
taint_active: true,
|
||||
};
|
||||
run_all(&ctx)
|
||||
}
|
||||
|
|
@ -59,7 +63,7 @@ fn parse_and_run_all_with_taint(
|
|||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs");
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs", None);
|
||||
let lang = Lang::from_slug(lang_str).unwrap();
|
||||
let ctx = AnalysisContext {
|
||||
cfg: &cfg,
|
||||
|
|
@ -70,6 +74,8 @@ fn parse_and_run_all_with_taint(
|
|||
func_summaries: &summaries,
|
||||
global_summaries: None,
|
||||
taint_findings,
|
||||
analysis_rules: None,
|
||||
taint_active: true,
|
||||
};
|
||||
run_all(&ctx)
|
||||
}
|
||||
|
|
@ -144,7 +150,7 @@ fn unreachable_detects_orphaned_nodes() {
|
|||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
|
||||
// All nodes in linear code should be reachable
|
||||
let reachable = dominators::reachable_set(&cfg, entry);
|
||||
|
|
@ -469,7 +475,7 @@ fn reachable_set_contains_all_connected_nodes() {
|
|||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
|
||||
let reachable = dominators::reachable_set(&cfg, entry);
|
||||
|
||||
|
|
@ -493,7 +499,7 @@ fn find_exit_node_exists() {
|
|||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, _, _) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, _, _) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
|
||||
let exit = dominators::find_exit_node(&cfg);
|
||||
assert!(exit.is_some(), "Should find an exit node");
|
||||
|
|
@ -512,7 +518,7 @@ fn shortest_distance_basic() {
|
|||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
|
||||
let exit = dominators::find_exit_node(&cfg).unwrap();
|
||||
let dist = dominators::shortest_distance(&cfg, entry, exit);
|
||||
|
|
@ -656,7 +662,7 @@ fn taint_and_unguarded_sink_deduped() {
|
|||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg_graph, entry, _summaries) = build_cfg(&tree, src, "rust", "test.rs");
|
||||
let (cfg_graph, entry, _summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let _lang = Lang::from_slug("rust").unwrap();
|
||||
|
||||
// Find a sink node to create a synthetic taint finding
|
||||
|
|
@ -674,6 +680,7 @@ fn taint_and_unguarded_sink_deduped() {
|
|||
sink: sink_node,
|
||||
source: entry,
|
||||
path: vec![entry, sink_node],
|
||||
source_kind: crate::labels::SourceKind::UserInput,
|
||||
}];
|
||||
|
||||
let findings = parse_and_run_all_with_taint(
|
||||
|
|
@ -719,3 +726,831 @@ fn process_star_without_web_params_no_auth_gap() {
|
|||
auth_findings
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Resource leak tests (additional languages) ────────────────────
|
||||
|
||||
#[test]
|
||||
fn resource_leak_python_open_without_close() {
|
||||
let src = br#"
|
||||
def process():
|
||||
f = open("data.txt")
|
||||
data = f.read()
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"python",
|
||||
Language::from(tree_sitter_python::LANGUAGE),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
!leak_findings.is_empty(),
|
||||
"Should detect open() without close() in Python"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resource_leak_php_fopen_without_fclose() {
|
||||
let src = br#"<?php
|
||||
function read_file() {
|
||||
$fp = fopen("data.txt", "r");
|
||||
$data = fread($fp, 1024);
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"php",
|
||||
Language::from(tree_sitter_php::LANGUAGE_PHP),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
!leak_findings.is_empty(),
|
||||
"Should detect fopen() without fclose() in PHP"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resource_leak_js_open_without_close() {
|
||||
let src = br#"
|
||||
function readFile() {
|
||||
var fd = fs.openSync("data.txt", "r");
|
||||
var data = fs.readSync(fd, buf, 0, 100, 0);
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"javascript",
|
||||
Language::from(tree_sitter_javascript::LANGUAGE),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
!leak_findings.is_empty(),
|
||||
"Should detect fs.openSync() without fs.closeSync() in JS"
|
||||
);
|
||||
}
|
||||
|
||||
// ─── JS CFG precision tests ────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn js_throw_terminates_block() {
|
||||
// throw should act as a terminator — code directly after throw in the same
|
||||
// block should be unreachable.
|
||||
let src = br#"
|
||||
function fail() {
|
||||
throw new Error("fatal");
|
||||
eval("dead code");
|
||||
}
|
||||
"#;
|
||||
|
||||
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "javascript", "test.js", None);
|
||||
|
||||
// Verify throw creates a Return-kind node
|
||||
let throw_nodes: Vec<_> = cfg
|
||||
.node_indices()
|
||||
.filter(|&idx| {
|
||||
cfg[idx].kind == crate::cfg::StmtKind::Return
|
||||
&& cfg[idx].span.0 > 0
|
||||
&& src[cfg[idx].span.0..].starts_with(b"throw")
|
||||
})
|
||||
.collect();
|
||||
|
||||
assert!(
|
||||
!throw_nodes.is_empty(),
|
||||
"throw statement should create a Return-kind node"
|
||||
);
|
||||
|
||||
// eval after throw should be unreachable
|
||||
let reachable = crate::cfg_analysis::dominators::reachable_set(&cfg, entry);
|
||||
let eval_nodes: Vec<_> = cfg
|
||||
.node_indices()
|
||||
.filter(|&idx| cfg[idx].callee.as_deref().is_some_and(|c| c == "eval"))
|
||||
.collect();
|
||||
|
||||
// eval might not even be in the CFG, or if it is, it should be unreachable
|
||||
if !eval_nodes.is_empty() {
|
||||
assert!(
|
||||
eval_nodes.iter().all(|n| !reachable.contains(n)),
|
||||
"eval after throw should be unreachable"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn configured_terminator_stops_flow() {
|
||||
let src = br#"
|
||||
function handler() {
|
||||
process.exit(1);
|
||||
eval("dangerous");
|
||||
}
|
||||
"#;
|
||||
|
||||
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let rules = crate::labels::LangAnalysisRules {
|
||||
extra_labels: vec![],
|
||||
terminators: vec!["process.exit".into()],
|
||||
event_handlers: vec![],
|
||||
};
|
||||
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "javascript", "test.js", Some(&rules));
|
||||
|
||||
let reachable = crate::cfg_analysis::dominators::reachable_set(&cfg, entry);
|
||||
|
||||
// eval should be unreachable since process.exit is a terminator
|
||||
let eval_nodes: Vec<_> = cfg
|
||||
.node_indices()
|
||||
.filter(|&idx| cfg[idx].callee.as_deref().is_some_and(|c| c == "eval"))
|
||||
.collect();
|
||||
|
||||
if !eval_nodes.is_empty() {
|
||||
assert!(
|
||||
eval_nodes.iter().all(|n| !reachable.contains(n)),
|
||||
"eval should be unreachable after process.exit terminator"
|
||||
);
|
||||
}
|
||||
// If eval_nodes is empty it means the node wasn't created (also acceptable —
|
||||
// it's after a terminator so the CFG may not even emit it)
|
||||
}
|
||||
|
||||
// ─── Href classification tests ─────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn location_href_assignment_is_sink() {
|
||||
let src = br#"
|
||||
function redirect(url) {
|
||||
location.href = url;
|
||||
}
|
||||
"#;
|
||||
|
||||
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, _entry, _summaries) = build_cfg(&tree, src, "javascript", "test.js", None);
|
||||
|
||||
let has_sink = cfg
|
||||
.node_indices()
|
||||
.any(|idx| matches!(cfg[idx].label, Some(crate::labels::DataLabel::Sink(_))));
|
||||
assert!(has_sink, "location.href = url should produce a Sink node");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn a_href_assignment_is_not_sink() {
|
||||
let src = br#"
|
||||
function setLink(el) {
|
||||
el.href = "/about";
|
||||
}
|
||||
"#;
|
||||
|
||||
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, _entry, _summaries) = build_cfg(&tree, src, "javascript", "test.js", None);
|
||||
|
||||
let has_sink = cfg
|
||||
.node_indices()
|
||||
.any(|idx| matches!(cfg[idx].label, Some(crate::labels::DataLabel::Sink(_))));
|
||||
assert!(
|
||||
!has_sink,
|
||||
"el.href = '/about' should NOT produce a Sink node"
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Config sanitizer tests ────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn config_sanitizer_suppresses_unguarded_sink() {
|
||||
// JS snippet: escapeHtml(x) before innerHTML = ... should not trigger
|
||||
// cfg-unguarded-sink when escapeHtml is configured as a sanitizer.
|
||||
let src = br#"
|
||||
function render(input) {
|
||||
var safe = escapeHtml(input);
|
||||
document.body.innerHTML = safe;
|
||||
}
|
||||
"#;
|
||||
|
||||
let ts_lang = Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let lang_str = "javascript";
|
||||
|
||||
// Build with config sanitizer rules
|
||||
let rules = crate::labels::LangAnalysisRules {
|
||||
extra_labels: vec![crate::labels::RuntimeLabelRule {
|
||||
matchers: vec!["escapeHtml".into()],
|
||||
label: crate::labels::DataLabel::Sanitizer(crate::labels::Cap::HTML_ESCAPE),
|
||||
}],
|
||||
terminators: vec![],
|
||||
event_handlers: vec![],
|
||||
};
|
||||
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, lang_str, "test.rs", Some(&rules));
|
||||
let lang = Lang::from_slug(lang_str).unwrap();
|
||||
let ctx = AnalysisContext {
|
||||
cfg: &cfg,
|
||||
entry,
|
||||
lang,
|
||||
file_path: "test.rs",
|
||||
source_bytes: src,
|
||||
func_summaries: &summaries,
|
||||
global_summaries: None,
|
||||
taint_findings: &[],
|
||||
analysis_rules: Some(&rules),
|
||||
taint_active: true,
|
||||
};
|
||||
let findings = run_all(&ctx);
|
||||
|
||||
let unguarded = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unguarded-sink")
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert!(
|
||||
unguarded.is_empty(),
|
||||
"escapeHtml config sanitizer should suppress cfg-unguarded-sink; got {:?}",
|
||||
unguarded
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Python precision tests ────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn python_constant_subprocess_no_finding() {
|
||||
// subprocess.run(["make","clean"]) with constant args should produce no finding
|
||||
let src = br#"
|
||||
import subprocess
|
||||
|
||||
def build():
|
||||
subprocess.run(["make", "clean"])
|
||||
"#;
|
||||
|
||||
let findings = parse_and_run_all(src, "python", Language::from(tree_sitter_python::LANGUAGE));
|
||||
|
||||
let unguarded: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unguarded-sink")
|
||||
.collect();
|
||||
assert!(
|
||||
unguarded.is_empty(),
|
||||
"subprocess.run with constant list args should not be flagged; got {:?}",
|
||||
unguarded
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_constant_git_status_no_finding() {
|
||||
let src = br#"
|
||||
import subprocess
|
||||
|
||||
def check():
|
||||
subprocess.run(["git", "status"])
|
||||
"#;
|
||||
|
||||
let findings = parse_and_run_all(src, "python", Language::from(tree_sitter_python::LANGUAGE));
|
||||
|
||||
let unguarded: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unguarded-sink")
|
||||
.collect();
|
||||
assert!(
|
||||
unguarded.is_empty(),
|
||||
"subprocess.run with constant git args should not be flagged; got {:?}",
|
||||
unguarded
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_tainted_os_system_produces_finding() {
|
||||
// Source (sys.argv) flowing to os.system → should produce a finding
|
||||
let src = br#"
|
||||
import sys
|
||||
import os
|
||||
|
||||
def run():
|
||||
cmd = sys.argv[1]
|
||||
os.system(cmd)
|
||||
"#;
|
||||
|
||||
let findings = parse_and_run_all(src, "python", Language::from(tree_sitter_python::LANGUAGE));
|
||||
|
||||
let sink_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| {
|
||||
f.rule_id == "cfg-unguarded-sink" && f.severity == crate::patterns::Severity::High
|
||||
})
|
||||
.collect();
|
||||
assert!(
|
||||
!sink_findings.is_empty(),
|
||||
"Source-derived os.system should produce a HIGH finding"
|
||||
);
|
||||
}
|
||||
|
||||
// ─── C++ precision tests ───────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn cpp_cout_not_a_sink() {
|
||||
let src = br#"
|
||||
#include <iostream>
|
||||
int main() {
|
||||
std::cout << "hello" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_run_all(src, "cpp", Language::from(tree_sitter_cpp::LANGUAGE));
|
||||
|
||||
let sink_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unguarded-sink")
|
||||
.collect();
|
||||
assert!(
|
||||
sink_findings.is_empty(),
|
||||
"std::cout should not produce an unguarded-sink finding; got {:?}",
|
||||
sink_findings
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cpp_printf_constant_no_finding() {
|
||||
// printf with constant args → FMT_STRING sink but constant-arg suppression
|
||||
let src = br#"
|
||||
#include <stdio.h>
|
||||
int main() {
|
||||
printf("hello\n");
|
||||
return 0;
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_run_all(src, "c", Language::from(tree_sitter_c::LANGUAGE));
|
||||
|
||||
let unguarded: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unguarded-sink")
|
||||
.collect();
|
||||
assert!(
|
||||
unguarded.is_empty(),
|
||||
"printf with constant args should be suppressed; got {:?}",
|
||||
unguarded
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cpp_system_with_getenv_produces_finding() {
|
||||
let src = br#"
|
||||
#include <stdlib.h>
|
||||
int main() {
|
||||
char* input = getenv("USER_CMD");
|
||||
system(input);
|
||||
return 0;
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_run_all(src, "c", Language::from(tree_sitter_c::LANGUAGE));
|
||||
|
||||
let sink_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unguarded-sink")
|
||||
.collect();
|
||||
assert!(
|
||||
!sink_findings.is_empty(),
|
||||
"system(getenv(...)) should produce an unguarded-sink finding"
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Unreachable + unguarded dedup test ─────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn unreachable_sink_suppresses_unguarded() {
|
||||
// If a sink is in unreachable code, only cfg-unreachable-sink should fire,
|
||||
// NOT also cfg-unguarded-sink.
|
||||
let src = br#"
|
||||
fn main() {
|
||||
return;
|
||||
std::process::Command::new("sh").arg("x").status().unwrap();
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_run_all(src, "rust", Language::from(tree_sitter_rust::LANGUAGE));
|
||||
|
||||
let unreachable: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unreachable-sink")
|
||||
.collect();
|
||||
let unguarded_at_same_span: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| {
|
||||
f.rule_id == "cfg-unguarded-sink" && unreachable.iter().any(|u| u.span == f.span)
|
||||
})
|
||||
.collect();
|
||||
assert!(
|
||||
unguarded_at_same_span.is_empty(),
|
||||
"cfg-unguarded-sink should be suppressed when cfg-unreachable-sink fires on same span; got {:?}",
|
||||
unguarded_at_same_span
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Fix 3: Wrapper resource names (curlx_fopen/curlx_fclose) ──────
|
||||
|
||||
#[test]
|
||||
fn curlx_fopen_with_curlx_fclose_no_leak() {
|
||||
let src = br#"
|
||||
void process() {
|
||||
FILE *fp = curlx_fopen("file.txt", "r");
|
||||
curlx_fclose(fp);
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"c",
|
||||
Language::from(tree_sitter_c::LANGUAGE),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
leak_findings.is_empty(),
|
||||
"curlx_fopen + curlx_fclose should not produce a resource leak; got {:?}",
|
||||
leak_findings
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Fix 4: freopen exclusion ───────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn freopen_not_treated_as_acquire() {
|
||||
let src = br#"
|
||||
void redirect_stderr() {
|
||||
freopen("/dev/null", "w", stderr);
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"c",
|
||||
Language::from(tree_sitter_c::LANGUAGE),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
leak_findings.is_empty(),
|
||||
"freopen should not produce a resource leak finding; got {:?}",
|
||||
leak_findings
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Fix 5: Struct field ownership transfer ─────────────────────────
|
||||
|
||||
#[test]
|
||||
fn struct_field_ownership_transfer_no_leak() {
|
||||
let src = br#"
|
||||
void open_stream(struct session *s) {
|
||||
FILE *fp = fopen("data.txt", "r");
|
||||
s->stream = fp;
|
||||
s->fopened = 1;
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"c",
|
||||
Language::from(tree_sitter_c::LANGUAGE),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
leak_findings.is_empty(),
|
||||
"Struct field ownership transfer should suppress resource leak; got {:?}",
|
||||
leak_findings
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Fix 6: Linked-list / global insertion ──────────────────────────
|
||||
|
||||
#[test]
|
||||
fn linked_list_insertion_no_leak() {
|
||||
let src = br#"
|
||||
void add_var(struct config *cfg, const char *name) {
|
||||
struct var *p = malloc(sizeof(struct var));
|
||||
p->next = cfg->variables;
|
||||
cfg->variables = p;
|
||||
}
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"c",
|
||||
Language::from(tree_sitter_c::LANGUAGE),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
leak_findings.is_empty(),
|
||||
"Linked-list insertion should suppress resource leak; got {:?}",
|
||||
leak_findings
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Fix 2: Preproc dangling-else CFG recovery ─────────────────────
|
||||
|
||||
#[test]
|
||||
fn preproc_ifdef_does_not_orphan_subsequent_code() {
|
||||
// After a #ifdef block containing an if/else, subsequent code should
|
||||
// still be reachable (no unreachable findings).
|
||||
let src = br#"
|
||||
void process() {
|
||||
int x = 1;
|
||||
#ifdef _WIN32
|
||||
if (x) {
|
||||
x = 2;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
x = 3;
|
||||
}
|
||||
free(x);
|
||||
}
|
||||
"#;
|
||||
|
||||
let ts_lang = Language::from(tree_sitter_c::LANGUAGE);
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "c", "test.c", None);
|
||||
|
||||
let reachable = dominators::reachable_set(&cfg, entry);
|
||||
|
||||
// All nodes should be reachable — the preproc recovery should prevent
|
||||
// the dangling-else from orphaning downstream code.
|
||||
let unreachable_count = cfg.node_count() - reachable.len();
|
||||
assert!(
|
||||
unreachable_count == 0,
|
||||
"Expected all nodes reachable after preproc block, but {} nodes are unreachable",
|
||||
unreachable_count
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Fix 1: Break in loop keeps post-loop code reachable ────────────
|
||||
|
||||
#[test]
|
||||
fn break_in_loop_post_loop_reachable() {
|
||||
let src = br#"
|
||||
void process() {
|
||||
int x = 0;
|
||||
while(1) {
|
||||
if(x) break;
|
||||
x = x + 1;
|
||||
}
|
||||
free(x);
|
||||
}
|
||||
"#;
|
||||
|
||||
let ts_lang = Language::from(tree_sitter_c::LANGUAGE);
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, _) = build_cfg(&tree, src, "c", "test.c", None);
|
||||
|
||||
let reachable = dominators::reachable_set(&cfg, entry);
|
||||
|
||||
// All nodes should be reachable — break exits the loop and post-loop
|
||||
// code (free(x)) should be connected.
|
||||
let unreachable_count = cfg.node_count() - reachable.len();
|
||||
assert!(
|
||||
unreachable_count == 0,
|
||||
"Expected all nodes reachable after break in loop, but {} nodes are unreachable",
|
||||
unreachable_count
|
||||
);
|
||||
}
|
||||
|
||||
// ─── PART 2A: One-hop constant binding trace ────────────────────────
|
||||
|
||||
#[test]
|
||||
fn python_one_hop_constant_binding_no_finding() {
|
||||
// cmd = "git"; subprocess.run([cmd, "status"]) → no finding
|
||||
let src = br#"
|
||||
import subprocess
|
||||
|
||||
def check():
|
||||
cmd = "git"
|
||||
subprocess.run([cmd, "status"])
|
||||
"#;
|
||||
|
||||
let findings = parse_and_run_all(src, "python", Language::from(tree_sitter_python::LANGUAGE));
|
||||
|
||||
let unguarded: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unguarded-sink")
|
||||
.collect();
|
||||
assert!(
|
||||
unguarded.is_empty(),
|
||||
"One-hop constant binding should suppress cfg-unguarded-sink; got {:?}",
|
||||
unguarded
|
||||
);
|
||||
}
|
||||
|
||||
// ─── PART 2B: Exec-path guard rules ─────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn exec_path_guard_suppresses_unguarded_sink() {
|
||||
// resolve_binary(&bin); Command::new(bin); → no finding
|
||||
let src = br#"
|
||||
use std::process::Command;
|
||||
fn main() {
|
||||
let bin = std::env::var("BIN").unwrap();
|
||||
resolve_binary(&bin);
|
||||
Command::new("sh").arg(&bin).status().unwrap();
|
||||
}"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&guards::UnguardedSink,
|
||||
src,
|
||||
"rust",
|
||||
Language::from(tree_sitter_rust::LANGUAGE),
|
||||
);
|
||||
|
||||
let unguarded: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-unguarded-sink")
|
||||
.collect();
|
||||
assert!(
|
||||
unguarded.is_empty(),
|
||||
"resolve_binary guard should suppress cfg-unguarded-sink; got {:?}",
|
||||
unguarded
|
||||
);
|
||||
}
|
||||
|
||||
// ─── PART 2C: Evidence-based severity in cfg-only mode ──────────────
|
||||
|
||||
#[test]
|
||||
fn cfg_only_no_taint_produces_low_severity() {
|
||||
// In cfg-only mode (taint_active=false) with no source-derived evidence,
|
||||
// unguarded sink should produce LOW severity instead of MEDIUM.
|
||||
let src = br#"
|
||||
use std::process::Command;
|
||||
fn process_data() {
|
||||
let x = compute_something();
|
||||
Command::new("sh").arg(&x).status().unwrap();
|
||||
}"#;
|
||||
|
||||
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(src as &[u8], None).unwrap();
|
||||
let (cfg, entry, summaries) = build_cfg(&tree, src, "rust", "test.rs", None);
|
||||
let lang = Lang::from_slug("rust").unwrap();
|
||||
let ctx = AnalysisContext {
|
||||
cfg: &cfg,
|
||||
entry,
|
||||
lang,
|
||||
file_path: "test.rs",
|
||||
source_bytes: src,
|
||||
func_summaries: &summaries,
|
||||
global_summaries: None,
|
||||
taint_findings: &[],
|
||||
analysis_rules: None,
|
||||
taint_active: false, // cfg-only mode
|
||||
};
|
||||
let findings = guards::UnguardedSink.run(&ctx);
|
||||
|
||||
let medium_or_high: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| {
|
||||
f.rule_id == "cfg-unguarded-sink"
|
||||
&& (f.severity == crate::patterns::Severity::Medium
|
||||
|| f.severity == crate::patterns::Severity::High)
|
||||
})
|
||||
.collect();
|
||||
assert!(
|
||||
medium_or_high.is_empty(),
|
||||
"cfg-only mode without taint should produce LOW severity, not MEDIUM/HIGH; got {:?}",
|
||||
medium_or_high
|
||||
);
|
||||
}
|
||||
|
||||
// ─── PART 4B: FileResponse ownership transfer ──────────────────────
|
||||
|
||||
#[test]
|
||||
fn file_response_ownership_transfer_no_leak() {
|
||||
let src = br#"
|
||||
def serve_file():
|
||||
f = open("report.pdf", "rb")
|
||||
return FileResponse(f)
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"python",
|
||||
Language::from(tree_sitter_python::LANGUAGE),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
leak_findings.is_empty(),
|
||||
"FileResponse should suppress cfg-resource-leak; got {:?}",
|
||||
leak_findings
|
||||
);
|
||||
}
|
||||
|
||||
// ─── PART 4C: Lock-not-released refinement ──────────────────────────
|
||||
|
||||
#[test]
|
||||
fn python_lock_constructor_only_no_finding() {
|
||||
// threading.Lock() without .acquire() → no finding
|
||||
let src = br#"
|
||||
import threading
|
||||
|
||||
def setup():
|
||||
lock = threading.Lock()
|
||||
do_work()
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"python",
|
||||
Language::from(tree_sitter_python::LANGUAGE),
|
||||
);
|
||||
|
||||
let lock_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-lock-not-released")
|
||||
.collect();
|
||||
assert!(
|
||||
lock_findings.is_empty(),
|
||||
"Lock constructor without acquire should not produce cfg-lock-not-released; got {:?}",
|
||||
lock_findings
|
||||
);
|
||||
}
|
||||
|
||||
// ─── PART 4A: signal.connect exclusion ──────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn python_signal_connect_not_treated_as_db_acquire() {
|
||||
let src = br#"
|
||||
def setup():
|
||||
signal.connect(handler)
|
||||
do_work()
|
||||
"#;
|
||||
|
||||
let findings = parse_and_analyse(
|
||||
&resources::ResourceMisuse,
|
||||
src,
|
||||
"python",
|
||||
Language::from(tree_sitter_python::LANGUAGE),
|
||||
);
|
||||
|
||||
let leak_findings: Vec<_> = findings
|
||||
.iter()
|
||||
.filter(|f| f.rule_id == "cfg-resource-leak")
|
||||
.collect();
|
||||
assert!(
|
||||
leak_findings.is_empty(),
|
||||
"signal.connect should not be treated as db acquire; got {:?}",
|
||||
leak_findings
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,9 +3,40 @@ use super::{AnalysisContext, CfgAnalysis, CfgFinding, Confidence};
|
|||
use crate::cfg::StmtKind;
|
||||
use crate::labels::DataLabel;
|
||||
use crate::patterns::Severity;
|
||||
use std::collections::HashSet;
|
||||
|
||||
pub struct UnreachableCode;
|
||||
|
||||
/// Collect function names that appear as arguments to configured event handler calls.
|
||||
fn event_handler_callbacks(ctx: &AnalysisContext) -> HashSet<String> {
|
||||
let mut callbacks = HashSet::new();
|
||||
let handlers = match ctx.analysis_rules {
|
||||
Some(rules) if !rules.event_handlers.is_empty() => &rules.event_handlers,
|
||||
_ => return callbacks,
|
||||
};
|
||||
|
||||
for idx in ctx.cfg.node_indices() {
|
||||
let info = &ctx.cfg[idx];
|
||||
if info.kind != StmtKind::Call {
|
||||
continue;
|
||||
}
|
||||
if let Some(callee) = &info.callee {
|
||||
let callee_lower = callee.to_ascii_lowercase();
|
||||
let is_handler = handlers
|
||||
.iter()
|
||||
.any(|h| callee_lower.ends_with(&h.to_ascii_lowercase()));
|
||||
if is_handler {
|
||||
// The callback function is typically used within the call — any function
|
||||
// that appears as `uses` of this call node is a potential callback.
|
||||
for u in &info.uses {
|
||||
callbacks.insert(u.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
callbacks
|
||||
}
|
||||
|
||||
impl CfgAnalysis for UnreachableCode {
|
||||
fn name(&self) -> &'static str {
|
||||
"unreachable-code"
|
||||
|
|
@ -13,6 +44,7 @@ impl CfgAnalysis for UnreachableCode {
|
|||
|
||||
fn run(&self, ctx: &AnalysisContext) -> Vec<CfgFinding> {
|
||||
let reachable = dominators::reachable_set(ctx.cfg, ctx.entry);
|
||||
let handler_callbacks = event_handler_callbacks(ctx);
|
||||
let mut findings = Vec::new();
|
||||
|
||||
for idx in ctx.cfg.node_indices() {
|
||||
|
|
@ -27,6 +59,13 @@ impl CfgAnalysis for UnreachableCode {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Suppress findings for nodes inside event handler callbacks
|
||||
if let Some(func_name) = &info.enclosing_func
|
||||
&& handler_callbacks.contains(func_name)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
let (rule_id, title, severity) = match info.label {
|
||||
Some(DataLabel::Sanitizer(_)) => (
|
||||
"cfg-unreachable-sanitizer",
|
||||
|
|
@ -43,7 +82,9 @@ impl CfgAnalysis for UnreachableCode {
|
|||
),
|
||||
_ => {
|
||||
// Check if it's a guard/auth call
|
||||
if super::is_guard_call(info, ctx.lang) || super::is_auth_call(info, ctx.lang) {
|
||||
if super::is_guard_call(info, ctx.lang, ctx.analysis_rules)
|
||||
|| super::is_auth_call(info, ctx.lang)
|
||||
{
|
||||
(
|
||||
"cfg-unreachable-guard",
|
||||
"Unreachable guard/auth check",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue