Critical bug fixes and recall improvements (#68)

This commit is contained in:
Eli Peter 2026-05-11 12:42:39 -04:00 committed by GitHub
parent 7d0e7320e2
commit 55247b7fcd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
352 changed files with 60069 additions and 900 deletions

File diff suppressed because it is too large Load diff

View file

@ -208,6 +208,13 @@ pub struct AnalysisContext<'a> {
/// in a callback the per-body CFG can't observe. When `None`, no
/// closure-based suppression is applied.
pub closure_released_var_names: Option<&'a std::collections::HashSet<String>>,
/// Class-level constant scalars discovered for this file, keyed by
/// the unqualified field name (Java `static final TYPE NAME = LIT;`).
/// Used by `cfg_analysis::guards` to treat identifiers referencing
/// these fields as compile-time constants for the
/// `cfg-unguarded-sink` all-args-constant check. `None` outside Java
/// and on call sites that have not threaded the map through.
pub class_constant_scalars: Option<&'a std::collections::HashMap<String, String>>,
}
pub trait CfgAnalysis {

View file

@ -10,6 +10,43 @@ use std::collections::HashSet;
pub struct ResourceMisuse;
/// Distinguishes `obj.connect("event-name", handler)` event-handler
/// registrations from real database-connection acquires.
///
/// Recognises the canonical handler shape: a string-literal first arg
/// that does not look like a URL (`scheme://`), plus a second positional
/// argument that resolves to a single identifier (the callable being
/// registered). SQLAlchemy `engine.connect()` and `sqlite3.connect(
/// "path.db")` either pass zero args or a single string, so they fall
/// through and the leak check still fires.
///
/// Kept out of the static `exclude_acquire` list because that list is
/// callee-substring-only; this check needs to read argument shape from
/// the call node.
fn is_event_handler_register_shape(info: &crate::cfg::NodeInfo) -> bool {
let Some(first_literal) = info
.call
.arg_string_literals
.first()
.and_then(|x| x.as_ref())
else {
return false;
};
if first_literal.contains("://") {
return false;
}
let Some(second_uses) = info.call.arg_uses.get(1) else {
return false;
};
// A bare identifier (`callback`) lands as `["callback"]`; a
// member-access ref (`self._on_status`) lands as `["self",
// "_on_status"]`. Both are valid handler shapes. Real DB connects
// either have no second positional or pass a non-ident value
// (string literal for `connect("user", "pass", ...)`), which lands
// as an empty `arg_uses[1]`.
!second_uses.is_empty()
}
/// Find nodes matching acquire patterns for a given resource pair,
/// excluding any that match `exclude_patterns`.
fn find_acquire_nodes(
@ -517,6 +554,21 @@ impl CfgAnalysis for ResourceMisuse {
if ctx.cfg[acquire].managed_resource {
continue;
}
// Suppress `obj.connect("event-name", callback)` event-
// handler registrations that share the `connect` /
// `cursor` callee suffix with real DB acquires. Sphinx
// app.connect("config-inited", on_init), Flask blueprint
// handlers, and MQTT client.connect("topic", on_msg) all
// pass a string literal event name plus a callable
// identifier; SQLAlchemy `engine.connect()` and
// `sqlite3.connect("path.db")` either have no args or a
// single string arg. Gated on the `db connection`
// resource name so file/socket/mutex pairs are untouched.
if pair.resource_name == "db connection"
&& is_event_handler_register_shape(&ctx.cfg[acquire])
{
continue;
}
// SAFE-FOR-FIELD-LHS (Go only): skip member-expression
// LHS acquires. `b.cpuprof = os.Create(...)` transfers
// ownership to the containing struct; closure
@ -598,3 +650,83 @@ impl CfgAnalysis for ResourceMisuse {
findings
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cfg::{CallMeta, NodeInfo, StmtKind};
fn call_node(arg_string_literals: Vec<Option<String>>, arg_uses: Vec<Vec<String>>) -> NodeInfo {
NodeInfo {
kind: StmtKind::Call,
call: CallMeta {
callee: Some("obj.connect".into()),
arg_string_literals,
arg_uses,
..Default::default()
},
..Default::default()
}
}
#[test]
fn event_handler_shape_recognises_sphinx_connect() {
// app.connect("config-inited", _on_init)
let info = call_node(
vec![Some("config-inited".into()), None],
vec![vec![], vec!["_on_init".into()]],
);
assert!(is_event_handler_register_shape(&info));
}
#[test]
fn event_handler_shape_recognises_self_method_callback() {
// client.connect("device/+", self._on_status)
let info = call_node(
vec![Some("device/+".into()), None],
vec![vec![], vec!["self".into(), "_on_status".into()]],
);
assert!(is_event_handler_register_shape(&info));
}
#[test]
fn event_handler_shape_rejects_url_first_arg() {
// engine.connect("postgres://localhost/mydb")
let info = call_node(vec![Some("postgres://localhost/mydb".into())], vec![vec![]]);
assert!(!is_event_handler_register_shape(&info));
}
#[test]
fn event_handler_shape_rejects_oracle_string_args() {
// cx_Oracle.connect("user", "pass", "dsn") -- arg1 is a literal,
// no identifier in `arg_uses[1]`.
let info = call_node(
vec![Some("user".into()), Some("pass".into()), Some("dsn".into())],
vec![vec![], vec![], vec![]],
);
assert!(!is_event_handler_register_shape(&info));
}
#[test]
fn event_handler_shape_rejects_no_args() {
// engine.connect()
let info = call_node(vec![], vec![]);
assert!(!is_event_handler_register_shape(&info));
}
#[test]
fn event_handler_shape_rejects_single_string_arg() {
// sqlite3.connect("path.db")
let info = call_node(vec![Some("path.db".into())], vec![vec![]]);
assert!(!is_event_handler_register_shape(&info));
}
#[test]
fn event_handler_shape_rejects_ident_first_arg() {
// signal.connect(receiver_func, sender=...) -- handled by the
// static exclude list `signal.connect`, but the shape check
// should also gate it out: first arg is not a string literal.
let info = call_node(vec![None], vec![vec!["receiver_func".into()]]);
assert!(!is_event_handler_register_shape(&info));
}
}

View file

@ -35,6 +35,7 @@ fn parse_and_analyse<A: CfgAnalysis>(
type_facts: None,
auth_decorators: &[],
closure_released_var_names: None,
class_constant_scalars: None,
};
analysis.run(&ctx)
}
@ -65,6 +66,7 @@ fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFi
type_facts: None,
auth_decorators: &[],
closure_released_var_names: None,
class_constant_scalars: None,
};
run_all(&ctx)
}
@ -100,6 +102,7 @@ fn parse_and_run_all_with_taint(
type_facts: None,
auth_decorators: &[],
closure_released_var_names: None,
class_constant_scalars: None,
};
run_all(&ctx)
}
@ -219,6 +222,7 @@ fn parse_and_analyse_with_ssa<A: CfgAnalysis>(
type_facts: facts.as_ref().map(|f| &f.type_facts),
auth_decorators: &[],
closure_released_var_names: None,
class_constant_scalars: None,
};
analysis.run(&ctx)
}
@ -1235,6 +1239,7 @@ fn config_sanitizer_suppresses_unguarded_sink() {
type_facts: None,
auth_decorators: &[],
closure_released_var_names: None,
class_constant_scalars: None,
};
let findings = run_all(&ctx);
@ -1715,6 +1720,7 @@ fn cfg_only_no_taint_produces_low_severity() {
type_facts: None,
auth_decorators: &[],
closure_released_var_names: None,
class_constant_scalars: None,
};
let findings = guards::UnguardedSink.run(&ctx);