Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)

* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers

* feat: Implement cross-file data exfiltration detection with parameter-specific gate filters

* feat: Add calibration tests and refine DATA_EXFIL severity scoring logic

* feat: Introduce per-detector configuration for data exfiltration suppression

* feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output

* feat: Add tainted body and URL handling for data exfiltration detection

* feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go

* feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients

* feat: Add synthetic externals handling for closure-captured variables in SSA

* feat: Implement closure-based suppression for resource leak findings

* feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns

* feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders

* feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt

* feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests

* feat: Add data exfiltration sinks for various languages and enhance documentation

* refactor: Simplify formatting and improve readability in various files

* refactor: Improve readability by simplifying conditional statements and adding clippy linting

* docs: Update CHANGELOG and comments for data exfiltration features and configuration

* docs: Clarify configuration instructions for data exfiltration trusted destinations

* docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
Eli Peter 2026-05-01 10:59:52 -04:00 committed by GitHub
parent a438886217
commit 58f1794a4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
189 changed files with 8421 additions and 383 deletions

View file

@ -19,19 +19,29 @@ fn sanitize_desc(s: &str) -> String {
/// convergence node where all execution paths join before leaving the function.
///
/// **Invariant:** Only terminal exits carry the complete merged lifecycle state
/// needed for leak analysis. Return nodes are intermediate (they flow into the
/// terminal exit) and must NOT be analyzed for terminal resource state.
///
/// Detection is purely topological: a node inside a function is terminal when
/// it has no successor within the same function scope. This works for both
/// per-body graphs (Exit node is a sink) and legacy supergraphs (the
/// synthesized Return's successor is the file-level Exit with
/// needed for leak analysis. Return nodes are intermediate in per-body graphs
/// (they flow into the synthetic Exit node) but become terminal in legacy
/// supergraphs (their successor is the file-level Exit with
/// `enclosing_func = None`).
///
/// Detection combines a kind filter with a topological check. Only nodes
/// whose `StmtKind` actually terminates execution (`Exit`, `Return`, `Throw`)
/// are considered, then we require that they have no successor in the same
/// function scope. Without the kind filter, dangling Seq nodes left behind
/// when nested function literals (e.g. `obj.fn = () => {...}`) get a
/// placeholder in the parent graph would be misclassified as terminal exits
/// and produce spurious resource-leak findings at the function-literal span.
fn is_terminal_function_exit(
idx: petgraph::graph::NodeIndex,
info: &crate::cfg::NodeInfo,
cfg: &Cfg,
) -> bool {
if !matches!(
info.kind,
StmtKind::Exit | StmtKind::Return | StmtKind::Throw
) {
return false;
}
info.ast.enclosing_func.is_some()
&& !cfg
.neighbors_directed(idx, petgraph::Direction::Outgoing)
@ -62,6 +72,7 @@ pub struct StateFinding {
/// `state-unauthed-access` finding is suppressed on those spans because
/// the user-controlled input has already been proved unable to escape
/// into a privileged location.
#[allow(clippy::too_many_arguments)]
pub fn extract_findings(
result: &DataflowResult<ProductState, TransferEvent>,
cfg: &Cfg,
@ -70,6 +81,7 @@ pub fn extract_findings(
func_summaries: &crate::cfg::FuncSummaries,
enable_auth: bool,
path_safe_suppressed_sink_spans: &std::collections::HashSet<(usize, usize)>,
closure_released_var_names: Option<&std::collections::HashSet<String>>,
) -> Vec<StateFinding> {
let mut findings = Vec::new();
@ -195,6 +207,23 @@ pub fn extract_findings(
continue;
}
// Suppress leaks for variables whose release call lives in a
// nested closure (callback / event handler) outside this
// body's CFG. Common JS/TS shape:
// const ws = new WebSocket(url);
// socket.on("close", () => ws.close());
// The per-body resource analysis cannot observe the close
// inside the registered handler body; without this gate the
// handle reads as a definite leak. Match by variable name —
// closure-captured handles share the binding name with the
// handle in the outer scope.
if closure_released_var_names
.map(|s| s.contains(var_name))
.unwrap_or(false)
{
continue;
}
// Prefer direct acquire node span; fall back to proxy span
// from ResourceMethodSummary (cross-body resource tracking).
let acquire_span = acquire_node
@ -557,6 +586,7 @@ mod tests {
&HashMap::new(),
false,
&std::collections::HashSet::new(),
None,
);
assert_eq!(findings.len(), 1);
@ -617,6 +647,7 @@ mod tests {
&HashMap::new(),
false,
&std::collections::HashSet::new(),
None,
);
assert!(findings.is_empty());
@ -751,6 +782,7 @@ mod tests {
&HashMap::new(),
false,
&std::collections::HashSet::new(),
None,
);
assert!(
@ -816,6 +848,7 @@ mod tests {
&HashMap::new(),
false,
&std::collections::HashSet::new(),
None,
);
assert_eq!(