Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)

* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers

* feat: Implement cross-file data exfiltration detection with parameter-specific gate filters

* feat: Add calibration tests and refine DATA_EXFIL severity scoring logic

* feat: Introduce per-detector configuration for data exfiltration suppression

* feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output

* feat: Add tainted body and URL handling for data exfiltration detection

* feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go

* feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients

* feat: Add synthetic externals handling for closure-captured variables in SSA

* feat: Implement closure-based suppression for resource leak findings

* feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns

* feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders

* feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt

* feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests

* feat: Add data exfiltration sinks for various languages and enhance documentation

* refactor: Simplify formatting and improve readability in various files

* refactor: Improve readability by simplifying conditional statements and adding clippy linting

* docs: Update CHANGELOG and comments for data exfiltration features and configuration

* docs: Clarify configuration instructions for data exfiltration trusted destinations

* docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
Eli Peter 2026-05-01 10:59:52 -04:00 committed by GitHub
parent a438886217
commit 58f1794a4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
189 changed files with 8421 additions and 383 deletions

View file

@ -678,12 +678,30 @@ fn find_guard_nodes(ctx: &AnalysisContext) -> Vec<(NodeIndex, Cap)> {
if info.kind == StmtKind::If {
if let Some(cond_text) = &info.condition_text {
let kind = classify_condition(cond_text);
// For `AllowlistCheck`, also confirm a target identifier was
// extractable. When the receiver-method form carries a
// string-literal arg (`filePath.includes("/")`,
// `path.contains("..")`), `extract_allowlist_target` returns
// `None` because the argument isn't an identifier. Those
// shapes are presence-checks, not real allowlist tests against
// a collection variable, and shouldn't dominate every
// downstream sink as a structural guard with `Cap::all()`.
// `classify_condition` itself stays unchanged (an existing
// test locks in its broad return for the receiver-method form,
// and the SSA branch-narrowing layer reads the kind for its
// own purposes).
let allowlist_has_target = if kind == PredicateKind::AllowlistCheck {
crate::taint::path_state::classify_condition_with_target(cond_text)
.1
.is_some()
} else {
true
};
if matches!(
kind,
PredicateKind::AllowlistCheck
| PredicateKind::TypeCheck
| PredicateKind::ValidationCall
) {
PredicateKind::TypeCheck | PredicateKind::ValidationCall,
) || (kind == PredicateKind::AllowlistCheck && allowlist_has_target)
{
result.push((idx, Cap::all()));
} else if cond_indirect_validator_callee(info, ctx).is_some() {
// Indirect-validator pattern:
@ -995,7 +1013,25 @@ impl CfgAnalysis for UnguardedSink {
// is the only other operand. The simpler `is_all_args_constant`
// check above rejects that mixed shape because it forbids real
// parameters in operand position.
if !has_taint && ssa_all_sink_operands_const_or_param(ctx, *sink) {
//
// Exemption: shell-array gate filters. The
// `extract_shell_array_payload_idents` detector recognises
// `[<shell>, "-c", <payload>]` arrays at any call site and emits a
// `Sink(SHELL_ESCAPE)` label with `destination_uses` narrowed to
// the payload-element idents. When the array shape itself is the
// gate, an unrelated reassign-to-const elsewhere in the body
// (`const flag = true; if (flag) {}`) does not erase the
// shell-exec intent — the construction of `[bash, -c, x]` is by
// itself the dangerous operation. Skip this suppression so the
// structural finding survives in closed-world contexts where no
// taint source has been resolved yet.
let has_shell_array_gate = sink_info.call.gate_filters.iter().any(|gf| {
gf.label_caps.contains(Cap::SHELL_ESCAPE) && gf.destination_uses.is_some()
});
if !has_taint
&& !has_shell_array_gate
&& ssa_all_sink_operands_const_or_param(ctx, *sink)
{
continue;
}

View file

@ -125,6 +125,13 @@ pub struct AnalysisContext<'a> {
/// the function-declaration level, the gap only matters when the
/// auth call has to live inside the body.
pub auth_decorators: &'a [String],
/// Names of variables whose `.close()` / release calls live in a
/// nested closure body somewhere else in the file (e.g.
/// `socket.on("close", () => ws.close())`). ResourceMisuse uses this
/// to suppress `cfg-resource-leak` for handles whose cleanup happens
/// in a callback the per-body CFG can't observe. When `None`, no
/// closure-based suppression is applied.
pub closure_released_var_names: Option<&'a std::collections::HashSet<String>>,
}
pub trait CfgAnalysis {

View file

@ -442,6 +442,23 @@ impl CfgAnalysis for ResourceMisuse {
if pair.resource_name == "mutex" && !has_explicit_lock_acquire(ctx, acquire) {
continue;
}
// Suppress when a sibling closure / event handler in
// this file releases the same variable. Common JS/TS
// shape: `const ws = new WebSocket(url);
// socket.on("close", () => ws.close())`. The release
// node lives in a nested body the per-body CFG can't
// see, so the structural "no release on this exit
// path" check fires erroneously. Match by acquired
// variable name; closure captures share the binding
// name with the outer handle.
if let Some(acq_var) = ctx.cfg[acquire].taint.defines.as_deref()
&& ctx
.closure_released_var_names
.map(|s| s.contains(acq_var))
.unwrap_or(false)
{
continue;
}
let info = &ctx.cfg[acquire];
let callee_desc = info.call.callee.as_deref().unwrap_or("(acquire)");

View file

@ -33,6 +33,7 @@ fn parse_and_analyse<A: CfgAnalysis>(
body_const_facts: None,
type_facts: None,
auth_decorators: &[],
closure_released_var_names: None,
};
analysis.run(&ctx)
}
@ -61,6 +62,7 @@ fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFi
body_const_facts: None,
type_facts: None,
auth_decorators: &[],
closure_released_var_names: None,
};
run_all(&ctx)
}
@ -94,6 +96,7 @@ fn parse_and_run_all_with_taint(
body_const_facts: None,
type_facts: None,
auth_decorators: &[],
closure_released_var_names: None,
};
run_all(&ctx)
}
@ -211,6 +214,7 @@ fn parse_and_analyse_with_ssa<A: CfgAnalysis>(
body_const_facts: facts.as_ref(),
type_facts: facts.as_ref().map(|f| &f.type_facts),
auth_decorators: &[],
closure_released_var_names: None,
};
analysis.run(&ctx)
}
@ -1225,6 +1229,7 @@ fn config_sanitizer_suppresses_unguarded_sink() {
body_const_facts: None,
type_facts: None,
auth_decorators: &[],
closure_released_var_names: None,
};
let findings = run_all(&ctx);
@ -1703,6 +1708,7 @@ fn cfg_only_no_taint_produces_low_severity() {
body_const_facts: None,
type_facts: None,
auth_decorators: &[],
closure_released_var_names: None,
};
let findings = guards::UnguardedSink.run(&ctx);