Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)

* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers

* feat: Implement cross-file data exfiltration detection with parameter-specific gate filters

* feat: Add calibration tests and refine DATA_EXFIL severity scoring logic

* feat: Introduce per-detector configuration for data exfiltration suppression

* feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output

* feat: Add tainted body and URL handling for data exfiltration detection

* feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go

* feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients

* feat: Add synthetic externals handling for closure-captured variables in SSA

* feat: Implement closure-based suppression for resource leak findings

* feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns

* feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders

* feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt

* feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests

* feat: Add data exfiltration sinks for various languages and enhance documentation

* refactor: Simplify formatting and improve readability in various files

* refactor: Improve readability by simplifying conditional statements and adding clippy linting

* docs: Update CHANGELOG and comments for data exfiltration features and configuration

* docs: Clarify configuration instructions for data exfiltration trusted destinations

* docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
Eli Peter 2026-05-01 10:59:52 -04:00 committed by GitHub
parent a438886217
commit 58f1794a4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
189 changed files with 8421 additions and 383 deletions

View file

@ -678,12 +678,30 @@ fn find_guard_nodes(ctx: &AnalysisContext) -> Vec<(NodeIndex, Cap)> {
if info.kind == StmtKind::If {
if let Some(cond_text) = &info.condition_text {
let kind = classify_condition(cond_text);
// For `AllowlistCheck`, also confirm a target identifier was
// extractable. When the receiver-method form carries a
// string-literal arg (`filePath.includes("/")`,
// `path.contains("..")`), `extract_allowlist_target` returns
// `None` because the argument isn't an identifier. Those
// shapes are presence-checks, not real allowlist tests against
// a collection variable, and shouldn't dominate every
// downstream sink as a structural guard with `Cap::all()`.
// `classify_condition` itself stays unchanged (an existing
// test locks in its broad return for the receiver-method form,
// and the SSA branch-narrowing layer reads the kind for its
// own purposes).
let allowlist_has_target = if kind == PredicateKind::AllowlistCheck {
crate::taint::path_state::classify_condition_with_target(cond_text)
.1
.is_some()
} else {
true
};
if matches!(
kind,
PredicateKind::AllowlistCheck
| PredicateKind::TypeCheck
| PredicateKind::ValidationCall
) {
PredicateKind::TypeCheck | PredicateKind::ValidationCall,
) || (kind == PredicateKind::AllowlistCheck && allowlist_has_target)
{
result.push((idx, Cap::all()));
} else if cond_indirect_validator_callee(info, ctx).is_some() {
// Indirect-validator pattern:
@ -995,7 +1013,25 @@ impl CfgAnalysis for UnguardedSink {
// is the only other operand. The simpler `is_all_args_constant`
// check above rejects that mixed shape because it forbids real
// parameters in operand position.
if !has_taint && ssa_all_sink_operands_const_or_param(ctx, *sink) {
//
// Exemption: shell-array gate filters. The
// `extract_shell_array_payload_idents` detector recognises
// `[<shell>, "-c", <payload>]` arrays at any call site and emits a
// `Sink(SHELL_ESCAPE)` label with `destination_uses` narrowed to
// the payload-element idents. When the array shape itself is the
// gate, an unrelated reassign-to-const elsewhere in the body
// (`const flag = true; if (flag) {}`) does not erase the
// shell-exec intent — the construction of `[bash, -c, x]` is by
// itself the dangerous operation. Skip this suppression so the
// structural finding survives in closed-world contexts where no
// taint source has been resolved yet.
let has_shell_array_gate = sink_info.call.gate_filters.iter().any(|gf| {
gf.label_caps.contains(Cap::SHELL_ESCAPE) && gf.destination_uses.is_some()
});
if !has_taint
&& !has_shell_array_gate
&& ssa_all_sink_operands_const_or_param(ctx, *sink)
{
continue;
}