Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)

* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers

* feat: Implement cross-file data exfiltration detection with parameter-specific gate filters

* feat: Add calibration tests and refine DATA_EXFIL severity scoring logic

* feat: Introduce per-detector configuration for data exfiltration suppression

* feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output

* feat: Add tainted body and URL handling for data exfiltration detection

* feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go

* feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients

* feat: Add synthetic externals handling for closure-captured variables in SSA

* feat: Implement closure-based suppression for resource leak findings

* feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns

* feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders

* feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt

* feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests

* feat: Add data exfiltration sinks for various languages and enhance documentation

* refactor: Simplify formatting and improve readability in various files

* refactor: Improve readability by simplifying conditional statements and adding clippy linting

* docs: Update CHANGELOG and comments for data exfiltration features and configuration

* docs: Clarify configuration instructions for data exfiltration trusted destinations

* docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
Eli Peter 2026-05-01 10:59:52 -04:00 committed by GitHub
parent a438886217
commit 58f1794a4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
189 changed files with 8421 additions and 383 deletions

View file

@ -1382,6 +1382,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let empty_succs = HashMap::new();
@ -1441,6 +1443,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let empty_succs = HashMap::new();
@ -1573,6 +1577,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let finding = make_finding(n0, n1);
@ -1680,6 +1686,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
// Finding path goes through B0 → B1 → B3
@ -1826,6 +1834,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let finding = Finding {
@ -1938,6 +1948,8 @@ mod tests {
exception_edges: vec![(b0, b2)],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let mut exc_succs: HashMap<BlockId, SmallVec<[BlockId; 2]>> = HashMap::new();
@ -2004,6 +2016,8 @@ mod tests {
exception_edges: vec![(b0, b2)],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let mut exc_succs: HashMap<BlockId, SmallVec<[BlockId; 2]>> = HashMap::new();
@ -2111,6 +2125,8 @@ mod tests {
exception_edges: vec![(b1, b2)],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let finding = Finding {

View file

@ -389,6 +389,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -434,6 +436,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -515,6 +519,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -577,6 +583,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -657,6 +665,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -728,6 +738,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -762,6 +774,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -818,6 +832,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -898,6 +914,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -976,6 +994,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);
@ -1011,6 +1031,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let info = analyse_loops(&ssa);

View file

@ -379,6 +379,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let finding = Finding {
@ -452,6 +454,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let finding = Finding {
@ -554,6 +558,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let ctx = SymexContext {
@ -614,6 +620,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let ctx = SymexContext {

View file

@ -353,6 +353,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let witness = state.get_sink_witness(&finding, &ssa);
@ -393,6 +395,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
assert_eq!(state.get_sink_witness(&finding, &ssa), None);
@ -430,6 +434,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
assert_eq!(state.get_sink_witness(&finding, &ssa), None);
@ -470,6 +476,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
state.widen_at_loop_head(BlockId(0), &ssa);
@ -513,6 +521,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
state.widen_at_loop_head(BlockId(0), &ssa);
@ -556,6 +566,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
state.widen_at_loop_head(BlockId(0), &ssa);

View file

@ -1012,6 +1012,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
}
}
@ -1591,6 +1593,7 @@ mod tests {
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
param_to_gate_filters: vec![],
},
);
let ctx = make_summary_ctx(&gs);
@ -1659,6 +1662,7 @@ mod tests {
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
param_to_gate_filters: vec![],
},
);
let ctx = make_summary_ctx(&gs);
@ -1727,6 +1731,7 @@ mod tests {
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
param_to_gate_filters: vec![],
},
);
let ctx = make_summary_ctx(&gs);
@ -1790,6 +1795,7 @@ mod tests {
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
param_to_gate_filters: vec![],
},
);
let ctx = make_summary_ctx(&gs);
@ -1853,6 +1859,7 @@ mod tests {
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
param_to_gate_filters: vec![],
},
);
let ctx = make_summary_ctx(&gs);
@ -2050,6 +2057,7 @@ mod tests {
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
param_to_gate_filters: vec![],
},
);
@ -2128,6 +2136,7 @@ mod tests {
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
param_to_gate_filters: vec![],
},
);
@ -2207,6 +2216,7 @@ mod tests {
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
param_to_gate_filters: vec![],
},
);
// Second "send", in ns B, also with same arity → ambiguous bare-name
@ -2236,6 +2246,7 @@ mod tests {
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
param_to_gate_filters: vec![],
},
);
// Also register the type-qualified name so Attempt 1 can find it
@ -2265,6 +2276,7 @@ mod tests {
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
param_to_gate_filters: vec![],
},
);
@ -2343,6 +2355,7 @@ mod tests {
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
param_to_gate_filters: vec![],
},
);
@ -2423,6 +2436,7 @@ mod tests {
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
param_to_gate_filters: vec![],
},
);
insert_java_summary(
@ -2451,6 +2465,7 @@ mod tests {
field_points_to: Default::default(),
return_path_facts: smallvec::SmallVec::new(),
typed_call_receivers: vec![],
param_to_gate_filters: vec![],
},
);
// No "HttpClient.send" summary registered, disambiguation has 0 exact matches

View file

@ -204,8 +204,15 @@ fn sink_cap(finding: &Finding, cfg: &Cfg) -> Cap {
/// Select a witness payload string based on the vulnerability class.
fn witness_payload(cap: Cap) -> &'static str {
// Check bits in priority order (most specific first)
if cap.intersects(Cap::CODE_EXEC) {
// Check bits in priority order (most specific first).
//
// `DATA_EXFIL` is checked before the action-class caps (CODE_EXEC, SQL,
// etc.) because a data-exfil sink reflects what the *attacker reads*,
// not what they *do*: the witness needs to look like a leaked secret
// ("<SESSION_TOKEN>") rather than an injected payload ("' OR 1=1 --").
if cap.intersects(Cap::DATA_EXFIL) {
"<SESSION_TOKEN>"
} else if cap.intersects(Cap::CODE_EXEC) {
"require('child_process').execSync('id')"
} else if cap.intersects(Cap::HTML_ESCAPE) {
"<script>alert('xss')</script>"
@ -639,9 +646,21 @@ mod tests {
witness_payload(Cap::DESERIALIZE),
"malicious_serialized_object"
);
assert_eq!(witness_payload(Cap::DATA_EXFIL), "<SESSION_TOKEN>");
assert_eq!(witness_payload(Cap::CRYPTO), "TAINTED"); // fallback
}
#[test]
fn test_witness_payload_data_exfil_wins_over_action_caps() {
// A `fetch` call's body slot can carry both DATA_EXFIL (the leak
// class) and the underlying action cap (e.g. SSRF) when the same
// sink is multi-gated. The witness should reflect the *leaked*
// value (a session token) rather than an injection payload, the
// attacker is reading data, not writing it.
let combined = Cap::DATA_EXFIL | Cap::SSRF;
assert_eq!(witness_payload(combined), "<SESSION_TOKEN>");
}
#[test]
fn test_witness_payload_code_exec_separate_from_xss() {
// CODE_EXEC must return a code-execution payload, not an XSS one.
@ -776,6 +795,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let finding = Finding {
@ -831,6 +852,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let cfg = Cfg::new();
let finding = Finding {
@ -892,6 +915,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let finding = Finding {
@ -954,6 +979,8 @@ mod tests {
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let finding = Finding {