Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)

* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers

* feat: Implement cross-file data exfiltration detection with parameter-specific gate filters

* feat: Add calibration tests and refine DATA_EXFIL severity scoring logic

* feat: Introduce per-detector configuration for data exfiltration suppression

* feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output

* feat: Add tainted body and URL handling for data exfiltration detection

* feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go

* feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients

* feat: Add synthetic externals handling for closure-captured variables in SSA

* feat: Implement closure-based suppression for resource leak findings

* feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns

* feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders

* feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt

* feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests

* feat: Add data exfiltration sinks for various languages and enhance documentation

* refactor: Simplify formatting and improve readability in various files

* refactor: Improve readability by simplifying conditional statements and adding clippy linting

* docs: Update CHANGELOG and comments for data exfiltration features and configuration

* docs: Clarify configuration instructions for data exfiltration trusted destinations

* docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
Eli Peter 2026-05-01 10:59:52 -04:00 committed by GitHub
parent a438886217
commit 58f1794a4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
189 changed files with 8421 additions and 383 deletions

View file

@ -752,6 +752,7 @@ mod tests {
exception_edges: Vec::new(),
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
(ssa, cfg)
@ -766,6 +767,47 @@ mod tests {
assert_eq!(d.validated_false, 0);
}
/// Regression guard: the cap-routing logic must round-trip
/// `Cap::DATA_EXFIL` exactly like every other cap. The backwards
/// engine treats the demand as opaque bits, so if a future change
/// accidentally narrows the type of `caps` (e.g. a hardcoded mask)
/// the data-exfiltration cap stops surviving the walk.
#[test]
fn demand_state_roundtrips_data_exfil_cap() {
let d = DemandState::new(Cap::DATA_EXFIL);
assert_eq!(d.caps, Cap::DATA_EXFIL);
assert!(d.caps.contains(Cap::DATA_EXFIL));
// Sanity: combined demand keeps the bit alongside SSRF (the two
// most-frequently-co-occurring caps on outbound HTTP gates).
let combined = DemandState::new(Cap::DATA_EXFIL | Cap::SSRF);
assert!(combined.caps.contains(Cap::DATA_EXFIL));
assert!(combined.caps.contains(Cap::SSRF));
}
/// The backwards driver must classify a `DATA_EXFIL`-capable source
/// even when the sink demand is *exactly* `DATA_EXFIL` (no other
/// caps). Mirrors `driver_walks_source_to_sink` but pins the cap so
/// a future change that intersects with a wider mask (and thus
/// silently widens the demand) is caught.
#[test]
fn driver_walks_data_exfil_source_to_sink() {
let (ssa, mut cfg) = build_trivial_source_body();
// Tag the source CFG node with a Source(DATA_EXFIL) label so
// the cap-match path (the one that actually rules end-to-end
// routing) exercises the bit.
let src_node = NodeIndex::new(0);
cfg[src_node]
.taint
.labels
.push(DataLabel::Source(Cap::DATA_EXFIL));
let ctx = BackwardsCtx::new(&ssa, &cfg, Lang::JavaScript);
let flows = analyse_sink_backwards(&ctx, SsaValue(1), NodeIndex::new(1), Cap::DATA_EXFIL);
assert_eq!(flows.len(), 1, "exactly one DATA_EXFIL flow expected");
assert!(flows[0].is_confirmation(), "must confirm at the source");
assert_eq!(flows[0].sink_caps, Cap::DATA_EXFIL);
}
#[test]
fn backward_transfer_source_terminates() {
let (ssa, _cfg) = build_trivial_source_body();
@ -800,6 +842,7 @@ mod tests {
exception_edges: Vec::new(),
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let demand = DemandState::new(Cap::all());
let (step, next) = backward_transfer(&ssa, SsaValue(0), &demand);
@ -832,6 +875,7 @@ mod tests {
exception_edges: Vec::new(),
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let demand = DemandState::new(Cap::all());
let (step, _next) = backward_transfer(&ssa, SsaValue(0), &demand);
@ -919,6 +963,7 @@ mod tests {
exception_edges: Vec::new(),
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let demand = DemandState::new(Cap::all());
@ -1007,6 +1052,7 @@ mod tests {
exception_edges: Vec::new(),
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let ctx = BackwardsCtx::new(&ssa, &cfg, Lang::JavaScript);