Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)

* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers

* feat: Implement cross-file data exfiltration detection with parameter-specific gate filters

* feat: Add calibration tests and refine DATA_EXFIL severity scoring logic

* feat: Introduce per-detector configuration for data exfiltration suppression

* feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output

* feat: Add tainted body and URL handling for data exfiltration detection

* feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go

* feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients

* feat: Add synthetic externals handling for closure-captured variables in SSA

* feat: Implement closure-based suppression for resource leak findings

* feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns

* feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders

* feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt

* feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests

* feat: Add data exfiltration sinks for various languages and enhance documentation

* refactor: Simplify formatting and improve readability in various files

* refactor: Improve readability by simplifying conditional statements and adding clippy linting

* docs: Update CHANGELOG and comments for data exfiltration features and configuration

* docs: Clarify configuration instructions for data exfiltration trusted destinations

* docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
Eli Peter 2026-05-01 10:59:52 -04:00 committed by GitHub
parent a438886217
commit 58f1794a4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
189 changed files with 8421 additions and 383 deletions

View file

@ -0,0 +1,19 @@
{
"description": "Container-taint DATA_EXFIL: tokens array pushed with req.cookies.session is JSON-stringified into a fetch body. The SSA heap Elements slot carries the cap from `tokens.push(...)` to the sink-side `collect_tainted_sink_values` heap-load, so DATA_EXFIL must fire on the body field even though `payload` itself is not directly tainted by an Assign.",
"tags": ["taint", "data-exfil", "fetch", "container", "heap-elements", "cookie", "edge-case"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [12, 17],
"notes": "tokens.push(req.cookies.session) → JSON.stringify({batch: tokens}) → fetch body. Heap Elements taint must round-trip through the container."
},
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [12, 17],
"notes": "fetch URL is a fixed literal — body taint must not surface as SSRF."
}
]
}

View file

@ -0,0 +1,21 @@
var express = require('express');
var app = express();
// Container-taint DATA_EXFIL: push a Sensitive cookie source into an
// array, then send the joined batch as the outbound `fetch` body. The
// SSA heap model marks the array's `Elements` slot tainted at the
// `tokens.push(...)` write; the sink-side `collect_tainted_sink_values`
// loads the same slot and observes the cap, so DATA_EXFIL must fire on
// the body channel even though the body var (`payload`) is not directly
// tainted. Pairs with `array_push_taint.js` (same shape, different
// sink: XSS).
app.post('/batch', function(req, res) {
var tokens = [];
tokens.push(req.cookies.session);
var payload = JSON.stringify({ batch: tokens });
fetch('https://analytics.internal/track', {
method: 'POST',
body: payload,
});
res.status(204).end();
});

View file

@ -0,0 +1,19 @@
{
"description": "Async/await DATA_EXFIL parity: an `await fetch(URL, {body: ...})` call with a Sensitive cookie source must fire DATA_EXFIL on the body field (no SSRF — destination is a fixed literal). Awaits do not strip taint; the cap split is preserved across the await edge identically to the synchronous fetch path.",
"tags": ["taint", "data-exfil", "fetch", "async", "await", "cookie", "edge-case"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [13, 16],
"notes": "req.cookies.session → JSON.stringify into await fetch body. Await must not silence the cap."
},
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [13, 16],
"notes": "fetch URL is a fixed literal — body taint must not fire as SSRF."
}
]
}

View file

@ -0,0 +1,18 @@
var express = require('express');
var app = express();
// Async/await DATA_EXFIL: `await fetch(...)` must preserve the cap
// split. The destination URL is a fixed string literal (so SSRF must
// NOT fire) but a Sensitive cookie source threads through the body
// channel of the awaited call, so `Cap::DATA_EXFIL` MUST fire on the
// body field. Awaiting a Promise does not strip taint, the SSA lowering
// preserves chained await values across .then/.await edges identically
// to the synchronous fetch case.
app.post('/sync-async', async function (req, res) {
var sid = req.cookies.session;
await fetch('https://analytics.internal/track', {
method: 'POST',
body: JSON.stringify({ session: sid }),
});
res.status(204).end();
});

View file

@ -0,0 +1,13 @@
{
"description": "Constructor cap narrowing: env secret flowing through `new Stripe(key)` must not propagate FILE_IO into the wrapper, so SDK-method-returned property values written to a file do not flag a phantom path-traversal flow.",
"tags": ["taint", "file_io", "constructor", "sdk", "negative", "regression-fp"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [9, 16],
"notes": "process.env.STRIPE_SECRET_KEY → new Stripe(key) → stripe.prices.create() → price.id → fs.writeFileSync — wrapper-object construction strips FILE_IO."
}
]
}

View file

@ -0,0 +1,17 @@
// Constructor cap narrowing: a third-party SDK client constructed from an
// env-derived secret returns objects whose string properties are
// SDK-generated, not derived from the secret in any path-shaped sense.
// `Cap::all()` flowing through `new Stripe(key)` must drop FILE_IO so
// downstream `fs.writeFileSync` of an SDK property does not flag a phantom
// path-traversal flow.
var fs = require('fs');
var key = process.env.STRIPE_SECRET_KEY;
var stripe = new Stripe(key);
async function setup() {
var price = await stripe.prices.create({ unit_amount: 9599 });
var line = 'PRICE_ID="' + price.id + '"';
fs.writeFileSync('./out.env', line);
}
setup();

View file

@ -0,0 +1,19 @@
{
"description": "Session-id forwarder: req.cookies.session (Sensitive-tier source) flows into a fixed-URL fetch body. SSRF must NOT fire (destination is hardcoded), but Cap::DATA_EXFIL MUST fire — auth-bearing operator state is leaving the process via the outbound payload. Pairs with fetch_tainted_body_safe.js (Plain source, silenced) to assert the source-sensitivity gate routes per-tier rather than globally.",
"tags": ["taint", "data-exfil", "fetch", "sensitivity-gate", "cookie", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [10, 17],
"notes": "fetch URL is a fixed literal — body taint must not fire as SSRF."
},
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [10, 17],
"notes": "req.cookies.session → SourceKind::Cookie → Sensitivity::Sensitive — DATA_EXFIL fires on the body field."
}
]
}

View file

@ -0,0 +1,18 @@
var express = require('express');
var app = express();
// Session-id forwarder: an internal handler proxies the user's session
// cookie into the body of an outbound request to a fixed analytics URL.
// The destination is hardcoded so SSRF must NOT fire, but the source is
// Sensitive-tier (cookie carries auth material) so Cap::DATA_EXFIL MUST
// fire — operator-bound state is leaving the process via the request
// payload.
app.get('/sync', function(req, res) {
var sid = req.cookies.session;
fetch('https://analytics.internal/track', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ session: sid }),
});
res.status(204).end();
});

View file

@ -1,6 +1,6 @@
{
"description": "fetch() with a fixed destination URL and an attacker-controlled body. SSRF must NOT fire (destination is not attacker-influenced) and the cross-boundary data-exfiltration class (Cap::DATA_EXFIL) MUST fire on the body field.",
"tags": ["taint", "data-exfil", "fetch", "destination-aware", "cap-attribution"],
"description": "fetch() with a fixed destination URL and a plain user-input body (req.body.message). SSRF must NOT fire (destination is not attacker-influenced) and DATA_EXFIL must NOT fire either: plain user input echoed back via an outbound body is not a cross-boundary disclosure (the source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL).",
"tags": ["taint", "data-exfil", "fetch", "destination-aware", "cap-attribution", "sensitivity-gate"],
"modes": ["full"],
"expected": [
{
@ -11,9 +11,9 @@
},
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"must_not_match": true,
"line_range": [7, 14],
"notes": "Body field carries req.body.message → must fire DATA_EXFIL (sensitive data leaving the process via outbound request payload)."
"notes": "Body source is plain user input (req.body.message → Sensitivity::Plain). DATA_EXFIL fires only on Sensitive-tier sources (cookies, headers, env, db, file) — plain user input echoed into a request body is not data exfiltration. See fetch_body_user_input_silenced.js for the unit-level regression."
}
]
}