mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)
* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers * feat: Implement cross-file data exfiltration detection with parameter-specific gate filters * feat: Add calibration tests and refine DATA_EXFIL severity scoring logic * feat: Introduce per-detector configuration for data exfiltration suppression * feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output * feat: Add tainted body and URL handling for data exfiltration detection * feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go * feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients * feat: Add synthetic externals handling for closure-captured variables in SSA * feat: Implement closure-based suppression for resource leak findings * feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns * feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders * feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt * feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests * feat: Add data exfiltration sinks for various languages and enhance documentation * refactor: Simplify formatting and improve readability in various files * refactor: Improve readability by simplifying conditional statements and adding clippy linting * docs: Update CHANGELOG and comments for data exfiltration features and configuration * docs: Clarify configuration instructions for data exfiltration trusted destinations * docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
parent
a438886217
commit
58f1794a4e
189 changed files with 8421 additions and 383 deletions
19
tests/fixtures/real_world/python/taint/dict_set_data_exfil.expect.json
vendored
Normal file
19
tests/fixtures/real_world/python/taint/dict_set_data_exfil.expect.json
vendored
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
{
|
||||
"description": "Container-taint DATA_EXFIL: payload dict accumulates os.environ.get() secrets across multiple keys, then flows into requests.post(json=payload). The SSA heap Elements slot carries the cap from each `payload[k] = ...` store to the sink-side heap-load, so DATA_EXFIL must fire on the json field. Mirrors `array_push_data_exfil.js` / `map_assign_data_exfil.go` for cross-language container-taint coverage.",
|
||||
"tags": ["taint", "data-exfil", "requests", "container", "heap-elements", "env", "edge-case"],
|
||||
"modes": ["full"],
|
||||
"expected": [
|
||||
{
|
||||
"rule_id": "taint-data-exfiltration",
|
||||
"must_match": true,
|
||||
"line_range": [18, 22],
|
||||
"notes": "Dict population with env secrets, then requests.post(json=...). Container-taint round-trip must fire DATA_EXFIL on the json field."
|
||||
},
|
||||
{
|
||||
"rule_id": "taint-unsanitised-flow",
|
||||
"must_not_match": true,
|
||||
"line_range": [18, 22],
|
||||
"notes": "Destination URL is a fixed literal — body taint must not surface as SSRF."
|
||||
}
|
||||
]
|
||||
}
|
||||
23
tests/fixtures/real_world/python/taint/dict_set_data_exfil.py
vendored
Normal file
23
tests/fixtures/real_world/python/taint/dict_set_data_exfil.py
vendored
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import os
|
||||
import requests
|
||||
from flask import Flask, request
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
# Container-taint DATA_EXFIL: a dict accumulates env-config secrets across
|
||||
# keys, then is forwarded as the JSON body of an outbound POST to a fixed
|
||||
# URL. The Python SSA heap model marks the dict's `Elements` slot tainted
|
||||
# at every `payload[k] = ...` write; the sink-side
|
||||
# `collect_tainted_sink_values` heap-loads the same slot when checking the
|
||||
# `json` kwarg, so DATA_EXFIL must fire on the json field even though
|
||||
# `payload` itself is not directly tainted by an Assign. Pairs with
|
||||
# `httpx_async_post_data_exfil.py` (single-key dict literal — no
|
||||
# container-mutation step).
|
||||
@app.route('/upload-config', methods=['POST'])
|
||||
def upload_config():
|
||||
payload = {}
|
||||
payload['api_key'] = os.environ.get('UPSTREAM_API_KEY')
|
||||
payload['region'] = os.environ.get('UPSTREAM_REGION')
|
||||
requests.post('https://api.internal/ingest', json=payload)
|
||||
return 'ok'
|
||||
13
tests/fixtures/real_world/python/taint/httpx_async_post_data_exfil.expect.json
vendored
Normal file
13
tests/fixtures/real_world/python/taint/httpx_async_post_data_exfil.expect.json
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"description": "Async DATA_EXFIL via httpx.AsyncClient: os.environ.get() (EnvironmentConfig — Sensitive-tier) flows into the json kwarg of an async client.post() call against a fixed URL. The receiver type resolves to HttpClient so the gated DATA_EXFIL fires via the type-qualified `HttpClient.post` matcher; the destination is hardcoded so SSRF must NOT fire.",
|
||||
"tags": ["taint", "data-exfil", "httpx", "async", "type-qualified", "sensitivity-gate"],
|
||||
"modes": ["full"],
|
||||
"expected": [
|
||||
{
|
||||
"rule_id": "taint-data-exfiltration",
|
||||
"must_match": true,
|
||||
"line_range": [15, 20],
|
||||
"notes": "os.environ → SourceKind::EnvironmentConfig → Sensitivity::Sensitive — DATA_EXFIL fires on the json kwarg of HttpClient.post."
|
||||
}
|
||||
]
|
||||
}
|
||||
20
tests/fixtures/real_world/python/taint/httpx_async_post_data_exfil.py
vendored
Normal file
20
tests/fixtures/real_world/python/taint/httpx_async_post_data_exfil.py
vendored
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
import os
|
||||
from fastapi import FastAPI, Request
|
||||
import httpx
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
# Async data-exfil path: an `httpx.AsyncClient` instance dispatches a POST
|
||||
# whose `json` kwarg embeds an environment-config secret. The chained-call
|
||||
# normalization collapses `httpx.AsyncClient().post` to the gate matcher
|
||||
# `httpx.AsyncClient.post` so the gated DATA_EXFIL fires. Source is
|
||||
# Sensitivity::Sensitive (EnvironmentConfig) so DATA_EXFIL MUST fire on the
|
||||
# json kwarg; the destination URL is fixed so SSRF must NOT fire.
|
||||
@app.post('/sync-async')
|
||||
async def sync_async(req: Request):
|
||||
api_key = os.environ.get('UPSTREAM_API_KEY')
|
||||
await httpx.AsyncClient().post(
|
||||
'https://upstream.internal/ingest',
|
||||
json={'api_key': api_key},
|
||||
)
|
||||
return {'ok': True}
|
||||
13
tests/fixtures/real_world/python/taint/requests_post_session_token.expect.json
vendored
Normal file
13
tests/fixtures/real_world/python/taint/requests_post_session_token.expect.json
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"description": "Session-token forwarder: flask `session` (Sensitive-tier source) flows into a fixed-URL requests.post body. SSRF must NOT fire (destination is hardcoded), but Cap::DATA_EXFIL MUST fire — auth-bearing operator state is leaving the process via the outbound payload. Mirrors the JS fetch_session_forward case for Python.",
|
||||
"tags": ["taint", "data-exfil", "requests", "sensitivity-gate", "session", "cap-attribution"],
|
||||
"modes": ["full"],
|
||||
"expected": [
|
||||
{
|
||||
"rule_id": "taint-data-exfiltration",
|
||||
"must_match": true,
|
||||
"line_range": [12, 18],
|
||||
"notes": "session.get('user_token') → SourceKind::Cookie → Sensitivity::Sensitive — DATA_EXFIL fires on the json kwarg of requests.post."
|
||||
}
|
||||
]
|
||||
}
|
||||
18
tests/fixtures/real_world/python/taint/requests_post_session_token.py
vendored
Normal file
18
tests/fixtures/real_world/python/taint/requests_post_session_token.py
vendored
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
from flask import Flask, request, session
|
||||
import requests
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
# Sensitive-source forwarder: the Flask session cookie carries auth material
|
||||
# and is being forwarded to a fixed analytics URL via the request body. The
|
||||
# destination is hardcoded so SSRF must NOT fire, but the source is
|
||||
# Sensitivity::Sensitive (session ↔ Cookie) so DATA_EXFIL MUST fire — the
|
||||
# auth-bearing operator state is leaving the process via the outbound payload.
|
||||
@app.route('/sync')
|
||||
def sync_session():
|
||||
sid = session.get('user_token')
|
||||
requests.post(
|
||||
'https://analytics.internal/track',
|
||||
json={'session': sid},
|
||||
)
|
||||
return '', 204
|
||||
19
tests/fixtures/real_world/python/taint/requests_post_url_tainted_ssrf_only.expect.json
vendored
Normal file
19
tests/fixtures/real_world/python/taint/requests_post_url_tainted_ssrf_only.expect.json
vendored
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
{
|
||||
"description": "requests.post(taintedUrl, json={fixed}) — destination-aware case for Python. URL is tainted (SSRF), body is fixed. SSRF must fire and the cross-boundary data-exfiltration class (Cap::DATA_EXFIL) must NOT fire — the two classes share the callee but cap attribution is per-position.",
|
||||
"tags": ["taint", "ssrf", "requests", "destination-aware", "cap-attribution"],
|
||||
"modes": ["full"],
|
||||
"expected": [
|
||||
{
|
||||
"rule_id": "taint-unsanitised-flow",
|
||||
"must_match": true,
|
||||
"line_range": [13, 18],
|
||||
"notes": "request.args.get('target') → requests.post(target, json={...}) — tainted URL fires SSRF."
|
||||
},
|
||||
{
|
||||
"rule_id": "taint-data-exfiltration",
|
||||
"must_not_match": true,
|
||||
"line_range": [13, 18],
|
||||
"notes": "Body json kwarg is a fixed literal — DATA_EXFIL must NOT fire on this site (regression guard for per-cap attribution)."
|
||||
}
|
||||
]
|
||||
}
|
||||
18
tests/fixtures/real_world/python/taint/requests_post_url_tainted_ssrf_only.py
vendored
Normal file
18
tests/fixtures/real_world/python/taint/requests_post_url_tainted_ssrf_only.py
vendored
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
from flask import Flask, request
|
||||
import requests
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
# URL-only taint: the destination URL is attacker-controlled but the body is
|
||||
# a fixed literal. SSRF must fire on the URL flow. DATA_EXFIL must NOT fire
|
||||
# because no body kwarg carries taint (regression guard for per-cap
|
||||
# attribution — the two classes share the callee but cap routing is per
|
||||
# argument position).
|
||||
@app.route('/proxy', methods=['POST'])
|
||||
def proxy():
|
||||
target = request.args.get('target')
|
||||
requests.post(
|
||||
target,
|
||||
json={'event': 'proxy_call'},
|
||||
)
|
||||
return '', 204
|
||||
13
tests/fixtures/real_world/python/taint/requests_post_user_input_silenced.expect.json
vendored
Normal file
13
tests/fixtures/real_world/python/taint/requests_post_user_input_silenced.expect.json
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"description": "requests.post() with a fixed destination URL and a plain user-input body (request.form). DATA_EXFIL must NOT fire: plain user input echoed back via an outbound body is not a cross-boundary disclosure (the source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL). Pairs with requests_post_session_token.py to assert per-tier routing for Python.",
|
||||
"tags": ["taint", "data-exfil", "requests", "sensitivity-gate", "cap-attribution"],
|
||||
"modes": ["full"],
|
||||
"expected": [
|
||||
{
|
||||
"rule_id": "taint-data-exfiltration",
|
||||
"must_not_match": true,
|
||||
"line_range": [13, 19],
|
||||
"notes": "Body source is plain user input (request.form → Sensitivity::Plain). DATA_EXFIL fires only on Sensitive-tier sources (cookies, sessions, headers, env) — plain user input echoed into a request body is not data exfiltration."
|
||||
}
|
||||
]
|
||||
}
|
||||
19
tests/fixtures/real_world/python/taint/requests_post_user_input_silenced.py
vendored
Normal file
19
tests/fixtures/real_world/python/taint/requests_post_user_input_silenced.py
vendored
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
from flask import Flask, request
|
||||
import requests
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
# Plain user input echoed back into a fixed-URL request body. The destination
|
||||
# is hardcoded so SSRF must NOT fire. DATA_EXFIL must NOT fire either: the
|
||||
# source is Sensitivity::Plain (request.form is raw user input) and the
|
||||
# source-sensitivity gate suppresses Plain-tier sources for Cap::DATA_EXFIL.
|
||||
# Echoing the user's own data back to telemetry is not a cross-boundary
|
||||
# disclosure — it is exactly what the API gateway pattern does.
|
||||
@app.route('/forward', methods=['POST'])
|
||||
def forward_message():
|
||||
payload = request.form.get('message')
|
||||
requests.post(
|
||||
'https://telemetry.internal/forward',
|
||||
data={'message': payload},
|
||||
)
|
||||
return '', 204
|
||||
Loading…
Add table
Add a link
Reference in a new issue