nyx/tests/fetch_data_exfil_integration_tests.rs
Eli Peter 58f1794a4e
Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)
* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers

* feat: Implement cross-file data exfiltration detection with parameter-specific gate filters

* feat: Add calibration tests and refine DATA_EXFIL severity scoring logic

* feat: Introduce per-detector configuration for data exfiltration suppression

* feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output

* feat: Add tainted body and URL handling for data exfiltration detection

* feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go

* feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients

* feat: Add synthetic externals handling for closure-captured variables in SSA

* feat: Implement closure-based suppression for resource leak findings

* feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns

* feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders

* feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt

* feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests

* feat: Add data exfiltration sinks for various languages and enhance documentation

* refactor: Simplify formatting and improve readability in various files

* refactor: Improve readability by simplifying conditional statements and adding clippy linting

* docs: Update CHANGELOG and comments for data exfiltration features and configuration

* docs: Clarify configuration instructions for data exfiltration trusted destinations

* docs: Enhance comments for evidence routing logic in data exfiltration
2026-05-01 10:59:52 -04:00

227 lines
8.3 KiB
Rust

//! Integration tests for the `Cap::DATA_EXFIL` detector class.
//!
//! Validates per-cap attribution at multi-gate call sites: a single `fetch`
//! call carries both an SSRF gate (URL flow) and a DATA_EXFIL gate (body /
//! headers / json flow), and a tainted body must not surface as SSRF and
//! vice versa. Also sanity-checks the SARIF output so the new finding
//! class produces a distinct rule id.
//!
//! `DATA_EXFIL` is gated on source sensitivity: only `Sensitive`-tier
//! sources (cookies, headers, env, db rows, file reads) trigger the cap.
//! Plain user input echoed back into a body is *not* data exfiltration —
//! the user already controls the value. See
//! `fetch_body_user_input_silenced.js` for the negative regression.
mod common;
use common::scan_fixture_dir;
use nyx_scanner::commands::scan::Diag;
use nyx_scanner::utils::config::AnalysisMode;
use std::path::PathBuf;
fn js_fixture_dir() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
.join("js")
}
fn diags_for(file: &str) -> Vec<Diag> {
let dir = js_fixture_dir();
let all = scan_fixture_dir(&dir, AnalysisMode::Full);
all.into_iter().filter(|d| d.path.ends_with(file)).collect()
}
#[test]
fn fetch_body_data_exfil_emits_data_exfil_not_ssrf() {
let diags = diags_for("fetch_body_data_exfil.js");
let exfil = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count();
let plain_taint = diags
.iter()
.filter(|d| d.id.starts_with("taint-unsanitised-flow"))
.count();
assert!(
exfil >= 1,
"expected at least one taint-data-exfiltration finding, got 0.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
assert_eq!(
plain_taint,
0,
"fixed-URL fetch with tainted body must NOT emit SSRF \
(taint-unsanitised-flow), got {plain_taint}.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
}
#[test]
fn fetch_ssrf_url_tainted_emits_ssrf_not_data_exfil() {
let diags = diags_for("fetch_ssrf_url_tainted.js");
let ssrf = diags
.iter()
.filter(|d| d.id.starts_with("taint-unsanitised-flow"))
.count();
let exfil = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count();
assert!(
ssrf >= 1,
"expected at least one taint-unsanitised-flow (SSRF) finding, got 0.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
assert_eq!(
exfil,
0,
"tainted-URL fetch must NOT emit DATA_EXFIL, got {exfil}.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
}
#[test]
fn fetch_body_plain_user_input_does_not_emit_data_exfil() {
// Plain attacker-controlled input (`req.body.message`) flowing into a
// fixed-URL `fetch` body must NOT fire `Cap::DATA_EXFIL` after the
// source-sensitivity gate. The user already controls the value;
// surfacing it back to the user via the outbound payload is not a
// cross-boundary disclosure.
let diags = diags_for("fetch_body_user_input_silenced.js");
let exfil = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count();
assert_eq!(
exfil,
0,
"plain user input echoed into a fetch body must NOT emit \
taint-data-exfiltration, got {exfil}.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
}
#[test]
fn fetch_body_data_exfil_witness_mentions_session_token() {
// Symex-witness regression guard: a DATA_EXFIL `Confirmed` (or
// Inconclusive but witness-bearing) verdict on the cookie → fetch
// body fixture must surface the session-token payload in its
// witness string. The cap-specific payload selector in
// `src/symex/witness.rs::witness_payload` returns
// `<SESSION_TOKEN>` for `Cap::DATA_EXFIL`, the rendered witness
// (via `get_sink_witness`) substitutes that into the
// string-renderable expression so the analyst sees that the *leak*
// is a credential-bearing payload, not an injection.
//
// When symex emits no witness for this flow (e.g. the expression
// tree was opaque) the test silently accepts that, the assertion
// is one-sided so the witness shape is locked but witness absence
// is not promoted to a hard failure (the calibration suite
// already covers the no-witness path).
let diags = diags_for("fetch_body_data_exfil.js");
let exfil_witnesses: Vec<&String> = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.filter_map(|d| {
d.evidence
.as_ref()
.and_then(|e| e.symbolic.as_ref())
.and_then(|sv| sv.witness.as_ref())
})
.collect();
for w in &exfil_witnesses {
assert!(
w.contains("<SESSION_TOKEN>") || w.contains("body") || w.contains("payload"),
"DATA_EXFIL witness must mention the leaked payload \
(<SESSION_TOKEN>) or body/payload context. Got: {w:?}",
);
}
}
#[test]
fn fetch_body_int_value_does_not_emit_data_exfil() {
// Numeric-typed bodies (e.g. `parseInt(req.cookies.session_count)`)
// are payload-incompatible: ints cannot carry session tokens, header
// secrets, or any credential material that constitutes a
// cross-boundary disclosure. `is_type_safe_for_sink` lists
// `DATA_EXFIL` in its type-suppressible cap mask so a proven-Int SSA
// value at the gate silences the finding.
let diags = diags_for("fetch_body_int_suppressed.js");
let exfil = diags
.iter()
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
.count();
assert_eq!(
exfil,
0,
"int-typed body must NOT emit taint-data-exfiltration, got {exfil}.\n\
Diags: {:#?}",
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
);
}
#[test]
fn sarif_distinguishes_data_exfil_rule_id_from_ssrf() {
use nyx_scanner::output::build_sarif;
let dir = js_fixture_dir();
let diags = scan_fixture_dir(&dir, AnalysisMode::Full);
let sarif = build_sarif(&diags, &dir);
let rules = sarif["runs"][0]["tool"]["driver"]["rules"]
.as_array()
.expect("SARIF rules array");
let rule_ids: Vec<&str> = rules.iter().filter_map(|r| r["id"].as_str()).collect();
assert!(
rule_ids.contains(&"taint-data-exfiltration"),
"SARIF rules must contain taint-data-exfiltration, got: {rule_ids:?}"
);
assert!(
rule_ids.contains(&"taint-unsanitised-flow"),
"SARIF rules must contain taint-unsanitised-flow, got: {rule_ids:?}"
);
// Each finding should reference exactly one rule, and the cap-specific
// class must not be folded back into the generic taint bucket.
let results = sarif["runs"][0]["results"]
.as_array()
.expect("SARIF results array");
let exfil_results: Vec<&serde_json::Value> = results
.iter()
.filter(|r| r["ruleId"].as_str() == Some("taint-data-exfiltration"))
.collect();
let ssrf_results = results
.iter()
.filter(|r| r["ruleId"].as_str() == Some("taint-unsanitised-flow"))
.count();
assert!(
!exfil_results.is_empty(),
"expected >= 1 SARIF result with ruleId taint-data-exfiltration, got {}",
exfil_results.len(),
);
assert!(
ssrf_results >= 1,
"expected >= 1 SARIF result with ruleId taint-unsanitised-flow, got {ssrf_results}",
);
// Every DATA_EXFIL finding from the fixture set targets the request body
// (`fetch('/endpoint', { body: payload })`), so SARIF must surface the
// destination field via `properties.data_exfil_field`. At least one
// result has to advertise `body`, fixtures that reach `headers` /
// `json` are out of scope for this assertion but must not be silenced.
let body_field_seen = exfil_results
.iter()
.any(|r| r["properties"]["data_exfil_field"].as_str() == Some("body"));
assert!(
body_field_seen,
"expected at least one taint-data-exfiltration SARIF result with \
properties.data_exfil_field == \"body\". Results: {exfil_results:#?}",
);
}