mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-21 20:18:06 +02:00
Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)
* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers * feat: Implement cross-file data exfiltration detection with parameter-specific gate filters * feat: Add calibration tests and refine DATA_EXFIL severity scoring logic * feat: Introduce per-detector configuration for data exfiltration suppression * feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output * feat: Add tainted body and URL handling for data exfiltration detection * feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go * feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients * feat: Add synthetic externals handling for closure-captured variables in SSA * feat: Implement closure-based suppression for resource leak findings * feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns * feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders * feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt * feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests * feat: Add data exfiltration sinks for various languages and enhance documentation * refactor: Simplify formatting and improve readability in various files * refactor: Improve readability by simplifying conditional statements and adding clippy linting * docs: Update CHANGELOG and comments for data exfiltration features and configuration * docs: Clarify configuration instructions for data exfiltration trusted destinations * docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
parent
a438886217
commit
58f1794a4e
189 changed files with 8421 additions and 383 deletions
|
|
@ -5,6 +5,12 @@
|
|||
//! headers / json flow), and a tainted body must not surface as SSRF and
|
||||
//! vice versa. Also sanity-checks the SARIF output so the new finding
|
||||
//! class produces a distinct rule id.
|
||||
//!
|
||||
//! `DATA_EXFIL` is gated on source sensitivity: only `Sensitive`-tier
|
||||
//! sources (cookies, headers, env, db rows, file reads) trigger the cap.
|
||||
//! Plain user input echoed back into a body is *not* data exfiltration —
|
||||
//! the user already controls the value. See
|
||||
//! `fetch_body_user_input_silenced.js` for the negative regression.
|
||||
|
||||
mod common;
|
||||
|
||||
|
|
@ -79,6 +85,87 @@ fn fetch_ssrf_url_tainted_emits_ssrf_not_data_exfil() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fetch_body_plain_user_input_does_not_emit_data_exfil() {
|
||||
// Plain attacker-controlled input (`req.body.message`) flowing into a
|
||||
// fixed-URL `fetch` body must NOT fire `Cap::DATA_EXFIL` after the
|
||||
// source-sensitivity gate. The user already controls the value;
|
||||
// surfacing it back to the user via the outbound payload is not a
|
||||
// cross-boundary disclosure.
|
||||
let diags = diags_for("fetch_body_user_input_silenced.js");
|
||||
let exfil = diags
|
||||
.iter()
|
||||
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
|
||||
.count();
|
||||
assert_eq!(
|
||||
exfil,
|
||||
0,
|
||||
"plain user input echoed into a fetch body must NOT emit \
|
||||
taint-data-exfiltration, got {exfil}.\n\
|
||||
Diags: {:#?}",
|
||||
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fetch_body_data_exfil_witness_mentions_session_token() {
|
||||
// Symex-witness regression guard: a DATA_EXFIL `Confirmed` (or
|
||||
// Inconclusive but witness-bearing) verdict on the cookie → fetch
|
||||
// body fixture must surface the session-token payload in its
|
||||
// witness string. The cap-specific payload selector in
|
||||
// `src/symex/witness.rs::witness_payload` returns
|
||||
// `<SESSION_TOKEN>` for `Cap::DATA_EXFIL`, the rendered witness
|
||||
// (via `get_sink_witness`) substitutes that into the
|
||||
// string-renderable expression so the analyst sees that the *leak*
|
||||
// is a credential-bearing payload, not an injection.
|
||||
//
|
||||
// When symex emits no witness for this flow (e.g. the expression
|
||||
// tree was opaque) the test silently accepts that, the assertion
|
||||
// is one-sided so the witness shape is locked but witness absence
|
||||
// is not promoted to a hard failure (the calibration suite
|
||||
// already covers the no-witness path).
|
||||
let diags = diags_for("fetch_body_data_exfil.js");
|
||||
let exfil_witnesses: Vec<&String> = diags
|
||||
.iter()
|
||||
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
|
||||
.filter_map(|d| {
|
||||
d.evidence
|
||||
.as_ref()
|
||||
.and_then(|e| e.symbolic.as_ref())
|
||||
.and_then(|sv| sv.witness.as_ref())
|
||||
})
|
||||
.collect();
|
||||
for w in &exfil_witnesses {
|
||||
assert!(
|
||||
w.contains("<SESSION_TOKEN>") || w.contains("body") || w.contains("payload"),
|
||||
"DATA_EXFIL witness must mention the leaked payload \
|
||||
(<SESSION_TOKEN>) or body/payload context. Got: {w:?}",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fetch_body_int_value_does_not_emit_data_exfil() {
|
||||
// Numeric-typed bodies (e.g. `parseInt(req.cookies.session_count)`)
|
||||
// are payload-incompatible: ints cannot carry session tokens, header
|
||||
// secrets, or any credential material that constitutes a
|
||||
// cross-boundary disclosure. `is_type_safe_for_sink` lists
|
||||
// `DATA_EXFIL` in its type-suppressible cap mask so a proven-Int SSA
|
||||
// value at the gate silences the finding.
|
||||
let diags = diags_for("fetch_body_int_suppressed.js");
|
||||
let exfil = diags
|
||||
.iter()
|
||||
.filter(|d| d.id.starts_with("taint-data-exfiltration"))
|
||||
.count();
|
||||
assert_eq!(
|
||||
exfil,
|
||||
0,
|
||||
"int-typed body must NOT emit taint-data-exfiltration, got {exfil}.\n\
|
||||
Diags: {:#?}",
|
||||
diags.iter().map(|d| &d.id).collect::<Vec<_>>(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sarif_distinguishes_data_exfil_rule_id_from_ssrf() {
|
||||
use nyx_scanner::output::build_sarif;
|
||||
|
|
@ -106,20 +193,35 @@ fn sarif_distinguishes_data_exfil_rule_id_from_ssrf() {
|
|||
let results = sarif["runs"][0]["results"]
|
||||
.as_array()
|
||||
.expect("SARIF results array");
|
||||
let exfil_results = results
|
||||
let exfil_results: Vec<&serde_json::Value> = results
|
||||
.iter()
|
||||
.filter(|r| r["ruleId"].as_str() == Some("taint-data-exfiltration"))
|
||||
.count();
|
||||
.collect();
|
||||
let ssrf_results = results
|
||||
.iter()
|
||||
.filter(|r| r["ruleId"].as_str() == Some("taint-unsanitised-flow"))
|
||||
.count();
|
||||
assert!(
|
||||
exfil_results >= 1,
|
||||
"expected >= 1 SARIF result with ruleId taint-data-exfiltration, got {exfil_results}",
|
||||
!exfil_results.is_empty(),
|
||||
"expected >= 1 SARIF result with ruleId taint-data-exfiltration, got {}",
|
||||
exfil_results.len(),
|
||||
);
|
||||
assert!(
|
||||
ssrf_results >= 1,
|
||||
"expected >= 1 SARIF result with ruleId taint-unsanitised-flow, got {ssrf_results}",
|
||||
);
|
||||
|
||||
// Every DATA_EXFIL finding from the fixture set targets the request body
|
||||
// (`fetch('/endpoint', { body: payload })`), so SARIF must surface the
|
||||
// destination field via `properties.data_exfil_field`. At least one
|
||||
// result has to advertise `body`, fixtures that reach `headers` /
|
||||
// `json` are out of scope for this assertion but must not be silenced.
|
||||
let body_field_seen = exfil_results
|
||||
.iter()
|
||||
.any(|r| r["properties"]["data_exfil_field"].as_str() == Some("body"));
|
||||
assert!(
|
||||
body_field_seen,
|
||||
"expected at least one taint-data-exfiltration SARIF result with \
|
||||
properties.data_exfil_field == \"body\". Results: {exfil_results:#?}",
|
||||
);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue