mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)
* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers * feat: Implement cross-file data exfiltration detection with parameter-specific gate filters * feat: Add calibration tests and refine DATA_EXFIL severity scoring logic * feat: Introduce per-detector configuration for data exfiltration suppression * feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output * feat: Add tainted body and URL handling for data exfiltration detection * feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go * feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients * feat: Add synthetic externals handling for closure-captured variables in SSA * feat: Implement closure-based suppression for resource leak findings * feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns * feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders * feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt * feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests * feat: Add data exfiltration sinks for various languages and enhance documentation * refactor: Simplify formatting and improve readability in various files * refactor: Improve readability by simplifying conditional statements and adding clippy linting * docs: Update CHANGELOG and comments for data exfiltration features and configuration * docs: Clarify configuration instructions for data exfiltration trusted destinations * docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
parent
a438886217
commit
58f1794a4e
189 changed files with 8421 additions and 383 deletions
170
src/ast.rs
170
src/ast.rs
|
|
@ -145,6 +145,11 @@ fn resolve_file_rel(file_rel: &str, scan_root: Option<&Path>, fallback: &Path) -
|
|||
|
||||
/// Build a [`Diag`] from a taint [`Finding`], the CFG that produced it,
|
||||
/// the parsed tree (for byte→line/col conversion) and the file path.
|
||||
///
|
||||
/// Returns `None` when source-sensitivity gating fully suppresses the
|
||||
/// finding (the canonical case is a multi-gate `DATA_EXFIL` event whose
|
||||
/// contributing source is plain user input — see the
|
||||
/// `effective_caps` strip below).
|
||||
fn build_taint_diag(
|
||||
finding: &crate::taint::Finding,
|
||||
cfg_graph: &crate::cfg::Cfg,
|
||||
|
|
@ -152,7 +157,7 @@ fn build_taint_diag(
|
|||
path: &Path,
|
||||
src: &[u8],
|
||||
scan_root: Option<&Path>,
|
||||
) -> Diag {
|
||||
) -> Option<Diag> {
|
||||
let call_site_byte = cfg_graph[finding.sink].classification_span().0;
|
||||
let call_site_point = byte_offset_to_point(tree, call_site_byte);
|
||||
// `finding.source` should be a NodeIndex valid in this body's CFG, but
|
||||
|
|
@ -373,16 +378,63 @@ fn build_taint_diag(
|
|||
// SSA dispatch) when populated; fall back to the union of all sink-label
|
||||
// caps on the CFG node so legacy paths that build findings without
|
||||
// setting `effective_sink_caps` still pick the right rule id.
|
||||
let effective_caps = if finding.effective_sink_caps.is_empty() {
|
||||
let mut effective_caps = if finding.effective_sink_caps.is_empty() {
|
||||
crate::labels::Cap::from_bits_truncate(sink_caps_bits)
|
||||
} else {
|
||||
finding.effective_sink_caps
|
||||
};
|
||||
|
||||
// Source-sensitivity gate for `DATA_EXFIL`. Plain attacker input echoed
|
||||
// back into an outbound request body / headers / json is not data
|
||||
// exfiltration, the user already controls the value, surfacing it as a
|
||||
// leak is noise (the canonical false-positive class for API gateways
|
||||
// and telemetry forwarders that proxy `req.body`). A `DATA_EXFIL`
|
||||
// finding requires the contributing source to be at least `Sensitive`
|
||||
// (cookies, headers, env, db rows, file reads). Plain user-input
|
||||
// sources have the cap stripped so the finding either drops entirely
|
||||
// or downgrades to whatever non-`DATA_EXFIL` cap also applies (e.g.
|
||||
// SSRF on the URL position of the same `fetch` call).
|
||||
if effective_caps.contains(crate::labels::Cap::DATA_EXFIL)
|
||||
&& finding.source_kind.sensitivity() < crate::labels::Sensitivity::Sensitive
|
||||
{
|
||||
effective_caps.remove(crate::labels::Cap::DATA_EXFIL);
|
||||
// The multi-gate dispatch produces one finding per (source, sink-cap)
|
||||
// pair, a body-flow finding's `effective_sink_caps` is exactly the
|
||||
// cap that fired (e.g. `DATA_EXFIL`). When that single cap is the
|
||||
// sensitivity-stripped one, the finding has no surviving rationale
|
||||
// and we drop it entirely rather than reroute it to the generic
|
||||
// `taint-unsanitised-flow` bucket (which would just re-emit the same
|
||||
// false positive under a different rule id). Findings with a
|
||||
// multi-cap `effective_sink_caps` keep their non-DATA_EXFIL caps and
|
||||
// are routed normally below.
|
||||
if finding.effective_sink_caps == crate::labels::Cap::DATA_EXFIL {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
// DATA_EXFIL routing.
|
||||
//
|
||||
// Multi-gate dispatch (JS / Go) emits one event per cap, so by this
|
||||
// point each finding's `effective_sink_caps` carries exactly one bit
|
||||
// and the simple `DATA_EXFIL && !SSRF` test routes correctly. Flat-
|
||||
// rule paths (Java HTTP clients where type-qualified resolution
|
||||
// attaches both `SSRF` and `DATA_EXFIL` Sink labels to the same call,
|
||||
// e.g. `client.send(req)` covering both URL and body channels of the
|
||||
// request value) produce a single dual-cap event. In that case the
|
||||
// source's sensitivity tier disambiguates: a Sensitive source
|
||||
// (cookie, header, env, db, session) leaking into an outbound
|
||||
// request is canonically DATA_EXFIL even if the sink also carries
|
||||
// an SSRF label, because operator-bound state is not URL-shaped
|
||||
// attacker input. Plain user input keeps SSRF routing (the typical
|
||||
// user-controlled-URL pattern).
|
||||
let is_data_exfil_rule = effective_caps.contains(crate::labels::Cap::DATA_EXFIL)
|
||||
&& !effective_caps.contains(crate::labels::Cap::UNAUTHORIZED_ID)
|
||||
&& (!effective_caps.contains(crate::labels::Cap::SSRF)
|
||||
|| finding.source_kind.sensitivity() >= crate::labels::Sensitivity::Sensitive);
|
||||
|
||||
let diag_id = if effective_caps.contains(crate::labels::Cap::UNAUTHORIZED_ID) {
|
||||
"rs.auth.missing_ownership_check.taint".to_string()
|
||||
} else if effective_caps.contains(crate::labels::Cap::DATA_EXFIL)
|
||||
&& !effective_caps.contains(crate::labels::Cap::SSRF)
|
||||
{
|
||||
} else if is_data_exfil_rule {
|
||||
format!(
|
||||
"taint-data-exfiltration (source {}:{})",
|
||||
source_point.row + 1,
|
||||
|
|
@ -396,18 +448,86 @@ fn build_taint_diag(
|
|||
)
|
||||
};
|
||||
|
||||
// For `DATA_EXFIL` rules, look up which destination object-literal field
|
||||
// (`body` / `headers` / `json`) the tainted value reached. Each
|
||||
// [`crate::cfg::GateFilter`] carries `destination_uses` (var names) in
|
||||
// parallel with `destination_fields` (the field each var was bound to),
|
||||
// so we walk the gate filter whose `label_caps` includes `DATA_EXFIL`
|
||||
// and match the tainted var name from the last flow step. Falls back
|
||||
// to the first non-empty destination field on the matching filter when
|
||||
// the var-name match fails (e.g. the SSA sink event is reported on a
|
||||
// copy-propagated value whose name no longer matches the original
|
||||
// destination ident). `None` when the sink wasn't a destination-aware
|
||||
// gate (no object literal, or non-fetch sink).
|
||||
let data_exfil_field: Option<String> = if is_data_exfil_rule {
|
||||
let last_var = finding
|
||||
.flow_steps
|
||||
.last()
|
||||
.and_then(|s| s.var_name.as_deref());
|
||||
let filters = &cfg_graph[finding.sink].call.gate_filters;
|
||||
filters
|
||||
.iter()
|
||||
.find(|f| f.label_caps.contains(crate::labels::Cap::DATA_EXFIL))
|
||||
.and_then(|f| {
|
||||
if let (Some(uses), Some(var)) = (f.destination_uses.as_ref(), last_var)
|
||||
&& let Some(idx) = uses.iter().position(|u| u == var)
|
||||
{
|
||||
return f.destination_fields.get(idx).cloned();
|
||||
}
|
||||
f.destination_fields.first().cloned()
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// DATA_EXFIL severity calibration (Phase: detector ranking).
|
||||
//
|
||||
// Generic taint severity comes from `severity_for_source_kind`, which
|
||||
// maps Cookie/Header/Env to High because those sources are spicy
|
||||
// *as taint roots*. For `DATA_EXFIL` we are scoring the leak class,
|
||||
// not the source itself: not every Sensitive-tier source is a Secret.
|
||||
// Cookies and env carry credential / session material whose leakage
|
||||
// is an immediate disclosure (Secret-tier); request headers, file
|
||||
// reads, db rows, and caught exceptions are Sensitive but not
|
||||
// automatically secret, so they downgrade to Medium. Plain user
|
||||
// input is already stripped above by the source-sensitivity gate, so
|
||||
// the `_` arm here is reached only by Sensitive sources that are not
|
||||
// explicit secrets.
|
||||
let severity = if is_data_exfil_rule {
|
||||
match finding.source_kind {
|
||||
crate::labels::SourceKind::Cookie | crate::labels::SourceKind::EnvironmentConfig => {
|
||||
crate::patterns::Severity::High
|
||||
}
|
||||
_ => crate::patterns::Severity::Medium,
|
||||
}
|
||||
} else {
|
||||
severity_for_source_kind(finding.source_kind)
|
||||
};
|
||||
|
||||
// DATA_EXFIL: surface the destination field in the message so analysts
|
||||
// see at a glance whether the leak reached the request body, headers,
|
||||
// or json payload. Generic taint findings stay on the existing
|
||||
// "unsanitised … flows from … → …" template.
|
||||
let message = if is_data_exfil_rule {
|
||||
let suffix = data_exfil_field
|
||||
.as_deref()
|
||||
.map(|f| format!(" ({f} field)"))
|
||||
.unwrap_or_default();
|
||||
format!("sensitive data flows from {short_source} \u{2192} {sink_display}{suffix}")
|
||||
} else {
|
||||
format!("unsanitised {kind_label} flows from {short_source} \u{2192} {sink_display}")
|
||||
};
|
||||
|
||||
let mut diag = Diag {
|
||||
path: primary_path.clone(),
|
||||
line: primary_line,
|
||||
col: primary_col,
|
||||
severity: severity_for_source_kind(finding.source_kind),
|
||||
severity,
|
||||
id: diag_id,
|
||||
category: FindingCategory::Security,
|
||||
path_validated: finding.path_validated,
|
||||
guard_kind: finding.guard_kind.map(|k| format!("{k:?}")),
|
||||
message: Some(format!(
|
||||
"unsanitised {kind_label} flows from {short_source} \u{2192} {sink_display}"
|
||||
)),
|
||||
message: Some(message),
|
||||
labels,
|
||||
confidence: None,
|
||||
evidence: Some(Evidence {
|
||||
|
|
@ -448,6 +568,7 @@ fn build_taint_diag(
|
|||
symbolic: finding.symbolic.clone(),
|
||||
sink_caps: sink_caps_bits,
|
||||
engine_notes: finding.engine_notes.clone(),
|
||||
data_exfil_field,
|
||||
..Default::default()
|
||||
}),
|
||||
rank_score: None,
|
||||
|
|
@ -467,7 +588,7 @@ fn build_taint_diag(
|
|||
ev.confidence_limiters = limiters;
|
||||
}
|
||||
|
||||
diag
|
||||
Some(diag)
|
||||
}
|
||||
|
||||
/// Resolve a file extension to a language slug (e.g. `"rust"`,
|
||||
|
|
@ -622,6 +743,8 @@ fn source_kind_label(sk: crate::labels::SourceKind) -> &'static str {
|
|||
use crate::labels::SourceKind;
|
||||
match sk {
|
||||
SourceKind::UserInput => "user input",
|
||||
SourceKind::Cookie => "cookie value",
|
||||
SourceKind::Header => "request header",
|
||||
SourceKind::EnvironmentConfig => "environment config",
|
||||
SourceKind::FileSystem => "file system data",
|
||||
SourceKind::Database => "database result",
|
||||
|
|
@ -1198,18 +1321,31 @@ impl<'a> ParsedFile<'a> {
|
|||
continue;
|
||||
}
|
||||
|
||||
out.push(build_taint_diag(
|
||||
if let Some(diag) = build_taint_diag(
|
||||
finding,
|
||||
body_cfg,
|
||||
&self.source.tree,
|
||||
self.source.path,
|
||||
self.source.bytes,
|
||||
scan_root,
|
||||
));
|
||||
) {
|
||||
out.push(diag);
|
||||
}
|
||||
}
|
||||
|
||||
// ── CFG structural analyses (per body) ─────────────────────────
|
||||
let taint_active = global_summaries.is_some() || !taint_results.is_empty();
|
||||
// Pre-compute, per body, the set of variable names whose
|
||||
// release / close calls live in a NESTED closure body inside
|
||||
// that body (e.g. `socket.on("close", () => ws.close())`).
|
||||
// Both the structural ResourceMisuse pass and the state-model
|
||||
// leak pass consult it to suppress findings whose cleanup is
|
||||
// registered as a callback the per-body CFG can't follow.
|
||||
// Only descendants count — sibling methods on the same class
|
||||
// don't share resource ownership.
|
||||
let closure_released_per_body =
|
||||
state::collect_closure_released_var_names(&self.file_cfg.bodies, caller_lang);
|
||||
let empty_set: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||
for body in &self.file_cfg.bodies {
|
||||
let body_taint: Vec<_> = taint_results
|
||||
.iter()
|
||||
|
|
@ -1231,6 +1367,11 @@ impl<'a> ParsedFile<'a> {
|
|||
body_const_facts: body_const_facts.as_ref(),
|
||||
type_facts: body_const_facts.as_ref().map(|f| &f.type_facts),
|
||||
auth_decorators: &body.meta.auth_decorators,
|
||||
closure_released_var_names: Some(
|
||||
closure_released_per_body
|
||||
.get(&body.meta.id)
|
||||
.unwrap_or(&empty_set),
|
||||
),
|
||||
};
|
||||
for cf in cfg_analysis::run_all(&cfg_ctx) {
|
||||
let point = byte_offset_to_point(&self.source.tree, cf.span.0);
|
||||
|
|
@ -1307,6 +1448,11 @@ impl<'a> ParsedFile<'a> {
|
|||
&body.meta.auth_decorators,
|
||||
&path_safe_suppressed_spans,
|
||||
body_pointer_hints.as_ref(),
|
||||
Some(
|
||||
closure_released_per_body
|
||||
.get(&body.meta.id)
|
||||
.unwrap_or(&empty_set),
|
||||
),
|
||||
);
|
||||
|
||||
for sf in &state_findings {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue