mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-21 20:18:06 +02:00
Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)
* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers * feat: Implement cross-file data exfiltration detection with parameter-specific gate filters * feat: Add calibration tests and refine DATA_EXFIL severity scoring logic * feat: Introduce per-detector configuration for data exfiltration suppression * feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output * feat: Add tainted body and URL handling for data exfiltration detection * feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go * feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients * feat: Add synthetic externals handling for closure-captured variables in SSA * feat: Implement closure-based suppression for resource leak findings * feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns * feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders * feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt * feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests * feat: Add data exfiltration sinks for various languages and enhance documentation * refactor: Simplify formatting and improve readability in various files * refactor: Improve readability by simplifying conditional statements and adding clippy linting * docs: Update CHANGELOG and comments for data exfiltration features and configuration * docs: Clarify configuration instructions for data exfiltration trusted destinations * docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
parent
a438886217
commit
58f1794a4e
189 changed files with 8421 additions and 383 deletions
|
|
@ -239,18 +239,25 @@ fn lower_to_ssa_inner(
|
|||
|
||||
// 6. Rename variables (dominator tree preorder walk)
|
||||
let dom_tree_children = build_dom_tree_children(num_blocks, &doms, &block_graph);
|
||||
let (mut ssa_blocks, mut value_defs, cfg_node_map, field_interner, field_writes) =
|
||||
rename_variables(
|
||||
cfg,
|
||||
&blocks_nodes,
|
||||
&block_succs,
|
||||
&block_preds,
|
||||
&phi_placements,
|
||||
&dom_tree_children,
|
||||
&filtered_edges,
|
||||
&external_vars,
|
||||
&nop_nodes,
|
||||
);
|
||||
let (
|
||||
mut ssa_blocks,
|
||||
mut value_defs,
|
||||
cfg_node_map,
|
||||
field_interner,
|
||||
field_writes,
|
||||
synthetic_externals,
|
||||
) = rename_variables(
|
||||
cfg,
|
||||
&blocks_nodes,
|
||||
&block_succs,
|
||||
&block_preds,
|
||||
&phi_placements,
|
||||
&dom_tree_children,
|
||||
&filtered_edges,
|
||||
&external_vars,
|
||||
formal_params,
|
||||
&nop_nodes,
|
||||
);
|
||||
|
||||
// 6b. Fill any missing phi operands with a shared Undef sentinel so
|
||||
// every phi has exactly one operand per predecessor. See
|
||||
|
|
@ -306,6 +313,7 @@ fn lower_to_ssa_inner(
|
|||
exception_edges,
|
||||
field_interner,
|
||||
field_writes,
|
||||
synthetic_externals,
|
||||
};
|
||||
|
||||
// 9. Catch-block reachability invariant.
|
||||
|
|
@ -927,6 +935,7 @@ fn rename_variables(
|
|||
dom_tree_children: &[Vec<usize>],
|
||||
filtered_edges: &[(NodeIndex, NodeIndex, EdgeKind)],
|
||||
external_vars: &[String],
|
||||
formal_params: &[String],
|
||||
nop_nodes: &HashSet<NodeIndex>,
|
||||
) -> (
|
||||
Vec<SsaBlock>,
|
||||
|
|
@ -934,6 +943,7 @@ fn rename_variables(
|
|||
HashMap<NodeIndex, SsaValue>,
|
||||
crate::ssa::ir::FieldInterner,
|
||||
HashMap<SsaValue, (SsaValue, crate::ssa::ir::FieldId)>,
|
||||
HashSet<SsaValue>,
|
||||
) {
|
||||
let num_blocks = blocks_nodes.len();
|
||||
let mut next_value: u32 = 0;
|
||||
|
|
@ -1679,6 +1689,27 @@ fn rename_variables(
|
|||
// Inject synthetic Param instructions at START of block 0 for external variables.
|
||||
// These create SSA definitions so the rename pass can reference them.
|
||||
// Pre-seed var_stacks so process_block sees them.
|
||||
//
|
||||
// `external_vars` contains both real formal parameters and free / closure-
|
||||
// captured variables (variables read by the body but not declared as a
|
||||
// formal and not assigned anywhere). Both end up emitted as
|
||||
// [`SsaOp::Param`] in block 0; we record the SSA values that correspond
|
||||
// to free vars in `synthetic_externals` so downstream analyses (the JS/TS
|
||||
// handler-name auto-seed in particular) can avoid treating closure
|
||||
// captures as if they were parameters of the function under analysis.
|
||||
//
|
||||
// **Conservative behaviour when `formal_params` is empty.** Several
|
||||
// call sites (`lower_to_ssa`, `lower_to_ssa_scoped_nop`) don't supply
|
||||
// formal parameter names; in that case we cannot distinguish formals
|
||||
// from free vars structurally, so we leave `synthetic_externals` empty
|
||||
// and the auto-seed pass keeps its pre-fix behaviour of treating every
|
||||
// `Param` op as a candidate. Only callers that pass a non-empty
|
||||
// `formal_params` slice (`lower_to_ssa_with_params`, used by the
|
||||
// findings pipeline's per-function lowering) opt into the
|
||||
// closure-capture distinction.
|
||||
let mut synthetic_externals: HashSet<SsaValue> = HashSet::new();
|
||||
let formal_set: HashSet<&str> = formal_params.iter().map(|s| s.as_str()).collect();
|
||||
let track_synthetic = !formal_params.is_empty();
|
||||
if !external_vars.is_empty() {
|
||||
let entry_cfg_node = blocks_nodes[0][0];
|
||||
let mut synthetic_body = Vec::with_capacity(external_vars.len());
|
||||
|
|
@ -1691,7 +1722,8 @@ fn rename_variables(
|
|||
cfg_node: entry_cfg_node,
|
||||
block: BlockId(0),
|
||||
});
|
||||
let op = if is_receiver_name(var) {
|
||||
let is_receiver = is_receiver_name(var);
|
||||
let op = if is_receiver {
|
||||
SsaOp::SelfParam
|
||||
} else {
|
||||
let op = SsaOp::Param {
|
||||
|
|
@ -1700,6 +1732,28 @@ fn rename_variables(
|
|||
positional_idx += 1;
|
||||
op
|
||||
};
|
||||
// A non-receiver var is "synthetic" (a free / closure capture)
|
||||
// when it is *not* one of the function's declared formals AND
|
||||
// not a dotted access on a formal (`input.cmd` where `input` is
|
||||
// a formal — it represents a structural projection of the
|
||||
// formal, not a free variable; the auto-seed should still treat
|
||||
// it as part of the formal's own taint surface). Receivers are
|
||||
// intentionally excluded: `this` / `self` represent the implicit
|
||||
// receiver, which always belongs to the function.
|
||||
//
|
||||
// Only fire when the caller supplied formal-parameter names; see
|
||||
// the `track_synthetic` rationale above.
|
||||
let root_is_formal = var
|
||||
.split_once('.')
|
||||
.map(|(root, _)| formal_set.contains(root))
|
||||
.unwrap_or(false);
|
||||
if track_synthetic
|
||||
&& !is_receiver
|
||||
&& !formal_set.contains(var.as_str())
|
||||
&& !root_is_formal
|
||||
{
|
||||
synthetic_externals.insert(v);
|
||||
}
|
||||
synthetic_body.push(SsaInst {
|
||||
value: v,
|
||||
op,
|
||||
|
|
@ -1784,6 +1838,7 @@ fn rename_variables(
|
|||
cfg_node_map,
|
||||
field_interner,
|
||||
field_writes,
|
||||
synthetic_externals,
|
||||
)
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue