mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)
* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers * feat: Implement cross-file data exfiltration detection with parameter-specific gate filters * feat: Add calibration tests and refine DATA_EXFIL severity scoring logic * feat: Introduce per-detector configuration for data exfiltration suppression * feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output * feat: Add tainted body and URL handling for data exfiltration detection * feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go * feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients * feat: Add synthetic externals handling for closure-captured variables in SSA * feat: Implement closure-based suppression for resource leak findings * feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns * feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders * feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt * feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests * feat: Add data exfiltration sinks for various languages and enhance documentation * refactor: Simplify formatting and improve readability in various files * refactor: Improve readability by simplifying conditional statements and adding clippy linting * docs: Update CHANGELOG and comments for data exfiltration features and configuration * docs: Clarify configuration instructions for data exfiltration trusted destinations * docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
parent
a438886217
commit
58f1794a4e
189 changed files with 8421 additions and 383 deletions
170
src/ast.rs
170
src/ast.rs
|
|
@ -145,6 +145,11 @@ fn resolve_file_rel(file_rel: &str, scan_root: Option<&Path>, fallback: &Path) -
|
|||
|
||||
/// Build a [`Diag`] from a taint [`Finding`], the CFG that produced it,
|
||||
/// the parsed tree (for byte→line/col conversion) and the file path.
|
||||
///
|
||||
/// Returns `None` when source-sensitivity gating fully suppresses the
|
||||
/// finding (the canonical case is a multi-gate `DATA_EXFIL` event whose
|
||||
/// contributing source is plain user input — see the
|
||||
/// `effective_caps` strip below).
|
||||
fn build_taint_diag(
|
||||
finding: &crate::taint::Finding,
|
||||
cfg_graph: &crate::cfg::Cfg,
|
||||
|
|
@ -152,7 +157,7 @@ fn build_taint_diag(
|
|||
path: &Path,
|
||||
src: &[u8],
|
||||
scan_root: Option<&Path>,
|
||||
) -> Diag {
|
||||
) -> Option<Diag> {
|
||||
let call_site_byte = cfg_graph[finding.sink].classification_span().0;
|
||||
let call_site_point = byte_offset_to_point(tree, call_site_byte);
|
||||
// `finding.source` should be a NodeIndex valid in this body's CFG, but
|
||||
|
|
@ -373,16 +378,63 @@ fn build_taint_diag(
|
|||
// SSA dispatch) when populated; fall back to the union of all sink-label
|
||||
// caps on the CFG node so legacy paths that build findings without
|
||||
// setting `effective_sink_caps` still pick the right rule id.
|
||||
let effective_caps = if finding.effective_sink_caps.is_empty() {
|
||||
let mut effective_caps = if finding.effective_sink_caps.is_empty() {
|
||||
crate::labels::Cap::from_bits_truncate(sink_caps_bits)
|
||||
} else {
|
||||
finding.effective_sink_caps
|
||||
};
|
||||
|
||||
// Source-sensitivity gate for `DATA_EXFIL`. Plain attacker input echoed
|
||||
// back into an outbound request body / headers / json is not data
|
||||
// exfiltration, the user already controls the value, surfacing it as a
|
||||
// leak is noise (the canonical false-positive class for API gateways
|
||||
// and telemetry forwarders that proxy `req.body`). A `DATA_EXFIL`
|
||||
// finding requires the contributing source to be at least `Sensitive`
|
||||
// (cookies, headers, env, db rows, file reads). Plain user-input
|
||||
// sources have the cap stripped so the finding either drops entirely
|
||||
// or downgrades to whatever non-`DATA_EXFIL` cap also applies (e.g.
|
||||
// SSRF on the URL position of the same `fetch` call).
|
||||
if effective_caps.contains(crate::labels::Cap::DATA_EXFIL)
|
||||
&& finding.source_kind.sensitivity() < crate::labels::Sensitivity::Sensitive
|
||||
{
|
||||
effective_caps.remove(crate::labels::Cap::DATA_EXFIL);
|
||||
// The multi-gate dispatch produces one finding per (source, sink-cap)
|
||||
// pair, a body-flow finding's `effective_sink_caps` is exactly the
|
||||
// cap that fired (e.g. `DATA_EXFIL`). When that single cap is the
|
||||
// sensitivity-stripped one, the finding has no surviving rationale
|
||||
// and we drop it entirely rather than reroute it to the generic
|
||||
// `taint-unsanitised-flow` bucket (which would just re-emit the same
|
||||
// false positive under a different rule id). Findings with a
|
||||
// multi-cap `effective_sink_caps` keep their non-DATA_EXFIL caps and
|
||||
// are routed normally below.
|
||||
if finding.effective_sink_caps == crate::labels::Cap::DATA_EXFIL {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
// DATA_EXFIL routing.
|
||||
//
|
||||
// Multi-gate dispatch (JS / Go) emits one event per cap, so by this
|
||||
// point each finding's `effective_sink_caps` carries exactly one bit
|
||||
// and the simple `DATA_EXFIL && !SSRF` test routes correctly. Flat-
|
||||
// rule paths (Java HTTP clients where type-qualified resolution
|
||||
// attaches both `SSRF` and `DATA_EXFIL` Sink labels to the same call,
|
||||
// e.g. `client.send(req)` covering both URL and body channels of the
|
||||
// request value) produce a single dual-cap event. In that case the
|
||||
// source's sensitivity tier disambiguates: a Sensitive source
|
||||
// (cookie, header, env, db, session) leaking into an outbound
|
||||
// request is canonically DATA_EXFIL even if the sink also carries
|
||||
// an SSRF label, because operator-bound state is not URL-shaped
|
||||
// attacker input. Plain user input keeps SSRF routing (the typical
|
||||
// user-controlled-URL pattern).
|
||||
let is_data_exfil_rule = effective_caps.contains(crate::labels::Cap::DATA_EXFIL)
|
||||
&& !effective_caps.contains(crate::labels::Cap::UNAUTHORIZED_ID)
|
||||
&& (!effective_caps.contains(crate::labels::Cap::SSRF)
|
||||
|| finding.source_kind.sensitivity() >= crate::labels::Sensitivity::Sensitive);
|
||||
|
||||
let diag_id = if effective_caps.contains(crate::labels::Cap::UNAUTHORIZED_ID) {
|
||||
"rs.auth.missing_ownership_check.taint".to_string()
|
||||
} else if effective_caps.contains(crate::labels::Cap::DATA_EXFIL)
|
||||
&& !effective_caps.contains(crate::labels::Cap::SSRF)
|
||||
{
|
||||
} else if is_data_exfil_rule {
|
||||
format!(
|
||||
"taint-data-exfiltration (source {}:{})",
|
||||
source_point.row + 1,
|
||||
|
|
@ -396,18 +448,86 @@ fn build_taint_diag(
|
|||
)
|
||||
};
|
||||
|
||||
// For `DATA_EXFIL` rules, look up which destination object-literal field
|
||||
// (`body` / `headers` / `json`) the tainted value reached. Each
|
||||
// [`crate::cfg::GateFilter`] carries `destination_uses` (var names) in
|
||||
// parallel with `destination_fields` (the field each var was bound to),
|
||||
// so we walk the gate filter whose `label_caps` includes `DATA_EXFIL`
|
||||
// and match the tainted var name from the last flow step. Falls back
|
||||
// to the first non-empty destination field on the matching filter when
|
||||
// the var-name match fails (e.g. the SSA sink event is reported on a
|
||||
// copy-propagated value whose name no longer matches the original
|
||||
// destination ident). `None` when the sink wasn't a destination-aware
|
||||
// gate (no object literal, or non-fetch sink).
|
||||
let data_exfil_field: Option<String> = if is_data_exfil_rule {
|
||||
let last_var = finding
|
||||
.flow_steps
|
||||
.last()
|
||||
.and_then(|s| s.var_name.as_deref());
|
||||
let filters = &cfg_graph[finding.sink].call.gate_filters;
|
||||
filters
|
||||
.iter()
|
||||
.find(|f| f.label_caps.contains(crate::labels::Cap::DATA_EXFIL))
|
||||
.and_then(|f| {
|
||||
if let (Some(uses), Some(var)) = (f.destination_uses.as_ref(), last_var)
|
||||
&& let Some(idx) = uses.iter().position(|u| u == var)
|
||||
{
|
||||
return f.destination_fields.get(idx).cloned();
|
||||
}
|
||||
f.destination_fields.first().cloned()
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// DATA_EXFIL severity calibration (Phase: detector ranking).
|
||||
//
|
||||
// Generic taint severity comes from `severity_for_source_kind`, which
|
||||
// maps Cookie/Header/Env to High because those sources are spicy
|
||||
// *as taint roots*. For `DATA_EXFIL` we are scoring the leak class,
|
||||
// not the source itself: not every Sensitive-tier source is a Secret.
|
||||
// Cookies and env carry credential / session material whose leakage
|
||||
// is an immediate disclosure (Secret-tier); request headers, file
|
||||
// reads, db rows, and caught exceptions are Sensitive but not
|
||||
// automatically secret, so they downgrade to Medium. Plain user
|
||||
// input is already stripped above by the source-sensitivity gate, so
|
||||
// the `_` arm here is reached only by Sensitive sources that are not
|
||||
// explicit secrets.
|
||||
let severity = if is_data_exfil_rule {
|
||||
match finding.source_kind {
|
||||
crate::labels::SourceKind::Cookie | crate::labels::SourceKind::EnvironmentConfig => {
|
||||
crate::patterns::Severity::High
|
||||
}
|
||||
_ => crate::patterns::Severity::Medium,
|
||||
}
|
||||
} else {
|
||||
severity_for_source_kind(finding.source_kind)
|
||||
};
|
||||
|
||||
// DATA_EXFIL: surface the destination field in the message so analysts
|
||||
// see at a glance whether the leak reached the request body, headers,
|
||||
// or json payload. Generic taint findings stay on the existing
|
||||
// "unsanitised … flows from … → …" template.
|
||||
let message = if is_data_exfil_rule {
|
||||
let suffix = data_exfil_field
|
||||
.as_deref()
|
||||
.map(|f| format!(" ({f} field)"))
|
||||
.unwrap_or_default();
|
||||
format!("sensitive data flows from {short_source} \u{2192} {sink_display}{suffix}")
|
||||
} else {
|
||||
format!("unsanitised {kind_label} flows from {short_source} \u{2192} {sink_display}")
|
||||
};
|
||||
|
||||
let mut diag = Diag {
|
||||
path: primary_path.clone(),
|
||||
line: primary_line,
|
||||
col: primary_col,
|
||||
severity: severity_for_source_kind(finding.source_kind),
|
||||
severity,
|
||||
id: diag_id,
|
||||
category: FindingCategory::Security,
|
||||
path_validated: finding.path_validated,
|
||||
guard_kind: finding.guard_kind.map(|k| format!("{k:?}")),
|
||||
message: Some(format!(
|
||||
"unsanitised {kind_label} flows from {short_source} \u{2192} {sink_display}"
|
||||
)),
|
||||
message: Some(message),
|
||||
labels,
|
||||
confidence: None,
|
||||
evidence: Some(Evidence {
|
||||
|
|
@ -448,6 +568,7 @@ fn build_taint_diag(
|
|||
symbolic: finding.symbolic.clone(),
|
||||
sink_caps: sink_caps_bits,
|
||||
engine_notes: finding.engine_notes.clone(),
|
||||
data_exfil_field,
|
||||
..Default::default()
|
||||
}),
|
||||
rank_score: None,
|
||||
|
|
@ -467,7 +588,7 @@ fn build_taint_diag(
|
|||
ev.confidence_limiters = limiters;
|
||||
}
|
||||
|
||||
diag
|
||||
Some(diag)
|
||||
}
|
||||
|
||||
/// Resolve a file extension to a language slug (e.g. `"rust"`,
|
||||
|
|
@ -622,6 +743,8 @@ fn source_kind_label(sk: crate::labels::SourceKind) -> &'static str {
|
|||
use crate::labels::SourceKind;
|
||||
match sk {
|
||||
SourceKind::UserInput => "user input",
|
||||
SourceKind::Cookie => "cookie value",
|
||||
SourceKind::Header => "request header",
|
||||
SourceKind::EnvironmentConfig => "environment config",
|
||||
SourceKind::FileSystem => "file system data",
|
||||
SourceKind::Database => "database result",
|
||||
|
|
@ -1198,18 +1321,31 @@ impl<'a> ParsedFile<'a> {
|
|||
continue;
|
||||
}
|
||||
|
||||
out.push(build_taint_diag(
|
||||
if let Some(diag) = build_taint_diag(
|
||||
finding,
|
||||
body_cfg,
|
||||
&self.source.tree,
|
||||
self.source.path,
|
||||
self.source.bytes,
|
||||
scan_root,
|
||||
));
|
||||
) {
|
||||
out.push(diag);
|
||||
}
|
||||
}
|
||||
|
||||
// ── CFG structural analyses (per body) ─────────────────────────
|
||||
let taint_active = global_summaries.is_some() || !taint_results.is_empty();
|
||||
// Pre-compute, per body, the set of variable names whose
|
||||
// release / close calls live in a NESTED closure body inside
|
||||
// that body (e.g. `socket.on("close", () => ws.close())`).
|
||||
// Both the structural ResourceMisuse pass and the state-model
|
||||
// leak pass consult it to suppress findings whose cleanup is
|
||||
// registered as a callback the per-body CFG can't follow.
|
||||
// Only descendants count — sibling methods on the same class
|
||||
// don't share resource ownership.
|
||||
let closure_released_per_body =
|
||||
state::collect_closure_released_var_names(&self.file_cfg.bodies, caller_lang);
|
||||
let empty_set: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||
for body in &self.file_cfg.bodies {
|
||||
let body_taint: Vec<_> = taint_results
|
||||
.iter()
|
||||
|
|
@ -1231,6 +1367,11 @@ impl<'a> ParsedFile<'a> {
|
|||
body_const_facts: body_const_facts.as_ref(),
|
||||
type_facts: body_const_facts.as_ref().map(|f| &f.type_facts),
|
||||
auth_decorators: &body.meta.auth_decorators,
|
||||
closure_released_var_names: Some(
|
||||
closure_released_per_body
|
||||
.get(&body.meta.id)
|
||||
.unwrap_or(&empty_set),
|
||||
),
|
||||
};
|
||||
for cf in cfg_analysis::run_all(&cfg_ctx) {
|
||||
let point = byte_offset_to_point(&self.source.tree, cf.span.0);
|
||||
|
|
@ -1307,6 +1448,11 @@ impl<'a> ParsedFile<'a> {
|
|||
&body.meta.auth_decorators,
|
||||
&path_safe_suppressed_spans,
|
||||
body_pointer_hints.as_ref(),
|
||||
Some(
|
||||
closure_released_per_body
|
||||
.get(&body.meta.id)
|
||||
.unwrap_or(&empty_set),
|
||||
),
|
||||
);
|
||||
|
||||
for sf in &state_findings {
|
||||
|
|
|
|||
|
|
@ -1118,6 +1118,7 @@ fn clone_preserves_all_sub_structs() {
|
|||
arg_string_literals: vec![Some("lit".into())],
|
||||
destination_uses: None,
|
||||
gate_filters: Vec::new(),
|
||||
is_constructor: false,
|
||||
},
|
||||
taint: TaintMeta {
|
||||
labels: {
|
||||
|
|
|
|||
|
|
@ -373,11 +373,26 @@ pub(crate) fn first_member_label(
|
|||
if let Some(full) = member_expr_text(n, code) {
|
||||
// Try the full text first, then progressively strip the last segment
|
||||
// to match rules like "process.env" from "process.env.CMD".
|
||||
//
|
||||
// The strip-and-retry only ever yields a sound label for Sources:
|
||||
// `process.env.CMD` → strip → `process.env` makes sense because
|
||||
// the receiver itself IS the source. Sinks and Sanitizers, by
|
||||
// contrast, name the *operation* — `connection.query`, `eval`,
|
||||
// `exec` — and stripping a trailing segment to match them is
|
||||
// not semantically valid (e.g. `exec.start` should never be
|
||||
// treated as a SHELL_ESCAPE sink because of bare `exec`). We
|
||||
// accept any label on a full-text match (the behaviour callers
|
||||
// already depend on for Source/Sink labels alike), but only
|
||||
// accept Source labels after segment stripping.
|
||||
let mut candidate = full.as_str();
|
||||
let mut first = true;
|
||||
loop {
|
||||
if let Some(lbl) = classify(lang, candidate, extra_labels) {
|
||||
return Some(lbl);
|
||||
if first || matches!(lbl, DataLabel::Source(_)) {
|
||||
return Some(lbl);
|
||||
}
|
||||
}
|
||||
first = false;
|
||||
match candidate.rsplit_once('.') {
|
||||
Some((prefix, _)) => candidate = prefix,
|
||||
None => break,
|
||||
|
|
|
|||
|
|
@ -38,25 +38,27 @@ pub(super) fn find_call_node<'a>(n: Node<'a>, lang: &str) -> Option<Node<'a>> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Extract identifiers from specified fields of an object-literal argument.
|
||||
/// Extract `(field_name, ident_name)` pairs from specified fields of an
|
||||
/// object-literal argument.
|
||||
///
|
||||
/// Returns:
|
||||
/// * `Some(names)` if the positional argument at `index` IS an object literal
|
||||
/// (JS `object`, TS `object`, Python `dictionary`). `names` contains
|
||||
/// identifiers lifted from pair values whose key matches any entry in
|
||||
/// `fields` (case-sensitive; JS/TS identifiers). When no destination-field
|
||||
/// pairs are present, returns `Some(vec![])`, the sink is effectively
|
||||
/// silenced because no destination identifier exists.
|
||||
/// * `Some(pairs)` if the positional argument at `index` IS an object literal
|
||||
/// (JS `object`, TS `object`, Python `dictionary`). Each pair is
|
||||
/// `(field_name, ident_name)` where `field_name` is the matched key from
|
||||
/// `fields` and `ident_name` is an identifier lifted from that pair's
|
||||
/// value expression. When no destination-field pairs are present, returns
|
||||
/// `Some(vec![])`, the sink is effectively silenced because no destination
|
||||
/// identifier exists.
|
||||
/// * `None` if the arg is absent, is not an object literal (plain string
|
||||
/// / ident / expression), or has splat/spread children that break static
|
||||
/// per-field reasoning. Callers fall back to the whole-arg positional
|
||||
/// filter in this case.
|
||||
pub(super) fn extract_destination_field_idents(
|
||||
pub(super) fn extract_destination_field_pairs(
|
||||
call_node: Node,
|
||||
arg_index: usize,
|
||||
fields: &[&str],
|
||||
code: &[u8],
|
||||
) -> Option<Vec<String>> {
|
||||
) -> Option<Vec<(String, String)>> {
|
||||
if fields.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
|
@ -71,7 +73,7 @@ pub(super) fn extract_destination_field_idents(
|
|||
return None;
|
||||
}
|
||||
|
||||
let mut out: Vec<String> = Vec::new();
|
||||
let mut out: Vec<(String, String)> = Vec::new();
|
||||
let mut c = arg.walk();
|
||||
for child in arg.named_children(&mut c) {
|
||||
match child.kind() {
|
||||
|
|
@ -88,8 +90,8 @@ pub(super) fn extract_destination_field_idents(
|
|||
let Some(name) = text_of(child, code) else {
|
||||
continue;
|
||||
};
|
||||
if fields.iter().any(|&f| f == name) && !out.contains(&name) {
|
||||
out.push(name);
|
||||
if fields.iter().any(|&f| f == name) && !out.iter().any(|(_, v)| v == &name) {
|
||||
out.push((name.clone(), name));
|
||||
}
|
||||
}
|
||||
"pair" => {
|
||||
|
|
@ -124,8 +126,8 @@ pub(super) fn extract_destination_field_idents(
|
|||
let mut paths: Vec<String> = Vec::new();
|
||||
collect_idents_with_paths(val_node, code, &mut idents, &mut paths);
|
||||
for name in paths.into_iter().chain(idents) {
|
||||
if !out.contains(&name) {
|
||||
out.push(name);
|
||||
if !out.iter().any(|(_, v)| v == &name) {
|
||||
out.push((key.clone(), name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -135,6 +137,62 @@ pub(super) fn extract_destination_field_idents(
|
|||
Some(out)
|
||||
}
|
||||
|
||||
/// Extract `(field_name, ident_name)` pairs from `keyword_argument` /
|
||||
/// `named_argument` children of a call whose keyword name matches one of
|
||||
/// `fields`. Used for languages where destination-bearing fields are passed
|
||||
/// as direct kwargs rather than wrapped in a dict literal, e.g. Python
|
||||
/// `requests.post(url, data=tainted, json=safe)` where `data` and `json` are
|
||||
/// `keyword_argument` siblings of the positional URL.
|
||||
///
|
||||
/// Returns the union of matching kwargs, preserving the kwarg name in the
|
||||
/// `field` slot so callers can still attribute findings per-field. Empty
|
||||
/// when no matching kwargs exist or the call has no `arguments` field.
|
||||
pub(super) fn extract_destination_kwarg_pairs(
|
||||
call_node: Node,
|
||||
fields: &[&str],
|
||||
code: &[u8],
|
||||
) -> Vec<(String, String)> {
|
||||
if fields.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
let Some(args_node) = call_node.child_by_field_name("arguments") else {
|
||||
return Vec::new();
|
||||
};
|
||||
let mut out: Vec<(String, String)> = Vec::new();
|
||||
let mut cursor = args_node.walk();
|
||||
for child in args_node.named_children(&mut cursor) {
|
||||
let kind = child.kind();
|
||||
if kind != "keyword_argument" && kind != "named_argument" {
|
||||
continue;
|
||||
}
|
||||
let named_count = child.named_child_count();
|
||||
let name_node = child
|
||||
.child_by_field_name("name")
|
||||
.or_else(|| child.named_child(0));
|
||||
let value_node = child
|
||||
.child_by_field_name("value")
|
||||
.or_else(|| child.named_child(named_count.saturating_sub(1) as u32));
|
||||
let (Some(nn), Some(vn)) = (name_node, value_node) else {
|
||||
continue;
|
||||
};
|
||||
let Some(name) = text_of(nn, code) else {
|
||||
continue;
|
||||
};
|
||||
if !fields.iter().any(|&f| f == name) {
|
||||
continue;
|
||||
}
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(vn, code, &mut idents, &mut paths);
|
||||
for ident in paths.into_iter().chain(idents) {
|
||||
if !out.iter().any(|(_, v)| v == &ident) {
|
||||
out.push((name.clone(), ident));
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Extract the string-literal content at argument position `index` (0-based).
|
||||
/// Returns `None` if the argument is not a string literal or the index is out of range.
|
||||
pub(super) fn extract_const_string_arg(
|
||||
|
|
@ -144,7 +202,14 @@ pub(super) fn extract_const_string_arg(
|
|||
) -> Option<String> {
|
||||
let args = call_node.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
let arg = args.named_children(&mut cursor).nth(index)?;
|
||||
let mut arg = args.named_children(&mut cursor).nth(index)?;
|
||||
// PHP / Go wrap each positional argument in an `argument` node; unwrap so
|
||||
// the kind-match below sees the inner literal.
|
||||
if arg.kind() == "argument" && arg.named_child_count() == 1 {
|
||||
if let Some(inner) = arg.named_child(0) {
|
||||
arg = inner;
|
||||
}
|
||||
}
|
||||
match arg.kind() {
|
||||
// `string` / `string_literal` cover JS/TS, Python, Java, PHP, C/C++, Ruby, Rust;
|
||||
// `interpreted_string_literal` / `raw_string_literal` cover Go's
|
||||
|
|
@ -177,6 +242,39 @@ pub(super) fn extract_const_string_arg(
|
|||
}
|
||||
}
|
||||
|
||||
/// Extract a macro-constant or `define`d identifier name at argument position
|
||||
/// `index` (0-based). Used for languages where activation values are
|
||||
/// preprocessor symbols rather than string literals — currently C, C++, and
|
||||
/// PHP define-constants like `CURLOPT_POSTFIELDS` whose syntactic form is an
|
||||
/// `identifier` / `name` node, not a `string`.
|
||||
///
|
||||
/// Returns `None` for any non-identifier shape so dynamic-activation
|
||||
/// semantics still apply when the activation arg is a runtime value
|
||||
/// (variable, expression, function call).
|
||||
pub(super) fn extract_const_macro_arg(
|
||||
call_node: Node,
|
||||
index: usize,
|
||||
code: &[u8],
|
||||
) -> Option<String> {
|
||||
let args = call_node.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
let mut arg = args.named_children(&mut cursor).nth(index)?;
|
||||
if arg.kind() == "argument" && arg.named_child_count() == 1 {
|
||||
if let Some(inner) = arg.named_child(0) {
|
||||
arg = inner;
|
||||
}
|
||||
}
|
||||
match arg.kind() {
|
||||
// C/C++ identifier / PHP `name` node for define-style constants.
|
||||
// Scoped C++ identifiers (`Curl::OPT_POSTFIELDS`) and PHP namespaced
|
||||
// names also surface here so the dangerous_values match catches them.
|
||||
"identifier" | "name" | "qualified_name" | "scoped_identifier" => {
|
||||
text_of(arg, code).map(|s| s.to_string())
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract the value of a keyword argument from a call node (e.g. Python `shell=True`).
|
||||
/// Walks argument children looking for `keyword_argument` nodes, matches the keyword
|
||||
/// name, and extracts the value node text for literals.
|
||||
|
|
@ -1546,6 +1644,59 @@ pub(super) fn def_use(
|
|||
(None, uses, vec![])
|
||||
}
|
||||
|
||||
// for-in / for-of / Python `for x in iter:` ─────────────────────────
|
||||
//
|
||||
// Tree-sitter classifies these as `Kind::For` with a `left`/`right`
|
||||
// field pair (binding pattern + iterable). Without an explicit
|
||||
// arm here, the default branch collects every ident as a `use` and
|
||||
// never registers the iteration binding as a `define`, so taint
|
||||
// entering the iterable does not propagate into the body's
|
||||
// references to the binding (`for (const [a, b] of obj) { sink(a) }`
|
||||
// would lose the flow at `a`).
|
||||
//
|
||||
// C-style `for_statement` has no `left`/`right` fields (it uses
|
||||
// `initializer`/`condition`/`increment`), so this path falls through
|
||||
// to the default-collecting behaviour for those, preserving today's
|
||||
// semantics.
|
||||
Kind::For => {
|
||||
let left = ast.child_by_field_name("left");
|
||||
let right = ast.child_by_field_name("right");
|
||||
if left.is_none() && right.is_none() {
|
||||
// C-style for, defer to default ident collection.
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(ast, code, &mut idents, &mut paths);
|
||||
let mut uses = paths;
|
||||
uses.extend(idents);
|
||||
return (None, uses, vec![]);
|
||||
}
|
||||
|
||||
let mut defs: Option<String> = None;
|
||||
let mut extra_defs: Vec<String> = Vec::new();
|
||||
let mut uses: Vec<String> = Vec::new();
|
||||
|
||||
if let Some(pat) = left {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(pat, code, &mut idents, &mut paths);
|
||||
let first = paths.pop().or_else(|| idents.first().cloned());
|
||||
for ident in &idents {
|
||||
if first.as_ref() != Some(ident) {
|
||||
extra_defs.push(ident.clone());
|
||||
}
|
||||
}
|
||||
defs = first;
|
||||
}
|
||||
if let Some(val) = right {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(val, code, &mut idents, &mut paths);
|
||||
uses.extend(paths);
|
||||
uses.extend(idents);
|
||||
}
|
||||
(defs, uses, extra_defs)
|
||||
}
|
||||
|
||||
// everything else – no definition, but may read vars
|
||||
_ => {
|
||||
let mut idents = Vec::new();
|
||||
|
|
@ -1557,3 +1708,225 @@ pub(super) fn def_use(
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// One match from [`extract_shell_array_payload_idents`].
|
||||
///
|
||||
/// `arg_position` is the positional argument index of the call where the
|
||||
/// shell-array literal was found. `payload_idents` is the union of
|
||||
/// identifiers (and dotted paths) lifted from the array's payload elements
|
||||
/// (positions 2+ for POSIX `sh -c <cmd>` form; positions 2+ for `cmd /c <cmd>`
|
||||
/// likewise). Empty `payload_idents` means the payload is a constant string,
|
||||
/// which the caller should treat as benign (no SHELL_ESCAPE finding possible).
|
||||
#[derive(Debug, Clone)]
|
||||
pub(super) struct ShellArrayMatch {
|
||||
pub arg_position: usize,
|
||||
pub payload_idents: Vec<String>,
|
||||
}
|
||||
|
||||
/// Detect inline shell-execution array literals at a call site.
|
||||
///
|
||||
/// Recognises the pattern `[<shell>, "-c", <payload>]` (POSIX shells) and
|
||||
/// `[<cmd-shell>, "/c"|"/C", <payload>]` (Windows `cmd.exe`) appearing as
|
||||
/// either:
|
||||
/// * a direct positional argument of `call_node`, or
|
||||
/// * the value of any field within an object-literal positional argument
|
||||
/// (covers `container.exec({Cmd: ["bash", "-c", x]})` form).
|
||||
///
|
||||
/// Returns one [`ShellArrayMatch`] per detected shell-array. Empty when the
|
||||
/// call has no shell-array literals.
|
||||
///
|
||||
/// The shell-name list is intentionally narrow (POSIX shells + Windows
|
||||
/// `cmd.exe`/`powershell`) to avoid false positives on benign array literals
|
||||
/// like `["ls", "-la"]` or `["git", "rev-parse", "HEAD"]`, where element 0 is
|
||||
/// not a shell. Element 1 must be a literal `-c` (POSIX) or `/c`/`/C` (cmd);
|
||||
/// otherwise the array is not in shell-exec form regardless of element 0.
|
||||
///
|
||||
/// Identifiers from elements at positions 2+ are lifted via
|
||||
/// [`collect_idents_with_paths`] so template-literal interpolations
|
||||
/// (`` `echo ${x}` ``), member-expressions (`obj.field`), and bare idents are
|
||||
/// all captured. Dedup is preserved across array elements so a single ident
|
||||
/// referenced in multiple payload positions appears once.
|
||||
pub(super) fn extract_shell_array_payload_idents(
|
||||
call_node: Node,
|
||||
code: &[u8],
|
||||
) -> Vec<ShellArrayMatch> {
|
||||
let mut out = Vec::new();
|
||||
let Some(args_node) = call_node.child_by_field_name("arguments") else {
|
||||
return out;
|
||||
};
|
||||
let mut cursor = args_node.walk();
|
||||
for (idx, child) in args_node.named_children(&mut cursor).enumerate() {
|
||||
let kind = child.kind();
|
||||
// Splats break positional indexing; bail conservatively on the whole call.
|
||||
if kind == "spread_element"
|
||||
|| kind == "dictionary_splat"
|
||||
|| kind == "list_splat"
|
||||
|| kind == "splat_argument"
|
||||
|| kind == "hash_splat_argument"
|
||||
{
|
||||
return Vec::new();
|
||||
}
|
||||
if kind == "keyword_argument" || kind == "named_argument" {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Direct array-literal arg.
|
||||
if let Some(idents) = shell_array_payload_idents_of(child, code) {
|
||||
out.push(ShellArrayMatch {
|
||||
arg_position: idx,
|
||||
payload_idents: idents,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Object-literal arg whose field value is a shell-array literal.
|
||||
// Covers `container.exec({Cmd: [...]})` form. Field name is not
|
||||
// restricted to `Cmd` / `cmd`: the shell-shape itself is the gate,
|
||||
// and the payload extraction is per-array.
|
||||
if matches!(kind, "object" | "dictionary") {
|
||||
let mut cc = child.walk();
|
||||
for pair in child.named_children(&mut cc) {
|
||||
if pair.kind() != "pair" {
|
||||
continue;
|
||||
}
|
||||
let Some(val_node) = pair.child_by_field_name("value") else {
|
||||
continue;
|
||||
};
|
||||
let val_node = unwrap_parens(val_node);
|
||||
if let Some(idents) = shell_array_payload_idents_of(val_node, code) {
|
||||
out.push(ShellArrayMatch {
|
||||
arg_position: idx,
|
||||
payload_idents: idents,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// If `node` is an array literal of shape `[<shell>, "-c", *]` (POSIX shells)
|
||||
/// or `[<cmd-shell>, "/c", *]` (Windows cmd.exe), return the identifiers
|
||||
/// referenced in the payload elements (positions 2+). Otherwise return
|
||||
/// `None`. Returning `Some(vec![])` means the payload is a constant string
|
||||
/// — caller should still skip emitting a sink (no taint can reach a literal).
|
||||
fn shell_array_payload_idents_of(node: Node, code: &[u8]) -> Option<Vec<String>> {
|
||||
let node = unwrap_parens(node);
|
||||
if node.kind() != "array" {
|
||||
return None;
|
||||
}
|
||||
// Walk named children to skip commas and other trivia.
|
||||
let mut cursor = node.walk();
|
||||
let elems: Vec<Node> = node.named_children(&mut cursor).collect();
|
||||
if elems.len() < 3 {
|
||||
return None;
|
||||
}
|
||||
let shell = const_string_value(elems[0], code)?;
|
||||
if !is_known_shell(&shell) {
|
||||
return None;
|
||||
}
|
||||
let flag = const_string_value(elems[1], code)?;
|
||||
if !is_shell_command_flag(&shell, &flag) {
|
||||
return None;
|
||||
}
|
||||
// Lift identifiers from the payload elements (positions 2+). Constants
|
||||
// contribute nothing. An empty result means the entire payload is
|
||||
// statically benign.
|
||||
let mut idents: Vec<String> = Vec::new();
|
||||
let mut paths: Vec<String> = Vec::new();
|
||||
for elem in &elems[2..] {
|
||||
collect_idents_with_paths(*elem, code, &mut idents, &mut paths);
|
||||
}
|
||||
let mut combined = paths;
|
||||
combined.extend(idents);
|
||||
// Dedup (preserve first-seen order).
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
combined.retain(|s| seen.insert(s.clone()));
|
||||
if combined.is_empty() {
|
||||
// Static payload — no taint can reach it. Return None so the caller
|
||||
// does not emit a useless sink filter.
|
||||
return None;
|
||||
}
|
||||
Some(combined)
|
||||
}
|
||||
|
||||
/// Extract a constant string value from `node`, handling JS/TS `string` /
|
||||
/// `template_string` (no interpolation) forms. Returns `None` for dynamic
|
||||
/// values, identifiers, or expressions.
|
||||
fn const_string_value(node: Node, code: &[u8]) -> Option<String> {
|
||||
let node = unwrap_parens(node);
|
||||
match node.kind() {
|
||||
"string" | "string_literal" | "interpreted_string_literal" | "raw_string_literal" => {
|
||||
let raw = text_of(node, code)?;
|
||||
if raw.len() >= 2 {
|
||||
Some(raw[1..raw.len() - 1].to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
"template_string" => {
|
||||
let mut c = node.walk();
|
||||
if node
|
||||
.named_children(&mut c)
|
||||
.any(|ch| ch.kind() == "template_substitution")
|
||||
{
|
||||
return None;
|
||||
}
|
||||
let raw = text_of(node, code)?;
|
||||
if raw.len() >= 2 {
|
||||
Some(raw[1..raw.len() - 1].to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Known shell executable names that activate the shell-array detector.
|
||||
/// Scoped narrowly to POSIX shells + Windows command interpreters, listing
|
||||
/// only canonical names so benign arrays like `["ls", ...]`, `["git", ...]`,
|
||||
/// or `["python", ...]` do not match.
|
||||
fn is_known_shell(name: &str) -> bool {
|
||||
// Strip directory prefix for matching: `/bin/bash` → `bash`.
|
||||
let leaf = name.rsplit('/').next().unwrap_or(name);
|
||||
matches!(
|
||||
leaf,
|
||||
"bash"
|
||||
| "sh"
|
||||
| "zsh"
|
||||
| "dash"
|
||||
| "ksh"
|
||||
| "fish"
|
||||
| "ash"
|
||||
| "tcsh"
|
||||
| "csh"
|
||||
| "cmd"
|
||||
| "cmd.exe"
|
||||
| "powershell"
|
||||
| "powershell.exe"
|
||||
| "pwsh"
|
||||
| "pwsh.exe"
|
||||
)
|
||||
}
|
||||
|
||||
/// True when `flag` is the "execute the following string as a shell command"
|
||||
/// switch for the given `shell`. POSIX shells use `-c`; cmd.exe accepts
|
||||
/// `/c` / `/C`; PowerShell uses `-Command` (also `-c` as alias) and
|
||||
/// `-EncodedCommand`.
|
||||
fn is_shell_command_flag(shell: &str, flag: &str) -> bool {
|
||||
let leaf = shell.rsplit('/').next().unwrap_or(shell);
|
||||
let is_cmd = matches!(leaf, "cmd" | "cmd.exe");
|
||||
let is_powershell = matches!(leaf, "powershell" | "powershell.exe" | "pwsh" | "pwsh.exe");
|
||||
if is_cmd {
|
||||
return matches!(flag, "/c" | "/C" | "/k" | "/K");
|
||||
}
|
||||
if is_powershell {
|
||||
return matches!(
|
||||
flag,
|
||||
"-c" | "-Command" | "-command" | "-EncodedCommand" | "-encodedcommand"
|
||||
);
|
||||
}
|
||||
// POSIX shells.
|
||||
flag == "-c"
|
||||
}
|
||||
|
|
|
|||
317
src/cfg/mod.rs
317
src/cfg/mod.rs
|
|
@ -52,10 +52,11 @@ use literals::has_sql_placeholders;
|
|||
use literals::{
|
||||
arg0_kind_and_interpolation, call_ident_of, def_use, detect_go_replace_call_sanitizer,
|
||||
detect_rust_replace_chain_sanitizer, extract_arg_callees, extract_arg_string_literals,
|
||||
extract_arg_uses, extract_const_keyword_arg, extract_const_string_arg,
|
||||
extract_destination_field_idents, extract_kwargs, extract_literal_rhs, find_call_node,
|
||||
find_call_node_deep, find_chained_inner_call, has_keyword_arg, has_only_literal_args,
|
||||
is_parameterized_query_call, java_chain_arg0_kind_for_method, js_chain_arg0_kind_for_method,
|
||||
extract_arg_uses, extract_const_keyword_arg, extract_const_macro_arg, extract_const_string_arg,
|
||||
extract_destination_field_pairs, extract_destination_kwarg_pairs, extract_kwargs,
|
||||
extract_literal_rhs, extract_shell_array_payload_idents, find_call_node, find_call_node_deep,
|
||||
find_chained_inner_call, has_keyword_arg, has_only_literal_args, is_parameterized_query_call,
|
||||
java_chain_arg0_kind_for_method, js_chain_arg0_kind_for_method,
|
||||
js_chain_outer_method_for_inner, ruby_chain_arg0_for_method, walk_chain_inner_call_args,
|
||||
};
|
||||
use params::{
|
||||
|
|
@ -312,6 +313,15 @@ pub struct CallMeta {
|
|||
/// [`Self::destination_uses`]).
|
||||
#[serde(default)]
|
||||
pub gate_filters: Vec<GateFilter>,
|
||||
/// True when this call expression is a constructor invocation
|
||||
/// (e.g. JS/TS `new Stripe(key)`, PHP `new PDO(...)`). The SSA Call
|
||||
/// transfer uses this to narrow the constructed value's caps: a wrapper
|
||||
/// object instance is structurally not a path string, format string,
|
||||
/// URL component, or JSON input, so out-of-process side-effect bits
|
||||
/// (FILE_IO, FMT_STRING, URL_ENCODE, JSON_PARSE) on the arguments
|
||||
/// must not survive into the constructed object.
|
||||
#[serde(default)]
|
||||
pub is_constructor: bool,
|
||||
}
|
||||
|
||||
/// One gate's contribution at a call site whose callee matches multiple
|
||||
|
|
@ -329,6 +339,15 @@ pub struct GateFilter {
|
|||
/// considers SSA values whose `var_name` matches one of `names` (object-
|
||||
/// literal destination fields lifted at CFG time). `None` ⇒ whole arg.
|
||||
pub destination_uses: Option<Vec<String>>,
|
||||
/// Parallel to [`Self::destination_uses`]: for each entry, the
|
||||
/// destination object-literal field name (e.g. `"body"`, `"headers"`,
|
||||
/// `"json"`) where the corresponding ident was bound. Empty when
|
||||
/// `destination_uses` is `None` or the gate had no
|
||||
/// `object_destination_fields` configured. Consumed by diag rendering
|
||||
/// to embed the destination field in `DATA_EXFIL` messages and SARIF
|
||||
/// `properties.data_exfil_field`.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub destination_fields: Vec<String>,
|
||||
}
|
||||
|
||||
/// Taint-classification and variable-flow metadata.
|
||||
|
|
@ -450,6 +469,13 @@ pub struct NodeInfo {
|
|||
/// up the field's declared `TypeKind`. Strictly additive, when
|
||||
/// `None`, the legacy copy-prop semantics apply.
|
||||
pub member_field: Option<String>,
|
||||
/// True when this assignment / declaration's RHS is a function or
|
||||
/// lambda literal (`obj.handler = (e) => {...}`, `let f = function(){}`).
|
||||
/// State analysis uses this to suppress resource-ownership transfer:
|
||||
/// storing a function reference into a property does not move the
|
||||
/// resources captured by the closure body, so the lifecycle of those
|
||||
/// captures must remain unchanged on the assignment node.
|
||||
pub rhs_is_function_literal: bool,
|
||||
}
|
||||
|
||||
impl NodeInfo {
|
||||
|
|
@ -1564,6 +1590,92 @@ pub(super) fn push_node<'a>(
|
|||
let extra = analysis_rules.map(|r| r.extra_labels.as_slice());
|
||||
let mut labels = classify_all(lang, &text, extra);
|
||||
|
||||
// Rust chain-text classification. The default `text` for a Rust
|
||||
// CallMethod is `{root_receiver}.{method}`, where `root_receiver`
|
||||
// is the leftmost identifier after walking through every nested
|
||||
// call/method receiver. That convention loses the intermediate
|
||||
// chain methods, so a body-binding chain like
|
||||
// `Client::post(url).body(payload).send()` reduces to
|
||||
// `Client::post.send` and rules keyed on `body.send` /
|
||||
// `RequestBuilder.body` cannot fire.
|
||||
//
|
||||
// Reclassify against the call-AST's source text (with paren groups
|
||||
// stripped) so suffix matchers covering chain shapes
|
||||
// (`body.send`, `body_string`, `Request::builder.body`, ...) attach.
|
||||
// Strictly additive: we union new labels with the existing ones,
|
||||
// never override. Limited to Rust to avoid disturbing the other
|
||||
// languages' chain conventions.
|
||||
if lang == "rust" {
|
||||
if let Some(cn) = find_call_node(ast, lang) {
|
||||
if let Some(chain_raw) = text_of(cn, code) {
|
||||
// Multi-line Rust chains (`Client::new()\n .post(url)\n
|
||||
// .body(p)\n .send()`) preserve interior whitespace in
|
||||
// the source slice, which would prevent suffix matchers
|
||||
// like `body.send` from firing. Strip whitespace before
|
||||
// normalizing paren groups, mirroring the same trick
|
||||
// used by `find_chained_inner_call` for JS/TS chains.
|
||||
let chain_compact: String =
|
||||
chain_raw.chars().filter(|c| !c.is_whitespace()).collect();
|
||||
let chain_text = crate::labels::normalize_chained_call_for_classify(&chain_compact);
|
||||
if chain_text != text {
|
||||
let chain_labels = classify_all(lang, &chain_text, extra);
|
||||
for l in chain_labels {
|
||||
if !labels.contains(&l) {
|
||||
labels.push(l);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Also try classification against the chain with
|
||||
// trailing identity methods peeled. Rust chains often
|
||||
// end in `.unwrap()` / `.expect("...")` / `.await` /
|
||||
// `.clone()` etc., which obscure the body-bind verb
|
||||
// for suffix matchers. E.g. hyper's
|
||||
// `Request::builder().method(..).uri(..).body(p).unwrap()`
|
||||
// peels to `...body`, allowing a simpler `body` /
|
||||
// `Request::builder.body` matcher to fire.
|
||||
let peeled = crate::ssa::type_facts::peel_identity_suffix(&chain_text);
|
||||
if peeled != chain_text && peeled != text {
|
||||
let peeled_labels = classify_all(lang, &peeled, extra);
|
||||
for l in peeled_labels {
|
||||
if !labels.contains(&l) {
|
||||
labels.push(l);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Pattern synthesis: the hyper request-builder chain
|
||||
// (`hyper::Request::builder().method(..).uri(..).body(p)`)
|
||||
// can interleave `.method`, `.uri`, `.header`, `.version`
|
||||
// etc. between `Request::builder` and the body-bind step.
|
||||
// Suffix matchers can't span those, so synthesise a
|
||||
// DATA_EXFIL sink whenever the chain begins with
|
||||
// `Request::builder` and ends in a body-binding verb.
|
||||
// Strictly additive: no labels are removed, only added,
|
||||
// and the synthesis only fires when an explicit Sink
|
||||
// hasn't already attached.
|
||||
let chain_for_synth = if peeled != chain_text {
|
||||
&peeled
|
||||
} else {
|
||||
&chain_text
|
||||
};
|
||||
if !labels
|
||||
.iter()
|
||||
.any(|l| matches!(l, DataLabel::Sink(c) if c.contains(crate::labels::Cap::DATA_EXFIL)))
|
||||
&& (chain_for_synth.contains("Request::builder.")
|
||||
|| chain_for_synth.contains("hyper::Request::builder."))
|
||||
{
|
||||
let last_seg =
|
||||
chain_for_synth.rsplit('.').next().unwrap_or(chain_for_synth);
|
||||
if matches!(
|
||||
last_seg,
|
||||
"body" | "body_mut" | "body_string" | "body_json" | "body_bytes"
|
||||
) {
|
||||
labels.push(DataLabel::Sink(crate::labels::Cap::DATA_EXFIL));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the outermost call didn't classify, try inner/nested calls.
|
||||
// E.g. `str(eval(expr))`, `str` is not a sink, but `eval` is.
|
||||
// When the callee is overridden, save the original for container ops
|
||||
|
|
@ -1727,7 +1839,23 @@ pub(super) fn push_node<'a>(
|
|||
let mut sink_payload_args: Option<Vec<usize>> = None;
|
||||
let mut destination_uses: Option<Vec<String>> = None;
|
||||
let mut gate_filters: Vec<GateFilter> = Vec::new();
|
||||
if labels.is_empty() {
|
||||
// Gates run when no flat `Sink` label is already present, OR when a
|
||||
// matching gate restricts the payload-arg set on top of an existing flat
|
||||
// sink. Source / Sanitizer labels are orthogonal — a callee like
|
||||
// Python's `requests.post` is a `Source` for its response object AND a
|
||||
// gated `Sink` for its URL/body argument positions; both should attach.
|
||||
//
|
||||
// Payload-arg refinement: when a flat sink matches a callee that ALSO
|
||||
// has a gate entry restricting `payload_args`, the gate's `payload_args`
|
||||
// are propagated to `sink_payload_args` so only those positions are
|
||||
// taint-checked. Example: `execSync(cmd, { env: process.env })` matches
|
||||
// the bare `execSync` flat `Sink(SHELL_ESCAPE)` AND the gate `=execSync`
|
||||
// with `payload_args: &[0]`; without the refinement, the flat rule's
|
||||
// implicit "all args" would flag `process.env` flowing into the options
|
||||
// object's `env` field. The gate's labels themselves are deduped so a
|
||||
// single capability never double-attributes.
|
||||
let has_sink_label = labels.iter().any(|l| matches!(l, DataLabel::Sink(_)));
|
||||
{
|
||||
let gate_call = call_ast.or_else(|| find_call_node_deep(ast, lang, 4));
|
||||
if let Some(cn) = gate_call {
|
||||
let gate_callee_text = if call_ast.is_some() {
|
||||
|
|
@ -1746,7 +1874,22 @@ pub(super) fn push_node<'a>(
|
|||
let matches = classify_gated_sink(
|
||||
lang,
|
||||
&gate_callee_text,
|
||||
|idx| extract_const_string_arg(cn, idx, code),
|
||||
|idx| {
|
||||
extract_const_string_arg(cn, idx, code).or_else(|| {
|
||||
// C/C++ preprocessor macros and PHP `define`d constants
|
||||
// surface as identifier nodes, not string literals.
|
||||
// Falling back to the macro-arg extractor for those
|
||||
// languages lets gates like `curl_easy_setopt` /
|
||||
// `curl_setopt` activate on a `CURLOPT_POSTFIELDS`
|
||||
// ident match instead of firing conservatively on
|
||||
// every positional arg.
|
||||
if matches!(lang, "c" | "cpp" | "c++" | "php") {
|
||||
extract_const_macro_arg(cn, idx, code)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
},
|
||||
|kw| extract_const_keyword_arg(cn, kw, code),
|
||||
|kw| has_keyword_arg(cn, kw, code),
|
||||
);
|
||||
|
|
@ -1758,11 +1901,23 @@ pub(super) fn push_node<'a>(
|
|||
// * a `GateFilter` carrying that gate's specific
|
||||
// `(label_caps, payload_args, destination_uses)` so
|
||||
// the SSA sink scan can attribute taint per-cap.
|
||||
//
|
||||
// When a flat sink already matches, gate labels are deduped
|
||||
// so the same capability isn't attributed twice (once flat,
|
||||
// once gated). Their `payload_args` still flow into
|
||||
// `sink_payload_args` so the gate's arg-position restriction
|
||||
// applies on top of the flat sink.
|
||||
let mut union_payload: Vec<usize> = Vec::new();
|
||||
for gm in &matches {
|
||||
labels.push(gm.label);
|
||||
if has_sink_label {
|
||||
if !labels.contains(&gm.label) {
|
||||
labels.push(gm.label);
|
||||
}
|
||||
} else {
|
||||
labels.push(gm.label);
|
||||
}
|
||||
|
||||
let payload_vec: Vec<usize> =
|
||||
let mut payload_vec: Vec<usize> =
|
||||
if gm.payload_args == crate::labels::ALL_ARGS_PAYLOAD {
|
||||
// Dynamic-activation sentinel: every positional arg is
|
||||
// conservatively a payload. Expand using the actual
|
||||
|
|
@ -1780,19 +1935,57 @@ pub(super) fn push_node<'a>(
|
|||
// checks to identifiers under those fields. Non-object
|
||||
// arg forms return `None` from the extractor and the gate
|
||||
// falls back to whole-arg positional filtering.
|
||||
//
|
||||
// The pair form preserves which object-literal field each
|
||||
// ident was bound to (e.g. `body` vs `headers` vs `json`)
|
||||
// so diag rendering can attribute `DATA_EXFIL` findings to
|
||||
// a specific destination field.
|
||||
let mut dest_uses: Option<Vec<String>> = None;
|
||||
let mut dest_fields: Vec<String> = Vec::new();
|
||||
if !gm.object_destination_fields.is_empty() {
|
||||
let mut all_pairs: Vec<(String, String)> = Vec::new();
|
||||
let mut had_object_match = false;
|
||||
for &pos in gm.payload_args {
|
||||
if let Some(names) = extract_destination_field_idents(
|
||||
if let Some(pairs) = extract_destination_field_pairs(
|
||||
cn,
|
||||
pos,
|
||||
gm.object_destination_fields,
|
||||
code,
|
||||
) {
|
||||
dest_uses = Some(names);
|
||||
all_pairs.extend(pairs);
|
||||
had_object_match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Direct kwargs: languages where destination-bearing
|
||||
// fields are passed as `keyword_argument` siblings of
|
||||
// the positional args (Python `data=`, Ruby kwargs).
|
||||
// SSA lowering folds kwarg idents into the implicit
|
||||
// args group at index `arity`, so we expand
|
||||
// `payload_vec` to include that position; the
|
||||
// `destination_filter` then narrows to the kwarg
|
||||
// ident's `var_name`.
|
||||
let kwarg_pairs =
|
||||
extract_destination_kwarg_pairs(cn, gm.object_destination_fields, code);
|
||||
if !kwarg_pairs.is_empty() {
|
||||
let arity = extract_arg_uses(cn, code).len();
|
||||
if !payload_vec.contains(&arity) {
|
||||
payload_vec.push(arity);
|
||||
}
|
||||
for pair in kwarg_pairs {
|
||||
if !all_pairs.iter().any(|(_, v)| v == &pair.1) {
|
||||
all_pairs.push(pair);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if had_object_match || !all_pairs.is_empty() {
|
||||
let (fields, vars): (Vec<String>, Vec<String>) =
|
||||
all_pairs.into_iter().unzip();
|
||||
dest_uses = Some(vars);
|
||||
dest_fields = fields;
|
||||
}
|
||||
}
|
||||
|
||||
let label_caps = match gm.label {
|
||||
|
|
@ -1809,6 +2002,7 @@ pub(super) fn push_node<'a>(
|
|||
label_caps,
|
||||
payload_args: payload_vec,
|
||||
destination_uses: dest_uses,
|
||||
destination_fields: dest_fields,
|
||||
});
|
||||
}
|
||||
if !union_payload.is_empty() {
|
||||
|
|
@ -1826,6 +2020,65 @@ pub(super) fn push_node<'a>(
|
|||
}
|
||||
}
|
||||
|
||||
// ── Inline shell-array sink synthesis ────────────────────────────────
|
||||
//
|
||||
// Recognise `[<shell>, "-c", <payload>]` (and `cmd /c <payload>`)
|
||||
// appearing as an argument to *any* call. The shell-array shape itself
|
||||
// is the gate, regardless of callee, so this fires through user-defined
|
||||
// wrappers like `execInContainer(id, ["bash", "-c", `echo ${tainted}`])`
|
||||
// without needing per-wrapper summary annotations. Only fires for JS/TS
|
||||
// because the array-literal grammar (`array` node) and shell-form usage
|
||||
// are JS/TS conventions; other languages use different shapes for
|
||||
// shell-exec wrappers.
|
||||
//
|
||||
// The inner array also covers Dockerode's
|
||||
// `container.exec({Cmd: [shell, "-c", payload]})`: the helper looks
|
||||
// inside object-literal args for shell-array values under any field.
|
||||
//
|
||||
// Existing FP carve-outs are preserved. `["ls", "-la"]` doesn't match
|
||||
// (element 0 is not a known shell). `untaintedArrayVariable` doesn't
|
||||
// match (variable, not literal). `execSync(cmd, { env: process.env })`
|
||||
// doesn't match (string + object args, no shell-array literal). When
|
||||
// the payload elements are constant strings the helper returns no
|
||||
// match, so a literal `["bash", "-c", "ls -la"]` doesn't fire either.
|
||||
if matches!(lang, "javascript" | "js" | "typescript" | "ts") {
|
||||
if let Some(cn) = call_ast.or_else(|| find_call_node_deep(ast, lang, 4)) {
|
||||
let shell_matches = extract_shell_array_payload_idents(cn, code);
|
||||
if !shell_matches.is_empty() {
|
||||
let shell_label = DataLabel::Sink(Cap::SHELL_ESCAPE);
|
||||
let already_has_shell_sink = labels.iter().any(|l| match l {
|
||||
DataLabel::Sink(c) => c.contains(Cap::SHELL_ESCAPE),
|
||||
_ => false,
|
||||
});
|
||||
if !already_has_shell_sink {
|
||||
labels.push(shell_label);
|
||||
}
|
||||
|
||||
let mut union_payload: Vec<usize> = sink_payload_args.clone().unwrap_or_default();
|
||||
for sm in shell_matches {
|
||||
if !union_payload.contains(&sm.arg_position) {
|
||||
union_payload.push(sm.arg_position);
|
||||
}
|
||||
gate_filters.push(GateFilter {
|
||||
label_caps: Cap::SHELL_ESCAPE,
|
||||
payload_args: vec![sm.arg_position],
|
||||
destination_uses: Some(sm.payload_idents),
|
||||
destination_fields: Vec::new(),
|
||||
});
|
||||
}
|
||||
if !union_payload.is_empty() {
|
||||
sink_payload_args = Some(union_payload);
|
||||
}
|
||||
// Legacy single-gate path: when this is the only gate filter,
|
||||
// populate the top-level destination_uses too so the SSA
|
||||
// fast-path stays consistent with the multi-gate behaviour.
|
||||
if gate_filters.len() == 1 {
|
||||
destination_uses = gate_filters[0].destination_uses.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pattern-based sanitizer synthesis: recognise a Rust
|
||||
// `param.replace(LIT, LIT)[.replace(LIT, LIT)]*` chain that provably strips
|
||||
// path-traversal or HTML metacharacters. The CFG collapses the whole
|
||||
|
|
@ -2296,6 +2549,20 @@ pub(super) fn push_node<'a>(
|
|||
// just bloat every labeled Call node.
|
||||
let callee_span = inner_callee_span.or(inner_text_span).filter(|s| *s != span);
|
||||
|
||||
// Constructor detection: a `new X(...)` call carries different cap
|
||||
// semantics than a plain function call. The SSA Call transfer uses
|
||||
// this flag to narrow the constructed value's caps so out-of-process
|
||||
// side-effect bits (FILE_IO, FMT_STRING, URL_ENCODE, JSON_PARSE) on
|
||||
// the arguments don't survive into a wrapper-object instance.
|
||||
// Recognised forms:
|
||||
// * JS/TS `new_expression`
|
||||
// * Java/C++ `object_creation_expression`
|
||||
// * PHP `object_creation_expression`
|
||||
let is_constructor = ast.kind() == "new_expression"
|
||||
|| ast.kind() == "object_creation_expression"
|
||||
|| call_ast
|
||||
.is_some_and(|cn| matches!(cn.kind(), "new_expression" | "object_creation_expression"));
|
||||
|
||||
let idx = g.add_node(NodeInfo {
|
||||
kind,
|
||||
call: CallMeta {
|
||||
|
|
@ -2311,6 +2578,7 @@ pub(super) fn push_node<'a>(
|
|||
arg_string_literals,
|
||||
destination_uses,
|
||||
gate_filters,
|
||||
is_constructor,
|
||||
},
|
||||
taint: TaintMeta {
|
||||
labels,
|
||||
|
|
@ -2339,6 +2607,7 @@ pub(super) fn push_node<'a>(
|
|||
is_eq_with_const: detect_eq_with_const(ast, lang),
|
||||
is_numeric_length_access: detect_numeric_length_access(ast, lang, code),
|
||||
member_field: detect_member_field_assignment(ast, code),
|
||||
rhs_is_function_literal: rhs_is_function_literal(ast, lang),
|
||||
});
|
||||
|
||||
debug!(
|
||||
|
|
@ -2404,7 +2673,10 @@ fn rhs_is_function_literal(ast: Node, lang: &str) -> bool {
|
|||
if candidate.is_none() {
|
||||
// Walk one level into declarations whose direct child is the
|
||||
// declarator (variable_declaration → variable_declarator →
|
||||
// value).
|
||||
// value), or expression-statement wrappers whose direct child is
|
||||
// an assignment_expression / assignment with a `right` field
|
||||
// (JS `expression_statement > assignment_expression`, Python
|
||||
// `expression_statement > assignment`).
|
||||
let mut cursor = ast.walk();
|
||||
for c in ast.children(&mut cursor) {
|
||||
if matches!(
|
||||
|
|
@ -2417,6 +2689,11 @@ fn rhs_is_function_literal(ast: Node, lang: &str) -> bool {
|
|||
if candidate.is_some() {
|
||||
break;
|
||||
}
|
||||
} else if matches!(lookup(lang, c.kind()), Kind::Assignment) {
|
||||
candidate = c.child_by_field_name("right");
|
||||
if candidate.is_some() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4417,7 +4694,23 @@ fn apply_promisify_labels(
|
|||
let Some(alias) = aliases.get(&callee) else {
|
||||
continue;
|
||||
};
|
||||
let wrapped_labels = classify_all(lang, &alias.wrapped, extra);
|
||||
// Inherit both flat and gated labels from the wrapped callee.
|
||||
// Gated sinks (e.g. `child_process.exec`) carry the same
|
||||
// capability semantics as flat sinks, just with arg-position
|
||||
// filtering at the call site; the promisify alias should
|
||||
// surface the wrapped function's sink class regardless of
|
||||
// which arm originally classified it.
|
||||
let mut wrapped_labels: Vec<crate::labels::DataLabel> =
|
||||
classify_all(lang, &alias.wrapped, extra)
|
||||
.into_iter()
|
||||
.collect();
|
||||
for gm in
|
||||
classify_gated_sink(lang, &alias.wrapped, |_| None, |_| None, |_| false).iter()
|
||||
{
|
||||
if !wrapped_labels.contains(&gm.label) {
|
||||
wrapped_labels.push(gm.label);
|
||||
}
|
||||
}
|
||||
if wrapped_labels.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -678,12 +678,30 @@ fn find_guard_nodes(ctx: &AnalysisContext) -> Vec<(NodeIndex, Cap)> {
|
|||
if info.kind == StmtKind::If {
|
||||
if let Some(cond_text) = &info.condition_text {
|
||||
let kind = classify_condition(cond_text);
|
||||
// For `AllowlistCheck`, also confirm a target identifier was
|
||||
// extractable. When the receiver-method form carries a
|
||||
// string-literal arg (`filePath.includes("/")`,
|
||||
// `path.contains("..")`), `extract_allowlist_target` returns
|
||||
// `None` because the argument isn't an identifier. Those
|
||||
// shapes are presence-checks, not real allowlist tests against
|
||||
// a collection variable, and shouldn't dominate every
|
||||
// downstream sink as a structural guard with `Cap::all()`.
|
||||
// `classify_condition` itself stays unchanged (an existing
|
||||
// test locks in its broad return for the receiver-method form,
|
||||
// and the SSA branch-narrowing layer reads the kind for its
|
||||
// own purposes).
|
||||
let allowlist_has_target = if kind == PredicateKind::AllowlistCheck {
|
||||
crate::taint::path_state::classify_condition_with_target(cond_text)
|
||||
.1
|
||||
.is_some()
|
||||
} else {
|
||||
true
|
||||
};
|
||||
if matches!(
|
||||
kind,
|
||||
PredicateKind::AllowlistCheck
|
||||
| PredicateKind::TypeCheck
|
||||
| PredicateKind::ValidationCall
|
||||
) {
|
||||
PredicateKind::TypeCheck | PredicateKind::ValidationCall,
|
||||
) || (kind == PredicateKind::AllowlistCheck && allowlist_has_target)
|
||||
{
|
||||
result.push((idx, Cap::all()));
|
||||
} else if cond_indirect_validator_callee(info, ctx).is_some() {
|
||||
// Indirect-validator pattern:
|
||||
|
|
@ -995,7 +1013,25 @@ impl CfgAnalysis for UnguardedSink {
|
|||
// is the only other operand. The simpler `is_all_args_constant`
|
||||
// check above rejects that mixed shape because it forbids real
|
||||
// parameters in operand position.
|
||||
if !has_taint && ssa_all_sink_operands_const_or_param(ctx, *sink) {
|
||||
//
|
||||
// Exemption: shell-array gate filters. The
|
||||
// `extract_shell_array_payload_idents` detector recognises
|
||||
// `[<shell>, "-c", <payload>]` arrays at any call site and emits a
|
||||
// `Sink(SHELL_ESCAPE)` label with `destination_uses` narrowed to
|
||||
// the payload-element idents. When the array shape itself is the
|
||||
// gate, an unrelated reassign-to-const elsewhere in the body
|
||||
// (`const flag = true; if (flag) {}`) does not erase the
|
||||
// shell-exec intent — the construction of `[bash, -c, x]` is by
|
||||
// itself the dangerous operation. Skip this suppression so the
|
||||
// structural finding survives in closed-world contexts where no
|
||||
// taint source has been resolved yet.
|
||||
let has_shell_array_gate = sink_info.call.gate_filters.iter().any(|gf| {
|
||||
gf.label_caps.contains(Cap::SHELL_ESCAPE) && gf.destination_uses.is_some()
|
||||
});
|
||||
if !has_taint
|
||||
&& !has_shell_array_gate
|
||||
&& ssa_all_sink_operands_const_or_param(ctx, *sink)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -125,6 +125,13 @@ pub struct AnalysisContext<'a> {
|
|||
/// the function-declaration level, the gap only matters when the
|
||||
/// auth call has to live inside the body.
|
||||
pub auth_decorators: &'a [String],
|
||||
/// Names of variables whose `.close()` / release calls live in a
|
||||
/// nested closure body somewhere else in the file (e.g.
|
||||
/// `socket.on("close", () => ws.close())`). ResourceMisuse uses this
|
||||
/// to suppress `cfg-resource-leak` for handles whose cleanup happens
|
||||
/// in a callback the per-body CFG can't observe. When `None`, no
|
||||
/// closure-based suppression is applied.
|
||||
pub closure_released_var_names: Option<&'a std::collections::HashSet<String>>,
|
||||
}
|
||||
|
||||
pub trait CfgAnalysis {
|
||||
|
|
|
|||
|
|
@ -442,6 +442,23 @@ impl CfgAnalysis for ResourceMisuse {
|
|||
if pair.resource_name == "mutex" && !has_explicit_lock_acquire(ctx, acquire) {
|
||||
continue;
|
||||
}
|
||||
// Suppress when a sibling closure / event handler in
|
||||
// this file releases the same variable. Common JS/TS
|
||||
// shape: `const ws = new WebSocket(url);
|
||||
// socket.on("close", () => ws.close())`. The release
|
||||
// node lives in a nested body the per-body CFG can't
|
||||
// see, so the structural "no release on this exit
|
||||
// path" check fires erroneously. Match by acquired
|
||||
// variable name; closure captures share the binding
|
||||
// name with the outer handle.
|
||||
if let Some(acq_var) = ctx.cfg[acquire].taint.defines.as_deref()
|
||||
&& ctx
|
||||
.closure_released_var_names
|
||||
.map(|s| s.contains(acq_var))
|
||||
.unwrap_or(false)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
let info = &ctx.cfg[acquire];
|
||||
let callee_desc = info.call.callee.as_deref().unwrap_or("(acquire)");
|
||||
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ fn parse_and_analyse<A: CfgAnalysis>(
|
|||
body_const_facts: None,
|
||||
type_facts: None,
|
||||
auth_decorators: &[],
|
||||
closure_released_var_names: None,
|
||||
};
|
||||
analysis.run(&ctx)
|
||||
}
|
||||
|
|
@ -61,6 +62,7 @@ fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFi
|
|||
body_const_facts: None,
|
||||
type_facts: None,
|
||||
auth_decorators: &[],
|
||||
closure_released_var_names: None,
|
||||
};
|
||||
run_all(&ctx)
|
||||
}
|
||||
|
|
@ -94,6 +96,7 @@ fn parse_and_run_all_with_taint(
|
|||
body_const_facts: None,
|
||||
type_facts: None,
|
||||
auth_decorators: &[],
|
||||
closure_released_var_names: None,
|
||||
};
|
||||
run_all(&ctx)
|
||||
}
|
||||
|
|
@ -211,6 +214,7 @@ fn parse_and_analyse_with_ssa<A: CfgAnalysis>(
|
|||
body_const_facts: facts.as_ref(),
|
||||
type_facts: facts.as_ref().map(|f| &f.type_facts),
|
||||
auth_decorators: &[],
|
||||
closure_released_var_names: None,
|
||||
};
|
||||
analysis.run(&ctx)
|
||||
}
|
||||
|
|
@ -1225,6 +1229,7 @@ fn config_sanitizer_suppresses_unguarded_sink() {
|
|||
body_const_facts: None,
|
||||
type_facts: None,
|
||||
auth_decorators: &[],
|
||||
closure_released_var_names: None,
|
||||
};
|
||||
let findings = run_all(&ctx);
|
||||
|
||||
|
|
@ -1703,6 +1708,7 @@ fn cfg_only_no_taint_produces_low_severity() {
|
|||
body_const_facts: None,
|
||||
type_facts: None,
|
||||
auth_decorators: &[],
|
||||
closure_released_var_names: None,
|
||||
};
|
||||
let findings = guards::UnguardedSink.run(&ctx);
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ pub fn handle_command(
|
|||
);
|
||||
}
|
||||
let _ = crate::utils::analysis_options::install(config.analysis.engine);
|
||||
let _ = crate::utils::detector_options::install(config.detectors.clone());
|
||||
};
|
||||
|
||||
match command {
|
||||
|
|
@ -293,6 +294,9 @@ pub fn handle_command(
|
|||
"analysis-engine runtime already installed; CLI engine flags ignored"
|
||||
);
|
||||
}
|
||||
// Detector knobs (currently `[detectors.data_exfil]`) are
|
||||
// resolved straight from config; no CLI overrides yet.
|
||||
let _ = crate::utils::detector_options::install(config.detectors.clone());
|
||||
|
||||
// ── --explain-engine: print resolved config and exit ────────
|
||||
if explain_engine {
|
||||
|
|
|
|||
|
|
@ -184,6 +184,7 @@ fn type_kind_index(kind: &TypeKind) -> u32 {
|
|||
TypeKind::Url => 10,
|
||||
TypeKind::HttpClient => 11,
|
||||
TypeKind::LocalCollection => 12,
|
||||
TypeKind::RequestBuilder => 13,
|
||||
// the analysis DTO types carry per-field structural info that the
|
||||
// bitset domain can't represent. Collapse to Unknown so callers
|
||||
// still see "any type possible" rather than crashing on an
|
||||
|
|
@ -208,6 +209,7 @@ fn type_kind_from_index(idx: u32) -> Option<TypeKind> {
|
|||
10 => Some(TypeKind::Url),
|
||||
11 => Some(TypeKind::HttpClient),
|
||||
12 => Some(TypeKind::LocalCollection),
|
||||
13 => Some(TypeKind::RequestBuilder),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -610,6 +610,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2516,6 +2516,7 @@ fn ssa_summaries_round_trip() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
),
|
||||
(
|
||||
|
|
@ -2550,6 +2551,7 @@ fn ssa_summaries_round_trip() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
),
|
||||
];
|
||||
|
|
@ -2722,6 +2724,7 @@ fn ssa_summaries_hash_rescan_replaces_stale() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
)];
|
||||
idx.replace_ssa_summaries_for_file(&f, &hash_v1, &sums_v1)
|
||||
|
|
@ -2758,6 +2761,7 @@ fn ssa_summaries_hash_rescan_replaces_stale() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
)];
|
||||
idx.replace_ssa_summaries_for_file(&f, &hash_v2, &sums_v2)
|
||||
|
|
@ -2815,6 +2819,7 @@ fn clear_drops_ssa_summaries_table() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
)];
|
||||
idx.replace_ssa_summaries_for_file(&f, &hash, &sums)
|
||||
|
|
@ -2871,6 +2876,7 @@ fn make_test_callee_body(
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::new(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
},
|
||||
opt: crate::ssa::OptimizeResult {
|
||||
const_values: std::collections::HashMap::new(),
|
||||
|
|
@ -3086,6 +3092,7 @@ fn make_test_ssa_summary() -> crate::summary::ssa_summary::SsaFuncSummary {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3847,3 +3854,59 @@ fn ssa_summaries_pre_phase5_blob_decodes_with_empty_field_points_to() {
|
|||
"missing field_points_to must default to empty",
|
||||
);
|
||||
}
|
||||
|
||||
/// Pre-`param_to_gate_filters` blob compatibility: a summary serialised
|
||||
/// before this field existed deserialises with the empty default.
|
||||
/// `#[serde(default)]` on the field means old SQLite blobs round-trip
|
||||
/// without a schema migration, the new field is stored inside the JSON
|
||||
/// `summary` column so SQL-level columns are unchanged.
|
||||
#[test]
|
||||
fn ssa_summaries_pre_gate_filters_blob_decodes_with_empty_param_to_gate_filters() {
|
||||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||||
|
||||
// Hand-craft JSON without the `param_to_gate_filters` key.
|
||||
let pre_gate_filters_json = r#"{
|
||||
"param_to_return": [],
|
||||
"param_to_sink": [],
|
||||
"source_caps": 0,
|
||||
"param_to_sink_param": [],
|
||||
"param_container_to_return": [],
|
||||
"param_to_container_store": [],
|
||||
"return_type": null,
|
||||
"return_abstract": null,
|
||||
"source_to_callback": [],
|
||||
"receiver_to_return": null,
|
||||
"receiver_to_sink": 0,
|
||||
"abstract_transfer": [],
|
||||
"param_return_paths": [],
|
||||
"return_path_facts": [],
|
||||
"typed_call_receivers": []
|
||||
}"#;
|
||||
let sum: SsaFuncSummary = serde_json::from_str(pre_gate_filters_json).unwrap();
|
||||
assert!(
|
||||
sum.param_to_gate_filters.is_empty(),
|
||||
"missing param_to_gate_filters must default to empty",
|
||||
);
|
||||
}
|
||||
|
||||
/// Round-trip: a summary with a populated `param_to_gate_filters`
|
||||
/// survives JSON serialise + deserialise, including the per-position
|
||||
/// cap-mask values needed to preserve SSRF-vs-DATA_EXFIL splits across
|
||||
/// the function-summary boundary.
|
||||
#[test]
|
||||
fn ssa_summaries_param_to_gate_filters_round_trip() {
|
||||
use crate::labels::Cap;
|
||||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||||
|
||||
let mut sum = SsaFuncSummary::default();
|
||||
sum.param_to_gate_filters.push((0, Cap::SSRF));
|
||||
sum.param_to_gate_filters.push((1, Cap::DATA_EXFIL));
|
||||
|
||||
let json = serde_json::to_string(&sum).expect("serialize");
|
||||
let restored: SsaFuncSummary = serde_json::from_str(&json).expect("deserialize");
|
||||
assert_eq!(
|
||||
restored.param_to_gate_filters,
|
||||
vec![(0, Cap::SSRF), (1, Cap::DATA_EXFIL)],
|
||||
"per-position cap masks must round-trip exactly",
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -218,6 +218,14 @@ pub struct Evidence {
|
|||
/// under-budget findings and skipped during serialization in that case.
|
||||
#[serde(default, skip_serializing_if = "smallvec::SmallVec::is_empty")]
|
||||
pub engine_notes: smallvec::SmallVec<[crate::engine_notes::EngineNote; 2]>,
|
||||
|
||||
/// For `Cap::DATA_EXFIL` findings, the destination object-literal field
|
||||
/// the tainted value reached (e.g. `"body"`, `"headers"`, `"json"`).
|
||||
/// `None` for non-exfil findings, for exfil findings whose payload arg
|
||||
/// was not an object literal, or when the sink was resolved through a
|
||||
/// summary path that did not preserve destination metadata.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub data_exfil_field: Option<String>,
|
||||
}
|
||||
|
||||
fn is_zero_u16(v: &u16) -> bool {
|
||||
|
|
@ -301,7 +309,15 @@ pub fn compute_confidence(diag: &Diag) -> Confidence {
|
|||
|
||||
let id = &diag.id;
|
||||
|
||||
let base = if id.starts_with("taint-") {
|
||||
let base = if id.starts_with("taint-data-exfiltration") {
|
||||
// DATA_EXFIL is calibrated independently from the generic taint path:
|
||||
// the value at risk is the leak of an *already-sensitive* source, not
|
||||
// the construction of an attacker payload, so the points-based scoring
|
||||
// tuned for code-exec / SSRF / SQLi over-credits these findings. Route
|
||||
// to a narrower decision tree that asks "did we corroborate a real
|
||||
// string body leaving the process?" instead.
|
||||
compute_data_exfil_confidence(diag)
|
||||
} else if id.starts_with("taint-") {
|
||||
compute_taint_confidence(diag)
|
||||
} else if id.starts_with("state-") {
|
||||
match id.as_str() {
|
||||
|
|
@ -458,13 +474,71 @@ fn compute_taint_confidence(diag: &Diag) -> Confidence {
|
|||
}
|
||||
}
|
||||
|
||||
/// Confidence routing for `taint-data-exfiltration` findings.
|
||||
///
|
||||
/// The generic taint scorer ranks DATA_EXFIL too aggressively: a Sensitive
|
||||
/// source plus a sink call is enough to push it into the Medium/High band,
|
||||
/// but the leak class needs corroboration that a real string body actually
|
||||
/// leaves the process (otherwise we surface every `fetch(..., {body: x})`
|
||||
/// where `x` happens to be Sensitive-tagged). This routing is deliberately
|
||||
/// capped at Medium and only fires Medium when the symbolic execution
|
||||
/// verdict confirms the path (abstract interpretation participates only as
|
||||
/// a sink-suppression filter inside SSA taint and does not surface a
|
||||
/// separate verdict here).
|
||||
///
|
||||
/// Routing:
|
||||
/// * Source < Sensitive → Low (caller already strips DATA_EXFIL for
|
||||
/// Plain sources, but defensively floor here).
|
||||
/// * Symbolic verdict `Confirmed` → Medium (symex produced a witness
|
||||
/// that a tainted string reaches the body argument).
|
||||
/// * Symbolic verdict `Inconclusive` / `NotAttempted` / no symbolic
|
||||
/// analysis → Low (instruction's "Inconclusive" tier; the `Confidence`
|
||||
/// enum has no separate Inconclusive variant so it floors to Low).
|
||||
/// * Symbolic verdict `Infeasible` → Low (path proven dead).
|
||||
///
|
||||
/// After routing, a `path_validated` guard on the diag drops the result
|
||||
/// one tier (Medium → Low; Low stays Low) and `apply_engine_notes_cap`
|
||||
/// applies the standard engine-notes cap.
|
||||
fn compute_data_exfil_confidence(diag: &Diag) -> Confidence {
|
||||
let ev = match &diag.evidence {
|
||||
Some(e) => e,
|
||||
None => return Confidence::Low,
|
||||
};
|
||||
|
||||
let is_sensitive = ev
|
||||
.source_kind
|
||||
.map(|k| k.sensitivity() >= crate::labels::Sensitivity::Sensitive)
|
||||
.unwrap_or(false);
|
||||
if !is_sensitive {
|
||||
return Confidence::Low;
|
||||
}
|
||||
|
||||
let mut base = match ev.symbolic.as_ref().map(|s| s.verdict) {
|
||||
Some(Verdict::Confirmed) => Confidence::Medium,
|
||||
Some(Verdict::Infeasible) => Confidence::Low,
|
||||
Some(Verdict::Inconclusive) | Some(Verdict::NotAttempted) | None => Confidence::Low,
|
||||
};
|
||||
|
||||
// Guarded flow: drop a tier. A validation predicate on the path means
|
||||
// the leak may be unreachable in practice, so the corroborated witness
|
||||
// is downgraded one step (Medium → Low; Low stays Low).
|
||||
if diag.path_validated && base > Confidence::Low {
|
||||
base = Confidence::Low;
|
||||
}
|
||||
|
||||
apply_engine_notes_cap(diag, base)
|
||||
}
|
||||
|
||||
/// Score a structured `SourceKind` value.
|
||||
///
|
||||
/// UserInput=+3, EnvironmentConfig=+2, Unknown/FileSystem=+1, Database/CaughtException=0.
|
||||
fn structured_source_kind_score(kind: crate::labels::SourceKind) -> i32 {
|
||||
use crate::labels::SourceKind;
|
||||
match kind {
|
||||
SourceKind::UserInput => 3,
|
||||
// Cookie / Header carry auth material, score them at the same
|
||||
// ranking weight as direct user input rather than the lower
|
||||
// FileSystem/Database tiers.
|
||||
SourceKind::UserInput | SourceKind::Cookie | SourceKind::Header => 3,
|
||||
SourceKind::EnvironmentConfig => 2,
|
||||
SourceKind::Unknown | SourceKind::FileSystem => 1,
|
||||
SourceKind::Database | SourceKind::CaughtException => 0,
|
||||
|
|
@ -538,6 +612,8 @@ pub fn generate_explanation(diag: &Diag) -> Option<String> {
|
|||
use crate::labels::SourceKind;
|
||||
match kind {
|
||||
SourceKind::UserInput => "user input",
|
||||
SourceKind::Cookie => "cookie",
|
||||
SourceKind::Header => "request header",
|
||||
SourceKind::EnvironmentConfig => "environment/config",
|
||||
SourceKind::Database => "database",
|
||||
SourceKind::FileSystem => "file system",
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig};
|
||||
use crate::labels::{Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, SinkGate};
|
||||
use phf::{Map, phf_map};
|
||||
|
||||
pub static RULES: &[LabelRule] = &[
|
||||
|
|
@ -69,6 +69,33 @@ pub static RULES: &[LabelRule] = &[
|
|||
},
|
||||
];
|
||||
|
||||
/// Gated sinks for C.
|
||||
///
|
||||
/// `curl_easy_setopt(handle, option, payload)` is libcurl's option-binding
|
||||
/// interface; the option identifier at arg 1 selects which slot the payload
|
||||
/// fills. `CURLOPT_POSTFIELDS` and `CURLOPT_COPYPOSTFIELDS` carry the
|
||||
/// request body, while other CURLOPT_* constants designate URL / auth / TLS
|
||||
/// behaviour and are not DATA_EXFIL-relevant. Gating on the macro identifier
|
||||
/// keeps the rule from over-firing on `curl_easy_setopt(h, CURLOPT_URL, url)`
|
||||
/// (covered separately by the `curl_easy_perform` SSRF flat sink).
|
||||
///
|
||||
/// Identifier-based activation is enabled via the macro-arg fallback in
|
||||
/// `cfg::mod::classify_gated_sink` for `lang == "c"`. Header-parsing
|
||||
/// libraries (e.g. libmicrohttpd, mongoose) lack a stable surface and are
|
||||
/// left to project-specific config.
|
||||
pub static GATED_SINKS: &[SinkGate] = &[SinkGate {
|
||||
callee_matcher: "curl_easy_setopt",
|
||||
arg_index: 1,
|
||||
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: true,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
}];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
// control-flow
|
||||
"if_statement" => Kind::If,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig};
|
||||
use crate::labels::{Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, SinkGate};
|
||||
use phf::{Map, phf_map};
|
||||
|
||||
pub static RULES: &[LabelRule] = &[
|
||||
|
|
@ -91,6 +91,28 @@ pub static RULES: &[LabelRule] = &[
|
|||
},
|
||||
];
|
||||
|
||||
/// Gated sinks for C++.
|
||||
///
|
||||
/// Mirror of the C gate set: `curl_easy_setopt` with `CURLOPT_POSTFIELDS` /
|
||||
/// `CURLOPT_COPYPOSTFIELDS` at arg 1 binds the request body at arg 2.
|
||||
/// Identifier-based activation is enabled via the macro-arg fallback in
|
||||
/// `cfg::mod::classify_gated_sink` for `lang == "cpp" / "c++"`. Modern C++
|
||||
/// HTTP wrappers (cpr, Boost.Beast) layer over libcurl or directly over the
|
||||
/// socket; their ergonomic surfaces differ enough that adding gates per-
|
||||
/// library is left for a follow-up driven by the corpus.
|
||||
pub static GATED_SINKS: &[SinkGate] = &[SinkGate {
|
||||
callee_matcher: "curl_easy_setopt",
|
||||
arg_index: 1,
|
||||
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: true,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
}];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
// control-flow
|
||||
"if_statement" => Kind::If,
|
||||
|
|
|
|||
380
src/labels/go.rs
380
src/labels/go.rs
|
|
@ -1,11 +1,13 @@
|
|||
use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig, RuntimeLabelRule};
|
||||
use crate::labels::{
|
||||
Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, RuntimeLabelRule, SinkGate,
|
||||
};
|
||||
use crate::utils::project::{DetectedFramework, FrameworkContext};
|
||||
use phf::{Map, phf_map};
|
||||
|
||||
pub static RULES: &[LabelRule] = &[
|
||||
// ─────────── Sources ───────────
|
||||
LabelRule {
|
||||
matchers: &["os.Getenv"],
|
||||
matchers: &["os.Getenv", "os.LookupEnv", "os.Environ"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
|
|
@ -16,8 +18,12 @@ pub static RULES: &[LabelRule] = &[
|
|||
"r.URL",
|
||||
"r.Body",
|
||||
"r.Header",
|
||||
"r.Header.Get",
|
||||
"r.Header.Values",
|
||||
"r.URL.Query",
|
||||
"r.URL.Query.Get",
|
||||
"r.Cookie",
|
||||
"r.Cookies",
|
||||
"Request.FormValue",
|
||||
"Request.URL",
|
||||
],
|
||||
|
|
@ -97,27 +103,20 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ── Outbound HTTP clients (SSRF) ───────────────────────────────────
|
||||
//
|
||||
// These are modeled as destination-aware gated sinks in `GATED_SINKS`
|
||||
// below. Flat Sink rules would over-flag every positional argument as
|
||||
// SSRF (so a tainted body in `http.Post(url, contentType, body)` would
|
||||
// fire SSRF on the body), and the gate machinery short-circuits when a
|
||||
// flat Sink label is already attached to the callee, blocking DATA_EXFIL
|
||||
// body-flow gates from running.
|
||||
//
|
||||
// `net.Dial` / `net.DialTimeout` keep their flat-sink modeling: the
|
||||
// first positional arg is the network address with no body / payload
|
||||
// companion, so the over-flag concern does not apply.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"http.Get",
|
||||
"http.Post",
|
||||
"http.Head",
|
||||
"http.NewRequest",
|
||||
"http.NewRequestWithContext",
|
||||
"net.Dial",
|
||||
"net.DialTimeout",
|
||||
// `http.DefaultClient` is the package-level default `*http.Client`.
|
||||
// Idiomatic Go SSRF sinks (Owncast CVE-2023-3188) use the
|
||||
// `http.DefaultClient.Get(url)` form rather than the bare
|
||||
// `http.Get(url)` helper, so the suffix-matched callee text needs
|
||||
// an explicit entry here, bare `Get/Post/Do/Head` would
|
||||
// over-match unrelated method names.
|
||||
"http.DefaultClient.Get",
|
||||
"http.DefaultClient.Post",
|
||||
"http.DefaultClient.Head",
|
||||
"http.DefaultClient.Do",
|
||||
"http.DefaultClient.PostForm",
|
||||
],
|
||||
matchers: &["net.Dial", "net.DialTimeout"],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
|
|
@ -135,6 +134,343 @@ pub static RULES: &[LabelRule] = &[
|
|||
},
|
||||
];
|
||||
|
||||
/// Argument-role-aware Go sinks. Two classes coexist on the outbound HTTP
|
||||
/// surface, mirroring the JS/TS modeling:
|
||||
///
|
||||
/// * SSRF on the URL-bearing position of a one-shot request (`http.Get`,
|
||||
/// `http.Post`, `http.NewRequest`, `http.DefaultClient.*`).
|
||||
/// * `Cap::DATA_EXFIL` on the body / payload position when the source is
|
||||
/// Sensitive (cookies, headers, env, db reads). Gates fire only when
|
||||
/// taint reaches the body argument, so a tainted URL alone never
|
||||
/// activates DATA_EXFIL and a tainted body alone never activates SSRF.
|
||||
///
|
||||
/// `http.NewRequest` / `http.NewRequestWithContext` carry an SSRF gate on
|
||||
/// their URL position only. In Go's two-step idiom the actual network
|
||||
/// call happens at `client.Do(req)`; body taint flows from the body
|
||||
/// argument through the returned `*http.Request` via default arg → return
|
||||
/// propagation, and then activates the `http.DefaultClient.Do` DATA_EXFIL
|
||||
/// gate below. Modeling NewRequest as a body propagator (rather than a
|
||||
/// body sink) avoids duplicate findings on the idiomatic
|
||||
/// `req, _ := http.NewRequest(...); client.Do(req)` shape.
|
||||
pub static GATED_SINKS: &[SinkGate] = &[
|
||||
// ── SSRF gates (URL-bearing position) ────────────────────────────────
|
||||
// `http.Get(url)` — url is arg 0.
|
||||
SinkGate {
|
||||
callee_matcher: "http.Get",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.Head(url)` — url is arg 0.
|
||||
SinkGate {
|
||||
callee_matcher: "http.Head",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.Post(url, contentType, body)` — url is arg 0.
|
||||
SinkGate {
|
||||
callee_matcher: "http.Post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.PostForm(url, data)` — url is arg 0.
|
||||
SinkGate {
|
||||
callee_matcher: "http.PostForm",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.NewRequest(method, url, body)` — url is arg 1.
|
||||
SinkGate {
|
||||
callee_matcher: "http.NewRequest",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.NewRequestWithContext(ctx, method, url, body)` — url is arg 2.
|
||||
SinkGate {
|
||||
callee_matcher: "http.NewRequestWithContext",
|
||||
arg_index: 2,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.DefaultClient.Get(url)` / `.Head(url)` — url is arg 0.
|
||||
SinkGate {
|
||||
callee_matcher: "http.DefaultClient.Get",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "http.DefaultClient.Head",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.DefaultClient.Post(url, contentType, body)` — url is arg 0.
|
||||
SinkGate {
|
||||
callee_matcher: "http.DefaultClient.Post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.DefaultClient.PostForm(url, data)` — url is arg 0.
|
||||
SinkGate {
|
||||
callee_matcher: "http.DefaultClient.PostForm",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// ── DATA_EXFIL gates (body-bearing position) ─────────────────────────
|
||||
// `http.Post(url, contentType, body)` — body is arg 2.
|
||||
SinkGate {
|
||||
callee_matcher: "http.Post",
|
||||
arg_index: 2,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.PostForm(url, data)` — `data` (arg 1) is `url.Values`. Form
|
||||
// bodies serialize the same operator state cookies / headers do, so a
|
||||
// tainted Sensitive value reaching the form payload is DATA_EXFIL.
|
||||
SinkGate {
|
||||
callee_matcher: "http.PostForm",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.DefaultClient.Do(req)` — `req` (arg 0) is the `*http.Request`
|
||||
// value. Body taint introduced via either `http.NewRequest(_, _, body)`
|
||||
// (default arg → return propagation) or a later `req.Body = body` field
|
||||
// write reaches this sink through the request value.
|
||||
SinkGate {
|
||||
callee_matcher: "http.DefaultClient.Do",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.DefaultClient.PostForm(url, data)` — same as `http.PostForm`
|
||||
// but invoked through the package-level default `*http.Client`.
|
||||
SinkGate {
|
||||
callee_matcher: "http.DefaultClient.PostForm",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `http.DefaultClient.Post(url, contentType, body)` — body is arg 2.
|
||||
SinkGate {
|
||||
callee_matcher: "http.DefaultClient.Post",
|
||||
arg_index: 2,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// ── Common third-party HTTP clients ─────────────────────────────────
|
||||
//
|
||||
// `go-resty/resty`: `client.R().SetBody(body).Post(url)` style.
|
||||
// `SetBody(body)` carries the body into the chained request; the
|
||||
// network call happens at the verb method. We model the verb
|
||||
// methods (Get / Post / Put / Patch / Delete / Send / Execute) as
|
||||
// DATA_EXFIL gates with `payload_args: &[]` (empty), which engages
|
||||
// the receiver-tainted fallback in `collect_tainted_sink_vars`. A
|
||||
// builder receiver carrying body taint from `SetBody` activates the
|
||||
// sink without us needing a positional body arg.
|
||||
SinkGate {
|
||||
callee_matcher: "resty.Request.Post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "resty.Request.Put",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "resty.Request.Patch",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `imroc/req`: `req.Post(url, req.BodyJSON(payload))`, the `BodyJSON`
|
||||
// / `BodyXML` helpers wrap a tainted payload and pass it as arg 1+ of
|
||||
// the verb call. Since the helper return value carries the body
|
||||
// taint, gating the verb on every payload arg is sufficient.
|
||||
SinkGate {
|
||||
callee_matcher: "req.Post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1, 2, 3],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "req.Put",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1, 2, 3],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
// control-flow
|
||||
"if_statement" => Kind::If,
|
||||
|
|
|
|||
|
|
@ -31,6 +31,15 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Sensitive operator state: HTTP session attributes commonly carry
|
||||
// auth tokens / CSRF tokens / signed user ids. Routed through the
|
||||
// `Cookie` source-kind heuristic so DATA_EXFIL fires when these
|
||||
// values leave the process via an outbound request body.
|
||||
LabelRule {
|
||||
matchers: &["HttpSession.getAttribute", "session.getAttribute"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ───────── Sanitizers ──────────
|
||||
LabelRule {
|
||||
matchers: &["HtmlUtils.htmlEscape", "StringEscapeUtils.escapeHtml4"],
|
||||
|
|
@ -121,6 +130,79 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ── Cross-boundary data exfiltration ──────────────────────────────────
|
||||
//
|
||||
// Outbound HTTP egress points where a Sensitive source (cookie, header,
|
||||
// env, session attribute, db read) reaching the request body / payload
|
||||
// is a cross-boundary disclosure distinct from SSRF. The flat-rule
|
||||
// model relies on default arg → return propagation through builder
|
||||
// chains: `HttpRequest.newBuilder().uri(u).POST(BodyPublishers.ofString(p)).build()`
|
||||
// smears `p`-taint into the returned request, which then activates the
|
||||
// sink at `client.send(req)`.
|
||||
//
|
||||
// Type-qualified resolution maps `restTemplate.postForObject(...)` →
|
||||
// `HttpClient.postForObject` via the JAVA_HIERARCHY (RestTemplate,
|
||||
// OkHttpClient, WebClient, CloseableHttpClient all subtype HttpClient),
|
||||
// so a single set of `HttpClient.<method>` rules covers every framework
|
||||
// in scope. Plain user input is silenced by the source-sensitivity
|
||||
// gate in `effective_sink_caps`, so this fires only on cookies / headers
|
||||
// / env / session / db.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
// java.net.http: client.send(req) consumes a request that
|
||||
// carries body-taint via BodyPublishers.ofString/ofByteArray/
|
||||
// ofInputStream through the builder chain.
|
||||
"HttpClient.send",
|
||||
"HttpClient.sendAsync",
|
||||
// Spring RestTemplate verbs that take a body / entity.
|
||||
"postForObject",
|
||||
"postForEntity",
|
||||
"RestTemplate.exchange",
|
||||
"RestTemplate.put",
|
||||
"RestTemplate.patchForObject",
|
||||
// Apache HttpClient: httpClient.execute(req) where req is an
|
||||
// HttpPost / HttpPut / HttpPatch with .setEntity(StringEntity(p)).
|
||||
// CloseableHttpClient subtypes HttpClient so type-qualified
|
||||
// resolution rewrites client.execute → HttpClient.execute.
|
||||
"HttpClient.execute",
|
||||
// Spring WebClient body-binding step:
|
||||
// webClient.post().uri(u).bodyValue(payload).retrieve().
|
||||
// bodyValue is the explicit body-bind verb; default propagation
|
||||
// carries the tainted body into the chain return so the sink
|
||||
// attaches at the body-bind site itself (no cross-call needed).
|
||||
"bodyValue",
|
||||
// Apache HttpClient body-binding: the `setEntity` step on
|
||||
// HttpPost / HttpPut / HttpPatch mutates the request rather
|
||||
// than returning the builder, so the receiver's SSA value at
|
||||
// the later `httpClient.execute(req)` does not carry body
|
||||
// taint via the default smear (which threads through return
|
||||
// values, not field mutations). Firing DATA_EXFIL at the
|
||||
// setEntity call itself catches the body-binding directly.
|
||||
// The matcher is specific enough to avoid collisions —
|
||||
// `setEntity` is Apache-HttpClient-specific.
|
||||
"setEntity",
|
||||
// OkHttp builder body-binding shortcut: when the chain
|
||||
// doesn't roll through `.post(body).build()` (e.g. a helper
|
||||
// function returns the Builder mid-chain), `RequestBody`
|
||||
// is bound via `.post(body)` / `.put(body)` / `.patch(body)`
|
||||
// / `.delete(body)` directly on the Builder. These methods
|
||||
// also exist on unrelated classes (NIO, Streams) but in the
|
||||
// OkHttp idiom the receiver type is `Request.Builder`; the
|
||||
// receiver-type widening from `Request.Builder` → HttpClient
|
||||
// isn't currently modeled, so we fall back to suffix-name
|
||||
// matchers and accept some receiver-agnostic firing risk.
|
||||
// Conservative: omit these for v1 to avoid over-fire on
|
||||
// non-OkHttp `post`/`put`/`patch` calls.
|
||||
// OkHttp two-step: client.newCall(req).execute() / .enqueue().
|
||||
// Chain normalization strips `()` between dots so the tree-
|
||||
// sitter callee text `client.newCall(req).execute` matches the
|
||||
// suffix `newCall.execute` after normalization.
|
||||
"newCall.execute",
|
||||
"newCall.enqueue",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"readObject",
|
||||
|
|
|
|||
|
|
@ -98,6 +98,26 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Conventional forwarding wrappers, telemetry / analytics / metrics dispatch.
|
||||
// Treating these as Sanitizer(DATA_EXFIL) encodes the project convention
|
||||
// that a payload routed through a named forwarding boundary is an
|
||||
// explicit, expected egress (the developer named the function), not the
|
||||
// accidental cross-boundary leak DATA_EXFIL is meant to catch. Users who
|
||||
// do not follow this convention can override per-project via
|
||||
// [analysis.languages.javascript] custom rules; the convention is
|
||||
// documented in docs/detectors/taint.md so projects can extend it.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"serializeForUpstream",
|
||||
"forwardPayload",
|
||||
"tracker.send",
|
||||
"analytics.track",
|
||||
"metrics.report",
|
||||
"logEvent",
|
||||
],
|
||||
label: DataLabel::Sanitizer(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Conventional project-local HTML escapers. Suffix word-boundary match
|
||||
// fires on bare calls to locally defined helpers (`function escapeHtml(x)`
|
||||
// invoked as `escapeHtml(x)`) across codebases that follow the common
|
||||
|
|
@ -128,6 +148,23 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::URL_ENCODE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Shell-exec sinks. Qualified `child_process.*` and bare destructured-
|
||||
// import forms (`exec`, `execSync`, `execFile`, ...) are both modeled as
|
||||
// flat sinks here so module-aliased call sites like `cp.exec(...)`
|
||||
// (where `cp = require('child_process')`) still fire via suffix match.
|
||||
// The bare-form FPs that motivated tightening are addressed elsewhere:
|
||||
//
|
||||
// * `container.exec(...)` (Dockerode) and `exec.start(...)` (the
|
||||
// resulting `exec` handle) — `container.exec` is excluded via the
|
||||
// EXCLUDES list below; `exec.start` is suppressed by restricting
|
||||
// `first_member_label`'s suffix-strip-and-retry to `Source` labels
|
||||
// only (see `cfg/helpers.rs`).
|
||||
// * `execSync(cmd, { env: process.env })` flagging `process.env`
|
||||
// flowing into the options arg — addressed by the
|
||||
// `=exec`/`=execSync`/`=execFile`/... gates in `GATED_SINKS` below
|
||||
// which set `payload_args: &[0]`. The cfg pass propagates a gate's
|
||||
// payload_args restriction onto the matching flat sink so only arg
|
||||
// 0 (the command string) is taint-checked at the call site.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"child_process.exec",
|
||||
|
|
@ -136,8 +173,9 @@ pub static RULES: &[LabelRule] = &[
|
|||
"child_process.execFile",
|
||||
// Bare forms from destructured imports:
|
||||
// const { exec, execSync } = require('child_process')
|
||||
// Note: bare `exec` suffix-matches RegExp.prototype.exec() too,
|
||||
// but in practice tainted data rarely flows to regexp.exec().
|
||||
// and module-aliased calls like `cp.exec(...)`. Receiver-name
|
||||
// collisions (`container.exec`, etc.) are suppressed via
|
||||
// EXCLUDES; arg-position restriction comes from the `=*` gates.
|
||||
"exec",
|
||||
"execSync",
|
||||
"execFile",
|
||||
|
|
@ -250,16 +288,22 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ORM / query builder raw-SQL entry points
|
||||
// ORM / query builder raw-SQL entry points.
|
||||
//
|
||||
// `$queryRaw` / `$executeRaw` are tagged-template forms; the SQL is
|
||||
// assembled from a template literal so taint reaching arg 0 is the
|
||||
// injection vector and modeling them as flat sinks is correct.
|
||||
//
|
||||
// `$queryRawUnsafe` / `$executeRawUnsafe` accept positional bind
|
||||
// parameters: `tx.$queryRawUnsafe(sqlTemplate, p1, p2, ...)` binds
|
||||
// p1..pN as `$1..$N` (PostgreSQL prepared-statement params) and the SQL
|
||||
// template at arg 0 is the only injection point. These are modeled as
|
||||
// gated sinks below (`payload_args: &[0]`) so taint flowing only into
|
||||
// the bind params no longer fires. `sequelize.query` and `knex.raw`
|
||||
// also accept a separate bind-params object/array but the bind-params
|
||||
// interface is non-positional in those APIs, so they stay flat for now.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"sequelize.query",
|
||||
"knex.raw",
|
||||
"$queryRaw",
|
||||
"$queryRawUnsafe",
|
||||
"$executeRaw",
|
||||
"$executeRawUnsafe",
|
||||
],
|
||||
matchers: &["sequelize.query", "knex.raw", "$queryRaw", "$executeRaw"],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
},
|
||||
|
|
@ -295,6 +339,17 @@ pub static EXCLUDES: &[&str] = &[
|
|||
"req.session.regenerate",
|
||||
"req.session.save",
|
||||
"req.session.reload",
|
||||
// Dockerode container API: `container.exec({ Cmd: [...] })` is the
|
||||
// canonical non-shell exec path (the Cmd array is passed directly to
|
||||
// the kernel via `execve`, no shell parsing). `exec.start(...)` is
|
||||
// the follow-on stream attach. Suffix-matching the bare `exec` rule
|
||||
// would otherwise classify every `<receiver>.exec(...)` method call
|
||||
// — including these — as a SHELL_ESCAPE sink. These patterns name
|
||||
// the Dockerode SDK methods specifically; if a project happens to
|
||||
// also expose its own `container.exec` shell wrapper, override via
|
||||
// [analysis.languages.javascript] custom rules.
|
||||
"container.exec",
|
||||
"exec.start",
|
||||
];
|
||||
|
||||
pub static GATED_SINKS: &[SinkGate] = &[
|
||||
|
|
@ -577,6 +632,128 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &["body", "headers", "json"],
|
||||
},
|
||||
},
|
||||
// ── Shell-exec sinks (SHELL_ESCAPE) ──────────────────────────────────
|
||||
//
|
||||
// Only arg 0 (the command string) is a shell-injection payload.
|
||||
// `options.env` / `options.cwd` / etc. at arg 1+ are not. Bare forms
|
||||
// (`exec`, `execSync`, `execFile`, `execAsync`, `execPromise`) use the
|
||||
// `=` exact-only sigil so they match the destructured-import shape
|
||||
// (`const { exec } = require('child_process'); exec(cmd)`) without
|
||||
// colliding with any `<receiver>.exec` method (Dockerode's
|
||||
// `container.exec`, `RegExp.prototype.exec`, etc.).
|
||||
// Qualified `child_process.*` forms stay as flat sinks (see RULES above);
|
||||
// gates run only when no flat sink already classifies the call, so adding
|
||||
// them here would never fire. The bare destructured-import forms below
|
||||
// are the only place where shell-exec needs gating, since `classify_all`
|
||||
// can't safely register a bare `exec` rule without colliding with every
|
||||
// `<receiver>.exec` method (Dockerode `container.exec`,
|
||||
// `RegExp.prototype.exec`, etc.).
|
||||
SinkGate {
|
||||
callee_matcher: "=exec",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "=execSync",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "=execFile",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "=execAsync",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "=execPromise",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// ── Prisma raw-SQL with positional bind params (SQL_QUERY) ───────────
|
||||
//
|
||||
// `tx.$queryRawUnsafe(sqlTemplate, p1, p2, ...)` binds `p1..pN` as
|
||||
// PostgreSQL `$1..$N` prepared-statement parameters; only arg 0 (the
|
||||
// SQL template) is the injection vector. Flat sinks here flagged taint
|
||||
// flowing only into bind params, which is equivalent to a parameterised
|
||||
// query and not exploitable. Suffix-match (no `=` sigil) so
|
||||
// `tx.$queryRawUnsafe`, `prisma.$queryRawUnsafe`, etc. all qualify.
|
||||
SinkGate {
|
||||
callee_matcher: "$queryRawUnsafe",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "$executeRawUnsafe",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
|
|||
|
|
@ -320,6 +320,11 @@ static GATED_REGISTRY: Lazy<HashMap<&'static str, &'static [SinkGate]>> = Lazy::
|
|||
m.insert("ts", typescript::GATED_SINKS);
|
||||
m.insert("python", python::GATED_SINKS);
|
||||
m.insert("py", python::GATED_SINKS);
|
||||
m.insert("go", go::GATED_SINKS);
|
||||
m.insert("php", php::GATED_SINKS);
|
||||
m.insert("c", c::GATED_SINKS);
|
||||
m.insert("cpp", cpp::GATED_SINKS);
|
||||
m.insert("c++", cpp::GATED_SINKS);
|
||||
m
|
||||
});
|
||||
|
||||
|
|
@ -473,6 +478,10 @@ pub fn lookup(lang: &str, raw: &str) -> Kind {
|
|||
pub enum SourceKind {
|
||||
/// Direct user input (request params, argv, stdin, form data)
|
||||
UserInput,
|
||||
/// HTTP cookie value (carries session / auth material)
|
||||
Cookie,
|
||||
/// HTTP request header (may carry auth tokens, user-agent fingerprints)
|
||||
Header,
|
||||
/// Environment variables and configuration
|
||||
EnvironmentConfig,
|
||||
/// File system reads
|
||||
|
|
@ -485,10 +494,81 @@ pub enum SourceKind {
|
|||
Unknown,
|
||||
}
|
||||
|
||||
/// Sensitivity classification of a taint source. Drives detector classes
|
||||
/// like `DATA_EXFIL` that only fire when the source carries information
|
||||
/// the operator did not intend to leak. Plain user input echoed back into
|
||||
/// an outbound request is not data exfiltration, the user already controls
|
||||
/// it, surfacing it as a leak is noise.
|
||||
///
|
||||
/// The threshold for `DATA_EXFIL` is `>= Sensitive`, plain user input is
|
||||
/// suppressed. Projects that legitimately classify a request body as
|
||||
/// sensitive (e.g. an API gateway forwarding pre-authenticated user tokens
|
||||
/// out of a request body) can override via custom rules in `nyx.conf`,
|
||||
/// either by re-classifying the source or by adding a Sanitizer rule for
|
||||
/// `Cap::DATA_EXFIL` on the legitimate forwarding path.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub enum Sensitivity {
|
||||
/// Attacker-controlled but not secret in itself, request bodies, query
|
||||
/// strings, form fields, argv. Echoing this to an outbound request is
|
||||
/// not data exfiltration.
|
||||
Plain,
|
||||
/// Carries operator state the user should not see leak out, cookies,
|
||||
/// auth headers, env, file system reads, database rows.
|
||||
Sensitive,
|
||||
/// Reserved for future explicit secret classifications (API keys,
|
||||
/// credential stores, key material). No source currently produces
|
||||
/// this, but the threshold check in `effective_sink_caps` already
|
||||
/// handles it monotonically.
|
||||
Secret,
|
||||
}
|
||||
|
||||
impl SourceKind {
|
||||
/// Return the sensitivity tier this source kind belongs to. Drives the
|
||||
/// `Cap::DATA_EXFIL` cap-suppression decision in `ast.rs`.
|
||||
pub fn sensitivity(self) -> Sensitivity {
|
||||
match self {
|
||||
// Plain user-controlled input, the user already has the data,
|
||||
// surfacing it back to them via an outbound request is not a
|
||||
// disclosure.
|
||||
SourceKind::UserInput => Sensitivity::Plain,
|
||||
// Operator-bound state, leaking these via an outbound request
|
||||
// is a real cross-boundary disclosure.
|
||||
SourceKind::Cookie
|
||||
| SourceKind::Header
|
||||
| SourceKind::EnvironmentConfig
|
||||
| SourceKind::FileSystem
|
||||
| SourceKind::Database => Sensitivity::Sensitive,
|
||||
// Caught exceptions can carry stack traces, db errors, internal
|
||||
// paths, treat them as sensitive by default.
|
||||
SourceKind::CaughtException => Sensitivity::Sensitive,
|
||||
// Conservative default for unclassified sources, surface
|
||||
// findings rather than silently drop them.
|
||||
SourceKind::Unknown => Sensitivity::Sensitive,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Infer the source kind from capabilities and callee name.
|
||||
pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
|
||||
let cl = callee.to_ascii_lowercase();
|
||||
|
||||
// Cookie / Header are checked *before* the generic user-input bucket
|
||||
// because they imply higher sensitivity (auth material, session ids).
|
||||
// The generic UserInput substrings (`request`, `header`, `cookie`)
|
||||
// would otherwise swallow these.
|
||||
//
|
||||
// Session stores carry auth material (CSRF tokens, signed user ids) of
|
||||
// the same sensitivity tier as raw cookies, so route them through the
|
||||
// `Cookie` arm. The substring is checked AFTER excluding the
|
||||
// capitalised `Session` constructor (covered by the `request` /
|
||||
// `requests` checks below not firing for `Session` builders).
|
||||
if cl.contains("cookie") || cl.contains("session") {
|
||||
return SourceKind::Cookie;
|
||||
}
|
||||
if cl.contains("header") {
|
||||
return SourceKind::Header;
|
||||
}
|
||||
|
||||
// User input patterns
|
||||
if cl.contains("argv")
|
||||
|| cl.contains("stdin")
|
||||
|
|
@ -498,11 +578,23 @@ pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
|
|||
|| cl.contains("params")
|
||||
|| cl.contains("input")
|
||||
|| cl.contains("body")
|
||||
|| cl.contains("header")
|
||||
|| cl.contains("cookie")
|
||||
|| cl.contains("location")
|
||||
|| cl.contains("document.url")
|
||||
|| cl.contains("document.referrer")
|
||||
// PHP superglobals: the AST text preserves the `$` (member-text
|
||||
// extraction reads the `variable_name` node verbatim) so we match
|
||||
// both `$_POST` and the `_POST` form some collectors emit.
|
||||
// `$_REQUEST` already matches via the `request` substring above;
|
||||
// `$_COOKIE` / `$_SESSION` route through the Cookie tier earlier in
|
||||
// the function. `$_SERVER` is operator-state-bearing (auth headers
|
||||
// etc.) so it stays Sensitive by falling through to the Unknown
|
||||
// bucket.
|
||||
|| cl == "$_get"
|
||||
|| cl == "$_post"
|
||||
|| cl == "$_files"
|
||||
|| cl == "_get"
|
||||
|| cl == "_post"
|
||||
|| cl == "_files"
|
||||
{
|
||||
return SourceKind::UserInput;
|
||||
}
|
||||
|
|
@ -542,6 +634,8 @@ pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
|
|||
pub fn severity_for_source_kind(kind: SourceKind) -> crate::patterns::Severity {
|
||||
match kind {
|
||||
SourceKind::UserInput => crate::patterns::Severity::High,
|
||||
SourceKind::Cookie => crate::patterns::Severity::High,
|
||||
SourceKind::Header => crate::patterns::Severity::High,
|
||||
SourceKind::EnvironmentConfig => crate::patterns::Severity::High,
|
||||
SourceKind::FileSystem => crate::patterns::Severity::Medium,
|
||||
SourceKind::Database => crate::patterns::Severity::Medium,
|
||||
|
|
@ -986,11 +1080,20 @@ pub fn classify_gated_sink(
|
|||
None => return out,
|
||||
};
|
||||
|
||||
// Match against the original callee text AND a chain-normalised form
|
||||
// that strips `()` between dots so a chained construction like
|
||||
// `httpx.AsyncClient().post` matches a gate matcher of
|
||||
// `httpx.AsyncClient.post`. Mirrors the normalisation applied by
|
||||
// `classify` for flat label rules.
|
||||
let callee_bytes = callee_text.as_bytes();
|
||||
let normalized = normalize_chained_call(callee_text);
|
||||
let normalized_bytes = normalized.as_bytes();
|
||||
|
||||
for gate in *gates {
|
||||
let matcher = gate.callee_matcher.as_bytes();
|
||||
if !match_suffix_cs(callee_bytes, matcher, gate.case_sensitive) {
|
||||
if !match_suffix_cs(callee_bytes, matcher, gate.case_sensitive)
|
||||
&& !match_suffix_cs(normalized_bytes, matcher, gate.case_sensitive)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -1473,26 +1576,69 @@ mod tests {
|
|||
// CVE Hunt Session 2 (Go CVE-2023-3188 Owncast SSRF):
|
||||
// `http.DefaultClient.Get/Post/Head/Do/PostForm` is the idiomatic Go
|
||||
// SSRF sink shape (`http.DefaultClient` is the package-level shared
|
||||
// `*http.Client`). Bare `Get`/`Post` matchers would over-match
|
||||
// unrelated method names; the explicit `http.DefaultClient.*` matcher
|
||||
// restricts the suffix-match to the stdlib helper while leaving
|
||||
// user-defined `myClient.Get` alone (no false positives).
|
||||
// `*http.Client`). These callees migrated from a flat `Sink(SSRF)`
|
||||
// rule to destination-aware gated sinks so that DATA_EXFIL gates can
|
||||
// coexist on the same callee (e.g. `http.DefaultClient.Post(url, _,
|
||||
// body)` carries SSRF on arg 0 and DATA_EXFIL on arg 2). The
|
||||
// assertions below check the gate registration rather than the flat
|
||||
// classifier output.
|
||||
#[test]
|
||||
fn classify_go_http_default_client_get_is_ssrf_sink() {
|
||||
let result = classify("go", "http.DefaultClient.Get", None);
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::SSRF)));
|
||||
fn classify_go_http_default_client_get_is_ssrf_gate() {
|
||||
let no_kw = |_: &str| None;
|
||||
let no_kw_present = |_: &str| false;
|
||||
let result = classify_gated_sink(
|
||||
"go",
|
||||
"http.DefaultClient.Get",
|
||||
|_| None,
|
||||
no_kw,
|
||||
no_kw_present,
|
||||
);
|
||||
assert!(
|
||||
result.iter().any(|m| m.label == DataLabel::Sink(Cap::SSRF)),
|
||||
"expected SSRF gate match, got {result:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_go_http_default_client_post_is_ssrf_sink() {
|
||||
let result = classify("go", "http.DefaultClient.Post", None);
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::SSRF)));
|
||||
fn classify_go_http_default_client_post_is_ssrf_and_data_exfil_gate() {
|
||||
let no_kw = |_: &str| None;
|
||||
let no_kw_present = |_: &str| false;
|
||||
let result = classify_gated_sink(
|
||||
"go",
|
||||
"http.DefaultClient.Post",
|
||||
|_| None,
|
||||
no_kw,
|
||||
no_kw_present,
|
||||
);
|
||||
assert!(
|
||||
result.iter().any(|m| m.label == DataLabel::Sink(Cap::SSRF)),
|
||||
"expected SSRF gate match, got {result:?}"
|
||||
);
|
||||
assert!(
|
||||
result
|
||||
.iter()
|
||||
.any(|m| m.label == DataLabel::Sink(Cap::DATA_EXFIL)),
|
||||
"expected DATA_EXFIL gate match, got {result:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_go_http_default_client_do_is_ssrf_sink() {
|
||||
let result = classify("go", "http.DefaultClient.Do", None);
|
||||
assert_eq!(result, Some(DataLabel::Sink(Cap::SSRF)));
|
||||
fn classify_go_http_default_client_do_is_data_exfil_gate() {
|
||||
let no_kw = |_: &str| None;
|
||||
let no_kw_present = |_: &str| false;
|
||||
let result = classify_gated_sink(
|
||||
"go",
|
||||
"http.DefaultClient.Do",
|
||||
|_| None,
|
||||
no_kw,
|
||||
no_kw_present,
|
||||
);
|
||||
assert!(
|
||||
result
|
||||
.iter()
|
||||
.any(|m| m.label == DataLabel::Sink(Cap::DATA_EXFIL)),
|
||||
"expected DATA_EXFIL gate match, got {result:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig, RuntimeLabelRule};
|
||||
use crate::labels::{
|
||||
Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, RuntimeLabelRule, SinkGate,
|
||||
};
|
||||
use crate::utils::project::{DetectedFramework, FrameworkContext};
|
||||
use phf::{Map, phf_map};
|
||||
|
||||
|
|
@ -138,8 +140,67 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ── Cross-boundary data exfiltration ──────────────────────────────────
|
||||
//
|
||||
// Body-bearing outbound HTTP verb methods on the major PHP HTTP clients.
|
||||
// Flat sinks here compose with the SSRF rule on `curl_exec` /
|
||||
// `file_get_contents` via multi-label classification. The
|
||||
// source-sensitivity gate in `effective_sink_caps` strips DATA_EXFIL
|
||||
// when the contributing source is `Plain` (`$_GET`, `$_POST`, `$_REQUEST`),
|
||||
// so this only fires for sensitive sources (cookies / sessions /
|
||||
// server-side state / env / file / db reads).
|
||||
//
|
||||
// Covered clients:
|
||||
// * `Guzzle\Client::post/put/patch` — guzzlehttp/guzzle
|
||||
// matched by suffix on the verb method (chained `$client->post(...)`).
|
||||
// * `Symfony\HttpClient::request` — symfony/http-client
|
||||
// request($method, $url, ['body' => $payload, 'json' => $data, ...])
|
||||
// * `Http::post` — Laravel HTTP facade (over Guzzle)
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"Client.post",
|
||||
"Client.put",
|
||||
"Client.patch",
|
||||
"Client.request",
|
||||
"HttpClient.post",
|
||||
"HttpClient.put",
|
||||
"HttpClient.patch",
|
||||
"HttpClient.request",
|
||||
"Http.post",
|
||||
"Http.put",
|
||||
"Http.patch",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: true,
|
||||
},
|
||||
];
|
||||
|
||||
/// Gated sinks for PHP.
|
||||
///
|
||||
/// `curl_setopt($ch, CURLOPT_POSTFIELDS, $payload)` is the canonical
|
||||
/// non-OO PHP HTTP-egress payload binding. The activation arg (index 1) is
|
||||
/// a `define`d constant: `CURLOPT_POSTFIELDS` (and the byref-copying variant
|
||||
/// `CURLOPT_COPYPOSTFIELDS`) carry the request body, while other CURLOPT_*
|
||||
/// constants designate URL / auth / TLS / behaviour, none of which is
|
||||
/// DATA_EXFIL-relevant. Gating on the constant identifier keeps the rule
|
||||
/// from over-firing on `curl_setopt($ch, CURLOPT_URL, $url)` (covered
|
||||
/// elsewhere by the `curl_exec` SSRF flat sink).
|
||||
///
|
||||
/// Identifier-based activation is enabled via the macro-arg fallback in
|
||||
/// `cfg::mod::classify_gated_sink` for `lang == "php"`.
|
||||
pub static GATED_SINKS: &[SinkGate] = &[SinkGate {
|
||||
callee_matcher: "curl_setopt",
|
||||
arg_index: 1,
|
||||
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: true,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
}];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
// control-flow
|
||||
"if_statement" => Kind::If,
|
||||
|
|
|
|||
|
|
@ -44,6 +44,34 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Session stores: session cookies / DRF / Django auth carry auth material
|
||||
// the operator did not intend to leak. `infer_source_kind` maps `session`
|
||||
// callees to `SourceKind::Cookie` (Sensitive) so flowing into an outbound
|
||||
// request payload fires `DATA_EXFIL`. Case-sensitive: lowercase `session`
|
||||
// here is the Flask global / Django request attribute; the capitalised
|
||||
// `requests.Session` constructor is a client object, not a source, and
|
||||
// must not be tagged.
|
||||
//
|
||||
// The matchers cover both attribute access (`request.session.user_id`,
|
||||
// resolved as the attribute text) and the bare `session.<method>`
|
||||
// pattern that follows `from flask import session`. The `=session`
|
||||
// exact-match form fires only when the call is the bare top-level
|
||||
// `session(...)` so accidental field projections like
|
||||
// `obj.client.session` (Phase 2 chained-receiver lowering) don't get
|
||||
// mis-labelled as sources.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"request.session",
|
||||
"flask_request.session",
|
||||
"flask.session",
|
||||
"django.contrib.sessions",
|
||||
"=session",
|
||||
"session.get",
|
||||
"session.pop",
|
||||
],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// Django-specific sources (case-sensitive to avoid request.get() dict method FP)
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
|
|
@ -208,58 +236,25 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sanitizer(Cap::FILE_IO),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Outbound HTTP — flat SSRF sinks for read-shaped methods (GET / HEAD)
|
||||
// that don't carry a body. Body-bearing methods (POST / PUT / PATCH /
|
||||
// DELETE / request) are modelled via destination-aware gates in
|
||||
// GATED_SINKS so SSRF activation can be narrowed to the URL position
|
||||
// and the cross-boundary `DATA_EXFIL` cap can attach to body kwargs as
|
||||
// a separate gate. `urllib.request.urlopen` stays flat: its argument
|
||||
// is a Request object whose payload-vs-URL split happens at
|
||||
// `urllib.request.Request` construction (gated below).
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"urllib.request.urlopen",
|
||||
"requests.get",
|
||||
"requests.post",
|
||||
"requests.put",
|
||||
"requests.delete",
|
||||
"requests.patch",
|
||||
"requests.head",
|
||||
"requests.request",
|
||||
"httpx.get",
|
||||
"httpx.post",
|
||||
"httpx.put",
|
||||
"httpx.delete",
|
||||
"httpx.patch",
|
||||
"httpx.head",
|
||||
"httpx.request",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// aiohttp HTTP client, SSRF sinks
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"aiohttp.get",
|
||||
"aiohttp.post",
|
||||
"aiohttp.put",
|
||||
"aiohttp.delete",
|
||||
"aiohttp.request",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Type-qualified SSRF sinks: when the receiver is tracked as
|
||||
// TypeKind::HttpClient (e.g. `client = requests.Session()`,
|
||||
// `client = httpx.Client()`, or `s = aiohttp.ClientSession()`),
|
||||
// resolve_type_qualified_labels() constructs `"HttpClient.<method>"`
|
||||
// call texts so the receiver-name is no longer load-bearing. Matches
|
||||
// the existing Rust HttpClient.<method> sink set so both languages
|
||||
// stay in step on the type-aware SSRF model. Motivated by the
|
||||
// upstream LMDeploy CVE-2026-33626 shape:
|
||||
// client = requests.Session()
|
||||
// response = client.get(url, ...)
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"aiohttp.head",
|
||||
"HttpClient.get",
|
||||
"HttpClient.post",
|
||||
"HttpClient.put",
|
||||
"HttpClient.delete",
|
||||
"HttpClient.patch",
|
||||
"HttpClient.head",
|
||||
"HttpClient.request",
|
||||
"HttpClient.send",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
|
|
@ -332,6 +327,687 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
dangerous_kwargs: &[("shell", &["True", "true"])],
|
||||
activation: GateActivation::ValueMatch,
|
||||
},
|
||||
// ── Outbound HTTP clients (SSRF + cross-boundary data exfiltration) ───
|
||||
//
|
||||
// Body-bearing methods (POST / PUT / PATCH / DELETE / request) are
|
||||
// gated by destination so that:
|
||||
// * SSRF fires only when taint reaches the URL position (arg 0).
|
||||
// * `DATA_EXFIL` fires only when taint reaches a body kwarg (`data` /
|
||||
// `json` / `files` for requests / aiohttp; `content` / `data` /
|
||||
// `json` / `files` for httpx).
|
||||
// The pair lets a single `requests.post(taintedUrl, data=secret)` call
|
||||
// report SSRF on the URL flow and DATA_EXFIL on the body flow as
|
||||
// independent findings rather than a conflated combined cap.
|
||||
//
|
||||
// CFG-level kwarg-aware extraction (see `extract_destination_kwarg_pairs`)
|
||||
// walks `keyword_argument` siblings and routes matching idents into the
|
||||
// gate's `destination_uses` so the SSA sink scan only fires when the
|
||||
// body kwarg itself is tainted.
|
||||
//
|
||||
// The source-sensitivity gate in `ast.rs` strips DATA_EXFIL when the
|
||||
// contributing source is `Sensitivity::Plain` (raw `request.args`,
|
||||
// `request.form`), so plain user input forwarded to a POST body does
|
||||
// not surface — only sensitive sources (cookies, sessions, env, headers)
|
||||
// produce a DATA_EXFIL finding.
|
||||
SinkGate {
|
||||
callee_matcher: "requests.post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.put",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.put",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.patch",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.patch",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.delete",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.delete",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json", "files"],
|
||||
},
|
||||
},
|
||||
// requests.request(method, url, ...) — note the URL is at arg 1, not
|
||||
// arg 0; method is at arg 0. Body kwargs at arg 2+ via kwarg expansion.
|
||||
SinkGate {
|
||||
callee_matcher: "requests.request",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.request",
|
||||
arg_index: 2,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json", "files"],
|
||||
},
|
||||
},
|
||||
// httpx — `content` is httpx's raw-bytes body kwarg; `data` covers
|
||||
// form-encoded; `json` covers JSON-encoded; `files` covers multipart.
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.put",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.put",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.patch",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.patch",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.delete",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.delete",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
// httpx.request(method, url, ...) — same shape as requests.request.
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.request",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.request",
|
||||
arg_index: 2,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
// Type-qualified variants: `requests.Session()`, `httpx.Client()`,
|
||||
// `httpx.AsyncClient()`, `aiohttp.ClientSession()` instances all resolve
|
||||
// to the synthetic `HttpClient.<method>` callee text via
|
||||
// `resolve_type_qualified_labels`. Covering both module-level and
|
||||
// type-qualified forms ensures `s = requests.Session(); s.post(url, data=x)`
|
||||
// and `client = httpx.AsyncClient(); await client.post(url, json=x)` both
|
||||
// fire SSRF on the URL and DATA_EXFIL on the body kwarg.
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.put",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.put",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.patch",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.patch",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.delete",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.delete",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.request",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "HttpClient.request",
|
||||
arg_index: 2,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
// aiohttp module-level (`aiohttp.post`, `aiohttp.put`, etc.) — uncommon
|
||||
// in real code (idiomatic usage is `async with aiohttp.ClientSession()`),
|
||||
// covered for completeness. ClientSession.<method> dispatches via the
|
||||
// type-qualified `HttpClient.<method>` gates above.
|
||||
SinkGate {
|
||||
callee_matcher: "aiohttp.post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "aiohttp.post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "aiohttp.put",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "aiohttp.put",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "aiohttp.request",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "aiohttp.request",
|
||||
arg_index: 2,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json"],
|
||||
},
|
||||
},
|
||||
// Chained-construction variants: `httpx.AsyncClient().post(url, json=x)`
|
||||
// / `httpx.Client().post(url, ...)` / `aiohttp.ClientSession().post(...)`.
|
||||
// Chain-normalisation strips `()` between dots so the callee text
|
||||
// becomes `httpx.AsyncClient.post`; gate matching applies to that
|
||||
// normalised form so the chained shape is covered without binding to
|
||||
// an intermediate variable.
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.AsyncClient.post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.AsyncClient.post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.Client.post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "httpx.Client.post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["content", "data", "json", "files"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "aiohttp.ClientSession.post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "aiohttp.ClientSession.post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json"],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.Session.post",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "requests.Session.post",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data", "json", "files"],
|
||||
},
|
||||
},
|
||||
// urllib.request.urlopen(req) — when req is a `urllib.request.Request`
|
||||
// built with the `data` kwarg, that kwarg becomes the POST body. The
|
||||
// gate fires on `Request(url, data=tainted)` directly: the constructor
|
||||
// does not egress, but the convention is that wrapping data in a Request
|
||||
// means egress is imminent (the urllib.request.Request → urlopen path).
|
||||
// This is a heuristic — the real egress happens at urlopen, but tracking
|
||||
// the data flow through the constructor is a fair static approximation.
|
||||
SinkGate {
|
||||
callee_matcher: "urllib.request.Request",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &["data"],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
|
|||
|
|
@ -28,6 +28,16 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Sensitive request state: cookies and session stores carry auth material
|
||||
// / CSRF tokens / signed user ids the operator did not intend to leak.
|
||||
// `infer_source_kind` routes substrings containing "cookie" or "session"
|
||||
// through `SourceKind::Cookie` (Sensitive), so flow into outbound request
|
||||
// payloads activates the `DATA_EXFIL` cap added below.
|
||||
LabelRule {
|
||||
matchers: &["request.cookies", "request.session", "cookies", "session"],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ───────── Sanitizers ──────────
|
||||
LabelRule {
|
||||
matchers: &["CGI.escapeHTML", "ERB::Util.html_escape"],
|
||||
|
|
@ -135,6 +145,55 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ── Cross-boundary data exfiltration ──────────────────────────────────
|
||||
//
|
||||
// Body-bearing outbound HTTP verb methods. A flat Sink(DATA_EXFIL) here
|
||||
// composes with the SSRF rule above via multi-label classification:
|
||||
// `Net::HTTP.post(uri, payload)` reports SSRF on the URL flow (arg 0)
|
||||
// and DATA_EXFIL on the body flow (arg 1+) as separate findings. The
|
||||
// source-sensitivity gate in `effective_sink_caps` strips DATA_EXFIL
|
||||
// when the contributing source is `Plain` (raw `params`), so this only
|
||||
// fires for sensitive sources (cookies / session / env / headers /
|
||||
// file / db reads).
|
||||
//
|
||||
// Covered clients:
|
||||
// * `Net::HTTP.post(uri, data, headers)` — stdlib
|
||||
// * `Net::HTTP::Post.new(path)` body= setter — emitted as
|
||||
// `Net::HTTP::Post.body=` after Ruby setter normalisation; flat rule
|
||||
// ensures any tainted assignment to `.body` smears into the request
|
||||
// * `RestClient.post(url, payload, headers)` — rest-client gem
|
||||
// * `Faraday.post(url, body, headers)` — faraday
|
||||
// * `HTTParty.post(url, body: ..., headers: ...)` — already a Sink(SSRF)
|
||||
// above, DATA_EXFIL adds independently
|
||||
// * `Typhoeus.post(url, body: ...)` — typhoeus
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"Net::HTTP.post",
|
||||
"RestClient.post",
|
||||
"RestClient.put",
|
||||
"RestClient.patch",
|
||||
"Faraday.post",
|
||||
"Faraday.put",
|
||||
"Faraday.patch",
|
||||
"HTTParty.post",
|
||||
"HTTParty.put",
|
||||
"HTTParty.patch",
|
||||
"Typhoeus.post",
|
||||
"Typhoeus.put",
|
||||
"Typhoeus.patch",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Generic outbound-method suffix matchers for chained / typed receivers
|
||||
// (e.g. `client.post(payload)` where `client` is a configured Faraday or
|
||||
// RestClient instance). Suffix-match keeps the rule compact; source
|
||||
// sensitivity gates noise from plain user input.
|
||||
LabelRule {
|
||||
matchers: &["HttpClient.post", "HttpClient.put", "HttpClient.patch"],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["Marshal.load", "Marshal.restore", "YAML.load"],
|
||||
label: DataLabel::Sink(Cap::DESERIALIZE),
|
||||
|
|
|
|||
|
|
@ -19,6 +19,34 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Inbound HTTP request metadata: headers, cookies, query strings,
|
||||
// and body extractors. These only carry caller-supplied bytes when
|
||||
// the framework binds them (the framework-conditional rules attach
|
||||
// the same labels for axum / actix / rocket extractors). Including
|
||||
// the bare suffix matchers here means a `req.headers().get("h")`
|
||||
// chain in non-framework code (e.g. internal helpers that take an
|
||||
// `&HeaderMap`) still surfaces as a Source. `infer_source_kind`
|
||||
// routes these to `Header` / `Cookie` (Sensitive), enabling
|
||||
// DATA_EXFIL gating downstream.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
// Type-qualified (receiver typed as HttpRequest, HeaderMap, ...)
|
||||
"HttpRequest.headers",
|
||||
"HttpRequest.cookie",
|
||||
"HttpRequest.cookies",
|
||||
"Request.headers",
|
||||
"Request.cookies",
|
||||
"Request.uri",
|
||||
// Bare HeaderMap / cookie-jar accessors.
|
||||
"headers.get",
|
||||
"headers.get_all",
|
||||
"CookieJar.get",
|
||||
"CookieJar.get_private",
|
||||
"CookieJar.get_signed",
|
||||
],
|
||||
label: DataLabel::Source(Cap::all()),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ───────── Sanitizers ──────────
|
||||
LabelRule {
|
||||
matchers: &["html_escape::encode_safe", "sanitize_", "sanitize_html"],
|
||||
|
|
@ -75,6 +103,34 @@ pub static RULES: &[LabelRule] = &[
|
|||
"reqwest::Client.head",
|
||||
"reqwest::Client.patch",
|
||||
"reqwest::Client.request",
|
||||
// Chained constructor + verb form: `reqwest::Client::new()
|
||||
// .post(url)` reduces (via root-receiver collapse) to chain
|
||||
// text `Client::new.post`, so existing `Client.post` matchers
|
||||
// miss it. Cover the chained shape directly.
|
||||
"Client::new.get",
|
||||
"Client::new.post",
|
||||
"Client::new.put",
|
||||
"Client::new.delete",
|
||||
"Client::new.head",
|
||||
"Client::new.patch",
|
||||
"Client::new.request",
|
||||
// surf free verbs are themselves SSRF gates , the URL is
|
||||
// their first positional argument.
|
||||
"surf::get",
|
||||
"surf::post",
|
||||
"surf::put",
|
||||
"surf::delete",
|
||||
"surf::head",
|
||||
"surf::patch",
|
||||
"surf::connect",
|
||||
"surf::trace",
|
||||
// ureq free verbs are HTTP request initiators.
|
||||
"ureq::get",
|
||||
"ureq::post",
|
||||
"ureq::put",
|
||||
"ureq::delete",
|
||||
"ureq::patch",
|
||||
"ureq::head",
|
||||
// Type-qualified (receiver typed as HttpClient)
|
||||
"HttpClient.get",
|
||||
"HttpClient.post",
|
||||
|
|
@ -89,6 +145,68 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Cross-boundary data exfiltration sinks. Outbound HTTP egress where
|
||||
// a Sensitive source (env, header, cookie, file, db) reaching the
|
||||
// request body / payload is a leak distinct from SSRF. Plain user
|
||||
// input is silenced by the source-sensitivity gate, so these only
|
||||
// fire when the source carries operator-bound state.
|
||||
//
|
||||
// Body-binding methods on the request builder: `body`, `json`, `form`,
|
||||
// `multipart` (reqwest); `body_string`, `body_json`, `body_bytes`
|
||||
// (surf); `send_string`, `send_json`, `send_form` (ureq, which
|
||||
// combines body-bind and dispatch). Plus `.send()` on an HttpClient
|
||||
// / RequestBuilder, where the chain receiver is typed. Chain text
|
||||
// matchers like `body.send` cover the all-in-one form
|
||||
// `Client::post(url).body(payload).send()`.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
// Type-qualified terminal verbs (split form, typed receiver).
|
||||
"HttpClient.send",
|
||||
"HttpClient.execute",
|
||||
"RequestBuilder.send",
|
||||
// Type-qualified body-bind methods on a typed RequestBuilder.
|
||||
"RequestBuilder.body",
|
||||
"RequestBuilder.json",
|
||||
"RequestBuilder.form",
|
||||
"RequestBuilder.multipart",
|
||||
"RequestBuilder.body_string",
|
||||
"RequestBuilder.body_json",
|
||||
"RequestBuilder.body_bytes",
|
||||
"RequestBuilder.send_string",
|
||||
"RequestBuilder.send_json",
|
||||
"RequestBuilder.send_form",
|
||||
// surf / ureq method names that are unambiguous in Rust ,
|
||||
// they only appear on HTTP request builders, so a bare-name
|
||||
// suffix matcher is safe.
|
||||
"body_string",
|
||||
"body_json",
|
||||
"body_bytes",
|
||||
"send_string",
|
||||
"send_json",
|
||||
"send_form",
|
||||
// Reqwest chain shapes. After paren-group strip the chain
|
||||
// text becomes `Client::post.body.send`, so the body-bind
|
||||
// verb sits before `.send` and a `body.send` suffix matcher
|
||||
// pins exfil-only firing to chains that actually bind a body.
|
||||
"body.send",
|
||||
"json.send",
|
||||
"form.send",
|
||||
"multipart.send",
|
||||
// hyper Request::builder().method(...).body(payload) , the
|
||||
// body-bind step is the leak point. `.unwrap` is a common
|
||||
// trailing identity method; we cover both shapes.
|
||||
"Request::builder.body",
|
||||
"Request::builder.method.body",
|
||||
"Request::builder.method.body.unwrap",
|
||||
"Request::builder.body.unwrap",
|
||||
// Two-step reqwest where the user has a dedicated `Client`
|
||||
// variable and uses `.execute(req)` on it.
|
||||
"Client::new.send",
|
||||
"Client::new.execute",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"rusqlite::Connection.execute",
|
||||
|
|
|
|||
|
|
@ -92,6 +92,22 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Conventional forwarding wrappers, telemetry / analytics / metrics dispatch.
|
||||
// See javascript.rs for rationale; mirrored here so TypeScript projects pick
|
||||
// up the same convention. Override per-project via
|
||||
// [analysis.languages.typescript] custom rules.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"serializeForUpstream",
|
||||
"forwardPayload",
|
||||
"tracker.send",
|
||||
"analytics.track",
|
||||
"metrics.report",
|
||||
"logEvent",
|
||||
],
|
||||
label: DataLabel::Sanitizer(Cap::DATA_EXFIL),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Conventional project-local HTML escapers. Suffix word-boundary match
|
||||
// fires on bare calls to locally defined helpers (`function escapeHtml(x)`
|
||||
// invoked as `escapeHtml(x)`) across codebases that follow the common
|
||||
|
|
@ -113,18 +129,21 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// Shell-exec sinks. Qualified `child_process.*` and bare forms are both
|
||||
// flat sinks; receiver-name collisions are handled via EXCLUDES; the
|
||||
// `=*` gates in `GATED_SINKS` below restrict checked args to arg 0
|
||||
// (command string) so `execSync(cmd, { env: process.env })` no longer
|
||||
// flags `process.env` flowing into the options object. See
|
||||
// javascript.rs for full rationale.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"child_process.exec",
|
||||
"child_process.execSync",
|
||||
"child_process.spawn",
|
||||
"child_process.execFile",
|
||||
// Bare forms from destructured imports:
|
||||
// const { exec, execSync } = require('child_process')
|
||||
"exec",
|
||||
"execSync",
|
||||
"execFile",
|
||||
// Common promisified wrappers around child_process.exec
|
||||
"execAsync",
|
||||
"execPromise",
|
||||
],
|
||||
|
|
@ -227,16 +246,12 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ORM / query builder raw-SQL entry points
|
||||
// ORM / query builder raw-SQL entry points. `$queryRawUnsafe` /
|
||||
// `$executeRawUnsafe` are gated below — only arg 0 (the SQL template) is
|
||||
// the injection vector; positional bind params are bound as `$1..$N`.
|
||||
// See javascript.rs for the full rationale.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"sequelize.query",
|
||||
"knex.raw",
|
||||
"$queryRaw",
|
||||
"$queryRawUnsafe",
|
||||
"$executeRaw",
|
||||
"$executeRawUnsafe",
|
||||
],
|
||||
matchers: &["sequelize.query", "knex.raw", "$queryRaw", "$executeRaw"],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
},
|
||||
|
|
@ -264,6 +279,9 @@ pub static EXCLUDES: &[&str] = &[
|
|||
"req.app",
|
||||
"req.route",
|
||||
"req.next",
|
||||
// Dockerode container API — see javascript.rs EXCLUDES for rationale.
|
||||
"container.exec",
|
||||
"exec.start",
|
||||
];
|
||||
|
||||
pub static GATED_SINKS: &[SinkGate] = &[
|
||||
|
|
@ -478,6 +496,113 @@ pub static GATED_SINKS: &[SinkGate] = &[
|
|||
object_destination_fields: &["body", "headers", "json"],
|
||||
},
|
||||
},
|
||||
// ── Shell-exec sinks (SHELL_ESCAPE) ──────────────────────────────────
|
||||
// See javascript.rs for the rationale. Only arg 0 (command string)
|
||||
// carries the shell-injection payload; bare forms use `=` exact-only
|
||||
// matching so they don't collide with any `<receiver>.exec` method.
|
||||
// Qualified `child_process.*` forms stay as flat sinks; gates only fire
|
||||
// when no flat sink classifies the call, so the bare destructured-import
|
||||
// forms below are the only place where shell-exec needs gating.
|
||||
SinkGate {
|
||||
callee_matcher: "=exec",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "=execSync",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "=execFile",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "=execAsync",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "=execPromise",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// ── Prisma raw-SQL with positional bind params (SQL_QUERY) ───────────
|
||||
// See javascript.rs for rationale.
|
||||
SinkGate {
|
||||
callee_matcher: "$queryRawUnsafe",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "$executeRawUnsafe",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SQL_QUERY),
|
||||
case_sensitive: true,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
|
|
|
|||
|
|
@ -207,6 +207,18 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
|
|||
props.insert("confidence".into(), json!(conf.to_string()));
|
||||
}
|
||||
|
||||
// `DATA_EXFIL` findings carry the destination object-literal
|
||||
// field the leak reached (`body` / `headers` / `json`); surface
|
||||
// it so SARIF consumers can pivot per-destination without
|
||||
// reparsing the message.
|
||||
if let Some(field) = d
|
||||
.evidence
|
||||
.as_ref()
|
||||
.and_then(|ev| ev.data_exfil_field.as_deref())
|
||||
{
|
||||
props.insert("data_exfil_field".into(), json!(field));
|
||||
}
|
||||
|
||||
// Alternative-path cross-references. When the dedup pass
|
||||
// at `taint::analyse_file` preserves both a validated and
|
||||
// an unvalidated flow for the same `(body, sink, source)`,
|
||||
|
|
|
|||
|
|
@ -666,6 +666,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: self.field_interner,
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -880,6 +882,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: FieldInterner::new(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
let facts = analyse_body(&body, body_id());
|
||||
assert!(facts.is_trivial());
|
||||
|
|
|
|||
11
src/rank.rs
11
src/rank.rs
|
|
@ -206,7 +206,16 @@ pub fn rank_diags(diags: &mut [Diag]) {
|
|||
|
||||
/// Bonus based on analysis kind inferred from rule ID + evidence.
|
||||
fn analysis_kind_bonus(rule_id: &str, evidence: Option<&Evidence>) -> f64 {
|
||||
if rule_id.starts_with("taint-") {
|
||||
if rule_id.starts_with("taint-data-exfiltration") {
|
||||
// DATA_EXFIL ranks below SSRF / SQLi / CMDi: the leak class is
|
||||
// a softer signal than direct payload-driven exploitation, so
|
||||
// the taint-class bonus is trimmed (-3) to seat data-exfil
|
||||
// findings between general taint flows and AST/CFG patterns.
|
||||
// The source-kind bonus (`evidence_strength`) already separates
|
||||
// cookie / env / header from less attacker-relevant origins,
|
||||
// so this bonus is the only ranking discount applied.
|
||||
7.0
|
||||
} else if rule_id.starts_with("taint-") {
|
||||
// Taint-confirmed flow is the strongest signal
|
||||
10.0
|
||||
} else if rule_id.starts_with("state-") {
|
||||
|
|
|
|||
|
|
@ -1179,6 +1179,7 @@ fn type_kind_tag(k: &TypeKind) -> String {
|
|||
TypeKind::Url => "Url".into(),
|
||||
TypeKind::HttpClient => "HttpClient".into(),
|
||||
TypeKind::LocalCollection => "LocalCollection".into(),
|
||||
TypeKind::RequestBuilder => "RequestBuilder".into(),
|
||||
TypeKind::Dto(_) => "Dto".into(),
|
||||
}
|
||||
}
|
||||
|
|
@ -1872,6 +1873,7 @@ function consume() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
||||
|
|
@ -2026,6 +2028,8 @@ async function recentAuditLogs() {
|
|||
exception_edges: vec![],
|
||||
field_interner,
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let facts = analyse_body(&body, BodyId(0));
|
||||
|
|
|
|||
|
|
@ -104,6 +104,14 @@ pub fn issue_categories(
|
|||
}
|
||||
|
||||
fn issue_category_label(rule_id: &str) -> &'static str {
|
||||
// `taint-data-exfiltration` and the legacy `taint-unsanitised-flow`
|
||||
// share the `taint` family token, but the exfil class targets a
|
||||
// different threat (sensitive data leaving the trust boundary, not
|
||||
// attacker payload entering it). Surface it as its own bucket so the
|
||||
// dashboard category badge matches the rule semantics.
|
||||
if rule_id.starts_with("taint-data-exfiltration") {
|
||||
return "Data Exfiltration";
|
||||
}
|
||||
match extract_family(rule_id) {
|
||||
"sqli" => "SQL Injection",
|
||||
"xss" => "Cross-Site Scripting",
|
||||
|
|
@ -221,6 +229,26 @@ mod tests {
|
|||
assert_eq!(out[2].count, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn issue_category_label_routes_data_exfil_to_dedicated_bucket() {
|
||||
// `taint-data-exfiltration` shares the `taint` family token with
|
||||
// `taint-unsanitised-flow`, but exfil findings need their own
|
||||
// dashboard badge so analysts can pivot on the leak class.
|
||||
assert_eq!(
|
||||
issue_category_label("taint-data-exfiltration"),
|
||||
"Data Exfiltration"
|
||||
);
|
||||
assert_eq!(
|
||||
issue_category_label("taint-data-exfiltration (source 1:1)"),
|
||||
"Data Exfiltration"
|
||||
);
|
||||
// Generic taint findings stay in the broader bucket.
|
||||
assert_eq!(
|
||||
issue_category_label("taint-unsanitised-flow"),
|
||||
"Tainted Flow"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn issue_category_label_recognises_simple_families() {
|
||||
assert_eq!(
|
||||
|
|
|
|||
|
|
@ -445,6 +445,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
)],
|
||||
)
|
||||
|
|
@ -516,6 +517,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
},
|
||||
false,
|
||||
false,
|
||||
|
|
@ -538,6 +541,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
},
|
||||
true,
|
||||
true,
|
||||
|
|
@ -560,6 +565,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
},
|
||||
true,
|
||||
false,
|
||||
|
|
@ -656,6 +663,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
)],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -217,6 +217,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -638,6 +638,8 @@ mod tests {
|
|||
exception_edges: Vec::new(),
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -215,6 +215,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let (eliminated, copy_map) = copy_propagate(&mut body, &cfg);
|
||||
|
|
@ -296,6 +298,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let (eliminated, copy_map) = copy_propagate(&mut body, &cfg);
|
||||
|
|
@ -366,6 +370,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
(cfg, body)
|
||||
}
|
||||
|
|
@ -488,6 +494,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
let (eliminated, _map) = copy_propagate(&mut body, &cfg);
|
||||
assert_eq!(eliminated, 0, "two-operand Assign is not a copy");
|
||||
|
|
@ -567,6 +575,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
let (eliminated, _) = copy_propagate(&mut body, &cfg);
|
||||
assert_eq!(eliminated, 1, "v1 should be eliminated");
|
||||
|
|
@ -664,6 +674,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
let (eliminated, _map) = copy_propagate(&mut body, &cfg);
|
||||
assert_eq!(eliminated, 1);
|
||||
|
|
@ -712,6 +724,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
let (eliminated, map) = copy_propagate(&mut body, &cfg);
|
||||
assert_eq!(eliminated, 0);
|
||||
|
|
|
|||
|
|
@ -217,6 +217,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -265,6 +267,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -314,6 +318,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -359,6 +365,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -396,6 +404,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -460,6 +470,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: interner,
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -527,6 +539,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: interner,
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -587,6 +601,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -637,6 +653,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -724,6 +742,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
@ -801,6 +821,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let removed = eliminate_dead_defs(&mut body, &cfg);
|
||||
|
|
|
|||
|
|
@ -788,6 +788,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
let errs = check_structural_invariants(&body);
|
||||
assert!(
|
||||
|
|
@ -835,6 +837,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
let errs = check_structural_invariants(&body);
|
||||
assert!(
|
||||
|
|
@ -885,6 +889,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
let errs = check_structural_invariants(&body);
|
||||
assert!(
|
||||
|
|
@ -913,6 +919,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
let errs = check_structural_invariants(&body);
|
||||
assert!(
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ use crate::ssa::type_facts::TypeKind;
|
|||
use petgraph::graph::NodeIndex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smallvec::SmallVec;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
/// Unique identifier for an SSA value (one per definition point).
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
|
|
@ -353,6 +353,26 @@ pub struct SsaBody {
|
|||
/// cleanly with an empty map (no migration needed).
|
||||
#[serde(default)]
|
||||
pub field_writes: HashMap<SsaValue, (SsaValue, FieldId)>,
|
||||
/// SSA values that lowering injected for **free / closure-captured**
|
||||
/// variables (variables referenced by the body but not declared as
|
||||
/// formal parameters and not assigned within the body).
|
||||
///
|
||||
/// Lowering models every external use as an [`SsaOp::Param`] in block
|
||||
/// 0 so the rename pass can reference it. Real formal parameters and
|
||||
/// closure captures end up using the same op variant; this side-table
|
||||
/// distinguishes the two so downstream analyses (in particular the
|
||||
/// JS/TS handler-name auto-seed in
|
||||
/// [`crate::taint::ssa_transfer`]) can avoid treating closure
|
||||
/// captures as if they were the function's own parameters. Without
|
||||
/// this distinction, a lambda body that references an out-of-scope
|
||||
/// `userId` / `cmd` / `payload` would have the synthetic Param
|
||||
/// auto-seeded as `UserInput`, producing a phantom source on the
|
||||
/// enclosing function's declaration line.
|
||||
///
|
||||
/// `#[serde(default)]` for backward compatibility with summary blobs
|
||||
/// produced before this field existed.
|
||||
#[serde(default)]
|
||||
pub synthetic_externals: HashSet<SsaValue>,
|
||||
}
|
||||
|
||||
impl SsaBody {
|
||||
|
|
@ -560,6 +580,7 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: FieldInterner::new(),
|
||||
field_writes: HashMap::new(),
|
||||
synthetic_externals: HashSet::new(),
|
||||
};
|
||||
let fid = body.intern_field("mu");
|
||||
body.blocks[0].body.push(SsaInst {
|
||||
|
|
|
|||
|
|
@ -239,18 +239,25 @@ fn lower_to_ssa_inner(
|
|||
|
||||
// 6. Rename variables (dominator tree preorder walk)
|
||||
let dom_tree_children = build_dom_tree_children(num_blocks, &doms, &block_graph);
|
||||
let (mut ssa_blocks, mut value_defs, cfg_node_map, field_interner, field_writes) =
|
||||
rename_variables(
|
||||
cfg,
|
||||
&blocks_nodes,
|
||||
&block_succs,
|
||||
&block_preds,
|
||||
&phi_placements,
|
||||
&dom_tree_children,
|
||||
&filtered_edges,
|
||||
&external_vars,
|
||||
&nop_nodes,
|
||||
);
|
||||
let (
|
||||
mut ssa_blocks,
|
||||
mut value_defs,
|
||||
cfg_node_map,
|
||||
field_interner,
|
||||
field_writes,
|
||||
synthetic_externals,
|
||||
) = rename_variables(
|
||||
cfg,
|
||||
&blocks_nodes,
|
||||
&block_succs,
|
||||
&block_preds,
|
||||
&phi_placements,
|
||||
&dom_tree_children,
|
||||
&filtered_edges,
|
||||
&external_vars,
|
||||
formal_params,
|
||||
&nop_nodes,
|
||||
);
|
||||
|
||||
// 6b. Fill any missing phi operands with a shared Undef sentinel so
|
||||
// every phi has exactly one operand per predecessor. See
|
||||
|
|
@ -306,6 +313,7 @@ fn lower_to_ssa_inner(
|
|||
exception_edges,
|
||||
field_interner,
|
||||
field_writes,
|
||||
synthetic_externals,
|
||||
};
|
||||
|
||||
// 9. Catch-block reachability invariant.
|
||||
|
|
@ -927,6 +935,7 @@ fn rename_variables(
|
|||
dom_tree_children: &[Vec<usize>],
|
||||
filtered_edges: &[(NodeIndex, NodeIndex, EdgeKind)],
|
||||
external_vars: &[String],
|
||||
formal_params: &[String],
|
||||
nop_nodes: &HashSet<NodeIndex>,
|
||||
) -> (
|
||||
Vec<SsaBlock>,
|
||||
|
|
@ -934,6 +943,7 @@ fn rename_variables(
|
|||
HashMap<NodeIndex, SsaValue>,
|
||||
crate::ssa::ir::FieldInterner,
|
||||
HashMap<SsaValue, (SsaValue, crate::ssa::ir::FieldId)>,
|
||||
HashSet<SsaValue>,
|
||||
) {
|
||||
let num_blocks = blocks_nodes.len();
|
||||
let mut next_value: u32 = 0;
|
||||
|
|
@ -1679,6 +1689,27 @@ fn rename_variables(
|
|||
// Inject synthetic Param instructions at START of block 0 for external variables.
|
||||
// These create SSA definitions so the rename pass can reference them.
|
||||
// Pre-seed var_stacks so process_block sees them.
|
||||
//
|
||||
// `external_vars` contains both real formal parameters and free / closure-
|
||||
// captured variables (variables read by the body but not declared as a
|
||||
// formal and not assigned anywhere). Both end up emitted as
|
||||
// [`SsaOp::Param`] in block 0; we record the SSA values that correspond
|
||||
// to free vars in `synthetic_externals` so downstream analyses (the JS/TS
|
||||
// handler-name auto-seed in particular) can avoid treating closure
|
||||
// captures as if they were parameters of the function under analysis.
|
||||
//
|
||||
// **Conservative behaviour when `formal_params` is empty.** Several
|
||||
// call sites (`lower_to_ssa`, `lower_to_ssa_scoped_nop`) don't supply
|
||||
// formal parameter names; in that case we cannot distinguish formals
|
||||
// from free vars structurally, so we leave `synthetic_externals` empty
|
||||
// and the auto-seed pass keeps its pre-fix behaviour of treating every
|
||||
// `Param` op as a candidate. Only callers that pass a non-empty
|
||||
// `formal_params` slice (`lower_to_ssa_with_params`, used by the
|
||||
// findings pipeline's per-function lowering) opt into the
|
||||
// closure-capture distinction.
|
||||
let mut synthetic_externals: HashSet<SsaValue> = HashSet::new();
|
||||
let formal_set: HashSet<&str> = formal_params.iter().map(|s| s.as_str()).collect();
|
||||
let track_synthetic = !formal_params.is_empty();
|
||||
if !external_vars.is_empty() {
|
||||
let entry_cfg_node = blocks_nodes[0][0];
|
||||
let mut synthetic_body = Vec::with_capacity(external_vars.len());
|
||||
|
|
@ -1691,7 +1722,8 @@ fn rename_variables(
|
|||
cfg_node: entry_cfg_node,
|
||||
block: BlockId(0),
|
||||
});
|
||||
let op = if is_receiver_name(var) {
|
||||
let is_receiver = is_receiver_name(var);
|
||||
let op = if is_receiver {
|
||||
SsaOp::SelfParam
|
||||
} else {
|
||||
let op = SsaOp::Param {
|
||||
|
|
@ -1700,6 +1732,28 @@ fn rename_variables(
|
|||
positional_idx += 1;
|
||||
op
|
||||
};
|
||||
// A non-receiver var is "synthetic" (a free / closure capture)
|
||||
// when it is *not* one of the function's declared formals AND
|
||||
// not a dotted access on a formal (`input.cmd` where `input` is
|
||||
// a formal — it represents a structural projection of the
|
||||
// formal, not a free variable; the auto-seed should still treat
|
||||
// it as part of the formal's own taint surface). Receivers are
|
||||
// intentionally excluded: `this` / `self` represent the implicit
|
||||
// receiver, which always belongs to the function.
|
||||
//
|
||||
// Only fire when the caller supplied formal-parameter names; see
|
||||
// the `track_synthetic` rationale above.
|
||||
let root_is_formal = var
|
||||
.split_once('.')
|
||||
.map(|(root, _)| formal_set.contains(root))
|
||||
.unwrap_or(false);
|
||||
if track_synthetic
|
||||
&& !is_receiver
|
||||
&& !formal_set.contains(var.as_str())
|
||||
&& !root_is_formal
|
||||
{
|
||||
synthetic_externals.insert(v);
|
||||
}
|
||||
synthetic_body.push(SsaInst {
|
||||
value: v,
|
||||
op,
|
||||
|
|
@ -1784,6 +1838,7 @@ fn rename_variables(
|
|||
cfg_node_map,
|
||||
field_interner,
|
||||
field_writes,
|
||||
synthetic_externals,
|
||||
)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -417,6 +417,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -440,6 +440,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
let cfg: Cfg = Graph::new();
|
||||
let const_values = HashMap::new();
|
||||
|
|
|
|||
|
|
@ -25,6 +25,15 @@ pub enum TypeKind {
|
|||
FileHandle,
|
||||
Url,
|
||||
HttpClient,
|
||||
/// A pre-network HTTP request builder produced by `Client::post(url)`,
|
||||
/// `surf::post(url)`, `Request::builder()`, `ureq::post(url)`, etc.
|
||||
/// The body-bind methods (`body`, `json`, `form`, `multipart`,
|
||||
/// `body_string`, `body_json`, `body_bytes`) and terminal verbs
|
||||
/// (`send`, `send_string`, `send_json`, `send_form`) are sinks for
|
||||
/// `DATA_EXFIL` when receiver-typed. Distinct from `HttpClient` so
|
||||
/// type-qualified resolution can attach builder-only rules without
|
||||
/// over-firing on plain client objects.
|
||||
RequestBuilder,
|
||||
/// A local, in-memory collection (HashMap, HashSet, Vec, etc.).
|
||||
/// The auth sink gate uses this so calls like `map.insert(...)`
|
||||
/// are treated as bookkeeping rather than cross-tenant sinks. No
|
||||
|
|
@ -76,6 +85,7 @@ impl TypeKind {
|
|||
Self::DatabaseConnection => Some("DatabaseConnection"),
|
||||
Self::FileHandle => Some("FileHandle"),
|
||||
Self::Url => Some("URL"),
|
||||
Self::RequestBuilder => Some("RequestBuilder"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
@ -180,9 +190,10 @@ impl TypeFactResult {
|
|||
///
|
||||
/// Suppression policy:
|
||||
/// * [`TypeKind::Int`] (and float, treated as numeric): suppresses
|
||||
/// `SQL_QUERY`, `FILE_IO`, `SHELL_ESCAPE`, `HTML_ESCAPE`, `SSRF` ,
|
||||
/// numeric values cannot carry the metacharacters required to drive
|
||||
/// any of these injection classes.
|
||||
/// `SQL_QUERY`, `FILE_IO`, `SHELL_ESCAPE`, `HTML_ESCAPE`, `SSRF`,
|
||||
/// `DATA_EXFIL`, numeric values cannot carry the metacharacters
|
||||
/// required to drive any of these injection classes, nor can they
|
||||
/// encode credentials/tokens that meaningfully constitute leakage.
|
||||
/// * [`TypeKind::Bool`]: suppresses every type-suppressible bit ,
|
||||
/// `true`/`false` cannot carry a payload of any kind.
|
||||
pub fn is_type_safe_for_sink(
|
||||
|
|
@ -191,8 +202,12 @@ pub fn is_type_safe_for_sink(
|
|||
type_facts: &TypeFactResult,
|
||||
) -> bool {
|
||||
use crate::labels::Cap;
|
||||
let type_suppressible =
|
||||
Cap::SQL_QUERY | Cap::FILE_IO | Cap::SHELL_ESCAPE | Cap::HTML_ESCAPE | Cap::SSRF;
|
||||
let type_suppressible = Cap::SQL_QUERY
|
||||
| Cap::FILE_IO
|
||||
| Cap::SHELL_ESCAPE
|
||||
| Cap::HTML_ESCAPE
|
||||
| Cap::SSRF
|
||||
| Cap::DATA_EXFIL;
|
||||
if !sink_caps.intersects(type_suppressible) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -224,6 +239,13 @@ pub(crate) fn constructor_type(lang: Lang, callee: &str) -> Option<TypeKind> {
|
|||
"newHttpClient" | "newBuilder" if callee.contains("HttpClient") => {
|
||||
Some(TypeKind::HttpClient)
|
||||
}
|
||||
// Apache HttpClient idiomatic factory:
|
||||
// `CloseableHttpClient client = HttpClients.createDefault();`
|
||||
// `HttpClients` contains the substring `HttpClient` so this
|
||||
// doesn't widen to unrelated `createDefault` calls.
|
||||
"createDefault" | "custom" if callee.contains("HttpClient") => {
|
||||
Some(TypeKind::HttpClient)
|
||||
}
|
||||
"OkHttpClient" | "WebClient" | "RestTemplate" => Some(TypeKind::HttpClient),
|
||||
"getConnection" => Some(TypeKind::DatabaseConnection),
|
||||
"MongoClient" => Some(TypeKind::DatabaseConnection),
|
||||
|
|
@ -340,6 +362,10 @@ pub(crate) fn constructor_type(lang: Lang, callee: &str) -> Option<TypeKind> {
|
|||
// so the auth sink gate recognises
|
||||
// `let x = factory_fn(); x.insert(..)`.
|
||||
Some(TypeKind::LocalCollection)
|
||||
} else if is_rust_request_builder_constructor(base) {
|
||||
// HTTP request-builder constructors across reqwest, surf,
|
||||
// ureq, hyper. See [`is_rust_request_builder_constructor`].
|
||||
Some(TypeKind::RequestBuilder)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
|
@ -449,6 +475,54 @@ fn is_rust_local_collection_constructor(base: &str) -> bool {
|
|||
})
|
||||
}
|
||||
|
||||
/// Does the peeled Rust callee correspond to a known HTTP request-builder
|
||||
/// constructor / factory? Covers:
|
||||
/// * surf free verbs (`surf::post`, `surf::get`, ...) ,
|
||||
/// * ureq free verbs (`ureq::post`, ...) ,
|
||||
/// * hyper `Request::builder` ,
|
||||
/// * reqwest `Client::post(url)` / `Client::get(url)` etc. (the `Client`
|
||||
/// instance is itself an `HttpClient` but the verb call on it returns a
|
||||
/// `RequestBuilder` whose chained methods bind body/json/form/etc.).
|
||||
///
|
||||
/// reqwest's `Client::new` keeps its existing `HttpClient` mapping ,
|
||||
/// it produces the client, not a builder.
|
||||
fn is_rust_request_builder_constructor(base: &str) -> bool {
|
||||
// surf free verbs that return Request (acts as a builder).
|
||||
const SURF_VERBS: &[&str] = &[
|
||||
"post", "get", "put", "delete", "patch", "head", "connect", "trace",
|
||||
];
|
||||
if SURF_VERBS
|
||||
.iter()
|
||||
.any(|v| base.ends_with(&format!("surf::{v}")))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
// ureq free verbs that return Request.
|
||||
const UREQ_VERBS: &[&str] = &["post", "get", "put", "delete", "patch", "head"];
|
||||
if UREQ_VERBS
|
||||
.iter()
|
||||
.any(|v| base.ends_with(&format!("ureq::{v}")))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
// hyper request builder.
|
||||
if base.ends_with("Request::builder") || base.ends_with("hyper::Request::builder") {
|
||||
return true;
|
||||
}
|
||||
// reqwest Client verb-on-instance. `Client::post(url)` /
|
||||
// `Client::get(url)` chained-form returns a RequestBuilder. We match
|
||||
// the constructor-style segment used by chain text after CFG receiver
|
||||
// collapse (`reqwest::Client::new.post`, `Client::post`, etc.).
|
||||
const REQWEST_CLIENT_VERBS: &[&str] =
|
||||
&["post", "get", "put", "delete", "patch", "head", "request"];
|
||||
if REQWEST_CLIENT_VERBS.iter().any(|v| {
|
||||
base.ends_with(&format!("Client::new.{v}")) || base.ends_with(&format!("Client::{v}"))
|
||||
}) {
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
pub fn is_identity_method(callee: &str) -> bool {
|
||||
let suffix = callee.rsplit(['.', ':']).next().unwrap_or(callee);
|
||||
matches!(
|
||||
|
|
@ -1076,6 +1150,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let consts = HashMap::from([
|
||||
|
|
@ -1189,6 +1265,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let consts = HashMap::new();
|
||||
|
|
@ -1220,9 +1298,10 @@ mod tests {
|
|||
}
|
||||
|
||||
/// Int-typed values must suppress every type-suppressible
|
||||
/// cap, including the freshly-added `SSRF` bit. Numeric IDs
|
||||
/// cannot rewrite a URL host, cannot form path traversal sequences,
|
||||
/// cannot carry SQL/HTML/shell metacharacters.
|
||||
/// cap, including the freshly-added `SSRF` and `DATA_EXFIL` bits.
|
||||
/// Numeric IDs cannot rewrite a URL host, cannot form path
|
||||
/// traversal sequences, cannot carry SQL/HTML/shell metacharacters,
|
||||
/// and do not encode credentials worth exfiltrating.
|
||||
#[test]
|
||||
fn int_suppresses_every_type_suppressible_cap() {
|
||||
use crate::labels::Cap;
|
||||
|
|
@ -1236,6 +1315,7 @@ mod tests {
|
|||
Cap::SHELL_ESCAPE,
|
||||
Cap::HTML_ESCAPE,
|
||||
Cap::SSRF,
|
||||
Cap::DATA_EXFIL,
|
||||
] {
|
||||
assert!(
|
||||
is_type_safe_for_sink(&[SsaValue(0)], cap, &result),
|
||||
|
|
@ -1271,6 +1351,7 @@ mod tests {
|
|||
Cap::SHELL_ESCAPE,
|
||||
Cap::HTML_ESCAPE,
|
||||
Cap::SSRF,
|
||||
Cap::DATA_EXFIL,
|
||||
] {
|
||||
assert!(
|
||||
is_type_safe_for_sink(&[SsaValue(0)], cap, &result),
|
||||
|
|
@ -1307,14 +1388,14 @@ mod tests {
|
|||
/// `is_type_safe_for_sink` requires an intentional matrix edit + a
|
||||
/// test update. Truth values:
|
||||
///
|
||||
/// | TypeKind | SQL | FILE | SHELL | HTML | SSRF | CODE_EXEC | DESERIALIZE |
|
||||
/// |-----------|-----|------|-------|------|------|-----------|-------------|
|
||||
/// | Int | Y | Y | Y | Y | Y | N | N |
|
||||
/// | Bool | Y | Y | Y | Y | Y | N | N |
|
||||
/// | String | N | N | N | N | N | N | N |
|
||||
/// | Url | N | N | N | N | N | N | N |
|
||||
/// | Object | N | N | N | N | N | N | N |
|
||||
/// | Unknown | N | N | N | N | N | N | N |
|
||||
/// | TypeKind | SQL | FILE | SHELL | HTML | SSRF | DATA_EXFIL | CODE_EXEC | DESERIALIZE |
|
||||
/// |-----------|-----|------|-------|------|------|------------|-----------|-------------|
|
||||
/// | Int | Y | Y | Y | Y | Y | Y | N | N |
|
||||
/// | Bool | Y | Y | Y | Y | Y | Y | N | N |
|
||||
/// | String | N | N | N | N | N | N | N | N |
|
||||
/// | Url | N | N | N | N | N | N | N | N |
|
||||
/// | Object | N | N | N | N | N | N | N | N |
|
||||
/// | Unknown | N | N | N | N | N | N | N | N |
|
||||
#[test]
|
||||
fn type_kind_cap_suppression_matrix() {
|
||||
use crate::labels::Cap;
|
||||
|
|
@ -1324,40 +1405,41 @@ mod tests {
|
|||
("SHELL_ESCAPE", Cap::SHELL_ESCAPE),
|
||||
("HTML_ESCAPE", Cap::HTML_ESCAPE),
|
||||
("SSRF", Cap::SSRF),
|
||||
("DATA_EXFIL", Cap::DATA_EXFIL),
|
||||
("CODE_EXEC", Cap::CODE_EXEC),
|
||||
("DESERIALIZE", Cap::DESERIALIZE),
|
||||
];
|
||||
// (kind_name, kind, [suppress for each cap in `caps` order])
|
||||
let rows: &[(&str, TypeKind, [bool; 7])] = &[
|
||||
let rows: &[(&str, TypeKind, [bool; 8])] = &[
|
||||
(
|
||||
"Int",
|
||||
TypeKind::Int,
|
||||
[true, true, true, true, true, false, false],
|
||||
[true, true, true, true, true, true, false, false],
|
||||
),
|
||||
(
|
||||
"Bool",
|
||||
TypeKind::Bool,
|
||||
[true, true, true, true, true, false, false],
|
||||
[true, true, true, true, true, true, false, false],
|
||||
),
|
||||
(
|
||||
"String",
|
||||
TypeKind::String,
|
||||
[false, false, false, false, false, false, false],
|
||||
[false, false, false, false, false, false, false, false],
|
||||
),
|
||||
(
|
||||
"Url",
|
||||
TypeKind::Url,
|
||||
[false, false, false, false, false, false, false],
|
||||
[false, false, false, false, false, false, false, false],
|
||||
),
|
||||
(
|
||||
"Object",
|
||||
TypeKind::Object,
|
||||
[false, false, false, false, false, false, false],
|
||||
[false, false, false, false, false, false, false, false],
|
||||
),
|
||||
(
|
||||
"Unknown",
|
||||
TypeKind::Unknown,
|
||||
[false, false, false, false, false, false, false],
|
||||
[false, false, false, false, false, false, false, false],
|
||||
),
|
||||
];
|
||||
for (kind_name, kind, expected) in rows {
|
||||
|
|
@ -1389,6 +1471,7 @@ mod tests {
|
|||
Cap::SHELL_ESCAPE,
|
||||
Cap::HTML_ESCAPE,
|
||||
Cap::SSRF,
|
||||
Cap::DATA_EXFIL,
|
||||
Cap::CODE_EXEC,
|
||||
Cap::DESERIALIZE,
|
||||
] {
|
||||
|
|
@ -1487,6 +1570,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let consts = HashMap::new();
|
||||
|
|
|
|||
|
|
@ -19,19 +19,29 @@ fn sanitize_desc(s: &str) -> String {
|
|||
/// convergence node where all execution paths join before leaving the function.
|
||||
///
|
||||
/// **Invariant:** Only terminal exits carry the complete merged lifecycle state
|
||||
/// needed for leak analysis. Return nodes are intermediate (they flow into the
|
||||
/// terminal exit) and must NOT be analyzed for terminal resource state.
|
||||
///
|
||||
/// Detection is purely topological: a node inside a function is terminal when
|
||||
/// it has no successor within the same function scope. This works for both
|
||||
/// per-body graphs (Exit node is a sink) and legacy supergraphs (the
|
||||
/// synthesized Return's successor is the file-level Exit with
|
||||
/// needed for leak analysis. Return nodes are intermediate in per-body graphs
|
||||
/// (they flow into the synthetic Exit node) but become terminal in legacy
|
||||
/// supergraphs (their successor is the file-level Exit with
|
||||
/// `enclosing_func = None`).
|
||||
///
|
||||
/// Detection combines a kind filter with a topological check. Only nodes
|
||||
/// whose `StmtKind` actually terminates execution (`Exit`, `Return`, `Throw`)
|
||||
/// are considered, then we require that they have no successor in the same
|
||||
/// function scope. Without the kind filter, dangling Seq nodes left behind
|
||||
/// when nested function literals (e.g. `obj.fn = () => {...}`) get a
|
||||
/// placeholder in the parent graph would be misclassified as terminal exits
|
||||
/// and produce spurious resource-leak findings at the function-literal span.
|
||||
fn is_terminal_function_exit(
|
||||
idx: petgraph::graph::NodeIndex,
|
||||
info: &crate::cfg::NodeInfo,
|
||||
cfg: &Cfg,
|
||||
) -> bool {
|
||||
if !matches!(
|
||||
info.kind,
|
||||
StmtKind::Exit | StmtKind::Return | StmtKind::Throw
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
info.ast.enclosing_func.is_some()
|
||||
&& !cfg
|
||||
.neighbors_directed(idx, petgraph::Direction::Outgoing)
|
||||
|
|
@ -62,6 +72,7 @@ pub struct StateFinding {
|
|||
/// `state-unauthed-access` finding is suppressed on those spans because
|
||||
/// the user-controlled input has already been proved unable to escape
|
||||
/// into a privileged location.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn extract_findings(
|
||||
result: &DataflowResult<ProductState, TransferEvent>,
|
||||
cfg: &Cfg,
|
||||
|
|
@ -70,6 +81,7 @@ pub fn extract_findings(
|
|||
func_summaries: &crate::cfg::FuncSummaries,
|
||||
enable_auth: bool,
|
||||
path_safe_suppressed_sink_spans: &std::collections::HashSet<(usize, usize)>,
|
||||
closure_released_var_names: Option<&std::collections::HashSet<String>>,
|
||||
) -> Vec<StateFinding> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
|
|
@ -195,6 +207,23 @@ pub fn extract_findings(
|
|||
continue;
|
||||
}
|
||||
|
||||
// Suppress leaks for variables whose release call lives in a
|
||||
// nested closure (callback / event handler) outside this
|
||||
// body's CFG. Common JS/TS shape:
|
||||
// const ws = new WebSocket(url);
|
||||
// socket.on("close", () => ws.close());
|
||||
// The per-body resource analysis cannot observe the close
|
||||
// inside the registered handler body; without this gate the
|
||||
// handle reads as a definite leak. Match by variable name —
|
||||
// closure-captured handles share the binding name with the
|
||||
// handle in the outer scope.
|
||||
if closure_released_var_names
|
||||
.map(|s| s.contains(var_name))
|
||||
.unwrap_or(false)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Prefer direct acquire node span; fall back to proxy span
|
||||
// from ResourceMethodSummary (cross-body resource tracking).
|
||||
let acquire_span = acquire_node
|
||||
|
|
@ -557,6 +586,7 @@ mod tests {
|
|||
&HashMap::new(),
|
||||
false,
|
||||
&std::collections::HashSet::new(),
|
||||
None,
|
||||
);
|
||||
|
||||
assert_eq!(findings.len(), 1);
|
||||
|
|
@ -617,6 +647,7 @@ mod tests {
|
|||
&HashMap::new(),
|
||||
false,
|
||||
&std::collections::HashSet::new(),
|
||||
None,
|
||||
);
|
||||
|
||||
assert!(findings.is_empty());
|
||||
|
|
@ -751,6 +782,7 @@ mod tests {
|
|||
&HashMap::new(),
|
||||
false,
|
||||
&std::collections::HashSet::new(),
|
||||
None,
|
||||
);
|
||||
|
||||
assert!(
|
||||
|
|
@ -816,6 +848,7 @@ mod tests {
|
|||
&HashMap::new(),
|
||||
false,
|
||||
&std::collections::HashSet::new(),
|
||||
None,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
|
|
|
|||
|
|
@ -77,6 +77,13 @@ pub fn run_state_analysis(
|
|||
// m.Lock()`) and routes them through `chain_proxies` instead. Pass
|
||||
// `None` to disable, strict-additive.
|
||||
ptr_proxy_hints: Option<&std::collections::HashMap<String, crate::pointer::PtrProxyHint>>,
|
||||
// Names of variables whose `.close()`/release calls live in a nested
|
||||
// closure (event handler, deferred callback) that the per-body CFG
|
||||
// can't observe directly. Used to suppress resource-leak findings
|
||||
// for handles whose cleanup is registered as a callback (`ws.on(
|
||||
// "close", () => ws2.close())`). Pass `None` for languages or
|
||||
// shapes that don't need this.
|
||||
closure_released_var_names: Option<&std::collections::HashSet<String>>,
|
||||
) -> Vec<StateFinding> {
|
||||
let _span = tracing::debug_span!("run_state_analysis").entered();
|
||||
|
||||
|
|
@ -116,9 +123,99 @@ pub fn run_state_analysis(
|
|||
func_summaries,
|
||||
enable_auth,
|
||||
path_safe_suppressed_sink_spans,
|
||||
closure_released_var_names,
|
||||
)
|
||||
}
|
||||
|
||||
/// Build a per-body map of variable names whose release calls
|
||||
/// (`.close`, `.destroy`, `.end`, `.release`, …) appear inside a
|
||||
/// **descendant** body (a closure / event handler nested inside the
|
||||
/// body that opens the handle).
|
||||
///
|
||||
/// Returned: `body_id → set of var names released somewhere inside
|
||||
/// that body's nested-closure subtree`. Used by the structural
|
||||
/// ResourceMisuse pass and the state-model leak pass to suppress
|
||||
/// findings whose cleanup lives in a callback the per-body CFG can't
|
||||
/// follow (`socket.on("close", () => ws.close())`).
|
||||
///
|
||||
/// Restricted to descendants — sibling methods on the same class
|
||||
/// don't share resource ownership, so a release in `queryAndClose`
|
||||
/// must NOT silence a leak in sibling `queryAndLeak`. Only true
|
||||
/// nested-closure parent / child relationships participate.
|
||||
pub fn collect_closure_released_var_names(
|
||||
bodies: &[crate::cfg::BodyCfg],
|
||||
lang: Lang,
|
||||
) -> std::collections::HashMap<crate::cfg::BodyId, std::collections::HashSet<String>> {
|
||||
use crate::cfg::{BodyId, StmtKind};
|
||||
use petgraph::visit::IntoNodeReferences;
|
||||
|
||||
// Step 1: collect releases per body. Only nested (non-toplevel)
|
||||
// closures are eligible — top-level bodies' own releases are
|
||||
// already tracked by the dataflow.
|
||||
let pairs = rules::resource_pairs(lang);
|
||||
let mut per_body: std::collections::HashMap<BodyId, std::collections::HashSet<String>> =
|
||||
std::collections::HashMap::new();
|
||||
for body in bodies {
|
||||
if body.meta.parent_body_id.is_none() {
|
||||
continue;
|
||||
}
|
||||
let mut local = std::collections::HashSet::new();
|
||||
for (_idx, info) in body.graph.node_references() {
|
||||
if info.kind != StmtKind::Call {
|
||||
continue;
|
||||
}
|
||||
let Some(callee) = info.call.callee.as_deref() else {
|
||||
continue;
|
||||
};
|
||||
let cl = callee.to_ascii_lowercase();
|
||||
let is_release = pairs.iter().any(|p| {
|
||||
p.release.iter().any(|r| {
|
||||
let rl = r.to_ascii_lowercase();
|
||||
if let Some(method) = rl.strip_prefix('.') {
|
||||
cl.ends_with(&format!(".{method}"))
|
||||
} else {
|
||||
cl == rl || cl.ends_with(&format!(".{rl}"))
|
||||
}
|
||||
})
|
||||
});
|
||||
if !is_release {
|
||||
continue;
|
||||
}
|
||||
if let Some(rcv) = info.call.receiver.as_deref() {
|
||||
local.insert(rcv.to_string());
|
||||
} else if let Some((rcv, _)) = callee.rsplit_once('.')
|
||||
&& !rcv.is_empty()
|
||||
{
|
||||
local.insert(rcv.to_string());
|
||||
}
|
||||
}
|
||||
if !local.is_empty() {
|
||||
per_body.insert(body.meta.id, local);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: roll up into ancestor bodies. Walk each non-top body's
|
||||
// parent chain and union its release set into every ancestor's
|
||||
// entry. Class methods at the same nesting level (siblings under a
|
||||
// class body) do not roll up into each other — they have distinct
|
||||
// BodyId entries and the chain only flows through `parent_body_id`.
|
||||
let mut rollup: std::collections::HashMap<BodyId, std::collections::HashSet<String>> =
|
||||
std::collections::HashMap::new();
|
||||
let by_id: std::collections::HashMap<BodyId, &crate::cfg::BodyCfg> =
|
||||
bodies.iter().map(|b| (b.meta.id, b)).collect();
|
||||
for body in bodies {
|
||||
let Some(local) = per_body.get(&body.meta.id) else {
|
||||
continue;
|
||||
};
|
||||
let mut cur = body.meta.parent_body_id;
|
||||
while let Some(pid) = cur {
|
||||
rollup.entry(pid).or_default().extend(local.iter().cloned());
|
||||
cur = by_id.get(&pid).and_then(|b| b.meta.parent_body_id);
|
||||
}
|
||||
}
|
||||
rollup
|
||||
}
|
||||
|
||||
/// Build resource method summaries by pre-scanning all method bodies for known
|
||||
/// resource acquire/release operations. Only creates summaries for methods whose
|
||||
/// bodies actually contain matching operations, never infers from names alone.
|
||||
|
|
|
|||
|
|
@ -635,6 +635,19 @@ impl DefaultTransfer<'_> {
|
|||
fn apply_assignment(&self, _node_idx: NodeIndex, info: &NodeInfo, state: &mut ProductState) {
|
||||
// Ownership transfer: if `defines` reassigns a tracked resource
|
||||
// variable from a `uses` variable, transfer the lifecycle.
|
||||
//
|
||||
// Skip when the RHS is a function or lambda literal: storing a
|
||||
// closure into a property (`ws.onclose = () => { ... }`,
|
||||
// `obj.handler = function(){...}`) does not move ownership of the
|
||||
// resources the closure body references — those identifiers appear
|
||||
// in `info.taint.uses` only because `def_use` walks the literal's
|
||||
// body, not because the assignment itself reads them. Without this
|
||||
// gate, the first OPEN-tracked capture inside the closure body gets
|
||||
// marked MOVED and the property's symbol becomes the new OPEN
|
||||
// owner, which then surfaces as a spurious leak on the property.
|
||||
if info.rhs_is_function_literal {
|
||||
return;
|
||||
}
|
||||
if let Some(ref def) = info.taint.defines
|
||||
&& let Some(def_sym) = self.get_sym(info, def)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -158,6 +158,39 @@ pub struct SsaFuncSummary {
|
|||
/// (caller_param_index, sink_arg_position, sink_caps).
|
||||
#[serde(default)]
|
||||
pub param_to_sink_param: Vec<(usize, usize, Cap)>,
|
||||
/// Per-parameter gate-filter cap masks lifted from inner multi-gate
|
||||
/// sink call sites.
|
||||
///
|
||||
/// When a function body contains a callee whose
|
||||
/// [`crate::cfg::CallMeta::gate_filters`] carries more than one entry
|
||||
/// (e.g. `fetch` is both an `SSRF` gate on the URL arg and a
|
||||
/// `DATA_EXFIL` gate on the body arg), the multi-gate dispatch in
|
||||
/// [`super::super::collect_block_events`] cap-narrows the event's
|
||||
/// `sink_caps` to the specific gate's `label_caps`. Each
|
||||
/// `(param_idx, label_caps)` entry records that this function's
|
||||
/// parameter `param_idx` flowed into a gated sink whose narrowed
|
||||
/// caps were `label_caps`.
|
||||
///
|
||||
/// Cross-file callers consume this list to preserve per-position cap
|
||||
/// attribution through wrapper functions: a wrapper
|
||||
/// `fn forward(url, body) { fetch(url, {body}) }` records
|
||||
/// `[(0, SSRF), (1, DATA_EXFIL)]` so a caller of `forward` splits
|
||||
/// URL-tainted SSRF findings from body-tainted DATA_EXFIL findings
|
||||
/// instead of conflating both caps onto every parameter.
|
||||
///
|
||||
/// `Vec<(param_idx, label_caps)>` is sufficient at cross-file
|
||||
/// granularity, the corresponding `payload_args` and
|
||||
/// `destination_uses` are intra-file context that does not survive
|
||||
/// the function-summary boundary (field idents reference SSA
|
||||
/// values from the callee body).
|
||||
///
|
||||
/// Empty (the default) for callees whose internal sinks carry zero
|
||||
/// or one gate filter, the existing
|
||||
/// [`Self::param_to_sink`] /
|
||||
/// [`Self::param_to_sink_param`] machinery already records those
|
||||
/// cases without per-position cap conflict.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub param_to_gate_filters: Vec<(usize, Cap)>,
|
||||
/// Parameter indices whose container identity flows to the return value
|
||||
/// (e.g., function returns the same container it received as input).
|
||||
///
|
||||
|
|
|
|||
|
|
@ -441,6 +441,7 @@ fn ssa_summary_serde_round_trip_identity() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
|
||||
|
|
@ -473,6 +474,7 @@ fn ssa_summary_serde_round_trip_strip_bits() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
|
||||
|
|
@ -502,6 +504,7 @@ fn ssa_summary_serde_round_trip_add_bits() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
|
||||
|
|
@ -538,6 +541,7 @@ fn ssa_summary_serde_round_trip_all_variants() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
|
||||
|
|
@ -576,6 +580,7 @@ fn global_summaries_insert_ssa_exact_key_replacement() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
gs.insert_ssa(key.clone(), v1.clone());
|
||||
assert_eq!(gs.get_ssa(&key), Some(&v1));
|
||||
|
|
@ -602,6 +607,7 @@ fn global_summaries_insert_ssa_exact_key_replacement() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
gs.insert_ssa(key.clone(), v2.clone());
|
||||
assert_eq!(gs.get_ssa(&key), Some(&v2));
|
||||
|
|
@ -648,6 +654,7 @@ fn global_summaries_merge_with_ssa_entries() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let sum_b = SsaFuncSummary {
|
||||
param_to_return: vec![],
|
||||
|
|
@ -670,6 +677,7 @@ fn global_summaries_merge_with_ssa_entries() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
|
||||
gs1.insert_ssa(key_a.clone(), sum_a.clone());
|
||||
|
|
@ -716,6 +724,7 @@ fn global_summaries_is_empty_considers_ssa() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
||||
|
|
@ -745,6 +754,7 @@ fn ssa_summary_serde_round_trip_param_to_sink_param() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
|
||||
|
|
@ -789,6 +799,7 @@ fn ssa_summary_serde_round_trip_container_fields() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
|
||||
|
|
@ -843,6 +854,7 @@ fn ssa_summary_serde_round_trip_return_abstract() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
|
||||
|
|
@ -916,6 +928,8 @@ fn make_callee_body(
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
},
|
||||
opt: crate::ssa::OptimizeResult {
|
||||
const_values: std::collections::HashMap::new(),
|
||||
|
|
@ -1361,6 +1375,7 @@ fn global_summaries_resolve_body_requires_body_present() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
// Don't insert body
|
||||
|
|
@ -3504,6 +3519,7 @@ fn cf4_return_path_transform_serde_round_trip() {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
let json = serde_json::to_string(&summary).unwrap();
|
||||
let back: SsaFuncSummary = serde_json::from_str(&json).unwrap();
|
||||
|
|
|
|||
|
|
@ -1382,6 +1382,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let empty_succs = HashMap::new();
|
||||
|
|
@ -1441,6 +1443,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let empty_succs = HashMap::new();
|
||||
|
|
@ -1573,6 +1577,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let finding = make_finding(n0, n1);
|
||||
|
|
@ -1680,6 +1686,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
// Finding path goes through B0 → B1 → B3
|
||||
|
|
@ -1826,6 +1834,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let finding = Finding {
|
||||
|
|
@ -1938,6 +1948,8 @@ mod tests {
|
|||
exception_edges: vec![(b0, b2)],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let mut exc_succs: HashMap<BlockId, SmallVec<[BlockId; 2]>> = HashMap::new();
|
||||
|
|
@ -2004,6 +2016,8 @@ mod tests {
|
|||
exception_edges: vec![(b0, b2)],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let mut exc_succs: HashMap<BlockId, SmallVec<[BlockId; 2]>> = HashMap::new();
|
||||
|
|
@ -2111,6 +2125,8 @@ mod tests {
|
|||
exception_edges: vec![(b1, b2)],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let finding = Finding {
|
||||
|
|
|
|||
|
|
@ -389,6 +389,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -434,6 +436,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -515,6 +519,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -577,6 +583,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -657,6 +665,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -728,6 +738,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -762,6 +774,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -818,6 +832,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -898,6 +914,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -976,6 +994,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
@ -1011,6 +1031,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let info = analyse_loops(&ssa);
|
||||
|
|
|
|||
|
|
@ -379,6 +379,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let finding = Finding {
|
||||
|
|
@ -452,6 +454,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let finding = Finding {
|
||||
|
|
@ -554,6 +558,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let ctx = SymexContext {
|
||||
|
|
@ -614,6 +620,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let ctx = SymexContext {
|
||||
|
|
|
|||
|
|
@ -353,6 +353,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let witness = state.get_sink_witness(&finding, &ssa);
|
||||
|
|
@ -393,6 +395,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
assert_eq!(state.get_sink_witness(&finding, &ssa), None);
|
||||
|
|
@ -430,6 +434,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
assert_eq!(state.get_sink_witness(&finding, &ssa), None);
|
||||
|
|
@ -470,6 +476,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
state.widen_at_loop_head(BlockId(0), &ssa);
|
||||
|
|
@ -513,6 +521,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
state.widen_at_loop_head(BlockId(0), &ssa);
|
||||
|
|
@ -556,6 +566,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
state.widen_at_loop_head(BlockId(0), &ssa);
|
||||
|
|
|
|||
|
|
@ -1012,6 +1012,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1591,6 +1593,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
let ctx = make_summary_ctx(&gs);
|
||||
|
|
@ -1659,6 +1662,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
let ctx = make_summary_ctx(&gs);
|
||||
|
|
@ -1727,6 +1731,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
let ctx = make_summary_ctx(&gs);
|
||||
|
|
@ -1790,6 +1795,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
let ctx = make_summary_ctx(&gs);
|
||||
|
|
@ -1853,6 +1859,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
let ctx = make_summary_ctx(&gs);
|
||||
|
|
@ -2050,6 +2057,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
||||
|
|
@ -2128,6 +2136,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
||||
|
|
@ -2207,6 +2216,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
// Second "send", in ns B, also with same arity → ambiguous bare-name
|
||||
|
|
@ -2236,6 +2246,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
// Also register the type-qualified name so Attempt 1 can find it
|
||||
|
|
@ -2265,6 +2276,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
||||
|
|
@ -2343,6 +2355,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
|
||||
|
|
@ -2423,6 +2436,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
insert_java_summary(
|
||||
|
|
@ -2451,6 +2465,7 @@ mod tests {
|
|||
field_points_to: Default::default(),
|
||||
return_path_facts: smallvec::SmallVec::new(),
|
||||
typed_call_receivers: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
},
|
||||
);
|
||||
// No "HttpClient.send" summary registered, disambiguation has 0 exact matches
|
||||
|
|
|
|||
|
|
@ -204,8 +204,15 @@ fn sink_cap(finding: &Finding, cfg: &Cfg) -> Cap {
|
|||
|
||||
/// Select a witness payload string based on the vulnerability class.
|
||||
fn witness_payload(cap: Cap) -> &'static str {
|
||||
// Check bits in priority order (most specific first)
|
||||
if cap.intersects(Cap::CODE_EXEC) {
|
||||
// Check bits in priority order (most specific first).
|
||||
//
|
||||
// `DATA_EXFIL` is checked before the action-class caps (CODE_EXEC, SQL,
|
||||
// etc.) because a data-exfil sink reflects what the *attacker reads*,
|
||||
// not what they *do*: the witness needs to look like a leaked secret
|
||||
// ("<SESSION_TOKEN>") rather than an injected payload ("' OR 1=1 --").
|
||||
if cap.intersects(Cap::DATA_EXFIL) {
|
||||
"<SESSION_TOKEN>"
|
||||
} else if cap.intersects(Cap::CODE_EXEC) {
|
||||
"require('child_process').execSync('id')"
|
||||
} else if cap.intersects(Cap::HTML_ESCAPE) {
|
||||
"<script>alert('xss')</script>"
|
||||
|
|
@ -639,9 +646,21 @@ mod tests {
|
|||
witness_payload(Cap::DESERIALIZE),
|
||||
"malicious_serialized_object"
|
||||
);
|
||||
assert_eq!(witness_payload(Cap::DATA_EXFIL), "<SESSION_TOKEN>");
|
||||
assert_eq!(witness_payload(Cap::CRYPTO), "TAINTED"); // fallback
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_witness_payload_data_exfil_wins_over_action_caps() {
|
||||
// A `fetch` call's body slot can carry both DATA_EXFIL (the leak
|
||||
// class) and the underlying action cap (e.g. SSRF) when the same
|
||||
// sink is multi-gated. The witness should reflect the *leaked*
|
||||
// value (a session token) rather than an injection payload, the
|
||||
// attacker is reading data, not writing it.
|
||||
let combined = Cap::DATA_EXFIL | Cap::SSRF;
|
||||
assert_eq!(witness_payload(combined), "<SESSION_TOKEN>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_witness_payload_code_exec_separate_from_xss() {
|
||||
// CODE_EXEC must return a code-execution payload, not an XSS one.
|
||||
|
|
@ -776,6 +795,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let finding = Finding {
|
||||
|
|
@ -831,6 +852,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
let cfg = Cfg::new();
|
||||
let finding = Finding {
|
||||
|
|
@ -892,6 +915,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let finding = Finding {
|
||||
|
|
@ -954,6 +979,8 @@ mod tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let finding = Finding {
|
||||
|
|
|
|||
|
|
@ -752,6 +752,7 @@ mod tests {
|
|||
exception_edges: Vec::new(),
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
(ssa, cfg)
|
||||
|
|
@ -766,6 +767,47 @@ mod tests {
|
|||
assert_eq!(d.validated_false, 0);
|
||||
}
|
||||
|
||||
/// Regression guard: the cap-routing logic must round-trip
|
||||
/// `Cap::DATA_EXFIL` exactly like every other cap. The backwards
|
||||
/// engine treats the demand as opaque bits, so if a future change
|
||||
/// accidentally narrows the type of `caps` (e.g. a hardcoded mask)
|
||||
/// the data-exfiltration cap stops surviving the walk.
|
||||
#[test]
|
||||
fn demand_state_roundtrips_data_exfil_cap() {
|
||||
let d = DemandState::new(Cap::DATA_EXFIL);
|
||||
assert_eq!(d.caps, Cap::DATA_EXFIL);
|
||||
assert!(d.caps.contains(Cap::DATA_EXFIL));
|
||||
// Sanity: combined demand keeps the bit alongside SSRF (the two
|
||||
// most-frequently-co-occurring caps on outbound HTTP gates).
|
||||
let combined = DemandState::new(Cap::DATA_EXFIL | Cap::SSRF);
|
||||
assert!(combined.caps.contains(Cap::DATA_EXFIL));
|
||||
assert!(combined.caps.contains(Cap::SSRF));
|
||||
}
|
||||
|
||||
/// The backwards driver must classify a `DATA_EXFIL`-capable source
|
||||
/// even when the sink demand is *exactly* `DATA_EXFIL` (no other
|
||||
/// caps). Mirrors `driver_walks_source_to_sink` but pins the cap so
|
||||
/// a future change that intersects with a wider mask (and thus
|
||||
/// silently widens the demand) is caught.
|
||||
#[test]
|
||||
fn driver_walks_data_exfil_source_to_sink() {
|
||||
let (ssa, mut cfg) = build_trivial_source_body();
|
||||
// Tag the source CFG node with a Source(DATA_EXFIL) label so
|
||||
// the cap-match path (the one that actually rules end-to-end
|
||||
// routing) exercises the bit.
|
||||
let src_node = NodeIndex::new(0);
|
||||
cfg[src_node]
|
||||
.taint
|
||||
.labels
|
||||
.push(DataLabel::Source(Cap::DATA_EXFIL));
|
||||
|
||||
let ctx = BackwardsCtx::new(&ssa, &cfg, Lang::JavaScript);
|
||||
let flows = analyse_sink_backwards(&ctx, SsaValue(1), NodeIndex::new(1), Cap::DATA_EXFIL);
|
||||
assert_eq!(flows.len(), 1, "exactly one DATA_EXFIL flow expected");
|
||||
assert!(flows[0].is_confirmation(), "must confirm at the source");
|
||||
assert_eq!(flows[0].sink_caps, Cap::DATA_EXFIL);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backward_transfer_source_terminates() {
|
||||
let (ssa, _cfg) = build_trivial_source_body();
|
||||
|
|
@ -800,6 +842,7 @@ mod tests {
|
|||
exception_edges: Vec::new(),
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
let demand = DemandState::new(Cap::all());
|
||||
let (step, next) = backward_transfer(&ssa, SsaValue(0), &demand);
|
||||
|
|
@ -832,6 +875,7 @@ mod tests {
|
|||
exception_edges: Vec::new(),
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
let demand = DemandState::new(Cap::all());
|
||||
let (step, _next) = backward_transfer(&ssa, SsaValue(0), &demand);
|
||||
|
|
@ -919,6 +963,7 @@ mod tests {
|
|||
exception_edges: Vec::new(),
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let demand = DemandState::new(Cap::all());
|
||||
|
|
@ -1007,6 +1052,7 @@ mod tests {
|
|||
exception_edges: Vec::new(),
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let ctx = BackwardsCtx::new(&ssa, &cfg, Lang::JavaScript);
|
||||
|
|
|
|||
|
|
@ -4026,6 +4026,45 @@ pub(super) fn transfer_inst(
|
|||
}
|
||||
}
|
||||
|
||||
// Constructor cap narrowing: a `new X(...)` call returns an object
|
||||
// instance, not a string. Caps that name a string-shaped sink
|
||||
// pattern (path argument, format string, URL component, JSON
|
||||
// input) cannot fire on a wrapper object, so they must not
|
||||
// survive the construction. Without this narrowing, a tainted
|
||||
// argument to `new SdkClient(secret)` propagates `Cap::all()`
|
||||
// into the wrapper, every method call on the wrapper inherits
|
||||
// those bits via receiver propagation, and any downstream
|
||||
// `fs.write*` / `printf` / `JSON.parse` on a string property
|
||||
// returned by an SDK method (e.g. `client.create().id`) flags
|
||||
// a phantom flow that has no real path-traversal etc. payload.
|
||||
//
|
||||
// Caps preserved (legitimately travel through wrappers):
|
||||
// - SHELL_ESCAPE / SQL_QUERY / CODE_EXEC / DESERIALIZE: a
|
||||
// wrapper that captures a tainted command/query string can
|
||||
// replay it via methods, the bit must survive the wrap.
|
||||
// - SSRF / DATA_EXFIL: URL/payload concerns persist on URL or
|
||||
// content-bearing objects.
|
||||
// - UNAUTHORIZED_ID: ownership obligation persists on a
|
||||
// wrapper that carries a request-bound identifier.
|
||||
// - ENV_VAR: provenance marker, never a sink trigger by
|
||||
// itself.
|
||||
// - HTML_ESCAPE: kept for safety, conservative dual concern
|
||||
// (a wrapper used as a string in template rendering).
|
||||
// - CRYPTO: kept conservatively.
|
||||
//
|
||||
// Caps stripped on construction:
|
||||
// - FILE_IO: path strings only.
|
||||
// - FMT_STRING: printf-style format args only.
|
||||
// - URL_ENCODE: URL components only.
|
||||
// - JSON_PARSE: parser inputs only.
|
||||
if info.call.is_constructor && !return_bits.is_empty() {
|
||||
let strip = Cap::FILE_IO | Cap::FMT_STRING | Cap::URL_ENCODE | Cap::JSON_PARSE;
|
||||
return_bits &= !strip;
|
||||
if return_bits.is_empty() {
|
||||
return_origins.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// Write result
|
||||
if return_bits.is_empty() {
|
||||
state.remove(inst.value);
|
||||
|
|
@ -4314,16 +4353,41 @@ pub(super) fn transfer_inst(
|
|||
// summary-extraction mode so baseline probes keep their
|
||||
// intrinsic-source contract. Gate is set by the caller, e.g.
|
||||
// always-on for JS/TS, only AnonymousFunction bodies for Java.
|
||||
//
|
||||
// The `Param` branch fires for both real formal parameters and
|
||||
// synthetic externals injected by lowering for free / closure-
|
||||
// captured variables (`SsaBody.synthetic_externals`). Only real
|
||||
// formals should receive the heuristic seed: a closure capturing
|
||||
// an out-of-scope `userId` / `cmd` / `payload` is NOT a handler
|
||||
// entry point — the variable is supplied by the enclosing scope
|
||||
// and seeding it here produces phantom sources anchored to the
|
||||
// function's declaration line.
|
||||
if transfer.auto_seed_handler_params
|
||||
&& !seeded_from_scope
|
||||
&& matches!(&inst.op, SsaOp::Param { .. })
|
||||
&& !ssa.synthetic_externals.contains(&inst.value)
|
||||
{
|
||||
if let Some(var_name) = ssa
|
||||
.value_defs
|
||||
.get(inst.value.0 as usize)
|
||||
.and_then(|vd| vd.var_name.as_deref())
|
||||
{
|
||||
if crate::labels::is_js_ts_handler_param_name(var_name) {
|
||||
// Direct match: the Param's name itself is a handler
|
||||
// identifier (e.g. `input`, `cmd`, `userId`).
|
||||
//
|
||||
// Root-prefix match: dotted-path Params produced by
|
||||
// lowering for member-expression uses inside the body
|
||||
// (`input.cmd` — an unbacked phantom Param) inherit the
|
||||
// seed when their *root* is a handler-param formal.
|
||||
// Without this, the field-aware suppression downstream
|
||||
// sees `input.cmd` as a "clean field" and strips
|
||||
// `input`'s taint, even though `input.cmd` is just a
|
||||
// structural projection of the auto-seeded formal.
|
||||
let root_is_handler = var_name
|
||||
.split_once('.')
|
||||
.map(|(root, _)| crate::labels::is_js_ts_handler_param_name(root))
|
||||
.unwrap_or(false);
|
||||
if crate::labels::is_js_ts_handler_param_name(var_name) || root_is_handler {
|
||||
let origin = TaintOrigin {
|
||||
node: inst.cfg_node,
|
||||
source_kind: SourceKind::UserInput,
|
||||
|
|
@ -5245,6 +5309,15 @@ fn collect_block_events(
|
|||
let sink_info = resolve_sink_info(info, transfer);
|
||||
let mut sink_caps = sink_info.caps;
|
||||
|
||||
// [detectors.data_exfil] enabled toggle. When the detector class is
|
||||
// disabled per-project, strip Cap::DATA_EXFIL from sink_caps so no
|
||||
// taint-data-exfiltration event is emitted regardless of which gate
|
||||
// would have fired. Strict-additive: defaults to enabled, no effect
|
||||
// for projects that don't opt in.
|
||||
if !crate::utils::detector_options::current().data_exfil.enabled {
|
||||
sink_caps &= !Cap::DATA_EXFIL;
|
||||
}
|
||||
|
||||
// Type-qualified sink resolution: when normal sink resolution found nothing,
|
||||
// try using the receiver's inferred type to construct a qualified callee name.
|
||||
if sink_caps.is_empty() {
|
||||
|
|
@ -5324,50 +5397,83 @@ fn collect_block_events(
|
|||
for &(cb_idx, src_caps) in &resolved.source_to_callback {
|
||||
let cb_name = info.arg_callees.get(cb_idx).and_then(|ac| ac.as_ref());
|
||||
if let Some(cb_callee) = cb_name {
|
||||
if let Some(cb_resolved) =
|
||||
resolve_callee(transfer, cb_callee, caller_func, 0)
|
||||
{
|
||||
let matching_sink_caps = cb_resolved
|
||||
.param_to_sink
|
||||
.iter()
|
||||
.filter(|(_, caps)| !(src_caps & *caps).is_empty())
|
||||
.fold(Cap::empty(), |acc, (_, c)| acc | *c);
|
||||
if !matching_sink_caps.is_empty() {
|
||||
let source_kind =
|
||||
crate::labels::infer_source_kind(src_caps, callee);
|
||||
let origin = TaintOrigin {
|
||||
node: inst.cfg_node,
|
||||
source_kind,
|
||||
source_span: None,
|
||||
};
|
||||
// Pick callback-path sink sites.
|
||||
// The callback callee's `param_to_sink_sites`
|
||||
// drives attribution when available; cap-only
|
||||
// fallback yields `primary_sink_site = None`.
|
||||
let cb_tainted: Vec<(
|
||||
SsaValue,
|
||||
Cap,
|
||||
SmallVec<[TaintOrigin; 2]>,
|
||||
)> = vec![(
|
||||
// First try the standard summary-based resolution
|
||||
// path (covers user-defined functions and built-ins
|
||||
// that landed in label-derived summaries upstream).
|
||||
// If that yields no matching sink caps, fall back
|
||||
// to gated-sink classification on the callback
|
||||
// callee's name — gated sinks (e.g.
|
||||
// `child_process.exec` post-fix) carry their
|
||||
// payload positions in the gate, not in any
|
||||
// summary, and the callback pipeline still needs
|
||||
// those positions to pair source caps against
|
||||
// param_to_sink.
|
||||
let cb_resolved = resolve_callee(transfer, cb_callee, caller_func, 0);
|
||||
let mut matching_sink_caps = Cap::empty();
|
||||
let cb_param_to_sink_sites: Vec<(usize, SmallVec<[SinkSite; 1]>)> =
|
||||
if let Some(ref r) = cb_resolved {
|
||||
matching_sink_caps = r
|
||||
.param_to_sink
|
||||
.iter()
|
||||
.filter(|(_, caps)| !(src_caps & *caps).is_empty())
|
||||
.fold(Cap::empty(), |acc, (_, c)| acc | *c);
|
||||
r.param_to_sink_sites.clone()
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
if matching_sink_caps.is_empty() {
|
||||
// Gate-fallback: classify_gated_sink yields the
|
||||
// callback callee's payload positions + sink
|
||||
// caps directly when the name matches a gated
|
||||
// sink rule.
|
||||
let lang_str = transfer.lang.as_str();
|
||||
let gates = crate::labels::classify_gated_sink(
|
||||
lang_str,
|
||||
cb_callee,
|
||||
|_| None,
|
||||
|_| None,
|
||||
|_| false,
|
||||
);
|
||||
for gm in gates.iter() {
|
||||
if let DataLabel::Sink(bits) = gm.label {
|
||||
if !(src_caps & bits).is_empty() {
|
||||
matching_sink_caps |= bits;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !matching_sink_caps.is_empty() {
|
||||
let source_kind =
|
||||
crate::labels::infer_source_kind(src_caps, callee);
|
||||
let origin = TaintOrigin {
|
||||
node: inst.cfg_node,
|
||||
source_kind,
|
||||
source_span: None,
|
||||
};
|
||||
// Pick callback-path sink sites.
|
||||
// The callback callee's `param_to_sink_sites`
|
||||
// drives attribution when available; cap-only
|
||||
// fallback yields `primary_sink_site = None`.
|
||||
let cb_tainted: Vec<(SsaValue, Cap, SmallVec<[TaintOrigin; 2]>)> =
|
||||
vec![(
|
||||
inst.value,
|
||||
src_caps & matching_sink_caps,
|
||||
SmallVec::from_elem(origin, 1),
|
||||
)];
|
||||
let cb_sites = pick_primary_sink_sites_from_resolved(
|
||||
matching_sink_caps,
|
||||
&cb_resolved.param_to_sink_sites,
|
||||
);
|
||||
emit_ssa_taint_events(
|
||||
events,
|
||||
inst.cfg_node,
|
||||
cb_tainted,
|
||||
matching_sink_caps,
|
||||
false,
|
||||
None,
|
||||
true,
|
||||
cb_sites,
|
||||
);
|
||||
}
|
||||
let cb_sites = pick_primary_sink_sites_from_resolved(
|
||||
matching_sink_caps,
|
||||
&cb_param_to_sink_sites,
|
||||
);
|
||||
emit_ssa_taint_events(
|
||||
events,
|
||||
inst.cfg_node,
|
||||
cb_tainted,
|
||||
matching_sink_caps,
|
||||
false,
|
||||
None,
|
||||
true,
|
||||
cb_sites,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -5563,8 +5669,62 @@ fn collect_block_events(
|
|||
// loop with the legacy `(sink_caps, info.call.sink_payload_args,
|
||||
// info.call.destination_uses)` triple, preserving prior behavior
|
||||
// for every non-multi-gate site.
|
||||
//
|
||||
// Cross-file wrapper case: when the resolved callee summary carries
|
||||
// [`SinkInfo::param_to_gate_filters`] (the wrapper's body contains
|
||||
// an inner multi-gate sink whose per-position cap split was lifted
|
||||
// at extraction time), expand one filter pass per `(param_idx,
|
||||
// label_caps)` entry restricted to that single arg position. This
|
||||
// preserves SSRF-vs-DATA_EXFIL attribution across a
|
||||
// `fn forward(url, body) { fetch(url, {body}) }` wrapper that is
|
||||
// NOT itself a known gated sink.
|
||||
//
|
||||
// Params NOT covered by `param_to_gate_filters` retain coverage
|
||||
// via their `param_to_sink` entry, expanded per-position so the
|
||||
// emitted event's `sink_caps` reflects the param-specific cap
|
||||
// mask rather than the aggregate union. This matters for
|
||||
// wrappers that mix gated sinks with label-based sinks
|
||||
// (e.g. `fn dispatch(cmd, url) { execSync(cmd); fetch(url) }`),
|
||||
// where param 0 reaches a non-gated SHELL_ESCAPE sink and the
|
||||
// gate-filter list only carries the SSRF gate for param 1.
|
||||
let multi_gate = info.call.gate_filters.len() > 1;
|
||||
let summary_per_position = !multi_gate && !sink_info.param_to_gate_filters.is_empty();
|
||||
type FilterEntry<'a> = (Cap, Option<&'a [usize]>, Option<&'a [String]>);
|
||||
// Per-position dispatch source for the summary-per-position branch.
|
||||
// First, every entry from `param_to_gate_filters` (cap-narrowed by
|
||||
// the inner gate); then, for any param_to_sink index NOT mentioned
|
||||
// in `param_to_gate_filters`, an entry using that param's
|
||||
// `param_to_sink` cap mask.
|
||||
struct PerPosEntry {
|
||||
idx: [usize; 1],
|
||||
caps: Cap,
|
||||
}
|
||||
let per_position_entries: Vec<PerPosEntry> = if summary_per_position {
|
||||
let mut out: Vec<PerPosEntry> =
|
||||
Vec::with_capacity(sink_info.param_to_gate_filters.len());
|
||||
for (idx, caps) in &sink_info.param_to_gate_filters {
|
||||
out.push(PerPosEntry {
|
||||
idx: [*idx],
|
||||
caps: *caps,
|
||||
});
|
||||
}
|
||||
for (idx, caps) in &sink_info.param_to_sink {
|
||||
if sink_info
|
||||
.param_to_gate_filters
|
||||
.iter()
|
||||
.any(|(i, _)| *i == *idx)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
out.push(PerPosEntry {
|
||||
idx: [*idx],
|
||||
caps: *caps,
|
||||
});
|
||||
}
|
||||
out
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
let filter_iter: smallvec::SmallVec<[FilterEntry<'_>; 2]> = if multi_gate {
|
||||
info.call
|
||||
.gate_filters
|
||||
|
|
@ -5577,11 +5737,37 @@ fn collect_block_events(
|
|||
)
|
||||
})
|
||||
.collect()
|
||||
} else if summary_per_position {
|
||||
per_position_entries
|
||||
.iter()
|
||||
.map(|e| (sink_caps & e.caps, Some(e.idx.as_slice()), None))
|
||||
.collect()
|
||||
} else {
|
||||
smallvec::smallvec![(sink_caps, None, None)]
|
||||
};
|
||||
|
||||
for (filter_caps, positions_override, destination_override) in filter_iter {
|
||||
let mut filter_caps = filter_caps;
|
||||
|
||||
// Per-filter destination allowlist for DATA_EXFIL. When this
|
||||
// filter would emit Cap::DATA_EXFIL and the call's destination
|
||||
// arg has a trusted static prefix (configured via
|
||||
// detectors.data_exfil.trusted_destinations), drop the bit
|
||||
// for this filter only. Other gates on the same call site
|
||||
// (notably SSRF) are unaffected. Mirrors the semantics of
|
||||
// is_call_data_exfil_destination_trusted but operates per-gate
|
||||
// so a multi-gate fetch site keeps SSRF attribution while
|
||||
// dropping DATA_EXFIL when the destination is trusted.
|
||||
if filter_caps.intersects(Cap::DATA_EXFIL) {
|
||||
if let SsaOp::Call { ref args, .. } = inst.op {
|
||||
if let Some(ref abs) = state.abstract_state {
|
||||
if is_call_data_exfil_destination_trusted(inst, args, abs, cfg) {
|
||||
filter_caps &= !Cap::DATA_EXFIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if filter_caps.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -6464,6 +6650,15 @@ struct SinkInfo {
|
|||
/// coordinates. Used to attribute findings to the dangerous
|
||||
/// callee-internal instruction.
|
||||
param_to_sink_sites: Vec<(usize, SmallVec<[SinkSite; 1]>)>,
|
||||
/// Per-parameter gate-filter cap masks lifted from the callee's
|
||||
/// inner multi-gate sink call sites. Mirrors
|
||||
/// [`crate::summary::ssa_summary::SsaFuncSummary::param_to_gate_filters`].
|
||||
/// When non-empty, the dispatcher in [`collect_block_events`]
|
||||
/// expands one filter pass per `(param_idx, label_caps)` entry so
|
||||
/// a wrapper carrying multiple gate classes (e.g. SSRF on the URL
|
||||
/// arg + DATA_EXFIL on the body arg) attributes findings per cap
|
||||
/// instead of joining them.
|
||||
param_to_gate_filters: Vec<(usize, Cap)>,
|
||||
}
|
||||
|
||||
fn resolve_sink_info(info: &NodeInfo, transfer: &SsaTaintTransfer) -> SinkInfo {
|
||||
|
|
@ -6479,6 +6674,7 @@ fn resolve_sink_info(info: &NodeInfo, transfer: &SsaTaintTransfer) -> SinkInfo {
|
|||
caps: label_sink_caps,
|
||||
param_to_sink: vec![],
|
||||
param_to_sink_sites: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -6500,6 +6696,7 @@ fn resolve_sink_info(info: &NodeInfo, transfer: &SsaTaintTransfer) -> SinkInfo {
|
|||
caps: r.sink_caps,
|
||||
param_to_sink: r.param_to_sink,
|
||||
param_to_sink_sites: r.param_to_sink_sites,
|
||||
param_to_gate_filters: r.param_to_gate_filters,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -6525,6 +6722,7 @@ fn resolve_sink_info(info: &NodeInfo, transfer: &SsaTaintTransfer) -> SinkInfo {
|
|||
caps: r.sink_caps,
|
||||
param_to_sink: r.param_to_sink,
|
||||
param_to_sink_sites: r.param_to_sink_sites,
|
||||
param_to_gate_filters: r.param_to_gate_filters,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -6533,6 +6731,7 @@ fn resolve_sink_info(info: &NodeInfo, transfer: &SsaTaintTransfer) -> SinkInfo {
|
|||
caps: Cap::empty(),
|
||||
param_to_sink: vec![],
|
||||
param_to_sink_sites: vec![],
|
||||
param_to_gate_filters: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -7383,6 +7582,16 @@ fn is_abstract_safe_for_sink(
|
|||
}
|
||||
}
|
||||
|
||||
// DATA_EXFIL, destination allowlist via configured trusted prefixes.
|
||||
// Mirrors the SSRF prefix-lock above but consults the user-configured
|
||||
// [detectors.data_exfil] table's trusted_destinations key. Strict-
|
||||
// additive: when no destinations are configured this is a no-op.
|
||||
if sink_caps.intersects(Cap::DATA_EXFIL)
|
||||
&& is_inst_data_exfil_destination_trusted(inst, abs, cfg)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// SHELL_ESCAPE, static-map finite-domain safety. When every tainted
|
||||
// payload value is proved by the static-HashMap-lookup analysis to come
|
||||
// from a bounded set of metacharacter-free literals, the call cannot
|
||||
|
|
@ -7509,6 +7718,15 @@ fn is_call_abstract_safe(
|
|||
}
|
||||
}
|
||||
|
||||
// DATA_EXFIL, destination-allowlist match. Mirrors the SSRF arm above
|
||||
// for the Call path. Strict-additive: a no-op when
|
||||
// detectors.data_exfil.trusted_destinations is empty.
|
||||
if sink_caps.intersects(Cap::DATA_EXFIL)
|
||||
&& is_call_data_exfil_destination_trusted(inst, args, abs, cfg)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// SHELL_ESCAPE, static-map finite-domain safety on every non-empty arg
|
||||
// group. Mirrors the non-Call path so suppression fires regardless of
|
||||
// which branch the sink detector took.
|
||||
|
|
@ -7785,6 +8003,118 @@ fn is_static_map_shell_safe(
|
|||
})
|
||||
}
|
||||
|
||||
/// `DATA_EXFIL` destination-allowlist match.
|
||||
///
|
||||
/// Returns `true` when `prefix` (the proven static prefix of an outbound
|
||||
/// destination URL, sourced from either the abstract string domain or an
|
||||
/// inline literal seen by CFG) starts with one of the user-configured
|
||||
/// trusted destinations. Used by the abstract sink-suppression code to
|
||||
/// drop the [`Cap::DATA_EXFIL`] bit on legitimate forwarding pipelines
|
||||
/// (telemetry, internal APIs, analytics) without affecting other caps on
|
||||
/// the same call.
|
||||
///
|
||||
/// Match semantics: a trusted destination entry is treated as a string
|
||||
/// prefix. An empty entry never matches (empty prefix would match
|
||||
/// every URL, which is never a useful allowlist). Entries should be
|
||||
/// origin-pinned (e.g. `https://api.internal/`) so partial-host
|
||||
/// collisions cannot occur.
|
||||
fn is_string_prefix_trusted_destination(prefix: &str, trusted: &[String]) -> bool {
|
||||
if prefix.is_empty() {
|
||||
return false;
|
||||
}
|
||||
trusted
|
||||
.iter()
|
||||
.any(|t| !t.is_empty() && prefix.starts_with(t.as_str()))
|
||||
}
|
||||
|
||||
/// Check whether the call site's destination argument (positional arg 0) is
|
||||
/// a known trusted destination per
|
||||
/// [`crate::utils::detector_options::DataExfilDetectorOptions::trusted_destinations`].
|
||||
///
|
||||
/// Returns `true` when the URL argument has a static prefix matching one
|
||||
/// of the configured trusted entries. Three sources are consulted in
|
||||
/// order:
|
||||
///
|
||||
/// 1. The CFG node's syntactic literal (`info.call.arg_string_literals[0]`),
|
||||
/// populated for any positional argument that is a syntactic string
|
||||
/// literal at the call site. Catches the common case
|
||||
/// `fetch('https://api.internal/...', {...})` whose URL never enters
|
||||
/// the abstract domain because it is not bound to an identifier.
|
||||
/// 2. The inline template-literal prefix attached to the call node
|
||||
/// directly (matches the SSRF prefix-lock fallback).
|
||||
/// 3. The abstract string-domain prefix of arg 0's SSA value group.
|
||||
/// Catches identifier-bound URLs like
|
||||
/// `let url = \`https://api.internal/${id}\`; fetch(url, {...})`.
|
||||
///
|
||||
/// Returns `false` when no trusted destinations are configured.
|
||||
fn is_call_data_exfil_destination_trusted(
|
||||
inst: &SsaInst,
|
||||
args: &[SmallVec<[SsaValue; 2]>],
|
||||
abs: &AbstractState,
|
||||
cfg: &Cfg,
|
||||
) -> bool {
|
||||
let opts = crate::utils::detector_options::current();
|
||||
let trusted = &opts.data_exfil.trusted_destinations;
|
||||
if trusted.is_empty() {
|
||||
return false;
|
||||
}
|
||||
let node_info = &cfg[inst.cfg_node];
|
||||
if let Some(Some(lit)) = node_info.call.arg_string_literals.first() {
|
||||
if is_string_prefix_trusted_destination(lit, trusted) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if let Some(prefix) = node_info.string_prefix.as_deref() {
|
||||
if is_string_prefix_trusted_destination(prefix, trusted) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if let Some(first_arg) = args.first() {
|
||||
if !first_arg.is_empty()
|
||||
&& first_arg.iter().all(|v| {
|
||||
abs.get(*v)
|
||||
.string
|
||||
.prefix
|
||||
.as_deref()
|
||||
.is_some_and(|p| is_string_prefix_trusted_destination(p, trusted))
|
||||
})
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Non-Call variant of [`is_call_data_exfil_destination_trusted`]: used by
|
||||
/// [`is_abstract_safe_for_sink`] where the destination is read off the
|
||||
/// instruction's own used SSA values rather than a positional Call arg
|
||||
/// list. Falls back to the node-attached `string_prefix` when no abstract
|
||||
/// fact is available.
|
||||
fn is_inst_data_exfil_destination_trusted(inst: &SsaInst, abs: &AbstractState, cfg: &Cfg) -> bool {
|
||||
let opts = crate::utils::detector_options::current();
|
||||
let trusted = &opts.data_exfil.trusted_destinations;
|
||||
if trusted.is_empty() {
|
||||
return false;
|
||||
}
|
||||
let node_info = &cfg[inst.cfg_node];
|
||||
if let Some(prefix) = node_info.string_prefix.as_deref() {
|
||||
if is_string_prefix_trusted_destination(prefix, trusted) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
let used = inst_use_values(inst);
|
||||
if used.is_empty() {
|
||||
return false;
|
||||
}
|
||||
used.iter().all(|v| {
|
||||
abs.get(*v)
|
||||
.string
|
||||
.prefix
|
||||
.as_deref()
|
||||
.is_some_and(|p| is_string_prefix_trusted_destination(p, trusted))
|
||||
})
|
||||
}
|
||||
|
||||
/// SSRF safety: prefix includes scheme + full host + path separator.
|
||||
///
|
||||
/// Soundness: if the prefix contains `scheme://host/`, the attacker cannot
|
||||
|
|
@ -8026,6 +8356,21 @@ struct ResolvedSummary {
|
|||
/// retained; in that case `param_to_sink` alone still drives sink
|
||||
/// detection.
|
||||
param_to_sink_sites: Vec<(usize, SmallVec<[SinkSite; 1]>)>,
|
||||
/// Per-parameter gate-filter cap masks lifted from the callee's
|
||||
/// inner multi-gate sink call sites. Mirrors
|
||||
/// [`crate::summary::ssa_summary::SsaFuncSummary::param_to_gate_filters`].
|
||||
///
|
||||
/// Each `(param_idx, label_caps)` entry says "this caller-side
|
||||
/// parameter flows to a callee-internal gated sink whose narrowed
|
||||
/// caps are `label_caps`". When non-empty, the multi-gate dispatch
|
||||
/// in [`collect_block_events`] expands one filter pass per entry so
|
||||
/// the emitted event's `sink_caps` reflect the gate-specific cap
|
||||
/// rather than the aggregate union, preserving SSRF-vs-DATA_EXFIL
|
||||
/// (and similar) attribution through wrapper functions.
|
||||
///
|
||||
/// Empty for label, local-summary, FuncSummary, and interop paths,
|
||||
/// these forms do not retain per-gate cap detail.
|
||||
param_to_gate_filters: Vec<(usize, Cap)>,
|
||||
propagates_taint: bool,
|
||||
propagating_params: Vec<usize>,
|
||||
/// Parameter indices whose container identity flows to return value.
|
||||
|
|
@ -8229,18 +8574,34 @@ fn resolve_callee_full(
|
|||
param_return_paths: vec![],
|
||||
points_to: Default::default(),
|
||||
field_points_to: Default::default(),
|
||||
param_to_gate_filters: vec![],
|
||||
});
|
||||
}
|
||||
// Try label classification for the bound function (by leaf name)
|
||||
// Try label classification for the bound function (by leaf name).
|
||||
// Consult both flat rules (`classify_all`) and gated sinks: a
|
||||
// callback bound to a gated sink (e.g. passing
|
||||
// `child_process.exec` directly as the callback) still needs to
|
||||
// surface its `Sink` capability so the source/callback pairing
|
||||
// logic can match `param_to_sink` against the caller's source.
|
||||
// The gate's `payload_args` translate directly into
|
||||
// `param_to_sink` index entries.
|
||||
let labels = crate::labels::classify_all(
|
||||
transfer.lang.as_str(),
|
||||
&real_key.name,
|
||||
transfer.extra_labels,
|
||||
);
|
||||
if !labels.is_empty() {
|
||||
let gate_matches = crate::labels::classify_gated_sink(
|
||||
transfer.lang.as_str(),
|
||||
&real_key.name,
|
||||
|_| None,
|
||||
|_| None,
|
||||
|_| false,
|
||||
);
|
||||
if !labels.is_empty() || !gate_matches.is_empty() {
|
||||
let mut source_caps = Cap::empty();
|
||||
let mut sanitizer_caps = Cap::empty();
|
||||
let mut sink_caps = Cap::empty();
|
||||
let mut param_to_sink: Vec<(usize, Cap)> = vec![];
|
||||
for lbl in &labels {
|
||||
match lbl {
|
||||
DataLabel::Source(bits) => source_caps |= *bits,
|
||||
|
|
@ -8248,11 +8609,25 @@ fn resolve_callee_full(
|
|||
DataLabel::Sink(bits) => sink_caps |= *bits,
|
||||
}
|
||||
}
|
||||
for gm in gate_matches.iter() {
|
||||
if let DataLabel::Sink(bits) = gm.label {
|
||||
sink_caps |= bits;
|
||||
// Map the gate's payload_args to per-param sink entries
|
||||
// so source-to-callback pairing can match by index.
|
||||
// Skip the dynamic-activation sentinel — without a
|
||||
// concrete arity we can't enumerate positions here.
|
||||
if gm.payload_args != crate::labels::ALL_ARGS_PAYLOAD {
|
||||
for &idx in gm.payload_args {
|
||||
param_to_sink.push((idx, bits));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return Some(ResolvedSummary {
|
||||
source_caps,
|
||||
sanitizer_caps,
|
||||
sink_caps,
|
||||
param_to_sink: vec![],
|
||||
param_to_sink,
|
||||
param_to_sink_sites: vec![],
|
||||
propagates_taint: false,
|
||||
propagating_params: vec![],
|
||||
|
|
@ -8270,6 +8645,7 @@ fn resolve_callee_full(
|
|||
param_return_paths: vec![],
|
||||
points_to: Default::default(),
|
||||
field_points_to: Default::default(),
|
||||
param_to_gate_filters: vec![],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -8414,6 +8790,7 @@ fn resolve_callee_full(
|
|||
param_return_paths: vec![],
|
||||
points_to: Default::default(),
|
||||
field_points_to: Default::default(),
|
||||
param_to_gate_filters: vec![],
|
||||
});
|
||||
}
|
||||
} else {
|
||||
|
|
@ -8463,6 +8840,7 @@ fn resolve_callee_full(
|
|||
param_return_paths: vec![],
|
||||
points_to: Default::default(),
|
||||
field_points_to: Default::default(),
|
||||
param_to_gate_filters: vec![],
|
||||
};
|
||||
match widened.len() {
|
||||
0 => {}
|
||||
|
|
@ -8533,6 +8911,7 @@ fn resolve_callee_full(
|
|||
param_return_paths: vec![],
|
||||
points_to: Default::default(),
|
||||
field_points_to: Default::default(),
|
||||
param_to_gate_filters: vec![],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -8714,6 +9093,7 @@ fn convert_ssa_to_resolved_for_caller(
|
|||
param_return_paths: ssa_sum.param_return_paths.clone(),
|
||||
points_to: ssa_sum.points_to.clone(),
|
||||
field_points_to: ssa_sum.field_points_to.clone(),
|
||||
param_to_gate_filters: ssa_sum.param_to_gate_filters.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -8810,6 +9190,20 @@ fn merge_resolved_summaries_fanout(
|
|||
}
|
||||
}
|
||||
|
||||
// param_to_gate_filters: dedup-union (idx, caps) pairs. Each
|
||||
// implementer may carry its own per-position cap split; the union
|
||||
// preserves cap attribution from any implementer reachable via
|
||||
// virtual dispatch.
|
||||
for (idx, caps) in r.param_to_gate_filters {
|
||||
if !acc
|
||||
.param_to_gate_filters
|
||||
.iter()
|
||||
.any(|&(i, c)| i == idx && c == caps)
|
||||
{
|
||||
acc.param_to_gate_filters.push((idx, caps));
|
||||
}
|
||||
}
|
||||
|
||||
// SSA-precision fields: drop on any disagreement.
|
||||
if acc.return_type != r.return_type {
|
||||
acc.return_type = None;
|
||||
|
|
|
|||
|
|
@ -753,6 +753,8 @@ fn origin_sort_key(o: &TaintOrigin) -> (usize, usize, u8, usize) {
|
|||
crate::labels::SourceKind::Database => 3,
|
||||
crate::labels::SourceKind::CaughtException => 4,
|
||||
crate::labels::SourceKind::Unknown => 5,
|
||||
crate::labels::SourceKind::Cookie => 6,
|
||||
crate::labels::SourceKind::Header => 7,
|
||||
};
|
||||
(span_start, span_end, kind_tag, o.node.index())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -387,6 +387,15 @@ pub fn extract_ssa_func_summary_full(
|
|||
let mut param_to_return = Vec::new();
|
||||
let mut param_to_sink: Vec<(usize, SmallVec<[SinkSite; 1]>)> = Vec::new();
|
||||
let mut param_to_sink_param = Vec::new();
|
||||
// Per-param gate-filter cap masks lifted from inner multi-gate sink calls.
|
||||
// Populated when the per-param probe reaches a sink whose CFG node carries
|
||||
// [`crate::cfg::CallMeta::gate_filters`] with more than one entry, the
|
||||
// multi-gate dispatch in `collect_block_events` has already cap-narrowed
|
||||
// `event.sink_caps` to the matching gate's `label_caps`, so we record the
|
||||
// pair as-is. Cross-file callers consume this list to preserve per-position
|
||||
// cap attribution through wrapper functions like
|
||||
// `fn forward(url, body) { fetch(url, {body}) }`.
|
||||
let mut param_to_gate_filters: Vec<(usize, Cap)> = Vec::new();
|
||||
// Per-param return-path decomposition. Populated only when the param
|
||||
// has ≥2 distinct return-block predicate hashes, a single-return-path
|
||||
// callee is already precise via `param_to_return`.
|
||||
|
|
@ -541,6 +550,28 @@ pub fn extract_ssa_func_summary_full(
|
|||
for pos in extract_sink_arg_positions(event, ssa) {
|
||||
param_to_sink_param.push((idx, pos, event.sink_caps));
|
||||
}
|
||||
// Per-position gate-filter cap lifting.
|
||||
//
|
||||
// When the sink callee carries multiple gate filters (e.g. `fetch`
|
||||
// is both an SSRF gate on the URL arg and a `DATA_EXFIL` gate on
|
||||
// the body arg), the multi-gate dispatch has already filtered
|
||||
// `event.sink_caps` down to the specific gate's `label_caps` for
|
||||
// this probe. Recording `(idx, event.sink_caps)` preserves that
|
||||
// narrowing across the function-summary boundary so a caller of
|
||||
// the wrapper splits SSRF from DATA_EXFIL findings instead of
|
||||
// joining them under a single union.
|
||||
//
|
||||
// Single-gate / no-gate sinks are skipped, the existing
|
||||
// `param_to_sink` machinery already records those without
|
||||
// per-position cap conflict.
|
||||
if !event.sink_caps.is_empty()
|
||||
&& cfg[event.sink_node].call.gate_filters.len() > 1
|
||||
&& !param_to_gate_filters
|
||||
.iter()
|
||||
.any(|&(i, c)| i == idx && c == event.sink_caps)
|
||||
{
|
||||
param_to_gate_filters.push((idx, event.sink_caps));
|
||||
}
|
||||
if event.sink_caps.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -641,6 +672,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
param_to_sink,
|
||||
source_caps,
|
||||
param_to_sink_param,
|
||||
param_to_gate_filters,
|
||||
param_container_to_return,
|
||||
param_to_container_store,
|
||||
return_type,
|
||||
|
|
|
|||
|
|
@ -85,6 +85,8 @@ mod cross_file_tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
},
|
||||
opt: crate::ssa::OptimizeResult {
|
||||
const_values: std::collections::HashMap::new(),
|
||||
|
|
@ -832,6 +834,8 @@ mod primary_sink_location_tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -963,6 +967,8 @@ mod goto_succ_propagation_tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
|
||||
let cfg: Cfg = Graph::new();
|
||||
|
|
@ -1053,6 +1059,8 @@ mod goto_succ_propagation_tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
let cfg: Cfg = Graph::new();
|
||||
let interner = SymbolInterner::new();
|
||||
|
|
@ -1112,6 +1120,8 @@ mod goto_succ_propagation_tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1298,6 +1308,8 @@ mod goto_succ_propagation_tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1423,6 +1435,8 @@ mod receiver_candidates_field_proj_tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: interner,
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1508,6 +1522,8 @@ mod receiver_candidates_field_proj_tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: interner,
|
||||
field_writes: std::collections::HashMap::new(),
|
||||
|
||||
synthetic_externals: std::collections::HashSet::new(),
|
||||
};
|
||||
let cands =
|
||||
super::super::receiver_candidates_for_type_lookup(SsaValue(0), Some(&body), Lang::Go);
|
||||
|
|
@ -1550,6 +1566,7 @@ mod fanout_merge_tests {
|
|||
param_return_paths: vec![],
|
||||
points_to: Default::default(),
|
||||
field_points_to: Default::default(),
|
||||
param_to_gate_filters: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1909,6 +1926,7 @@ mod field_write_tests {
|
|||
exception_edges: vec![],
|
||||
field_interner,
|
||||
field_writes,
|
||||
synthetic_externals: HashSet::new(),
|
||||
};
|
||||
(body, cache_id)
|
||||
}
|
||||
|
|
@ -2206,6 +2224,7 @@ mod field_write_tests {
|
|||
m.insert(SsaValue(2), (SsaValue(0), cache_id));
|
||||
m
|
||||
},
|
||||
synthetic_externals: HashSet::new(),
|
||||
};
|
||||
let pf = crate::pointer::analyse_body(&body, crate::cfg::BodyId(0));
|
||||
// v0 is Const → empty pt, the hook should not insert anything.
|
||||
|
|
@ -2437,6 +2456,8 @@ mod container_elem_tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: HashMap::new(),
|
||||
|
||||
synthetic_externals: HashSet::new(),
|
||||
};
|
||||
|
||||
// Run pointer analysis first to confirm the result of `shift()`
|
||||
|
|
@ -2575,6 +2596,8 @@ mod container_elem_tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: HashMap::new(),
|
||||
|
||||
synthetic_externals: HashSet::new(),
|
||||
};
|
||||
|
||||
let pf = crate::pointer::analyse_body(&body, crate::cfg::BodyId(7));
|
||||
|
|
@ -2715,6 +2738,8 @@ mod container_elem_tests {
|
|||
exception_edges: vec![],
|
||||
field_interner: crate::ssa::ir::FieldInterner::default(),
|
||||
field_writes: HashMap::new(),
|
||||
|
||||
synthetic_externals: HashSet::new(),
|
||||
};
|
||||
|
||||
let interner = SymbolInterner::new();
|
||||
|
|
@ -2838,6 +2863,8 @@ mod cross_call_field_tests {
|
|||
exception_edges: vec![],
|
||||
field_interner,
|
||||
field_writes: HashMap::new(),
|
||||
|
||||
synthetic_externals: HashSet::new(),
|
||||
};
|
||||
let pf = crate::pointer::analyse_body(&body, crate::cfg::BodyId(7));
|
||||
(body, cache_id, pf)
|
||||
|
|
@ -3210,6 +3237,8 @@ mod field_taint_origin_cap_tests {
|
|||
exception_edges: vec![],
|
||||
field_interner,
|
||||
field_writes: HashMap::new(),
|
||||
|
||||
synthetic_externals: HashSet::new(),
|
||||
};
|
||||
(body, cache_id, cfg, n_proj)
|
||||
}
|
||||
|
|
@ -3533,6 +3562,7 @@ mod pointer_lattice_worklist_tests {
|
|||
exception_edges: vec![],
|
||||
field_interner,
|
||||
field_writes,
|
||||
synthetic_externals: HashSet::new(),
|
||||
};
|
||||
|
||||
let mut interner = SymbolInterner::new();
|
||||
|
|
|
|||
|
|
@ -712,6 +712,10 @@ pub struct Config {
|
|||
pub output: OutputConfig,
|
||||
pub performance: PerformanceConfig,
|
||||
pub analysis: AnalysisRulesConfig,
|
||||
/// Per-detector knobs ([detectors.*] in nyx.conf). Currently exposes
|
||||
/// `[detectors.data_exfil]` for cross-boundary leak suppression.
|
||||
#[serde(default)]
|
||||
pub detectors: crate::utils::detector_options::DetectorOptions,
|
||||
pub server: ServerConfig,
|
||||
pub runs: RunsConfig,
|
||||
pub profiles: HashMap<String, ScanProfile>,
|
||||
|
|
@ -1018,6 +1022,17 @@ pub(crate) fn merge_configs(mut default: Config, user: Config) -> Config {
|
|||
default.profiles.insert(name, profile);
|
||||
}
|
||||
|
||||
// --- DetectorOptions ---
|
||||
// Wholesale replace: each `[detectors.*]` field uses #[serde(default)],
|
||||
// so any omitted field already inherits the documented defaults during
|
||||
// user-config deserialization. trusted_destinations is union-merged so
|
||||
// the user adds to (rather than replaces) any future built-in defaults.
|
||||
default.detectors.data_exfil.enabled = user.detectors.data_exfil.enabled;
|
||||
extend_dedup(
|
||||
&mut default.detectors.data_exfil.trusted_destinations,
|
||||
user.detectors.data_exfil.trusted_destinations,
|
||||
);
|
||||
|
||||
// --- AnalysisRulesConfig ---
|
||||
// Engine options: wholesale replace. User's engine block is already
|
||||
// serde-merged with defaults (via #[serde(default)] per field), so any
|
||||
|
|
|
|||
129
src/utils/detector_options.rs
Normal file
129
src/utils/detector_options.rs
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
//! Per-detector runtime options.
|
||||
//!
|
||||
//! Mirrors the install/current pattern in [`crate::utils::analysis_options`]
|
||||
//! but for detector-class knobs that live under `[detectors.*]` in
|
||||
//! `nyx.conf`. Engine code that wants to consult a detector option calls
|
||||
//! [`current`]; the CLI installs a resolved value before the scan starts.
|
||||
//!
|
||||
//! The first knobs covered here are the [`Cap::DATA_EXFIL`][crate::labels::Cap::DATA_EXFIL]
|
||||
//! suppression layers:
|
||||
//!
|
||||
//! * `enabled` — turn the cap off entirely per-project so legitimate
|
||||
//! forwarding pipelines don't surface findings.
|
||||
//! * `trusted_destinations` — destination URL prefixes that suppress the
|
||||
//! cap when a sink's URL argument has a static prefix matching one of
|
||||
//! them. Uses the same prefix-lock plumbing the SSRF suppression has.
|
||||
//!
|
||||
//! Defaults are conservative: detector enabled, no trusted destinations.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::RwLock;
|
||||
|
||||
/// Options for the `Cap::DATA_EXFIL` suppression layers.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct DataExfilDetectorOptions {
|
||||
/// When `false`, the entire data-exfiltration detector class is
|
||||
/// suppressed for the project. Sink-time filters drop
|
||||
/// [`crate::labels::Cap::DATA_EXFIL`] from sink caps before event
|
||||
/// emission, so no `taint-data-exfiltration` findings reach output.
|
||||
pub enabled: bool,
|
||||
/// URL prefixes treated as trusted destinations for outbound
|
||||
/// requests. When a sink's destination argument has a proven static
|
||||
/// prefix (from the abstract string domain or an inline literal)
|
||||
/// that begins with one of these entries, the
|
||||
/// [`crate::labels::Cap::DATA_EXFIL`] bit is dropped before event
|
||||
/// emission. Mirrors the SSRF prefix-lock semantics.
|
||||
pub trusted_destinations: Vec<String>,
|
||||
}
|
||||
|
||||
impl Default for DataExfilDetectorOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
enabled: true,
|
||||
trusted_destinations: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Top-level `[detectors]` block.
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct DetectorOptions {
|
||||
pub data_exfil: DataExfilDetectorOptions,
|
||||
}
|
||||
|
||||
static RUNTIME: RwLock<Option<DetectorOptions>> = RwLock::new(None);
|
||||
|
||||
/// Install the process-wide detector options. First-wins: subsequent calls
|
||||
/// are a no-op and return `false`. The CLI calls this once per process at
|
||||
/// scan start; library consumers that never install pick up
|
||||
/// [`DetectorOptions::default`] via [`current`].
|
||||
pub fn install(opts: DetectorOptions) -> bool {
|
||||
let mut guard = RUNTIME.write().expect("detector options RwLock poisoned");
|
||||
if guard.is_some() {
|
||||
return false;
|
||||
}
|
||||
*guard = Some(opts);
|
||||
true
|
||||
}
|
||||
|
||||
/// Replace the installed options unconditionally. Mirrors
|
||||
/// [`crate::utils::analysis_options::reinstall`] for the server's
|
||||
/// per-request resolution path.
|
||||
pub fn reinstall(opts: DetectorOptions) {
|
||||
*RUNTIME.write().expect("detector options RwLock poisoned") = Some(opts);
|
||||
}
|
||||
|
||||
/// Read the active options. Returns the installed runtime when present,
|
||||
/// otherwise [`DetectorOptions::default`].
|
||||
pub fn current() -> DetectorOptions {
|
||||
RUNTIME
|
||||
.read()
|
||||
.expect("detector options RwLock poisoned")
|
||||
.clone()
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Test helper: clear the installed runtime so a subsequent [`install`]
|
||||
/// takes effect. Used only in tests that exercise different detector
|
||||
/// configurations within the same process.
|
||||
#[doc(hidden)]
|
||||
pub fn _reset_for_tests() {
|
||||
*RUNTIME.write().expect("detector options RwLock poisoned") = None;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn defaults_match_documented() {
|
||||
let o = DetectorOptions::default();
|
||||
assert!(o.data_exfil.enabled);
|
||||
assert!(o.data_exfil.trusted_destinations.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn toml_roundtrip() {
|
||||
let opts = DetectorOptions {
|
||||
data_exfil: DataExfilDetectorOptions {
|
||||
enabled: false,
|
||||
trusted_destinations: vec![
|
||||
"https://api.internal/".into(),
|
||||
"https://telemetry.".into(),
|
||||
],
|
||||
},
|
||||
};
|
||||
let s = toml::to_string(&opts).unwrap();
|
||||
let back: DetectorOptions = toml::from_str(&s).unwrap();
|
||||
assert_eq!(opts, back);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_section_uses_defaults() {
|
||||
let toml_str = r#"# empty"#;
|
||||
let cfg: DetectorOptions = toml::from_str(toml_str).unwrap();
|
||||
assert!(cfg.data_exfil.enabled);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
pub mod analysis_options;
|
||||
pub mod config;
|
||||
pub mod detector_options;
|
||||
pub(crate) mod ext;
|
||||
pub mod path;
|
||||
pub mod project;
|
||||
|
|
@ -8,4 +9,5 @@ pub(crate) mod snippet;
|
|||
|
||||
pub use analysis_options::{AnalysisOptions, SymexOptions};
|
||||
pub use config::Config;
|
||||
pub use detector_options::{DataExfilDetectorOptions, DetectorOptions};
|
||||
pub use project::{detect_frameworks, get_project_info};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue