mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-15 20:05:13 +02:00
Added Cap::DATA_EXFIL and taint fp and fn fixes on real repos (#59)
* feat: Enhance data exfiltration detection with source sensitivity gating for cookies and headers * feat: Implement cross-file data exfiltration detection with parameter-specific gate filters * feat: Add calibration tests and refine DATA_EXFIL severity scoring logic * feat: Introduce per-detector configuration for data exfiltration suppression * feat: Enhance DATA_EXFIL findings with destination field tracking in diagnostics and SARIF output * feat: Add tainted body and URL handling for data exfiltration detection * feat: Add integration tests and fixtures for DATA_EXFIL and SSRF detection in Go * feat: Add Java integration tests and fixtures for DATA_EXFIL detection across multiple HTTP clients * feat: Add synthetic externals handling for closure-captured variables in SSA * feat: Implement closure-based suppression for resource leak findings * feat: Add regression guards for shell-injection and taint propagation in for-of destructure patterns * feat: Implement constructor cap narrowing for data exfiltration detection in HTTP request builders * feat: Add gated sinks for data exfiltration detection in C and C++ using curl_easy_setopt * feat: Implement DATA_EXFIL cap parity for backwards analysis and add integration tests * feat: Add data exfiltration sinks for various languages and enhance documentation * refactor: Simplify formatting and improve readability in various files * refactor: Improve readability by simplifying conditional statements and adding clippy linting * docs: Update CHANGELOG and comments for data exfiltration features and configuration * docs: Clarify configuration instructions for data exfiltration trusted destinations * docs: Enhance comments for evidence routing logic in data exfiltration
This commit is contained in:
parent
a438886217
commit
58f1794a4e
189 changed files with 8421 additions and 383 deletions
|
|
@ -1118,6 +1118,7 @@ fn clone_preserves_all_sub_structs() {
|
|||
arg_string_literals: vec![Some("lit".into())],
|
||||
destination_uses: None,
|
||||
gate_filters: Vec::new(),
|
||||
is_constructor: false,
|
||||
},
|
||||
taint: TaintMeta {
|
||||
labels: {
|
||||
|
|
|
|||
|
|
@ -373,11 +373,26 @@ pub(crate) fn first_member_label(
|
|||
if let Some(full) = member_expr_text(n, code) {
|
||||
// Try the full text first, then progressively strip the last segment
|
||||
// to match rules like "process.env" from "process.env.CMD".
|
||||
//
|
||||
// The strip-and-retry only ever yields a sound label for Sources:
|
||||
// `process.env.CMD` → strip → `process.env` makes sense because
|
||||
// the receiver itself IS the source. Sinks and Sanitizers, by
|
||||
// contrast, name the *operation* — `connection.query`, `eval`,
|
||||
// `exec` — and stripping a trailing segment to match them is
|
||||
// not semantically valid (e.g. `exec.start` should never be
|
||||
// treated as a SHELL_ESCAPE sink because of bare `exec`). We
|
||||
// accept any label on a full-text match (the behaviour callers
|
||||
// already depend on for Source/Sink labels alike), but only
|
||||
// accept Source labels after segment stripping.
|
||||
let mut candidate = full.as_str();
|
||||
let mut first = true;
|
||||
loop {
|
||||
if let Some(lbl) = classify(lang, candidate, extra_labels) {
|
||||
return Some(lbl);
|
||||
if first || matches!(lbl, DataLabel::Source(_)) {
|
||||
return Some(lbl);
|
||||
}
|
||||
}
|
||||
first = false;
|
||||
match candidate.rsplit_once('.') {
|
||||
Some((prefix, _)) => candidate = prefix,
|
||||
None => break,
|
||||
|
|
|
|||
|
|
@ -38,25 +38,27 @@ pub(super) fn find_call_node<'a>(n: Node<'a>, lang: &str) -> Option<Node<'a>> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Extract identifiers from specified fields of an object-literal argument.
|
||||
/// Extract `(field_name, ident_name)` pairs from specified fields of an
|
||||
/// object-literal argument.
|
||||
///
|
||||
/// Returns:
|
||||
/// * `Some(names)` if the positional argument at `index` IS an object literal
|
||||
/// (JS `object`, TS `object`, Python `dictionary`). `names` contains
|
||||
/// identifiers lifted from pair values whose key matches any entry in
|
||||
/// `fields` (case-sensitive; JS/TS identifiers). When no destination-field
|
||||
/// pairs are present, returns `Some(vec![])`, the sink is effectively
|
||||
/// silenced because no destination identifier exists.
|
||||
/// * `Some(pairs)` if the positional argument at `index` IS an object literal
|
||||
/// (JS `object`, TS `object`, Python `dictionary`). Each pair is
|
||||
/// `(field_name, ident_name)` where `field_name` is the matched key from
|
||||
/// `fields` and `ident_name` is an identifier lifted from that pair's
|
||||
/// value expression. When no destination-field pairs are present, returns
|
||||
/// `Some(vec![])`, the sink is effectively silenced because no destination
|
||||
/// identifier exists.
|
||||
/// * `None` if the arg is absent, is not an object literal (plain string
|
||||
/// / ident / expression), or has splat/spread children that break static
|
||||
/// per-field reasoning. Callers fall back to the whole-arg positional
|
||||
/// filter in this case.
|
||||
pub(super) fn extract_destination_field_idents(
|
||||
pub(super) fn extract_destination_field_pairs(
|
||||
call_node: Node,
|
||||
arg_index: usize,
|
||||
fields: &[&str],
|
||||
code: &[u8],
|
||||
) -> Option<Vec<String>> {
|
||||
) -> Option<Vec<(String, String)>> {
|
||||
if fields.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
|
@ -71,7 +73,7 @@ pub(super) fn extract_destination_field_idents(
|
|||
return None;
|
||||
}
|
||||
|
||||
let mut out: Vec<String> = Vec::new();
|
||||
let mut out: Vec<(String, String)> = Vec::new();
|
||||
let mut c = arg.walk();
|
||||
for child in arg.named_children(&mut c) {
|
||||
match child.kind() {
|
||||
|
|
@ -88,8 +90,8 @@ pub(super) fn extract_destination_field_idents(
|
|||
let Some(name) = text_of(child, code) else {
|
||||
continue;
|
||||
};
|
||||
if fields.iter().any(|&f| f == name) && !out.contains(&name) {
|
||||
out.push(name);
|
||||
if fields.iter().any(|&f| f == name) && !out.iter().any(|(_, v)| v == &name) {
|
||||
out.push((name.clone(), name));
|
||||
}
|
||||
}
|
||||
"pair" => {
|
||||
|
|
@ -124,8 +126,8 @@ pub(super) fn extract_destination_field_idents(
|
|||
let mut paths: Vec<String> = Vec::new();
|
||||
collect_idents_with_paths(val_node, code, &mut idents, &mut paths);
|
||||
for name in paths.into_iter().chain(idents) {
|
||||
if !out.contains(&name) {
|
||||
out.push(name);
|
||||
if !out.iter().any(|(_, v)| v == &name) {
|
||||
out.push((key.clone(), name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -135,6 +137,62 @@ pub(super) fn extract_destination_field_idents(
|
|||
Some(out)
|
||||
}
|
||||
|
||||
/// Extract `(field_name, ident_name)` pairs from `keyword_argument` /
|
||||
/// `named_argument` children of a call whose keyword name matches one of
|
||||
/// `fields`. Used for languages where destination-bearing fields are passed
|
||||
/// as direct kwargs rather than wrapped in a dict literal, e.g. Python
|
||||
/// `requests.post(url, data=tainted, json=safe)` where `data` and `json` are
|
||||
/// `keyword_argument` siblings of the positional URL.
|
||||
///
|
||||
/// Returns the union of matching kwargs, preserving the kwarg name in the
|
||||
/// `field` slot so callers can still attribute findings per-field. Empty
|
||||
/// when no matching kwargs exist or the call has no `arguments` field.
|
||||
pub(super) fn extract_destination_kwarg_pairs(
|
||||
call_node: Node,
|
||||
fields: &[&str],
|
||||
code: &[u8],
|
||||
) -> Vec<(String, String)> {
|
||||
if fields.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
let Some(args_node) = call_node.child_by_field_name("arguments") else {
|
||||
return Vec::new();
|
||||
};
|
||||
let mut out: Vec<(String, String)> = Vec::new();
|
||||
let mut cursor = args_node.walk();
|
||||
for child in args_node.named_children(&mut cursor) {
|
||||
let kind = child.kind();
|
||||
if kind != "keyword_argument" && kind != "named_argument" {
|
||||
continue;
|
||||
}
|
||||
let named_count = child.named_child_count();
|
||||
let name_node = child
|
||||
.child_by_field_name("name")
|
||||
.or_else(|| child.named_child(0));
|
||||
let value_node = child
|
||||
.child_by_field_name("value")
|
||||
.or_else(|| child.named_child(named_count.saturating_sub(1) as u32));
|
||||
let (Some(nn), Some(vn)) = (name_node, value_node) else {
|
||||
continue;
|
||||
};
|
||||
let Some(name) = text_of(nn, code) else {
|
||||
continue;
|
||||
};
|
||||
if !fields.iter().any(|&f| f == name) {
|
||||
continue;
|
||||
}
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(vn, code, &mut idents, &mut paths);
|
||||
for ident in paths.into_iter().chain(idents) {
|
||||
if !out.iter().any(|(_, v)| v == &ident) {
|
||||
out.push((name.clone(), ident));
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Extract the string-literal content at argument position `index` (0-based).
|
||||
/// Returns `None` if the argument is not a string literal or the index is out of range.
|
||||
pub(super) fn extract_const_string_arg(
|
||||
|
|
@ -144,7 +202,14 @@ pub(super) fn extract_const_string_arg(
|
|||
) -> Option<String> {
|
||||
let args = call_node.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
let arg = args.named_children(&mut cursor).nth(index)?;
|
||||
let mut arg = args.named_children(&mut cursor).nth(index)?;
|
||||
// PHP / Go wrap each positional argument in an `argument` node; unwrap so
|
||||
// the kind-match below sees the inner literal.
|
||||
if arg.kind() == "argument" && arg.named_child_count() == 1 {
|
||||
if let Some(inner) = arg.named_child(0) {
|
||||
arg = inner;
|
||||
}
|
||||
}
|
||||
match arg.kind() {
|
||||
// `string` / `string_literal` cover JS/TS, Python, Java, PHP, C/C++, Ruby, Rust;
|
||||
// `interpreted_string_literal` / `raw_string_literal` cover Go's
|
||||
|
|
@ -177,6 +242,39 @@ pub(super) fn extract_const_string_arg(
|
|||
}
|
||||
}
|
||||
|
||||
/// Extract a macro-constant or `define`d identifier name at argument position
|
||||
/// `index` (0-based). Used for languages where activation values are
|
||||
/// preprocessor symbols rather than string literals — currently C, C++, and
|
||||
/// PHP define-constants like `CURLOPT_POSTFIELDS` whose syntactic form is an
|
||||
/// `identifier` / `name` node, not a `string`.
|
||||
///
|
||||
/// Returns `None` for any non-identifier shape so dynamic-activation
|
||||
/// semantics still apply when the activation arg is a runtime value
|
||||
/// (variable, expression, function call).
|
||||
pub(super) fn extract_const_macro_arg(
|
||||
call_node: Node,
|
||||
index: usize,
|
||||
code: &[u8],
|
||||
) -> Option<String> {
|
||||
let args = call_node.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
let mut arg = args.named_children(&mut cursor).nth(index)?;
|
||||
if arg.kind() == "argument" && arg.named_child_count() == 1 {
|
||||
if let Some(inner) = arg.named_child(0) {
|
||||
arg = inner;
|
||||
}
|
||||
}
|
||||
match arg.kind() {
|
||||
// C/C++ identifier / PHP `name` node for define-style constants.
|
||||
// Scoped C++ identifiers (`Curl::OPT_POSTFIELDS`) and PHP namespaced
|
||||
// names also surface here so the dangerous_values match catches them.
|
||||
"identifier" | "name" | "qualified_name" | "scoped_identifier" => {
|
||||
text_of(arg, code).map(|s| s.to_string())
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract the value of a keyword argument from a call node (e.g. Python `shell=True`).
|
||||
/// Walks argument children looking for `keyword_argument` nodes, matches the keyword
|
||||
/// name, and extracts the value node text for literals.
|
||||
|
|
@ -1546,6 +1644,59 @@ pub(super) fn def_use(
|
|||
(None, uses, vec![])
|
||||
}
|
||||
|
||||
// for-in / for-of / Python `for x in iter:` ─────────────────────────
|
||||
//
|
||||
// Tree-sitter classifies these as `Kind::For` with a `left`/`right`
|
||||
// field pair (binding pattern + iterable). Without an explicit
|
||||
// arm here, the default branch collects every ident as a `use` and
|
||||
// never registers the iteration binding as a `define`, so taint
|
||||
// entering the iterable does not propagate into the body's
|
||||
// references to the binding (`for (const [a, b] of obj) { sink(a) }`
|
||||
// would lose the flow at `a`).
|
||||
//
|
||||
// C-style `for_statement` has no `left`/`right` fields (it uses
|
||||
// `initializer`/`condition`/`increment`), so this path falls through
|
||||
// to the default-collecting behaviour for those, preserving today's
|
||||
// semantics.
|
||||
Kind::For => {
|
||||
let left = ast.child_by_field_name("left");
|
||||
let right = ast.child_by_field_name("right");
|
||||
if left.is_none() && right.is_none() {
|
||||
// C-style for, defer to default ident collection.
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(ast, code, &mut idents, &mut paths);
|
||||
let mut uses = paths;
|
||||
uses.extend(idents);
|
||||
return (None, uses, vec![]);
|
||||
}
|
||||
|
||||
let mut defs: Option<String> = None;
|
||||
let mut extra_defs: Vec<String> = Vec::new();
|
||||
let mut uses: Vec<String> = Vec::new();
|
||||
|
||||
if let Some(pat) = left {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(pat, code, &mut idents, &mut paths);
|
||||
let first = paths.pop().or_else(|| idents.first().cloned());
|
||||
for ident in &idents {
|
||||
if first.as_ref() != Some(ident) {
|
||||
extra_defs.push(ident.clone());
|
||||
}
|
||||
}
|
||||
defs = first;
|
||||
}
|
||||
if let Some(val) = right {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(val, code, &mut idents, &mut paths);
|
||||
uses.extend(paths);
|
||||
uses.extend(idents);
|
||||
}
|
||||
(defs, uses, extra_defs)
|
||||
}
|
||||
|
||||
// everything else – no definition, but may read vars
|
||||
_ => {
|
||||
let mut idents = Vec::new();
|
||||
|
|
@ -1557,3 +1708,225 @@ pub(super) fn def_use(
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// One match from [`extract_shell_array_payload_idents`].
|
||||
///
|
||||
/// `arg_position` is the positional argument index of the call where the
|
||||
/// shell-array literal was found. `payload_idents` is the union of
|
||||
/// identifiers (and dotted paths) lifted from the array's payload elements
|
||||
/// (positions 2+ for POSIX `sh -c <cmd>` form; positions 2+ for `cmd /c <cmd>`
|
||||
/// likewise). Empty `payload_idents` means the payload is a constant string,
|
||||
/// which the caller should treat as benign (no SHELL_ESCAPE finding possible).
|
||||
#[derive(Debug, Clone)]
|
||||
pub(super) struct ShellArrayMatch {
|
||||
pub arg_position: usize,
|
||||
pub payload_idents: Vec<String>,
|
||||
}
|
||||
|
||||
/// Detect inline shell-execution array literals at a call site.
|
||||
///
|
||||
/// Recognises the pattern `[<shell>, "-c", <payload>]` (POSIX shells) and
|
||||
/// `[<cmd-shell>, "/c"|"/C", <payload>]` (Windows `cmd.exe`) appearing as
|
||||
/// either:
|
||||
/// * a direct positional argument of `call_node`, or
|
||||
/// * the value of any field within an object-literal positional argument
|
||||
/// (covers `container.exec({Cmd: ["bash", "-c", x]})` form).
|
||||
///
|
||||
/// Returns one [`ShellArrayMatch`] per detected shell-array. Empty when the
|
||||
/// call has no shell-array literals.
|
||||
///
|
||||
/// The shell-name list is intentionally narrow (POSIX shells + Windows
|
||||
/// `cmd.exe`/`powershell`) to avoid false positives on benign array literals
|
||||
/// like `["ls", "-la"]` or `["git", "rev-parse", "HEAD"]`, where element 0 is
|
||||
/// not a shell. Element 1 must be a literal `-c` (POSIX) or `/c`/`/C` (cmd);
|
||||
/// otherwise the array is not in shell-exec form regardless of element 0.
|
||||
///
|
||||
/// Identifiers from elements at positions 2+ are lifted via
|
||||
/// [`collect_idents_with_paths`] so template-literal interpolations
|
||||
/// (`` `echo ${x}` ``), member-expressions (`obj.field`), and bare idents are
|
||||
/// all captured. Dedup is preserved across array elements so a single ident
|
||||
/// referenced in multiple payload positions appears once.
|
||||
pub(super) fn extract_shell_array_payload_idents(
|
||||
call_node: Node,
|
||||
code: &[u8],
|
||||
) -> Vec<ShellArrayMatch> {
|
||||
let mut out = Vec::new();
|
||||
let Some(args_node) = call_node.child_by_field_name("arguments") else {
|
||||
return out;
|
||||
};
|
||||
let mut cursor = args_node.walk();
|
||||
for (idx, child) in args_node.named_children(&mut cursor).enumerate() {
|
||||
let kind = child.kind();
|
||||
// Splats break positional indexing; bail conservatively on the whole call.
|
||||
if kind == "spread_element"
|
||||
|| kind == "dictionary_splat"
|
||||
|| kind == "list_splat"
|
||||
|| kind == "splat_argument"
|
||||
|| kind == "hash_splat_argument"
|
||||
{
|
||||
return Vec::new();
|
||||
}
|
||||
if kind == "keyword_argument" || kind == "named_argument" {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Direct array-literal arg.
|
||||
if let Some(idents) = shell_array_payload_idents_of(child, code) {
|
||||
out.push(ShellArrayMatch {
|
||||
arg_position: idx,
|
||||
payload_idents: idents,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Object-literal arg whose field value is a shell-array literal.
|
||||
// Covers `container.exec({Cmd: [...]})` form. Field name is not
|
||||
// restricted to `Cmd` / `cmd`: the shell-shape itself is the gate,
|
||||
// and the payload extraction is per-array.
|
||||
if matches!(kind, "object" | "dictionary") {
|
||||
let mut cc = child.walk();
|
||||
for pair in child.named_children(&mut cc) {
|
||||
if pair.kind() != "pair" {
|
||||
continue;
|
||||
}
|
||||
let Some(val_node) = pair.child_by_field_name("value") else {
|
||||
continue;
|
||||
};
|
||||
let val_node = unwrap_parens(val_node);
|
||||
if let Some(idents) = shell_array_payload_idents_of(val_node, code) {
|
||||
out.push(ShellArrayMatch {
|
||||
arg_position: idx,
|
||||
payload_idents: idents,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// If `node` is an array literal of shape `[<shell>, "-c", *]` (POSIX shells)
|
||||
/// or `[<cmd-shell>, "/c", *]` (Windows cmd.exe), return the identifiers
|
||||
/// referenced in the payload elements (positions 2+). Otherwise return
|
||||
/// `None`. Returning `Some(vec![])` means the payload is a constant string
|
||||
/// — caller should still skip emitting a sink (no taint can reach a literal).
|
||||
fn shell_array_payload_idents_of(node: Node, code: &[u8]) -> Option<Vec<String>> {
|
||||
let node = unwrap_parens(node);
|
||||
if node.kind() != "array" {
|
||||
return None;
|
||||
}
|
||||
// Walk named children to skip commas and other trivia.
|
||||
let mut cursor = node.walk();
|
||||
let elems: Vec<Node> = node.named_children(&mut cursor).collect();
|
||||
if elems.len() < 3 {
|
||||
return None;
|
||||
}
|
||||
let shell = const_string_value(elems[0], code)?;
|
||||
if !is_known_shell(&shell) {
|
||||
return None;
|
||||
}
|
||||
let flag = const_string_value(elems[1], code)?;
|
||||
if !is_shell_command_flag(&shell, &flag) {
|
||||
return None;
|
||||
}
|
||||
// Lift identifiers from the payload elements (positions 2+). Constants
|
||||
// contribute nothing. An empty result means the entire payload is
|
||||
// statically benign.
|
||||
let mut idents: Vec<String> = Vec::new();
|
||||
let mut paths: Vec<String> = Vec::new();
|
||||
for elem in &elems[2..] {
|
||||
collect_idents_with_paths(*elem, code, &mut idents, &mut paths);
|
||||
}
|
||||
let mut combined = paths;
|
||||
combined.extend(idents);
|
||||
// Dedup (preserve first-seen order).
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
combined.retain(|s| seen.insert(s.clone()));
|
||||
if combined.is_empty() {
|
||||
// Static payload — no taint can reach it. Return None so the caller
|
||||
// does not emit a useless sink filter.
|
||||
return None;
|
||||
}
|
||||
Some(combined)
|
||||
}
|
||||
|
||||
/// Extract a constant string value from `node`, handling JS/TS `string` /
|
||||
/// `template_string` (no interpolation) forms. Returns `None` for dynamic
|
||||
/// values, identifiers, or expressions.
|
||||
fn const_string_value(node: Node, code: &[u8]) -> Option<String> {
|
||||
let node = unwrap_parens(node);
|
||||
match node.kind() {
|
||||
"string" | "string_literal" | "interpreted_string_literal" | "raw_string_literal" => {
|
||||
let raw = text_of(node, code)?;
|
||||
if raw.len() >= 2 {
|
||||
Some(raw[1..raw.len() - 1].to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
"template_string" => {
|
||||
let mut c = node.walk();
|
||||
if node
|
||||
.named_children(&mut c)
|
||||
.any(|ch| ch.kind() == "template_substitution")
|
||||
{
|
||||
return None;
|
||||
}
|
||||
let raw = text_of(node, code)?;
|
||||
if raw.len() >= 2 {
|
||||
Some(raw[1..raw.len() - 1].to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Known shell executable names that activate the shell-array detector.
|
||||
/// Scoped narrowly to POSIX shells + Windows command interpreters, listing
|
||||
/// only canonical names so benign arrays like `["ls", ...]`, `["git", ...]`,
|
||||
/// or `["python", ...]` do not match.
|
||||
fn is_known_shell(name: &str) -> bool {
|
||||
// Strip directory prefix for matching: `/bin/bash` → `bash`.
|
||||
let leaf = name.rsplit('/').next().unwrap_or(name);
|
||||
matches!(
|
||||
leaf,
|
||||
"bash"
|
||||
| "sh"
|
||||
| "zsh"
|
||||
| "dash"
|
||||
| "ksh"
|
||||
| "fish"
|
||||
| "ash"
|
||||
| "tcsh"
|
||||
| "csh"
|
||||
| "cmd"
|
||||
| "cmd.exe"
|
||||
| "powershell"
|
||||
| "powershell.exe"
|
||||
| "pwsh"
|
||||
| "pwsh.exe"
|
||||
)
|
||||
}
|
||||
|
||||
/// True when `flag` is the "execute the following string as a shell command"
|
||||
/// switch for the given `shell`. POSIX shells use `-c`; cmd.exe accepts
|
||||
/// `/c` / `/C`; PowerShell uses `-Command` (also `-c` as alias) and
|
||||
/// `-EncodedCommand`.
|
||||
fn is_shell_command_flag(shell: &str, flag: &str) -> bool {
|
||||
let leaf = shell.rsplit('/').next().unwrap_or(shell);
|
||||
let is_cmd = matches!(leaf, "cmd" | "cmd.exe");
|
||||
let is_powershell = matches!(leaf, "powershell" | "powershell.exe" | "pwsh" | "pwsh.exe");
|
||||
if is_cmd {
|
||||
return matches!(flag, "/c" | "/C" | "/k" | "/K");
|
||||
}
|
||||
if is_powershell {
|
||||
return matches!(
|
||||
flag,
|
||||
"-c" | "-Command" | "-command" | "-EncodedCommand" | "-encodedcommand"
|
||||
);
|
||||
}
|
||||
// POSIX shells.
|
||||
flag == "-c"
|
||||
}
|
||||
|
|
|
|||
317
src/cfg/mod.rs
317
src/cfg/mod.rs
|
|
@ -52,10 +52,11 @@ use literals::has_sql_placeholders;
|
|||
use literals::{
|
||||
arg0_kind_and_interpolation, call_ident_of, def_use, detect_go_replace_call_sanitizer,
|
||||
detect_rust_replace_chain_sanitizer, extract_arg_callees, extract_arg_string_literals,
|
||||
extract_arg_uses, extract_const_keyword_arg, extract_const_string_arg,
|
||||
extract_destination_field_idents, extract_kwargs, extract_literal_rhs, find_call_node,
|
||||
find_call_node_deep, find_chained_inner_call, has_keyword_arg, has_only_literal_args,
|
||||
is_parameterized_query_call, java_chain_arg0_kind_for_method, js_chain_arg0_kind_for_method,
|
||||
extract_arg_uses, extract_const_keyword_arg, extract_const_macro_arg, extract_const_string_arg,
|
||||
extract_destination_field_pairs, extract_destination_kwarg_pairs, extract_kwargs,
|
||||
extract_literal_rhs, extract_shell_array_payload_idents, find_call_node, find_call_node_deep,
|
||||
find_chained_inner_call, has_keyword_arg, has_only_literal_args, is_parameterized_query_call,
|
||||
java_chain_arg0_kind_for_method, js_chain_arg0_kind_for_method,
|
||||
js_chain_outer_method_for_inner, ruby_chain_arg0_for_method, walk_chain_inner_call_args,
|
||||
};
|
||||
use params::{
|
||||
|
|
@ -312,6 +313,15 @@ pub struct CallMeta {
|
|||
/// [`Self::destination_uses`]).
|
||||
#[serde(default)]
|
||||
pub gate_filters: Vec<GateFilter>,
|
||||
/// True when this call expression is a constructor invocation
|
||||
/// (e.g. JS/TS `new Stripe(key)`, PHP `new PDO(...)`). The SSA Call
|
||||
/// transfer uses this to narrow the constructed value's caps: a wrapper
|
||||
/// object instance is structurally not a path string, format string,
|
||||
/// URL component, or JSON input, so out-of-process side-effect bits
|
||||
/// (FILE_IO, FMT_STRING, URL_ENCODE, JSON_PARSE) on the arguments
|
||||
/// must not survive into the constructed object.
|
||||
#[serde(default)]
|
||||
pub is_constructor: bool,
|
||||
}
|
||||
|
||||
/// One gate's contribution at a call site whose callee matches multiple
|
||||
|
|
@ -329,6 +339,15 @@ pub struct GateFilter {
|
|||
/// considers SSA values whose `var_name` matches one of `names` (object-
|
||||
/// literal destination fields lifted at CFG time). `None` ⇒ whole arg.
|
||||
pub destination_uses: Option<Vec<String>>,
|
||||
/// Parallel to [`Self::destination_uses`]: for each entry, the
|
||||
/// destination object-literal field name (e.g. `"body"`, `"headers"`,
|
||||
/// `"json"`) where the corresponding ident was bound. Empty when
|
||||
/// `destination_uses` is `None` or the gate had no
|
||||
/// `object_destination_fields` configured. Consumed by diag rendering
|
||||
/// to embed the destination field in `DATA_EXFIL` messages and SARIF
|
||||
/// `properties.data_exfil_field`.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub destination_fields: Vec<String>,
|
||||
}
|
||||
|
||||
/// Taint-classification and variable-flow metadata.
|
||||
|
|
@ -450,6 +469,13 @@ pub struct NodeInfo {
|
|||
/// up the field's declared `TypeKind`. Strictly additive, when
|
||||
/// `None`, the legacy copy-prop semantics apply.
|
||||
pub member_field: Option<String>,
|
||||
/// True when this assignment / declaration's RHS is a function or
|
||||
/// lambda literal (`obj.handler = (e) => {...}`, `let f = function(){}`).
|
||||
/// State analysis uses this to suppress resource-ownership transfer:
|
||||
/// storing a function reference into a property does not move the
|
||||
/// resources captured by the closure body, so the lifecycle of those
|
||||
/// captures must remain unchanged on the assignment node.
|
||||
pub rhs_is_function_literal: bool,
|
||||
}
|
||||
|
||||
impl NodeInfo {
|
||||
|
|
@ -1564,6 +1590,92 @@ pub(super) fn push_node<'a>(
|
|||
let extra = analysis_rules.map(|r| r.extra_labels.as_slice());
|
||||
let mut labels = classify_all(lang, &text, extra);
|
||||
|
||||
// Rust chain-text classification. The default `text` for a Rust
|
||||
// CallMethod is `{root_receiver}.{method}`, where `root_receiver`
|
||||
// is the leftmost identifier after walking through every nested
|
||||
// call/method receiver. That convention loses the intermediate
|
||||
// chain methods, so a body-binding chain like
|
||||
// `Client::post(url).body(payload).send()` reduces to
|
||||
// `Client::post.send` and rules keyed on `body.send` /
|
||||
// `RequestBuilder.body` cannot fire.
|
||||
//
|
||||
// Reclassify against the call-AST's source text (with paren groups
|
||||
// stripped) so suffix matchers covering chain shapes
|
||||
// (`body.send`, `body_string`, `Request::builder.body`, ...) attach.
|
||||
// Strictly additive: we union new labels with the existing ones,
|
||||
// never override. Limited to Rust to avoid disturbing the other
|
||||
// languages' chain conventions.
|
||||
if lang == "rust" {
|
||||
if let Some(cn) = find_call_node(ast, lang) {
|
||||
if let Some(chain_raw) = text_of(cn, code) {
|
||||
// Multi-line Rust chains (`Client::new()\n .post(url)\n
|
||||
// .body(p)\n .send()`) preserve interior whitespace in
|
||||
// the source slice, which would prevent suffix matchers
|
||||
// like `body.send` from firing. Strip whitespace before
|
||||
// normalizing paren groups, mirroring the same trick
|
||||
// used by `find_chained_inner_call` for JS/TS chains.
|
||||
let chain_compact: String =
|
||||
chain_raw.chars().filter(|c| !c.is_whitespace()).collect();
|
||||
let chain_text = crate::labels::normalize_chained_call_for_classify(&chain_compact);
|
||||
if chain_text != text {
|
||||
let chain_labels = classify_all(lang, &chain_text, extra);
|
||||
for l in chain_labels {
|
||||
if !labels.contains(&l) {
|
||||
labels.push(l);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Also try classification against the chain with
|
||||
// trailing identity methods peeled. Rust chains often
|
||||
// end in `.unwrap()` / `.expect("...")` / `.await` /
|
||||
// `.clone()` etc., which obscure the body-bind verb
|
||||
// for suffix matchers. E.g. hyper's
|
||||
// `Request::builder().method(..).uri(..).body(p).unwrap()`
|
||||
// peels to `...body`, allowing a simpler `body` /
|
||||
// `Request::builder.body` matcher to fire.
|
||||
let peeled = crate::ssa::type_facts::peel_identity_suffix(&chain_text);
|
||||
if peeled != chain_text && peeled != text {
|
||||
let peeled_labels = classify_all(lang, &peeled, extra);
|
||||
for l in peeled_labels {
|
||||
if !labels.contains(&l) {
|
||||
labels.push(l);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Pattern synthesis: the hyper request-builder chain
|
||||
// (`hyper::Request::builder().method(..).uri(..).body(p)`)
|
||||
// can interleave `.method`, `.uri`, `.header`, `.version`
|
||||
// etc. between `Request::builder` and the body-bind step.
|
||||
// Suffix matchers can't span those, so synthesise a
|
||||
// DATA_EXFIL sink whenever the chain begins with
|
||||
// `Request::builder` and ends in a body-binding verb.
|
||||
// Strictly additive: no labels are removed, only added,
|
||||
// and the synthesis only fires when an explicit Sink
|
||||
// hasn't already attached.
|
||||
let chain_for_synth = if peeled != chain_text {
|
||||
&peeled
|
||||
} else {
|
||||
&chain_text
|
||||
};
|
||||
if !labels
|
||||
.iter()
|
||||
.any(|l| matches!(l, DataLabel::Sink(c) if c.contains(crate::labels::Cap::DATA_EXFIL)))
|
||||
&& (chain_for_synth.contains("Request::builder.")
|
||||
|| chain_for_synth.contains("hyper::Request::builder."))
|
||||
{
|
||||
let last_seg =
|
||||
chain_for_synth.rsplit('.').next().unwrap_or(chain_for_synth);
|
||||
if matches!(
|
||||
last_seg,
|
||||
"body" | "body_mut" | "body_string" | "body_json" | "body_bytes"
|
||||
) {
|
||||
labels.push(DataLabel::Sink(crate::labels::Cap::DATA_EXFIL));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the outermost call didn't classify, try inner/nested calls.
|
||||
// E.g. `str(eval(expr))`, `str` is not a sink, but `eval` is.
|
||||
// When the callee is overridden, save the original for container ops
|
||||
|
|
@ -1727,7 +1839,23 @@ pub(super) fn push_node<'a>(
|
|||
let mut sink_payload_args: Option<Vec<usize>> = None;
|
||||
let mut destination_uses: Option<Vec<String>> = None;
|
||||
let mut gate_filters: Vec<GateFilter> = Vec::new();
|
||||
if labels.is_empty() {
|
||||
// Gates run when no flat `Sink` label is already present, OR when a
|
||||
// matching gate restricts the payload-arg set on top of an existing flat
|
||||
// sink. Source / Sanitizer labels are orthogonal — a callee like
|
||||
// Python's `requests.post` is a `Source` for its response object AND a
|
||||
// gated `Sink` for its URL/body argument positions; both should attach.
|
||||
//
|
||||
// Payload-arg refinement: when a flat sink matches a callee that ALSO
|
||||
// has a gate entry restricting `payload_args`, the gate's `payload_args`
|
||||
// are propagated to `sink_payload_args` so only those positions are
|
||||
// taint-checked. Example: `execSync(cmd, { env: process.env })` matches
|
||||
// the bare `execSync` flat `Sink(SHELL_ESCAPE)` AND the gate `=execSync`
|
||||
// with `payload_args: &[0]`; without the refinement, the flat rule's
|
||||
// implicit "all args" would flag `process.env` flowing into the options
|
||||
// object's `env` field. The gate's labels themselves are deduped so a
|
||||
// single capability never double-attributes.
|
||||
let has_sink_label = labels.iter().any(|l| matches!(l, DataLabel::Sink(_)));
|
||||
{
|
||||
let gate_call = call_ast.or_else(|| find_call_node_deep(ast, lang, 4));
|
||||
if let Some(cn) = gate_call {
|
||||
let gate_callee_text = if call_ast.is_some() {
|
||||
|
|
@ -1746,7 +1874,22 @@ pub(super) fn push_node<'a>(
|
|||
let matches = classify_gated_sink(
|
||||
lang,
|
||||
&gate_callee_text,
|
||||
|idx| extract_const_string_arg(cn, idx, code),
|
||||
|idx| {
|
||||
extract_const_string_arg(cn, idx, code).or_else(|| {
|
||||
// C/C++ preprocessor macros and PHP `define`d constants
|
||||
// surface as identifier nodes, not string literals.
|
||||
// Falling back to the macro-arg extractor for those
|
||||
// languages lets gates like `curl_easy_setopt` /
|
||||
// `curl_setopt` activate on a `CURLOPT_POSTFIELDS`
|
||||
// ident match instead of firing conservatively on
|
||||
// every positional arg.
|
||||
if matches!(lang, "c" | "cpp" | "c++" | "php") {
|
||||
extract_const_macro_arg(cn, idx, code)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
},
|
||||
|kw| extract_const_keyword_arg(cn, kw, code),
|
||||
|kw| has_keyword_arg(cn, kw, code),
|
||||
);
|
||||
|
|
@ -1758,11 +1901,23 @@ pub(super) fn push_node<'a>(
|
|||
// * a `GateFilter` carrying that gate's specific
|
||||
// `(label_caps, payload_args, destination_uses)` so
|
||||
// the SSA sink scan can attribute taint per-cap.
|
||||
//
|
||||
// When a flat sink already matches, gate labels are deduped
|
||||
// so the same capability isn't attributed twice (once flat,
|
||||
// once gated). Their `payload_args` still flow into
|
||||
// `sink_payload_args` so the gate's arg-position restriction
|
||||
// applies on top of the flat sink.
|
||||
let mut union_payload: Vec<usize> = Vec::new();
|
||||
for gm in &matches {
|
||||
labels.push(gm.label);
|
||||
if has_sink_label {
|
||||
if !labels.contains(&gm.label) {
|
||||
labels.push(gm.label);
|
||||
}
|
||||
} else {
|
||||
labels.push(gm.label);
|
||||
}
|
||||
|
||||
let payload_vec: Vec<usize> =
|
||||
let mut payload_vec: Vec<usize> =
|
||||
if gm.payload_args == crate::labels::ALL_ARGS_PAYLOAD {
|
||||
// Dynamic-activation sentinel: every positional arg is
|
||||
// conservatively a payload. Expand using the actual
|
||||
|
|
@ -1780,19 +1935,57 @@ pub(super) fn push_node<'a>(
|
|||
// checks to identifiers under those fields. Non-object
|
||||
// arg forms return `None` from the extractor and the gate
|
||||
// falls back to whole-arg positional filtering.
|
||||
//
|
||||
// The pair form preserves which object-literal field each
|
||||
// ident was bound to (e.g. `body` vs `headers` vs `json`)
|
||||
// so diag rendering can attribute `DATA_EXFIL` findings to
|
||||
// a specific destination field.
|
||||
let mut dest_uses: Option<Vec<String>> = None;
|
||||
let mut dest_fields: Vec<String> = Vec::new();
|
||||
if !gm.object_destination_fields.is_empty() {
|
||||
let mut all_pairs: Vec<(String, String)> = Vec::new();
|
||||
let mut had_object_match = false;
|
||||
for &pos in gm.payload_args {
|
||||
if let Some(names) = extract_destination_field_idents(
|
||||
if let Some(pairs) = extract_destination_field_pairs(
|
||||
cn,
|
||||
pos,
|
||||
gm.object_destination_fields,
|
||||
code,
|
||||
) {
|
||||
dest_uses = Some(names);
|
||||
all_pairs.extend(pairs);
|
||||
had_object_match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Direct kwargs: languages where destination-bearing
|
||||
// fields are passed as `keyword_argument` siblings of
|
||||
// the positional args (Python `data=`, Ruby kwargs).
|
||||
// SSA lowering folds kwarg idents into the implicit
|
||||
// args group at index `arity`, so we expand
|
||||
// `payload_vec` to include that position; the
|
||||
// `destination_filter` then narrows to the kwarg
|
||||
// ident's `var_name`.
|
||||
let kwarg_pairs =
|
||||
extract_destination_kwarg_pairs(cn, gm.object_destination_fields, code);
|
||||
if !kwarg_pairs.is_empty() {
|
||||
let arity = extract_arg_uses(cn, code).len();
|
||||
if !payload_vec.contains(&arity) {
|
||||
payload_vec.push(arity);
|
||||
}
|
||||
for pair in kwarg_pairs {
|
||||
if !all_pairs.iter().any(|(_, v)| v == &pair.1) {
|
||||
all_pairs.push(pair);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if had_object_match || !all_pairs.is_empty() {
|
||||
let (fields, vars): (Vec<String>, Vec<String>) =
|
||||
all_pairs.into_iter().unzip();
|
||||
dest_uses = Some(vars);
|
||||
dest_fields = fields;
|
||||
}
|
||||
}
|
||||
|
||||
let label_caps = match gm.label {
|
||||
|
|
@ -1809,6 +2002,7 @@ pub(super) fn push_node<'a>(
|
|||
label_caps,
|
||||
payload_args: payload_vec,
|
||||
destination_uses: dest_uses,
|
||||
destination_fields: dest_fields,
|
||||
});
|
||||
}
|
||||
if !union_payload.is_empty() {
|
||||
|
|
@ -1826,6 +2020,65 @@ pub(super) fn push_node<'a>(
|
|||
}
|
||||
}
|
||||
|
||||
// ── Inline shell-array sink synthesis ────────────────────────────────
|
||||
//
|
||||
// Recognise `[<shell>, "-c", <payload>]` (and `cmd /c <payload>`)
|
||||
// appearing as an argument to *any* call. The shell-array shape itself
|
||||
// is the gate, regardless of callee, so this fires through user-defined
|
||||
// wrappers like `execInContainer(id, ["bash", "-c", `echo ${tainted}`])`
|
||||
// without needing per-wrapper summary annotations. Only fires for JS/TS
|
||||
// because the array-literal grammar (`array` node) and shell-form usage
|
||||
// are JS/TS conventions; other languages use different shapes for
|
||||
// shell-exec wrappers.
|
||||
//
|
||||
// The inner array also covers Dockerode's
|
||||
// `container.exec({Cmd: [shell, "-c", payload]})`: the helper looks
|
||||
// inside object-literal args for shell-array values under any field.
|
||||
//
|
||||
// Existing FP carve-outs are preserved. `["ls", "-la"]` doesn't match
|
||||
// (element 0 is not a known shell). `untaintedArrayVariable` doesn't
|
||||
// match (variable, not literal). `execSync(cmd, { env: process.env })`
|
||||
// doesn't match (string + object args, no shell-array literal). When
|
||||
// the payload elements are constant strings the helper returns no
|
||||
// match, so a literal `["bash", "-c", "ls -la"]` doesn't fire either.
|
||||
if matches!(lang, "javascript" | "js" | "typescript" | "ts") {
|
||||
if let Some(cn) = call_ast.or_else(|| find_call_node_deep(ast, lang, 4)) {
|
||||
let shell_matches = extract_shell_array_payload_idents(cn, code);
|
||||
if !shell_matches.is_empty() {
|
||||
let shell_label = DataLabel::Sink(Cap::SHELL_ESCAPE);
|
||||
let already_has_shell_sink = labels.iter().any(|l| match l {
|
||||
DataLabel::Sink(c) => c.contains(Cap::SHELL_ESCAPE),
|
||||
_ => false,
|
||||
});
|
||||
if !already_has_shell_sink {
|
||||
labels.push(shell_label);
|
||||
}
|
||||
|
||||
let mut union_payload: Vec<usize> = sink_payload_args.clone().unwrap_or_default();
|
||||
for sm in shell_matches {
|
||||
if !union_payload.contains(&sm.arg_position) {
|
||||
union_payload.push(sm.arg_position);
|
||||
}
|
||||
gate_filters.push(GateFilter {
|
||||
label_caps: Cap::SHELL_ESCAPE,
|
||||
payload_args: vec![sm.arg_position],
|
||||
destination_uses: Some(sm.payload_idents),
|
||||
destination_fields: Vec::new(),
|
||||
});
|
||||
}
|
||||
if !union_payload.is_empty() {
|
||||
sink_payload_args = Some(union_payload);
|
||||
}
|
||||
// Legacy single-gate path: when this is the only gate filter,
|
||||
// populate the top-level destination_uses too so the SSA
|
||||
// fast-path stays consistent with the multi-gate behaviour.
|
||||
if gate_filters.len() == 1 {
|
||||
destination_uses = gate_filters[0].destination_uses.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pattern-based sanitizer synthesis: recognise a Rust
|
||||
// `param.replace(LIT, LIT)[.replace(LIT, LIT)]*` chain that provably strips
|
||||
// path-traversal or HTML metacharacters. The CFG collapses the whole
|
||||
|
|
@ -2296,6 +2549,20 @@ pub(super) fn push_node<'a>(
|
|||
// just bloat every labeled Call node.
|
||||
let callee_span = inner_callee_span.or(inner_text_span).filter(|s| *s != span);
|
||||
|
||||
// Constructor detection: a `new X(...)` call carries different cap
|
||||
// semantics than a plain function call. The SSA Call transfer uses
|
||||
// this flag to narrow the constructed value's caps so out-of-process
|
||||
// side-effect bits (FILE_IO, FMT_STRING, URL_ENCODE, JSON_PARSE) on
|
||||
// the arguments don't survive into a wrapper-object instance.
|
||||
// Recognised forms:
|
||||
// * JS/TS `new_expression`
|
||||
// * Java/C++ `object_creation_expression`
|
||||
// * PHP `object_creation_expression`
|
||||
let is_constructor = ast.kind() == "new_expression"
|
||||
|| ast.kind() == "object_creation_expression"
|
||||
|| call_ast
|
||||
.is_some_and(|cn| matches!(cn.kind(), "new_expression" | "object_creation_expression"));
|
||||
|
||||
let idx = g.add_node(NodeInfo {
|
||||
kind,
|
||||
call: CallMeta {
|
||||
|
|
@ -2311,6 +2578,7 @@ pub(super) fn push_node<'a>(
|
|||
arg_string_literals,
|
||||
destination_uses,
|
||||
gate_filters,
|
||||
is_constructor,
|
||||
},
|
||||
taint: TaintMeta {
|
||||
labels,
|
||||
|
|
@ -2339,6 +2607,7 @@ pub(super) fn push_node<'a>(
|
|||
is_eq_with_const: detect_eq_with_const(ast, lang),
|
||||
is_numeric_length_access: detect_numeric_length_access(ast, lang, code),
|
||||
member_field: detect_member_field_assignment(ast, code),
|
||||
rhs_is_function_literal: rhs_is_function_literal(ast, lang),
|
||||
});
|
||||
|
||||
debug!(
|
||||
|
|
@ -2404,7 +2673,10 @@ fn rhs_is_function_literal(ast: Node, lang: &str) -> bool {
|
|||
if candidate.is_none() {
|
||||
// Walk one level into declarations whose direct child is the
|
||||
// declarator (variable_declaration → variable_declarator →
|
||||
// value).
|
||||
// value), or expression-statement wrappers whose direct child is
|
||||
// an assignment_expression / assignment with a `right` field
|
||||
// (JS `expression_statement > assignment_expression`, Python
|
||||
// `expression_statement > assignment`).
|
||||
let mut cursor = ast.walk();
|
||||
for c in ast.children(&mut cursor) {
|
||||
if matches!(
|
||||
|
|
@ -2417,6 +2689,11 @@ fn rhs_is_function_literal(ast: Node, lang: &str) -> bool {
|
|||
if candidate.is_some() {
|
||||
break;
|
||||
}
|
||||
} else if matches!(lookup(lang, c.kind()), Kind::Assignment) {
|
||||
candidate = c.child_by_field_name("right");
|
||||
if candidate.is_some() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4417,7 +4694,23 @@ fn apply_promisify_labels(
|
|||
let Some(alias) = aliases.get(&callee) else {
|
||||
continue;
|
||||
};
|
||||
let wrapped_labels = classify_all(lang, &alias.wrapped, extra);
|
||||
// Inherit both flat and gated labels from the wrapped callee.
|
||||
// Gated sinks (e.g. `child_process.exec`) carry the same
|
||||
// capability semantics as flat sinks, just with arg-position
|
||||
// filtering at the call site; the promisify alias should
|
||||
// surface the wrapped function's sink class regardless of
|
||||
// which arm originally classified it.
|
||||
let mut wrapped_labels: Vec<crate::labels::DataLabel> =
|
||||
classify_all(lang, &alias.wrapped, extra)
|
||||
.into_iter()
|
||||
.collect();
|
||||
for gm in
|
||||
classify_gated_sink(lang, &alias.wrapped, |_| None, |_| None, |_| false).iter()
|
||||
{
|
||||
if !wrapped_labels.contains(&gm.label) {
|
||||
wrapped_labels.push(gm.label);
|
||||
}
|
||||
}
|
||||
if wrapped_labels.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue