mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-15 20:05:13 +02:00
Merge branch 'master' into dynamic
This commit is contained in:
commit
7e0e19a7f0
356 changed files with 14144 additions and 1317 deletions
48
src/ast.rs
48
src/ast.rs
|
|
@ -377,7 +377,7 @@ fn build_taint_diag(
|
|||
// Resolved sink capability bits, used by deduplication to distinguish
|
||||
// sinks with different cap types on the same source line (e.g.
|
||||
// `sink_sql(x); sink_shell(x);`).
|
||||
let sink_caps_bits: u16 = cfg_graph[finding.sink]
|
||||
let sink_caps_bits: u32 = cfg_graph[finding.sink]
|
||||
.taint
|
||||
.labels
|
||||
.iter()
|
||||
|
|
@ -385,7 +385,7 @@ fn build_taint_diag(
|
|||
crate::labels::DataLabel::Sink(c) => Some(c.bits()),
|
||||
_ => None,
|
||||
})
|
||||
.fold(0u16, |acc, b| acc | b);
|
||||
.fold(0u32, |acc, b| acc | b);
|
||||
|
||||
// Cap-specific rule-id routing.
|
||||
//
|
||||
|
|
@ -508,6 +508,14 @@ fn build_taint_diag(
|
|||
|| (finding.source_kind.sensitivity() >= crate::labels::Sensitivity::Sensitive
|
||||
&& (flow_has_body_bind || source_is_credential_bearing)));
|
||||
|
||||
// Cap-specific rule routing. Auth-as-taint and data-exfil keep their
|
||||
// pre-existing branches so the routing rules they encode (auth-finding
|
||||
// namespace alignment; body-bind / source-sensitivity gate) stay
|
||||
// exactly as before. New cap classes (LDAP / XPath / Header / Open
|
||||
// redirect / SSTI / XXE / Prototype pollution) route through
|
||||
// `cap_rule_meta()` so the canonical rule ids in the registry are the
|
||||
// single source of truth. Legacy generic taint findings continue to
|
||||
// emit `taint-unsanitised-flow`.
|
||||
let diag_id = if effective_caps.contains(crate::labels::Cap::UNAUTHORIZED_ID) {
|
||||
"rs.auth.missing_ownership_check.taint".to_string()
|
||||
} else if is_data_exfil_rule {
|
||||
|
|
@ -516,6 +524,25 @@ fn build_taint_diag(
|
|||
source_point.row + 1,
|
||||
source_point.column + 1
|
||||
)
|
||||
} else if let Some(meta) = [
|
||||
crate::labels::Cap::LDAP_INJECTION,
|
||||
crate::labels::Cap::XPATH_INJECTION,
|
||||
crate::labels::Cap::HEADER_INJECTION,
|
||||
crate::labels::Cap::OPEN_REDIRECT,
|
||||
crate::labels::Cap::SSTI,
|
||||
crate::labels::Cap::XXE,
|
||||
crate::labels::Cap::PROTOTYPE_POLLUTION,
|
||||
]
|
||||
.iter()
|
||||
.find(|c| effective_caps.contains(**c))
|
||||
.and_then(|c| crate::labels::cap_rule_meta(*c))
|
||||
{
|
||||
format!(
|
||||
"{} (source {}:{})",
|
||||
meta.rule_id,
|
||||
source_point.row + 1,
|
||||
source_point.column + 1
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"taint-unsanitised-flow (source {}:{})",
|
||||
|
|
@ -576,6 +603,23 @@ fn build_taint_diag(
|
|||
}
|
||||
_ => crate::patterns::Severity::Medium,
|
||||
}
|
||||
} else if let Some(meta) = [
|
||||
crate::labels::Cap::LDAP_INJECTION,
|
||||
crate::labels::Cap::XPATH_INJECTION,
|
||||
crate::labels::Cap::HEADER_INJECTION,
|
||||
crate::labels::Cap::OPEN_REDIRECT,
|
||||
crate::labels::Cap::SSTI,
|
||||
crate::labels::Cap::XXE,
|
||||
crate::labels::Cap::PROTOTYPE_POLLUTION,
|
||||
]
|
||||
.iter()
|
||||
.find(|c| effective_caps.contains(**c))
|
||||
.and_then(|c| crate::labels::cap_rule_meta(*c))
|
||||
{
|
||||
// New cap classes draw severity from the rule registry so a single
|
||||
// edit to `CAP_RULE_REGISTRY` cascades through SARIF, the dashboard,
|
||||
// and the integration suite without per-language source-kind nudges.
|
||||
meta.severity
|
||||
} else {
|
||||
severity_for_source_kind(finding.source_kind)
|
||||
};
|
||||
|
|
|
|||
|
|
@ -206,8 +206,8 @@ pub fn run_auth_analysis_with_model(
|
|||
// (when provided) for cross-file helpers that live in other files.
|
||||
apply_helper_lifting(&mut model, lang, file_path, scan_root, global_summaries);
|
||||
|
||||
// Phase 1 caller-scope IPA: propagate route-handler-level auth
|
||||
// checks DOWN to callee helper units within the same file. See
|
||||
// Caller-scope IPA: propagate route-handler-level auth checks DOWN
|
||||
// to callee helper units within the same file. See
|
||||
// [`apply_caller_scope_propagation`] for the propagation rule.
|
||||
apply_caller_scope_propagation(&mut model);
|
||||
|
||||
|
|
@ -547,8 +547,8 @@ fn apply_helper_lifting(
|
|||
}
|
||||
}
|
||||
|
||||
/// Phase 1 caller-scope IPA: propagate route-handler-level auth checks
|
||||
/// DOWN to callee helper units within the same file.
|
||||
/// Caller-scope IPA: propagate route-handler-level auth checks DOWN to
|
||||
/// callee helper units within the same file.
|
||||
///
|
||||
/// `apply_helper_lifting` walks UPWARD: a helper that internally
|
||||
/// proves ownership / membership / etc. has its summary lifted onto
|
||||
|
|
|
|||
|
|
@ -1190,6 +1190,7 @@ fn clone_preserves_all_sub_structs() {
|
|||
destination_uses: None,
|
||||
gate_filters: Vec::new(),
|
||||
is_constructor: false,
|
||||
produces_null_proto: false,
|
||||
},
|
||||
taint: TaintMeta {
|
||||
labels: {
|
||||
|
|
@ -1841,9 +1842,12 @@ def outer(cmd):
|
|||
assert_eq!(kwargs[1].0, "check");
|
||||
}
|
||||
|
||||
/// Languages without keyword-argument grammar should leave `kwargs` empty.
|
||||
/// JS object-literal positional args lift their `pair` children into
|
||||
/// `kwargs` so consumers like xml_config's `processEntities` /
|
||||
/// `resolve_entities` opt-in detector can read them without re-walking
|
||||
/// the tree-sitter AST.
|
||||
#[test]
|
||||
fn call_node_kwargs_empty_for_javascript() {
|
||||
fn call_node_kwargs_lifts_javascript_object_literal_pairs() {
|
||||
let src = br"
|
||||
function outer(cmd) {
|
||||
child_process.exec(cmd, { shell: true });
|
||||
|
|
@ -1861,9 +1865,12 @@ fn call_node_kwargs_empty_for_javascript() {
|
|||
.is_some_and(|c| c.ends_with("exec"))
|
||||
})
|
||||
.expect("child_process.exec call node should exist");
|
||||
let kwargs = &call_node.call.kwargs;
|
||||
assert!(
|
||||
call_node.call.kwargs.is_empty(),
|
||||
"JS object-literal arg is not a keyword_argument — kwargs should stay empty"
|
||||
kwargs
|
||||
.iter()
|
||||
.any(|(k, vs)| k == "shell" && vs.iter().any(|v| v == "true")),
|
||||
"JS object-literal `{{ shell: true }}` should surface as kwarg, got {kwargs:?}"
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
//! Strictly additive: classes whose fields cannot be classified produce
|
||||
//! a `DtoFields` with an empty `fields` map, the caller must decide
|
||||
//! whether to use that as a "Dto with no inferred fields" or fall back
|
||||
//! to the pre-Phase-6 Object/Unknown classification.
|
||||
//! to the generic Object/Unknown classification.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,16 @@ pub(crate) fn root_receiver_text(n: Node, lang: &str, code: &[u8]) -> Option<Str
|
|||
None => text_of(n, code),
|
||||
}
|
||||
}
|
||||
// PHP `variable_name` text carries a leading `$` (`$smarty`, `$twig`).
|
||||
// Strip it so chain text built downstream (`{recv}.{method}`) presents
|
||||
// a `.`-only delimiter sequence — required by the suffix-matcher
|
||||
// boundary rule, which only accepts `.`/`:` as chain separators.
|
||||
// Without this strip, gate matchers like `Smarty.fetch` /
|
||||
// `Environment.createTemplate` never fire on idiomatic
|
||||
// `$smarty->fetch(...)` / `$twig->createTemplate(...)` shapes.
|
||||
_ if lang == "php" && n.kind() == "variable_name" => {
|
||||
text_of(n, code).map(|s| s.trim_start_matches('$').to_string())
|
||||
}
|
||||
_ => text_of(n, code),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -195,6 +195,56 @@ pub(super) fn extract_destination_kwarg_pairs(
|
|||
|
||||
/// Extract the string-literal content at argument position `index` (0-based).
|
||||
/// Returns `None` if the argument is not a string literal or the index is out of range.
|
||||
/// True when `call_node` is `Object.create(null)` (or its parenthesised /
|
||||
/// awaited / type-cast wrappers). Strict literal-`null` first-arg match,
|
||||
/// no aliasing through intermediate variables. Caller restricts to JS/TS.
|
||||
pub(super) fn is_object_create_null_call(call_node: Node, code: &[u8]) -> bool {
|
||||
if !matches!(call_node.kind(), "call_expression") {
|
||||
return false;
|
||||
}
|
||||
let callee = call_node
|
||||
.child_by_field_name("function")
|
||||
.and_then(|f| text_of(f, code))
|
||||
.unwrap_or_default();
|
||||
if callee != "Object.create" {
|
||||
return false;
|
||||
}
|
||||
let Some(args) = call_node.child_by_field_name("arguments") else {
|
||||
return false;
|
||||
};
|
||||
let mut cursor = args.walk();
|
||||
let named: Vec<Node> = args.named_children(&mut cursor).collect();
|
||||
if named.len() != 1 {
|
||||
return false;
|
||||
}
|
||||
let mut arg = named[0];
|
||||
// Unwrap parens / await / TS type-assertions.
|
||||
for _ in 0..4 {
|
||||
match arg.kind() {
|
||||
"parenthesized_expression" => {
|
||||
if let Some(inner) = arg.named_child(0) {
|
||||
arg = inner;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
"await_expression" => {
|
||||
if let Some(inner) = arg.child_by_field_name("argument") {
|
||||
arg = inner;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
"as_expression" | "type_assertion" => {
|
||||
if let Some(inner) = arg.named_child(0) {
|
||||
arg = inner;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
arg.kind() == "null" || text_of(arg, code).as_deref() == Some("null")
|
||||
}
|
||||
|
||||
pub(super) fn extract_const_string_arg(
|
||||
call_node: Node,
|
||||
index: usize,
|
||||
|
|
@ -222,6 +272,37 @@ pub(super) fn extract_const_string_arg(
|
|||
None
|
||||
}
|
||||
}
|
||||
// Boolean literals — JS/TS `true`/`false` are their own node kinds; some
|
||||
// grammars wrap them as identifiers carrying the keyword text. Returned
|
||||
// verbatim so `dangerous_values` matching can detect deep-flag forms
|
||||
// like `extend(true, target, src)`.
|
||||
"true" | "false" => Some(arg.kind().to_string()),
|
||||
// PHP double-quoted strings parse as `encapsed_string` whose body is
|
||||
// a sequence of `string_content` / `escape_sequence` / interpolation
|
||||
// nodes. Treat the string as constant only when every child is a
|
||||
// pure-literal segment (no `variable_name` / `subscript_expression`
|
||||
// interpolations); the returned value is the concatenation of the
|
||||
// literal segments verbatim.
|
||||
"encapsed_string" => {
|
||||
let mut c = arg.walk();
|
||||
let mut buf = String::new();
|
||||
for ch in arg.named_children(&mut c) {
|
||||
match ch.kind() {
|
||||
"string_content" => {
|
||||
if let Some(s) = text_of(ch, code) {
|
||||
buf.push_str(&s);
|
||||
}
|
||||
}
|
||||
"escape_sequence" => {
|
||||
if let Some(s) = text_of(ch, code) {
|
||||
buf.push_str(&s);
|
||||
}
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
Some(buf)
|
||||
}
|
||||
"template_string" => {
|
||||
// Only treat as constant if no interpolation (no template_substitution children)
|
||||
let mut c = arg.walk();
|
||||
|
|
@ -238,6 +319,44 @@ pub(super) fn extract_const_string_arg(
|
|||
None
|
||||
}
|
||||
}
|
||||
// Concat-style binary expression with a leading string literal, e.g.
|
||||
// PHP `"Location: " . $url`, JS/TS `"Location: " + url`. Returns the
|
||||
// left-most literal so prefix-driven gates (`dangerous_prefixes`) can
|
||||
// activate on partially-dynamic concatenations; falls through to
|
||||
// `None` when the leading segment is not a string literal so
|
||||
// exact-`dangerous_values` matching keeps its strict semantics.
|
||||
"binary_expression" => {
|
||||
let left = arg.child_by_field_name("left")?;
|
||||
match left.kind() {
|
||||
"string"
|
||||
| "string_literal"
|
||||
| "interpreted_string_literal"
|
||||
| "raw_string_literal" => {
|
||||
let raw = text_of(left, code)?;
|
||||
if raw.len() >= 2 {
|
||||
Some(raw[1..raw.len() - 1].to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
"encapsed_string" => {
|
||||
let mut c = left.walk();
|
||||
let mut buf = String::new();
|
||||
for ch in left.named_children(&mut c) {
|
||||
match ch.kind() {
|
||||
"string_content" | "escape_sequence" => {
|
||||
if let Some(s) = text_of(ch, code) {
|
||||
buf.push_str(&s);
|
||||
}
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
Some(buf)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
@ -271,6 +390,27 @@ pub(super) fn extract_const_macro_arg(
|
|||
"identifier" | "name" | "qualified_name" | "scoped_identifier" => {
|
||||
text_of(arg, code).map(|s| s.to_string())
|
||||
}
|
||||
// Ruby bare constant (`NOENT`) — leaf form.
|
||||
"constant" => text_of(arg, code).map(|s| s.to_string()),
|
||||
// Ruby scope-qualified constant (`Nokogiri::XML::ParseOptions::NOENT`).
|
||||
// Return only the rightmost `name` segment so the gate's
|
||||
// `dangerous_values` list can stay identifier-bare instead of
|
||||
// enumerating every possible namespacing. Falls back to the full
|
||||
// text if the `name` field is missing for any reason.
|
||||
"scope_resolution" => arg
|
||||
.child_by_field_name("name")
|
||||
.and_then(|n| text_of(n, code))
|
||||
.map(|s| s.to_string())
|
||||
.or_else(|| text_of(arg, code).map(|s| s.to_string())),
|
||||
// Integer literals at the activation arg position. PHP / C / C++
|
||||
// commonly use plain `0` to opt into the safe-default option set
|
||||
// (e.g. `simplexml_load_string($xml, "SimpleXMLElement", 0)`). The
|
||||
// gate's `dangerous_values` list is identifier-only, so returning
|
||||
// the literal text lets the comparison fail against `LIBXML_NOENT`
|
||||
// and suppresses the conservative-fire branch.
|
||||
"integer" | "integer_literal" | "number_literal" | "decimal_integer_literal" => {
|
||||
text_of(arg, code).map(|s| s.to_string())
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
@ -728,35 +868,72 @@ pub(super) fn find_chained_inner_call<'a>(
|
|||
return Some((function, inner_text));
|
||||
}
|
||||
// The function/method field for a chained call is a member_expression
|
||||
// (JS/TS) or attribute (Python) etc.; its `object` field is the
|
||||
// receiver expression. Only proceed when that receiver is itself a
|
||||
// call.
|
||||
let object = function.child_by_field_name("object")?;
|
||||
// (JS/TS), attribute (Python), or field_expression (Rust); its
|
||||
// receiver is the `object` field (JS/TS/Python) or `value` field
|
||||
// (Rust). Only proceed when that receiver is itself a call.
|
||||
let object = function
|
||||
.child_by_field_name("object")
|
||||
.or_else(|| function.child_by_field_name("value"))?;
|
||||
if !matches!(lookup(lang, object.kind()), Kind::CallFn | Kind::CallMethod) {
|
||||
return None;
|
||||
}
|
||||
// Recurse: the inner call may itself be chained
|
||||
// (`axios.get(u).then(h).catch(h)`, innermost is `axios.get`).
|
||||
if let Some(inner) = find_chained_inner_call(object, lang, code) {
|
||||
return Some(inner);
|
||||
}
|
||||
// `object` is the innermost call_expression in the chain. Extract
|
||||
// its callee identifier the same way `first_call_ident_with_span`
|
||||
// does for a CallFn (member_expression text → "http.get").
|
||||
let inner_func = object
|
||||
// Decide whether `object` is itself a chained method call (its
|
||||
// function/method field is a member-style expression). When yes,
|
||||
// recurse one more level so deeper chains resolve to their innermost
|
||||
// method (e.g. `axios.get(u).then(h).catch(h)` → `axios.get`).
|
||||
// When no — the receiver is a plain function/constructor call like
|
||||
// Rust's `HttpResponse::Found()` — descending one more level would
|
||||
// strand us on the non-method leaf whose text would not match any
|
||||
// gate matcher. Stop here and return the current `outer` level,
|
||||
// which IS the innermost method call.
|
||||
let object_function = object
|
||||
.child_by_field_name("function")
|
||||
.or_else(|| object.child_by_field_name("method"))
|
||||
.or_else(|| object.child_by_field_name("name"))?;
|
||||
// Multi-line dotted member expressions (`http\n .get`) include
|
||||
// formatting whitespace in the source-text slice. The labels map
|
||||
// keys are literal `"http.get"` etc., strip whitespace so the
|
||||
// chained-call inner-gate rebinding fires for both single-line and
|
||||
// multi-line chain styles. Also strips `\r` for CRLF sources.
|
||||
// Motivated by upstream Parse Server CVE-2025-64430 which uses the
|
||||
// multi-line `http\n .get(uri, ...)\n .on(...)` form.
|
||||
let raw = text_of(inner_func, code)?;
|
||||
.or_else(|| object.child_by_field_name("method"));
|
||||
let object_is_chained_method = object_function
|
||||
.map(|f| {
|
||||
matches!(
|
||||
f.kind(),
|
||||
"member_expression"
|
||||
| "attribute"
|
||||
| "field_expression"
|
||||
| "scoped_identifier"
|
||||
| "scope_resolution"
|
||||
) && f
|
||||
.child_by_field_name("object")
|
||||
.or_else(|| f.child_by_field_name("value"))
|
||||
.is_some()
|
||||
})
|
||||
.unwrap_or(false);
|
||||
if object_is_chained_method {
|
||||
// Recurse: the inner call may itself be chained.
|
||||
if let Some(inner) = find_chained_inner_call(object, lang, code) {
|
||||
return Some(inner);
|
||||
}
|
||||
// `object` is the innermost call_expression in the chain. Extract
|
||||
// its callee identifier the same way `first_call_ident_with_span`
|
||||
// does for a CallFn (member_expression text → "http.get").
|
||||
let inner_func = object
|
||||
.child_by_field_name("function")
|
||||
.or_else(|| object.child_by_field_name("method"))
|
||||
.or_else(|| object.child_by_field_name("name"))?;
|
||||
// Multi-line dotted member expressions (`http\n .get`) include
|
||||
// formatting whitespace in the source-text slice. The labels map
|
||||
// keys are literal `"http.get"` etc., strip whitespace so the
|
||||
// chained-call inner-gate rebinding fires for both single-line and
|
||||
// multi-line chain styles. Also strips `\r` for CRLF sources.
|
||||
// Motivated by upstream Parse Server CVE-2025-64430 which uses the
|
||||
// multi-line `http\n .get(uri, ...)\n .on(...)` form.
|
||||
let raw = text_of(inner_func, code)?;
|
||||
let inner_text: String = raw.chars().filter(|c| !c.is_whitespace()).collect();
|
||||
return Some((object, inner_text));
|
||||
}
|
||||
// Receiver is a non-chained call (Rust constructor `Foo::new()` /
|
||||
// `HttpResponse::Found()`, JS bare `f()`). Outer level IS the
|
||||
// innermost method call — return its own function text so gate
|
||||
// matching sees the method name.
|
||||
let raw = text_of(function, code)?;
|
||||
let inner_text: String = raw.chars().filter(|c| !c.is_whitespace()).collect();
|
||||
Some((object, inner_text))
|
||||
Some((outer, inner_text))
|
||||
}
|
||||
|
||||
/// Recursively walk the receiver chain of `outer` (a CallFn / CallMethod
|
||||
|
|
@ -1389,6 +1566,47 @@ pub(super) fn extract_kwargs(call_node: Node, code: &[u8]) -> Vec<(String, Vec<S
|
|||
let mut cursor = args_node.walk();
|
||||
for child in args_node.named_children(&mut cursor) {
|
||||
let kind = child.kind();
|
||||
// JS/TS object-literal positional arg: `f(x, { a: true, b: 'str' })`.
|
||||
// The pairs inside the object are not tree-sitter
|
||||
// `keyword_argument` nodes (those are Python/Ruby), but
|
||||
// downstream consumers (xml_config's
|
||||
// `lookup_kwargs(inst.cfg_node)` JS branch checking
|
||||
// `processEntities`) expect these fields in the kwargs vector.
|
||||
// Lift each `pair` (and `shorthand_property_identifier`) into
|
||||
// the kwargs list using the property name as kwarg name and the
|
||||
// raw text of the value expression as the single value.
|
||||
// Boolean / numeric / string / identifier values all surface as
|
||||
// their textual form, which is what xml_config's kwarg-value
|
||||
// matchers (e.g. `v == "true"`) compare against.
|
||||
if kind == "object" {
|
||||
let mut oc = child.walk();
|
||||
for pair in child.named_children(&mut oc) {
|
||||
let pk = pair.kind();
|
||||
if pk == "pair" {
|
||||
let Some(kn) = pair.child_by_field_name("key") else {
|
||||
continue;
|
||||
};
|
||||
let Some(vn) = pair.child_by_field_name("value") else {
|
||||
continue;
|
||||
};
|
||||
let Some(raw_name) = text_of(kn, code) else {
|
||||
continue;
|
||||
};
|
||||
let name = raw_name
|
||||
.trim_start_matches(['"', '\''])
|
||||
.trim_end_matches(['"', '\''])
|
||||
.to_string();
|
||||
if let Some(val_text) = text_of(vn, code) {
|
||||
out.push((name, vec![val_text.to_string()]));
|
||||
}
|
||||
} else if pk == "shorthand_property_identifier" {
|
||||
if let Some(name) = text_of(pair, code) {
|
||||
out.push((name.to_string(), vec![name.to_string()]));
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if kind != "keyword_argument" && kind != "named_argument" {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -1413,6 +1631,32 @@ pub(super) fn extract_kwargs(call_node: Node, code: &[u8]) -> Vec<(String, Vec<S
|
|||
collect_idents_with_paths(vn, code, &mut idents, &mut paths);
|
||||
let mut combined = paths;
|
||||
combined.extend(idents);
|
||||
// Boolean / numeric literal kwarg values (Python `True`/`False`,
|
||||
// Ruby `true`/`false`/integer/float, JS `true`/`false`/number)
|
||||
// do not surface through `collect_idents_with_paths` — the value
|
||||
// node's kind is `true`/`false`/`integer`/`float`/`number`, not
|
||||
// an identifier kind. Capture the raw text so consumers like
|
||||
// `xml_config::classify_call` (which checks
|
||||
// `values.iter().any(|v| v == "True" || v == "true")` for the
|
||||
// lxml `resolve_entities=True` opt-in) can match.
|
||||
if combined.is_empty() {
|
||||
if matches!(
|
||||
vn.kind(),
|
||||
"true"
|
||||
| "false"
|
||||
| "integer"
|
||||
| "float"
|
||||
| "number"
|
||||
| "string"
|
||||
| "string_literal"
|
||||
| "true_constant"
|
||||
| "false_constant"
|
||||
) {
|
||||
if let Some(txt) = text_of(vn, code) {
|
||||
combined.push(txt.trim_matches(['"', '\'']).to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
out.push((name, combined));
|
||||
}
|
||||
out
|
||||
|
|
@ -1718,6 +1962,29 @@ pub(super) fn extract_arg_string_literals(call_node: Node, code: &[u8]) -> Vec<O
|
|||
let raw = text_of(target, code);
|
||||
raw.and_then(|s| strip_literal_quotes(&s, target, code))
|
||||
}
|
||||
// Boolean / null / numeric literal tokens — capture verbatim so
|
||||
// downstream pattern-aware analysis (e.g. the XXE config-fact
|
||||
// pass that needs to read the boolean polarity arg of
|
||||
// `setFeature(NAME, true)`) can recover the literal text without
|
||||
// re-walking the AST. Existing string-only consumers (URL
|
||||
// prefix matching, etc.) are unaffected: a "true" / "false"
|
||||
// token never satisfies their matching predicates.
|
||||
"true"
|
||||
| "false"
|
||||
| "null"
|
||||
| "null_literal"
|
||||
| "nil"
|
||||
| "nil_literal"
|
||||
| "none"
|
||||
| "boolean_literal"
|
||||
| "true_literal"
|
||||
| "false_literal"
|
||||
| "decimal_integer_literal"
|
||||
| "integer_literal"
|
||||
| "integer"
|
||||
| "number"
|
||||
| "number_literal"
|
||||
| "decimal_literal" => text_of(target, code).map(|s| s.to_string()),
|
||||
_ => None,
|
||||
};
|
||||
result.push(literal);
|
||||
|
|
|
|||
625
src/cfg/mod.rs
625
src/cfg/mod.rs
|
|
@ -70,8 +70,8 @@ use literals::{
|
|||
extract_destination_field_pairs, extract_destination_kwarg_pairs, extract_kwargs,
|
||||
extract_literal_rhs, extract_object_arg_property, extract_shell_array_payload_idents,
|
||||
find_call_node, find_call_node_deep, find_chained_inner_call, has_keyword_arg,
|
||||
has_object_arg_property, has_only_literal_args, is_parameterized_query_call,
|
||||
java_chain_arg0_kind_for_method, js_chain_arg0_kind_for_method,
|
||||
has_object_arg_property, has_only_literal_args, is_object_create_null_call,
|
||||
is_parameterized_query_call, java_chain_arg0_kind_for_method, js_chain_arg0_kind_for_method,
|
||||
js_chain_outer_method_for_inner, ruby_chain_arg0_for_method, walk_chain_inner_call_args,
|
||||
};
|
||||
use params::{
|
||||
|
|
@ -359,6 +359,14 @@ pub struct CallMeta {
|
|||
/// must not survive into the constructed object.
|
||||
#[serde(default)]
|
||||
pub is_constructor: bool,
|
||||
/// True when this call is `Object.create(null)` (or alias). The returned
|
||||
/// value has no prototype chain. Consumed by TypeFacts to tag the
|
||||
/// SsaValue with [`crate::ssa::type_facts::TypeKind::NullPrototypeObject`]
|
||||
/// so PROTOTYPE_POLLUTION suppression can fire flow-sensitively at the
|
||||
/// synthetic `__index_set__` sink. Set during CFG node construction so
|
||||
/// SSA does not need to re-walk the AST.
|
||||
#[serde(default)]
|
||||
pub produces_null_proto: bool,
|
||||
}
|
||||
|
||||
/// One gate's contribution at a call site whose callee matches multiple
|
||||
|
|
@ -601,8 +609,7 @@ pub struct BodyMeta {
|
|||
/// decorators / annotations / static type text at CFG construction
|
||||
/// time. Same length as `params`; positions with no recoverable
|
||||
/// type info are `None`. Strictly additive, when every entry is
|
||||
/// `None`, downstream behaviour is identical to the pre-Phase-1
|
||||
/// engine.
|
||||
/// `None`, downstream behaviour is identical to the type-unaware path.
|
||||
pub param_types: Vec<Option<crate::ssa::type_facts::TypeKind>>,
|
||||
/// Per-parameter destructured-binding sibling names. Same length
|
||||
/// as `params`; entry `i` lists field names bound by the same
|
||||
|
|
@ -1811,6 +1818,31 @@ pub(super) fn push_node<'a>(
|
|||
labels.push(l);
|
||||
}
|
||||
}
|
||||
// Subscript-set form: `response.headers["X-Foo"] = bar`
|
||||
// (Ruby `element_reference`, JS/TS `subscript_expression`,
|
||||
// Python `subscript`). The LHS has no `property` field, so
|
||||
// walk into the subscript's `object` and try classifying its
|
||||
// member-expression text (e.g. `response.headers`). This
|
||||
// lets header-injection sinks fire on the bare bracket form
|
||||
// alongside the `set_header` / `headers_mut.insert` method
|
||||
// shapes already covered above.
|
||||
if labels.is_empty()
|
||||
&& matches!(
|
||||
lhs.kind(),
|
||||
"subscript_expression" | "subscript" | "element_reference"
|
||||
)
|
||||
{
|
||||
let obj = lhs
|
||||
.child_by_field_name("object")
|
||||
.or_else(|| lhs.child_by_field_name("value"))
|
||||
.or_else(|| lhs.child(0));
|
||||
if let Some(obj_node) = obj
|
||||
&& let Some(obj_text) = member_expr_text(obj_node, code)
|
||||
&& let Some(l) = classify(lang, &obj_text, extra)
|
||||
{
|
||||
labels.push(l);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1933,18 +1965,45 @@ pub(super) fn push_node<'a>(
|
|||
{
|
||||
let gate_call = call_ast.or_else(|| find_call_node_deep(ast, lang, 4));
|
||||
if let Some(cn) = gate_call {
|
||||
let gate_callee_text = if call_ast.is_some() {
|
||||
// Derive the gate's callee text from the call's
|
||||
// `function`/`method`/`name` field, falling back to `text`.
|
||||
//
|
||||
// The default is `text`, which by this point reflects the
|
||||
// qualified callee for method calls (`Velocity.evaluate`,
|
||||
// `$smarty->fetch`) reconstructed in the `Kind::CallMethod`
|
||||
// arm. When `first_member_label` rewrites `text` to a member
|
||||
// Source like `req.body` (because the wrapper carries one as
|
||||
// an argument), the rewrite is correct for source attribution
|
||||
// but defeats gate matching against a bare callee
|
||||
// (`setValue(target, req.body, …)` would gate-match
|
||||
// `req.body` instead of `setValue`).
|
||||
//
|
||||
// Detect that case structurally: a Source label is present AND
|
||||
// the call's function-field text differs from `text`. The
|
||||
// function field carries the actual callee identifier; when it
|
||||
// disagrees with `text`, `text` was clobbered by a member-source
|
||||
// override and the function field is the right gate target.
|
||||
// Whitespace is stripped to mirror `find_chained_inner_call`
|
||||
// so multi-line chains (`http\n .get(...)`) still match flat
|
||||
// gate matchers like `http.get`.
|
||||
let function_field_text: Option<String> = cn
|
||||
.child_by_field_name("function")
|
||||
.or_else(|| cn.child_by_field_name("method"))
|
||||
.or_else(|| cn.child_by_field_name("name"))
|
||||
.and_then(|f| text_of(f, code))
|
||||
.map(|t| t.chars().filter(|c| !c.is_whitespace()).collect::<String>());
|
||||
let has_source_label = labels
|
||||
.iter()
|
||||
.any(|l| matches!(l, crate::labels::DataLabel::Source(_)));
|
||||
let gate_callee_text = if let Some(ff) = function_field_text.as_deref()
|
||||
&& has_source_label
|
||||
&& ff != text.as_str()
|
||||
{
|
||||
ff.to_string()
|
||||
} else if call_ast.is_some() {
|
||||
text.clone()
|
||||
} else {
|
||||
// Inner call reached via wrapper, use the call-expression's
|
||||
// function name directly. Falls back to `text` so non-call-
|
||||
// expression kinds (method calls, Ruby `call` nodes, macros)
|
||||
// still have a usable callee string.
|
||||
cn.child_by_field_name("function")
|
||||
.or_else(|| cn.child_by_field_name("method"))
|
||||
.or_else(|| cn.child_by_field_name("name"))
|
||||
.and_then(|f| text_of(f, code))
|
||||
.unwrap_or_else(|| text.clone())
|
||||
function_field_text.unwrap_or_else(|| text.clone())
|
||||
};
|
||||
let matches = classify_gated_sink(
|
||||
lang,
|
||||
|
|
@ -1953,12 +2012,15 @@ pub(super) fn push_node<'a>(
|
|||
extract_const_string_arg(cn, idx, code).or_else(|| {
|
||||
// C/C++ preprocessor macros and PHP `define`d constants
|
||||
// surface as identifier nodes, not string literals.
|
||||
// Falling back to the macro-arg extractor for those
|
||||
// languages lets gates like `curl_easy_setopt` /
|
||||
// `curl_setopt` activate on a `CURLOPT_POSTFIELDS`
|
||||
// ident match instead of firing conservatively on
|
||||
// every positional arg.
|
||||
if matches!(lang, "c" | "cpp" | "c++" | "php") {
|
||||
// Ruby option constants (e.g.
|
||||
// `Nokogiri::XML::ParseOptions::NOENT`) surface as
|
||||
// `scope_resolution` / `constant` nodes. Falling back
|
||||
// to the macro-arg extractor for those languages lets
|
||||
// gates like `curl_easy_setopt` / `curl_setopt` /
|
||||
// `Nokogiri::XML` activate on a bare-leaf identifier
|
||||
// match instead of firing conservatively on every
|
||||
// positional arg.
|
||||
if matches!(lang, "c" | "cpp" | "c++" | "php" | "ruby" | "rb") {
|
||||
extract_const_macro_arg(cn, idx, code)
|
||||
} else {
|
||||
None
|
||||
|
|
@ -2656,6 +2718,13 @@ pub(super) fn push_node<'a>(
|
|||
|| call_ast
|
||||
.is_some_and(|cn| matches!(cn.kind(), "new_expression" | "object_creation_expression"));
|
||||
|
||||
// Detect `Object.create(null)` so TypeFacts can tag the returned
|
||||
// SsaValue with `NullPrototypeObject` for flow-sensitive
|
||||
// prototype-pollution suppression. Restricted to JS/TS where
|
||||
// `Object.create` is the idiomatic null-prototype constructor.
|
||||
let produces_null_proto = matches!(lang, "javascript" | "typescript")
|
||||
&& call_ast.is_some_and(|cn| is_object_create_null_call(cn, code));
|
||||
|
||||
let idx = g.add_node(NodeInfo {
|
||||
kind,
|
||||
call: CallMeta {
|
||||
|
|
@ -2672,6 +2741,7 @@ pub(super) fn push_node<'a>(
|
|||
destination_uses,
|
||||
gate_filters,
|
||||
is_constructor,
|
||||
produces_null_proto,
|
||||
},
|
||||
taint: TaintMeta {
|
||||
labels,
|
||||
|
|
@ -2860,6 +2930,31 @@ fn try_lower_subscript_write(
|
|||
*call_ordinal += 1;
|
||||
let mut uses_all: Vec<String> = vec![arr_text.clone(), idx_text.clone()];
|
||||
uses_all.extend(rhs_uses.iter().cloned());
|
||||
|
||||
// Prototype pollution sink classification on the synthetic
|
||||
// `__index_set__` node for JS/TS. Tainted *key* in `obj[key] = val`
|
||||
// is the pollution channel (a `__proto__` / `constructor` literal flowing
|
||||
// through `key` mutates `Object.prototype` globally), so the gate's
|
||||
// payload arg list is `[0]` (the key only — the value at index 1 is
|
||||
// benign on its own). Sanitizer recognition is structural (no taint
|
||||
// engine plumbing) and runs before label attachment, so suppressed
|
||||
// shapes never enter the SSA sink scan:
|
||||
// * constant string key whose literal value is not in the dangerous
|
||||
// set (`__proto__` / `constructor` / `prototype`),
|
||||
// * receiver was assigned `Object.create(null)` in this function
|
||||
// (no prototype chain to pollute),
|
||||
// * the assignment is dominated by an `if` whose condition rejects
|
||||
// dangerous keys with an early `return` / `throw` / `break`, or
|
||||
// that allowlists the key against safe constants on its true arm.
|
||||
let mut pp_labels: smallvec::SmallVec<[DataLabel; 2]> = smallvec::SmallVec::new();
|
||||
let mut pp_payload_args: Option<Vec<usize>> = None;
|
||||
if matches!(lang, "javascript" | "typescript" | "js" | "ts")
|
||||
&& !pp_should_suppress_index_set(assign_ast, subscript_node, &arr_text, &idx_text, code)
|
||||
{
|
||||
pp_labels.push(DataLabel::Sink(Cap::PROTOTYPE_POLLUTION));
|
||||
pp_payload_args = Some(vec![0]);
|
||||
}
|
||||
|
||||
let n = g.add_node(NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
call: CallMeta {
|
||||
|
|
@ -2867,9 +2962,11 @@ fn try_lower_subscript_write(
|
|||
receiver: Some(arr_text.clone()),
|
||||
arg_uses: vec![vec![idx_text.clone()], rhs_uses.clone()],
|
||||
call_ordinal: ord,
|
||||
sink_payload_args: pp_payload_args,
|
||||
..Default::default()
|
||||
},
|
||||
taint: TaintMeta {
|
||||
labels: pp_labels,
|
||||
uses: uses_all,
|
||||
..Default::default()
|
||||
},
|
||||
|
|
@ -2883,6 +2980,477 @@ fn try_lower_subscript_write(
|
|||
Some(n)
|
||||
}
|
||||
|
||||
/// Spring MVC controller-return open-redirect recogniser. Detects the
|
||||
/// shape `return "redirect:" + tainted` (Java string concatenation) and
|
||||
/// emits a synthetic `__spring_redirect__` Call sink with
|
||||
/// `Sink(OPEN_REDIRECT)` so the existing taint pipeline propagates the
|
||||
/// concatenated suffix through the OPEN_REDIRECT cap. The synthetic
|
||||
/// node sequences between `preds` and the eventual Return node.
|
||||
///
|
||||
/// Returns `Some(synthetic_idx)` when matched, otherwise `None`.
|
||||
/// Java only — Spring's `redirect:` view-name convention has no
|
||||
/// counterpart in the other supported languages, and matching the
|
||||
/// literal across non-Spring code would over-fire.
|
||||
fn try_lower_spring_redirect_return(
|
||||
ast: Node,
|
||||
preds: &[NodeIndex],
|
||||
g: &mut Cfg,
|
||||
lang: &str,
|
||||
code: &[u8],
|
||||
enclosing_func: Option<&str>,
|
||||
call_ordinal: &mut u32,
|
||||
) -> Option<NodeIndex> {
|
||||
if lang != "java" {
|
||||
return None;
|
||||
}
|
||||
// `return EXPR ;` — find the returned expression. tree-sitter-java
|
||||
// wraps the value in a `return_statement` whose first named child
|
||||
// is the expression.
|
||||
let expr = ast.named_child(0)?;
|
||||
// Strip parentheses.
|
||||
let mut cur = expr;
|
||||
while cur.kind() == "parenthesized_expression" {
|
||||
cur = cur.named_child(0)?;
|
||||
}
|
||||
if cur.kind() != "binary_expression" {
|
||||
return None;
|
||||
}
|
||||
let op = cur.child_by_field_name("operator")?;
|
||||
let op_text = text_of(op, code)?;
|
||||
if op_text != "+" {
|
||||
return None;
|
||||
}
|
||||
// Walk leftmost descent through left-associated `+` chains so that
|
||||
// `"redirect:" + a + b` still matches (the AST nests as
|
||||
// `(("redirect:" + a) + b)`).
|
||||
let mut leftmost = cur;
|
||||
loop {
|
||||
let left = leftmost.child_by_field_name("left")?;
|
||||
let mut left_inner = left;
|
||||
while left_inner.kind() == "parenthesized_expression" {
|
||||
left_inner = left_inner.named_child(0)?;
|
||||
}
|
||||
if left_inner.kind() == "binary_expression" {
|
||||
let op_l = left_inner.child_by_field_name("operator")?;
|
||||
if text_of(op_l, code).as_deref() == Some("+") {
|
||||
leftmost = left_inner;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// `left_inner` is the leftmost atom — must be a string literal
|
||||
// whose constant value starts with `redirect:`.
|
||||
if !matches!(left_inner.kind(), "string_literal" | "string") {
|
||||
return None;
|
||||
}
|
||||
let lit = text_of(left_inner, code)?;
|
||||
if lit.len() < 2 {
|
||||
return None;
|
||||
}
|
||||
let inner = &lit[1..lit.len() - 1];
|
||||
if !inner.starts_with("redirect:") {
|
||||
return None;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Collect identifiers referenced anywhere in the original concat
|
||||
// expression — the tainted URL piece is one of them. Receiver-style
|
||||
// method calls (`view.toString()`) are intentionally captured via
|
||||
// the bare identifier; precision improvements are deferred to the
|
||||
// SSA / abstract-string layer.
|
||||
let mut concat_uses: Vec<String> = Vec::new();
|
||||
collect_idents(cur, code, &mut concat_uses);
|
||||
if concat_uses.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let span = (ast.start_byte(), ast.end_byte());
|
||||
let ord = *call_ordinal;
|
||||
*call_ordinal += 1;
|
||||
|
||||
let mut labels: smallvec::SmallVec<[DataLabel; 2]> = smallvec::SmallVec::new();
|
||||
labels.push(DataLabel::Sink(Cap::OPEN_REDIRECT));
|
||||
|
||||
let n = g.add_node(NodeInfo {
|
||||
kind: StmtKind::Call,
|
||||
call: CallMeta {
|
||||
callee: Some("__spring_redirect__".to_string()),
|
||||
arg_uses: vec![concat_uses.clone()],
|
||||
call_ordinal: ord,
|
||||
sink_payload_args: Some(vec![0]),
|
||||
..Default::default()
|
||||
},
|
||||
taint: TaintMeta {
|
||||
labels,
|
||||
uses: concat_uses,
|
||||
..Default::default()
|
||||
},
|
||||
ast: AstMeta {
|
||||
span,
|
||||
enclosing_func: enclosing_func.map(|s| s.to_string()),
|
||||
},
|
||||
..Default::default()
|
||||
});
|
||||
connect_all(g, preds, n, EdgeKind::Seq);
|
||||
Some(n)
|
||||
}
|
||||
|
||||
/// Prototype-pollution suppression decisions for the synthetic
|
||||
/// `__index_set__` node emitted by `try_lower_subscript_write`.
|
||||
///
|
||||
/// Returns `true` when the assignment is provably safe and the
|
||||
/// `Cap::PROTOTYPE_POLLUTION` sink label should be elided. The three
|
||||
/// CFG-layer recognised shapes are flow-insensitive AST patterns:
|
||||
///
|
||||
/// 1. Constant string key whose value is not one of the dangerous
|
||||
/// keys (`__proto__`, `constructor`, `prototype`). A literal-keyed
|
||||
/// write cannot pollute even if the value is tainted.
|
||||
/// 2. Reject pattern `if (idx === "__proto__" || idx === "constructor"
|
||||
/// || idx === "prototype") <return/throw/break>` enclosing the
|
||||
/// assignment. The dangerous-key path terminates before reaching
|
||||
/// the synthesised store.
|
||||
/// 3. Allowlist pattern `if (idx === "name" || idx === "id") { obj[idx]
|
||||
/// = v }`. The assignment only executes when `idx` is one of a
|
||||
/// small set of known-safe constants.
|
||||
///
|
||||
/// The null-prototype receiver suppression (`Object.create(null)`) is
|
||||
/// handled flow-sensitively in the SSA taint engine via
|
||||
/// `TypeKind::NullPrototypeObject`, since AST scans cannot honour
|
||||
/// branch-local re-bindings or phi joins.
|
||||
///
|
||||
/// Conservative: any unrecognised shape returns `false` so the sink
|
||||
/// label is attached and the SSA layer decides on taint reachability.
|
||||
fn pp_should_suppress_index_set(
|
||||
assign_ast: Node,
|
||||
subscript_node: Node,
|
||||
_arr_text: &str,
|
||||
idx_text: &str,
|
||||
code: &[u8],
|
||||
) -> bool {
|
||||
// 1. Constant-key fold.
|
||||
if let Some(idx_node) = subscript_node
|
||||
.child_by_field_name("index")
|
||||
.or_else(|| subscript_node.child_by_field_name("subscript"))
|
||||
.or_else(|| {
|
||||
let mut cur = subscript_node.walk();
|
||||
subscript_node.named_children(&mut cur).nth(1)
|
||||
})
|
||||
{
|
||||
if let Some(literal) = pp_string_literal_value(idx_node, code) {
|
||||
return !pp_is_dangerous_proto_key(&literal);
|
||||
}
|
||||
}
|
||||
|
||||
// 2 + 3. Dominator-style guard ancestors (reject + allowlist).
|
||||
if pp_is_guarded_by_proto_check(assign_ast, idx_text, code) {
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Dangerous prototype-pollution key strings. Matches the literal
|
||||
/// values that JS engines treat as references into the prototype chain.
|
||||
fn pp_is_dangerous_proto_key(s: &str) -> bool {
|
||||
matches!(s, "__proto__" | "constructor" | "prototype")
|
||||
}
|
||||
|
||||
/// Extract the value of a JS/TS string literal node, stripping the
|
||||
/// outer quote bytes (single, double, or backtick). Returns `None`
|
||||
/// for non-literal nodes, template literals containing interpolation,
|
||||
/// or anything that doesn't resemble a single-segment string.
|
||||
fn pp_string_literal_value(n: Node, code: &[u8]) -> Option<String> {
|
||||
let kind = n.kind();
|
||||
if !matches!(kind, "string" | "string_literal" | "template_string") {
|
||||
return None;
|
||||
}
|
||||
let raw = std::str::from_utf8(&code[n.start_byte()..n.end_byte()]).ok()?;
|
||||
if raw.len() < 2 {
|
||||
return None;
|
||||
}
|
||||
let bytes = raw.as_bytes();
|
||||
let first = bytes[0];
|
||||
let last = bytes[bytes.len() - 1];
|
||||
if !matches!(first, b'"' | b'\'' | b'`') || first != last {
|
||||
return None;
|
||||
}
|
||||
let inner = &raw[1..raw.len() - 1];
|
||||
// Reject template literals carrying `${...}` interpolation — we
|
||||
// can't fold those to a single concrete value.
|
||||
if first == b'`' && inner.contains("${") {
|
||||
return None;
|
||||
}
|
||||
Some(inner.to_string())
|
||||
}
|
||||
|
||||
/// Walk up from the assignment node looking for two structural guard
|
||||
/// shapes:
|
||||
///
|
||||
/// * **Reject pattern** — a *previous sibling* `if_statement` in any
|
||||
/// enclosing block whose condition is `idx === DANGEROUS [|| …]` and
|
||||
/// whose consequence terminates control flow (`return` / `throw` /
|
||||
/// `break` / `continue`). The dangerous-key path never reaches the
|
||||
/// subsequent assignment.
|
||||
/// * **Allowlist pattern** — an *ancestor* `if_statement` whose
|
||||
/// condition is `idx === SAFE [|| …]` and through whose consequence
|
||||
/// the descendant flows. Only the safe-key arm reaches the
|
||||
/// assignment.
|
||||
///
|
||||
/// Both shapes must compare against the same key variable as the
|
||||
/// synthetic `__index_set__` node. Stops at the enclosing function so
|
||||
/// guards in an outer scope around a closure passed elsewhere don't
|
||||
/// accidentally suppress inner assignments.
|
||||
fn pp_is_guarded_by_proto_check(from: Node, idx_text: &str, code: &[u8]) -> bool {
|
||||
let mut cur = from;
|
||||
while let Some(parent) = cur.parent() {
|
||||
match parent.kind() {
|
||||
"function_declaration"
|
||||
| "function"
|
||||
| "function_expression"
|
||||
| "arrow_function"
|
||||
| "method_definition"
|
||||
| "generator_function_declaration"
|
||||
| "program"
|
||||
| "source_file" => return false,
|
||||
"if_statement" => {
|
||||
if let Some(cond) = parent.child_by_field_name("condition") {
|
||||
let consequence = parent.child_by_field_name("consequence");
|
||||
if let Some(verdict) =
|
||||
pp_classify_proto_guard(cond, consequence, cur, idx_text, code)
|
||||
{
|
||||
return verdict;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Reject pattern: scan previous siblings in the parent block
|
||||
// for `if (idx === DANGEROUS [|| …]) { return; }` shapes that
|
||||
// dominate the assignment via early-return.
|
||||
let mut sibling_cursor = parent.walk();
|
||||
for sibling in parent.named_children(&mut sibling_cursor) {
|
||||
if sibling.start_byte() >= cur.start_byte() {
|
||||
break;
|
||||
}
|
||||
if sibling.kind() != "if_statement" {
|
||||
continue;
|
||||
}
|
||||
if pp_is_reject_pattern(sibling, idx_text, code) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
cur = parent;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// True when `if_node` is `if (idx === DANGEROUS [|| idx === DANGEROUS]
|
||||
/// …) { return; / throw …; / break; }` shaped — every disjunct
|
||||
/// compares the named key variable to a dangerous prototype key, and
|
||||
/// the consequence terminates control flow.
|
||||
fn pp_is_reject_pattern(if_node: Node, idx_text: &str, code: &[u8]) -> bool {
|
||||
let Some(cond) = if_node.child_by_field_name("condition") else {
|
||||
return false;
|
||||
};
|
||||
let consequence = if_node.child_by_field_name("consequence");
|
||||
let clauses = pp_split_or_clauses(cond);
|
||||
if clauses.is_empty() {
|
||||
return false;
|
||||
}
|
||||
for clause in &clauses {
|
||||
let Some((var, lit)) = pp_extract_eq_compare(*clause, code) else {
|
||||
return false;
|
||||
};
|
||||
if var != idx_text || !pp_is_dangerous_proto_key(&lit) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
consequence.map(pp_block_terminates).unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Decide whether an enclosing `if` clause around an `__index_set__`
|
||||
/// statement constitutes a prototype-pollution guard.
|
||||
///
|
||||
/// `cond` is the if's condition expression, `consequence` is the
|
||||
/// optional consequence block, and `descendant` is the node on the
|
||||
/// path from the if-statement down to the assignment (used to
|
||||
/// distinguish "assignment lives inside the consequence" from
|
||||
/// "assignment lives after the if"). `idx_text` is the textual key
|
||||
/// variable used by the synthetic `__index_set__`.
|
||||
///
|
||||
/// Returns `Some(true)` to suppress, `Some(false)` to keep the gate
|
||||
/// (e.g. an unrelated guard), and `None` when the if-statement is
|
||||
/// not a recognised guard so the walker continues outward.
|
||||
fn pp_classify_proto_guard(
|
||||
cond: Node,
|
||||
consequence: Option<Node>,
|
||||
descendant: Node,
|
||||
idx_text: &str,
|
||||
code: &[u8],
|
||||
) -> Option<bool> {
|
||||
let cond_clauses = pp_split_or_clauses(cond);
|
||||
if cond_clauses.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut all_against_idx = true;
|
||||
let mut all_dangerous = true;
|
||||
let mut all_safe = true;
|
||||
for clause in &cond_clauses {
|
||||
let (var, lit) = pp_extract_eq_compare(*clause, code)?;
|
||||
if var != idx_text {
|
||||
all_against_idx = false;
|
||||
break;
|
||||
}
|
||||
let dangerous = pp_is_dangerous_proto_key(&lit);
|
||||
if dangerous {
|
||||
all_safe = false;
|
||||
} else {
|
||||
all_dangerous = false;
|
||||
}
|
||||
}
|
||||
if !all_against_idx {
|
||||
return None;
|
||||
}
|
||||
|
||||
let consequence_contains_descendant = consequence
|
||||
.map(|c| pp_subtree_contains(c, descendant))
|
||||
.unwrap_or(false);
|
||||
|
||||
// Allowlist pattern: every clause is `idx === SAFE` and the
|
||||
// assignment lives inside the consequence (true arm).
|
||||
if all_safe && consequence_contains_descendant {
|
||||
return Some(true);
|
||||
}
|
||||
|
||||
// Reject pattern: every clause is `idx === DANGEROUS` and the
|
||||
// consequence terminates control flow before reaching the
|
||||
// assignment. Only suppress when the assignment is *outside* the
|
||||
// consequence (i.e., follows the if).
|
||||
if all_dangerous
|
||||
&& !consequence_contains_descendant
|
||||
&& consequence.map(pp_block_terminates).unwrap_or(false)
|
||||
{
|
||||
return Some(true);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// True when `descendant` is identical to or transitively a child of
|
||||
/// `root`. Identity is checked via byte-range equality because
|
||||
/// tree-sitter `Node` doesn't implement `Eq` directly.
|
||||
fn pp_subtree_contains(root: Node, descendant: Node) -> bool {
|
||||
let dr = (descendant.start_byte(), descendant.end_byte());
|
||||
let rr = (root.start_byte(), root.end_byte());
|
||||
dr.0 >= rr.0 && dr.1 <= rr.1
|
||||
}
|
||||
|
||||
/// True when `block` (typically an `if` consequence) terminates
|
||||
/// control flow on every path: the last meaningful statement is a
|
||||
/// return / throw / break / continue. Conservative — falls back to
|
||||
/// `false` for empty blocks or anything non-trivial.
|
||||
fn pp_block_terminates(block: Node) -> bool {
|
||||
// Bare statement consequence (no braces): the if's consequence is
|
||||
// the terminator itself.
|
||||
if pp_is_terminator(block) {
|
||||
return true;
|
||||
}
|
||||
if !matches!(block.kind(), "statement_block" | "block") {
|
||||
return false;
|
||||
}
|
||||
let mut cursor = block.walk();
|
||||
let last_stmt = block.named_children(&mut cursor).last();
|
||||
match last_stmt {
|
||||
Some(s) => pp_is_terminator(s),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// True when `n` is a control-flow-ending statement: return / throw /
|
||||
/// break / continue.
|
||||
fn pp_is_terminator(n: Node) -> bool {
|
||||
matches!(
|
||||
n.kind(),
|
||||
"return_statement" | "throw_statement" | "break_statement" | "continue_statement"
|
||||
)
|
||||
}
|
||||
|
||||
/// Split an expression by top-level `||` operators. Returns the
|
||||
/// individual disjunct sub-expressions. Single (non-OR) expressions
|
||||
/// yield a one-element vector. Walks `binary_expression` nodes whose
|
||||
/// `operator` field is `||` and recurses into both sides.
|
||||
fn pp_split_or_clauses<'a>(expr: Node<'a>) -> Vec<Node<'a>> {
|
||||
let mut out = Vec::new();
|
||||
pp_collect_or_clauses(expr, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
fn pp_collect_or_clauses<'a>(expr: Node<'a>, out: &mut Vec<Node<'a>>) {
|
||||
let stripped = pp_unwrap_paren(expr);
|
||||
if matches!(stripped.kind(), "binary_expression") {
|
||||
let op = stripped
|
||||
.child_by_field_name("operator")
|
||||
.map(|o| o.kind())
|
||||
.unwrap_or("");
|
||||
if op == "||" {
|
||||
if let Some(l) = stripped.child_by_field_name("left") {
|
||||
pp_collect_or_clauses(l, out);
|
||||
}
|
||||
if let Some(r) = stripped.child_by_field_name("right") {
|
||||
pp_collect_or_clauses(r, out);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
out.push(stripped);
|
||||
}
|
||||
|
||||
fn pp_unwrap_paren(n: Node) -> Node {
|
||||
let mut cur = n;
|
||||
while matches!(cur.kind(), "parenthesized_expression") {
|
||||
match cur.named_child(0) {
|
||||
Some(inner) => cur = inner,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
cur
|
||||
}
|
||||
|
||||
/// Extract `(var_text, literal_value)` from an equality comparison
|
||||
/// `var === "literal"` / `var == "literal"` (and reversed forms).
|
||||
/// Returns `None` for any other shape.
|
||||
fn pp_extract_eq_compare(expr: Node, code: &[u8]) -> Option<(String, String)> {
|
||||
let stripped = pp_unwrap_paren(expr);
|
||||
if !matches!(stripped.kind(), "binary_expression") {
|
||||
return None;
|
||||
}
|
||||
let op = stripped
|
||||
.child_by_field_name("operator")
|
||||
.map(|o| o.kind())
|
||||
.unwrap_or("");
|
||||
if !matches!(op, "===" | "==") {
|
||||
return None;
|
||||
}
|
||||
let left = stripped.child_by_field_name("left")?;
|
||||
let right = stripped.child_by_field_name("right")?;
|
||||
let left = pp_unwrap_paren(left);
|
||||
let right = pp_unwrap_paren(right);
|
||||
if let (Some(lv), Some(rs)) = (text_of(left, code), pp_string_literal_value(right, code)) {
|
||||
if matches!(left.kind(), "identifier" | "shorthand_property_identifier") {
|
||||
return Some((lv, rs));
|
||||
}
|
||||
}
|
||||
if let (Some(rv), Some(ls)) = (text_of(right, code), pp_string_literal_value(left, code)) {
|
||||
if matches!(right.kind(), "identifier" | "shorthand_property_identifier") {
|
||||
return Some((rv, ls));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Step 1 (`pre_emit_arg_source_nodes`): scan the AST, create Source nodes,
|
||||
/// wire them to `preds`, and return (effective_preds, synth_bindings,
|
||||
/// uses_only_synth_names).
|
||||
|
|
@ -3682,6 +4250,21 @@ pub(super) fn build_sub<'a>(
|
|||
|
||||
Vec::new()
|
||||
} else {
|
||||
// Spring MVC `return "redirect:" + url` open-redirect
|
||||
// synthetic-sink emission. When matched the synthetic
|
||||
// call sequences between `preds` and the Return node.
|
||||
let mut effective_preds: Vec<NodeIndex> = preds.to_vec();
|
||||
if let Some(synth) = try_lower_spring_redirect_return(
|
||||
ast,
|
||||
&effective_preds,
|
||||
g,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
call_ordinal,
|
||||
) {
|
||||
effective_preds = vec![synth];
|
||||
}
|
||||
let ret = push_node(
|
||||
g,
|
||||
StmtKind::Return,
|
||||
|
|
@ -3692,7 +4275,7 @@ pub(super) fn build_sub<'a>(
|
|||
0,
|
||||
analysis_rules,
|
||||
);
|
||||
connect_all(g, preds, ret, EdgeKind::Seq);
|
||||
connect_all(g, &effective_preds, ret, EdgeKind::Seq);
|
||||
Vec::new() // terminates this path
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ use tree_sitter::Node;
|
|||
/// of `build_cfg`. Returns the [`TypeKind::Dto`] carrying the
|
||||
/// per-field type map when the class is declared in the same file;
|
||||
/// returns `None` otherwise so callers can fall through to the
|
||||
/// pre-Phase-6 behaviour (Object / Unknown).
|
||||
/// generic Object / Unknown classification.
|
||||
fn lookup_dto_class(class_name: &str) -> Option<TypeKind> {
|
||||
DTO_CLASSES.with(|cell| cell.borrow().get(class_name).cloned().map(TypeKind::Dto))
|
||||
}
|
||||
|
|
@ -27,7 +27,7 @@ fn lookup_dto_class(class_name: &str) -> Option<TypeKind> {
|
|||
/// for the JS/TS object-pattern formal `({ a, b, c })`, the entry is
|
||||
/// `("a", None, ["b", "c"])`. Strictly additive: when the param is
|
||||
/// not a destructured pattern (or the language has no destructure
|
||||
/// concept), behaviour is identical to the pre-Phase-5 names-only path.
|
||||
/// concept), behaviour is identical to the names-only path.
|
||||
///
|
||||
/// Closes the residual gap behind CVE-2026-25544 (PayloadCMS Drizzle
|
||||
/// SQL injection): a per-parameter taint probe that seeds only the
|
||||
|
|
|
|||
37
src/cli.rs
37
src/cli.rs
|
|
@ -49,6 +49,7 @@ impl Commands {
|
|||
match self {
|
||||
Commands::Scan { explain_engine, .. } => *explain_engine,
|
||||
Commands::List { .. } => true,
|
||||
Commands::Rules { .. } => true,
|
||||
Commands::Config { action } => {
|
||||
matches!(action, ConfigAction::Show { .. } | ConfigAction::Path)
|
||||
}
|
||||
|
|
@ -459,6 +460,12 @@ pub enum Commands {
|
|||
action: ConfigAction,
|
||||
},
|
||||
|
||||
/// Browse the built-in rule registry (cap classes + per-language label rules)
|
||||
Rules {
|
||||
#[command(subcommand)]
|
||||
action: RulesAction,
|
||||
},
|
||||
|
||||
/// Start the local web UI for browsing scan results
|
||||
Serve {
|
||||
/// Path to scan root (defaults to current directory)
|
||||
|
|
@ -525,6 +532,36 @@ pub enum ConfigAction {
|
|||
},
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
pub enum RulesAction {
|
||||
/// List built-in rules
|
||||
List {
|
||||
/// Filter by language slug (e.g. javascript, java, python). Cap-class
|
||||
/// entries (`language = "all"`) are always shown unless `--no-class`
|
||||
/// is set.
|
||||
#[arg(long)]
|
||||
lang: Option<String>,
|
||||
|
||||
/// Filter by rule kind (`class`, `source`, `sink`, `sanitizer`).
|
||||
#[arg(long)]
|
||||
kind: Option<String>,
|
||||
|
||||
/// Show only the cap-class registry entries (one per vulnerability
|
||||
/// class), suppressing per-language label rules.
|
||||
#[arg(long, conflicts_with = "no_class")]
|
||||
class_only: bool,
|
||||
|
||||
/// Suppress cap-class registry entries (show only per-language label
|
||||
/// rules and gated sinks).
|
||||
#[arg(long)]
|
||||
no_class: bool,
|
||||
|
||||
/// Emit JSON instead of the human-readable table.
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
pub enum IndexAction {
|
||||
/// Build or update index for current project
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ pub mod clean;
|
|||
pub mod config;
|
||||
pub mod index;
|
||||
pub mod list;
|
||||
pub mod rules;
|
||||
pub mod scan;
|
||||
#[cfg(feature = "serve")]
|
||||
pub mod serve;
|
||||
|
|
@ -352,6 +353,9 @@ pub fn handle_command(
|
|||
}
|
||||
}
|
||||
}
|
||||
Commands::Rules { action } => {
|
||||
self::rules::handle(action, config)?;
|
||||
}
|
||||
Commands::Serve {
|
||||
path,
|
||||
port,
|
||||
|
|
|
|||
248
src/commands/rules.rs
Normal file
248
src/commands/rules.rs
Normal file
|
|
@ -0,0 +1,248 @@
|
|||
//! `nyx rules` subcommand.
|
||||
//!
|
||||
//! Surfaces the rule registry from the terminal so users can enumerate
|
||||
//! the same content that the dashboard's `/api/rules` endpoint and the
|
||||
//! browser's Rules page show. The output composes built-in cap-class
|
||||
//! entries (one per `Cap` with a canonical rule id), per-language label
|
||||
//! rules (sink/source/sanitizer), gated sinks, and any custom rules
|
||||
//! defined in the user's config.
|
||||
|
||||
use crate::cli::RulesAction;
|
||||
use crate::errors::NyxResult;
|
||||
use crate::labels::{self, RuleInfo};
|
||||
use crate::utils::config::{Config, RuleKind};
|
||||
use console::style;
|
||||
|
||||
pub fn handle(action: RulesAction, config: &Config) -> NyxResult<()> {
|
||||
match action {
|
||||
RulesAction::List {
|
||||
lang,
|
||||
kind,
|
||||
class_only,
|
||||
no_class,
|
||||
json: as_json,
|
||||
} => list(
|
||||
config,
|
||||
lang.as_deref(),
|
||||
kind.as_deref(),
|
||||
class_only,
|
||||
no_class,
|
||||
as_json,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn list(
|
||||
config: &Config,
|
||||
lang_filter: Option<&str>,
|
||||
kind_filter: Option<&str>,
|
||||
class_only: bool,
|
||||
no_class: bool,
|
||||
as_json: bool,
|
||||
) -> NyxResult<()> {
|
||||
let mut rules = labels::enumerate_builtin_rules();
|
||||
|
||||
// Apply disabled-rules overlay so the CLI matches the dashboard view.
|
||||
for rule in &mut rules {
|
||||
if config.analysis.disabled_rules.contains(&rule.id) {
|
||||
rule.enabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Append custom rules from config. Mirrors the projection in
|
||||
// `src/server/routes/rules.rs::build_rule_list`.
|
||||
for (cfg_lang, lang_cfg) in &config.analysis.languages {
|
||||
let canonical = labels::canonical_lang(cfg_lang);
|
||||
for cr in &lang_cfg.rules {
|
||||
let kind_str = match cr.kind {
|
||||
RuleKind::Source => "source",
|
||||
RuleKind::Sanitizer => "sanitizer",
|
||||
RuleKind::Sink => "sink",
|
||||
};
|
||||
let id = labels::custom_rule_id(canonical, kind_str, &cr.matchers);
|
||||
let first = cr.matchers.first().map(|s| s.as_str()).unwrap_or("?");
|
||||
let title = format!("{} (custom {})", first, kind_str);
|
||||
let cap = cr.cap.to_cap();
|
||||
let enabled = !config.analysis.disabled_rules.contains(&id);
|
||||
rules.push(RuleInfo {
|
||||
id,
|
||||
title,
|
||||
language: canonical.to_string(),
|
||||
kind: kind_str.to_string(),
|
||||
cap: labels::cap_to_name(cap).to_string(),
|
||||
cap_bits: cap.bits(),
|
||||
matchers: cr.matchers.clone(),
|
||||
case_sensitive: cr.case_sensitive,
|
||||
is_custom: true,
|
||||
is_gated: false,
|
||||
is_class: false,
|
||||
emission_active: true,
|
||||
enabled,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Filter.
|
||||
let lang_filter_canonical = lang_filter.map(labels::canonical_lang);
|
||||
rules.retain(|r| {
|
||||
if class_only && !r.is_class {
|
||||
return false;
|
||||
}
|
||||
if no_class && r.is_class {
|
||||
return false;
|
||||
}
|
||||
if let Some(want) = lang_filter_canonical {
|
||||
// Cap-class entries (`language == "all"`) are language-agnostic;
|
||||
// surface them alongside any language filter unless explicitly
|
||||
// suppressed via `--no-class`.
|
||||
if r.language != want && r.language != "all" {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if let Some(want) = kind_filter
|
||||
&& !r.kind.eq_ignore_ascii_case(want)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
true
|
||||
});
|
||||
|
||||
if as_json {
|
||||
let body = serde_json::to_string_pretty(&rules)
|
||||
.map_err(|e| crate::errors::NyxError::Msg(format!("rules JSON serialise: {e}")))?;
|
||||
println!("{body}");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if rules.is_empty() {
|
||||
println!("{}", style("(no rules match the supplied filters)").dim());
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Header.
|
||||
println!(
|
||||
"{}",
|
||||
style("Rules (built-in registry, per-language labels, and custom rules from config)")
|
||||
.bold()
|
||||
);
|
||||
println!();
|
||||
|
||||
// Cap-class section first, distinct from per-language entries.
|
||||
let class_rules: Vec<&RuleInfo> = rules.iter().filter(|r| r.is_class).collect();
|
||||
if !class_rules.is_empty() {
|
||||
println!(" {}", style("Vulnerability classes").cyan().bold());
|
||||
for r in &class_rules {
|
||||
print_class_row(r);
|
||||
}
|
||||
println!();
|
||||
}
|
||||
|
||||
let builtin_label_rules: Vec<&RuleInfo> = rules
|
||||
.iter()
|
||||
.filter(|r| !r.is_class && !r.is_custom)
|
||||
.collect();
|
||||
if !builtin_label_rules.is_empty() {
|
||||
println!(" {}", style("Built-in label rules").cyan().bold());
|
||||
for r in &builtin_label_rules {
|
||||
print_label_row(r);
|
||||
}
|
||||
println!();
|
||||
}
|
||||
|
||||
let custom_rules: Vec<&RuleInfo> = rules.iter().filter(|r| r.is_custom).collect();
|
||||
if !custom_rules.is_empty() {
|
||||
println!(" {}", style("Custom rules (from config)").cyan().bold());
|
||||
for r in &custom_rules {
|
||||
print_label_row(r);
|
||||
}
|
||||
println!();
|
||||
}
|
||||
|
||||
println!(
|
||||
"{}",
|
||||
style(format!(
|
||||
"{} class · {} built-in label · {} custom · {} total",
|
||||
class_rules.len(),
|
||||
builtin_label_rules.len(),
|
||||
custom_rules.len(),
|
||||
rules.len()
|
||||
))
|
||||
.dim()
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn print_class_row(r: &RuleInfo) {
|
||||
let status = if r.enabled {
|
||||
style("on ").green().to_string()
|
||||
} else {
|
||||
style("off").red().dim().to_string()
|
||||
};
|
||||
// Forward-declared classes (registered but not yet wired through
|
||||
// `ast.rs::diag_for_finding`) carry a tag so users don't expect
|
||||
// findings under the class id; live findings still surface under
|
||||
// the legacy `taint-unsanitised-flow` rule id.
|
||||
let tag = if r.emission_active {
|
||||
String::new()
|
||||
} else {
|
||||
format!(" {}", style("(forward-declared)").yellow())
|
||||
};
|
||||
println!(
|
||||
" {} {:<32} {} {}{}",
|
||||
status,
|
||||
style(&r.id).white().bold(),
|
||||
style(format!("[{}]", r.cap)).dim(),
|
||||
style(&r.title).dim(),
|
||||
tag,
|
||||
);
|
||||
}
|
||||
|
||||
fn print_label_row(r: &RuleInfo) {
|
||||
let status = if r.enabled {
|
||||
style("on ").green().to_string()
|
||||
} else {
|
||||
style("off").red().dim().to_string()
|
||||
};
|
||||
let tag = if r.is_custom {
|
||||
style(" custom").yellow().to_string()
|
||||
} else if r.is_gated {
|
||||
style(" gated").magenta().to_string()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let matchers = if r.matchers.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
let joined = r.matchers.join(", ");
|
||||
format!(" — {joined}")
|
||||
};
|
||||
println!(
|
||||
" {} {:<10} {:<10} {:<14}{}{}",
|
||||
status,
|
||||
style(&r.language).cyan(),
|
||||
style(&r.kind).white(),
|
||||
style(&r.cap).dim(),
|
||||
tag,
|
||||
style(matchers).dim(),
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::utils::config::Config;
|
||||
|
||||
#[test]
|
||||
fn list_runs_without_panic_default_config() {
|
||||
let cfg = Config::default();
|
||||
// Plain list, no filters.
|
||||
list(&cfg, None, None, false, false, false).unwrap();
|
||||
// Class-only.
|
||||
list(&cfg, None, None, true, false, false).unwrap();
|
||||
// JSON output.
|
||||
list(&cfg, None, None, false, false, true).unwrap();
|
||||
// Lang + kind filters.
|
||||
list(&cfg, Some("javascript"), Some("sink"), false, true, false).unwrap();
|
||||
}
|
||||
}
|
||||
|
|
@ -544,14 +544,14 @@ pub(crate) fn deduplicate_taint_flows(diags: &mut Vec<Diag>) {
|
|||
id.starts_with(TAINT_BASE)
|
||||
}
|
||||
|
||||
fn sink_cap_bits(d: &Diag) -> u16 {
|
||||
fn sink_cap_bits(d: &Diag) -> u32 {
|
||||
d.evidence.as_ref().map(|e| e.sink_caps).unwrap_or(0)
|
||||
}
|
||||
|
||||
// Group candidates by (path, line, severity, sink_cap_bits). Only
|
||||
// `taint-unsanitised-flow` rule IDs participate; findings with other
|
||||
// bases (e.g. `js.code_exec.eval`) are left untouched per guardrails.
|
||||
let mut groups: HashMap<(String, usize, Severity, u16), Vec<usize>> = HashMap::new();
|
||||
let mut groups: HashMap<(String, usize, Severity, u32), Vec<usize>> = HashMap::new();
|
||||
for (i, d) in diags.iter().enumerate() {
|
||||
if is_taint_flow(&d.id) {
|
||||
groups
|
||||
|
|
@ -690,8 +690,8 @@ pub const SCC_UNCONVERGED_CROSS_FILE_NOTE_PREFIX: &str = "scc_unconverged:cross-
|
|||
/// file set. Semantics match [`diff_cap_snapshots`], a key that
|
||||
/// appears or disappears counts as changed.
|
||||
fn changed_cap_keys_of(
|
||||
before: &HashMap<crate::symbol::FuncKey, (u16, u16, u16, Vec<usize>)>,
|
||||
after: &HashMap<crate::symbol::FuncKey, (u16, u16, u16, Vec<usize>)>,
|
||||
before: &HashMap<crate::symbol::FuncKey, (u32, u32, u32, Vec<usize>)>,
|
||||
after: &HashMap<crate::symbol::FuncKey, (u32, u32, u32, Vec<usize>)>,
|
||||
) -> HashSet<crate::symbol::FuncKey> {
|
||||
let mut changed = HashSet::new();
|
||||
for (k, v_after) in after {
|
||||
|
|
@ -971,10 +971,10 @@ fn run_topo_batches(
|
|||
// with a 64-iter budget; the classifier only needs the tail.
|
||||
let mut delta_trajectory: smallvec::SmallVec<[u32; 4]> = smallvec::SmallVec::new();
|
||||
|
||||
// Phase-B worklist: files to re-analyse in this iteration.
|
||||
// SCC fixpoint worklist: files to re-analyse in this iteration.
|
||||
// Initialised to the full batch so iteration 0 behaves like
|
||||
// the pre-Phase-B implementation; subsequent iterations
|
||||
// prune to files containing a caller of a changed summary.
|
||||
// the unconditional re-analysis; subsequent iterations prune
|
||||
// to files containing a caller of a changed summary.
|
||||
//
|
||||
// Storing `PathBuf` clones (matching how the rest of the
|
||||
// SCC loop identifies files) so membership tests are cheap
|
||||
|
|
|
|||
|
|
@ -113,22 +113,22 @@ impl ConstValue {
|
|||
|
||||
// ── TypeSet ─────────────────────────────────────────────────────────────
|
||||
|
||||
/// Bitset over [`TypeKind`] variants (12 bits used of u16).
|
||||
/// Bitset over [`TypeKind`] variants (19 bits used of u32).
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct TypeSet(u16);
|
||||
pub struct TypeSet(u32);
|
||||
|
||||
impl TypeSet {
|
||||
/// All 12 type bits set, no type constraint (Top).
|
||||
pub const TOP: Self = Self(0x0FFF);
|
||||
/// All 19 type bits set, no type constraint (Top).
|
||||
pub const TOP: Self = Self(0x0007_FFFF);
|
||||
/// No type bits, unsatisfiable (Bottom).
|
||||
pub const BOTTOM: Self = Self(0);
|
||||
|
||||
pub fn singleton(kind: &TypeKind) -> Self {
|
||||
Self(1u16 << type_kind_index(kind))
|
||||
Self(1u32 << type_kind_index(kind))
|
||||
}
|
||||
|
||||
pub fn contains(&self, kind: &TypeKind) -> bool {
|
||||
self.0 & (1u16 << type_kind_index(kind)) != 0
|
||||
self.0 & (1u32 << type_kind_index(kind)) != 0
|
||||
}
|
||||
|
||||
/// Meet (intersection): refine type knowledge.
|
||||
|
|
@ -156,7 +156,7 @@ impl TypeSet {
|
|||
|
||||
/// Check if this set contains exactly one type matching the given kind.
|
||||
pub fn is_singleton_of(&self, kind: &TypeKind) -> bool {
|
||||
self.0 != 0 && self.0 == (1u16 << type_kind_index(kind))
|
||||
self.0 != 0 && self.0 == (1u32 << type_kind_index(kind))
|
||||
}
|
||||
|
||||
/// Return the TypeKind if this is a singleton set (exactly one type).
|
||||
|
|
@ -186,12 +186,21 @@ fn type_kind_index(kind: &TypeKind) -> u32 {
|
|||
TypeKind::LocalCollection => 12,
|
||||
TypeKind::RequestBuilder => 13,
|
||||
TypeKind::JpaCriteriaQuery => 14,
|
||||
TypeKind::LdapClient => 15,
|
||||
TypeKind::XPathClient => 16,
|
||||
TypeKind::XmlParser => 17,
|
||||
TypeKind::Template => 18,
|
||||
// the analysis DTO types carry per-field structural info that the
|
||||
// bitset domain can't represent. Collapse to Unknown so callers
|
||||
// still see "any type possible" rather than crashing on an
|
||||
// unhandled variant. Same-file/cross-file Dto-aware paths read
|
||||
// the structured TypeKind directly, not via this index.
|
||||
TypeKind::Dto(_) => 6,
|
||||
// NullPrototypeObject is a JS-only sub-kind of Object used for
|
||||
// flow-sensitive prototype-pollution suppression. The bitset
|
||||
// domain has no dedicated slot, share the Object index so
|
||||
// singleton recovery still maps to a meaningful TypeKind.
|
||||
TypeKind::NullPrototypeObject => 3,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -212,6 +221,10 @@ fn type_kind_from_index(idx: u32) -> Option<TypeKind> {
|
|||
12 => Some(TypeKind::LocalCollection),
|
||||
13 => Some(TypeKind::RequestBuilder),
|
||||
14 => Some(TypeKind::JpaCriteriaQuery),
|
||||
15 => Some(TypeKind::LdapClient),
|
||||
16 => Some(TypeKind::XPathClient),
|
||||
17 => Some(TypeKind::XmlParser),
|
||||
18 => Some(TypeKind::Template),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
@ -801,7 +814,7 @@ pub struct PathEnv {
|
|||
/// Per-key meet count for widening decisions.
|
||||
meet_counts: SmallVec<[(SsaValue, u8); 8]>,
|
||||
/// Refinement counter (bounded per block).
|
||||
refine_count: u16,
|
||||
refine_count: u32,
|
||||
}
|
||||
|
||||
impl PathEnv {
|
||||
|
|
@ -837,7 +850,7 @@ impl PathEnv {
|
|||
if self.unsat {
|
||||
return;
|
||||
}
|
||||
if self.refine_count >= MAX_REFINE_PER_BLOCK as u16 {
|
||||
if self.refine_count >= MAX_REFINE_PER_BLOCK as u32 {
|
||||
return; // bounded
|
||||
}
|
||||
let canonical = self.uf.find_immutable(v);
|
||||
|
|
@ -860,7 +873,7 @@ impl PathEnv {
|
|||
// but `refine_single` is also invoked directly from `assume_eq`,
|
||||
// `assume_neq`, and a few internal sites. Large generated inputs
|
||||
// (thousands of short statements on one line) can drive millions
|
||||
// of calls and overflow a plain u16 `refine_count`. Saturate to
|
||||
// of calls and overflow a plain u32 `refine_count`. Saturate to
|
||||
// stay within bounds, the refinement pipeline is already
|
||||
// idempotent past the cap, so saturation is semantically a no-op.
|
||||
self.refine_count = self.refine_count.saturating_add(1);
|
||||
|
|
|
|||
|
|
@ -250,6 +250,31 @@ pub fn class_name_to_type_kind(name: &str) -> Option<TypeKind> {
|
|||
// Java I/O supertypes (enables hierarchy fallback for subtypes)
|
||||
| "InputStream" | "OutputStream" | "Reader" | "Writer" | "PrintWriter"
|
||||
| "BufferedInputStream" | "BufferedOutputStream" => Some(TypeKind::FileHandle),
|
||||
// JNDI / Spring LDAP directory-service types. Field- and method-typed
|
||||
// declarations (`DirContext ctx = ...`, `LdapTemplate ldapTemplate;`)
|
||||
// attach this fact to the receiver SSA value so type-qualified
|
||||
// resolution rewrites `ctx.search(...)` → `LdapClient.search`.
|
||||
"DirContext" | "LdapContext" | "InitialDirContext" | "InitialLdapContext"
|
||||
| "LdapTemplate" => Some(TypeKind::LdapClient),
|
||||
// JAXP XML parser instances. Field/local declarations like
|
||||
// `DocumentBuilder builder = factory.newDocumentBuilder();` route
|
||||
// through this map so the receiver SSA value carries
|
||||
// `TypeKind::XmlParser` and the type-qualified
|
||||
// `XmlParser.parse` rule fires on `builder.parse(...)`.
|
||||
"DocumentBuilder" | "SAXParser" | "XMLReader" | "SAXBuilder" => {
|
||||
Some(TypeKind::XmlParser)
|
||||
}
|
||||
// JAXP XPath instances. `XPath xpath = factory.newXPath();`
|
||||
// routes through this map so the receiver carries
|
||||
// `TypeKind::XPathClient`, enabling the type-qualified
|
||||
// `XPathClient.evaluate` resolution and the resolver-binding
|
||||
// suppression sidecar.
|
||||
"XPath" | "XPathExpression" => Some(TypeKind::XPathClient),
|
||||
// Apache FreeMarker `Template` declared receiver type. Routes
|
||||
// `Template tpl = ...; tpl.process(model, out)` through
|
||||
// type-qualified resolution to `Template.process`, the SSTI
|
||||
// sink defined in `labels/java.rs`.
|
||||
"Template" => Some(TypeKind::Template),
|
||||
// Python qualified type names.
|
||||
// Only covers raw lowered names from isinstance(). The lowering in lower.rs
|
||||
// extracts the literal type text: isinstance(x, requests.Session) produces
|
||||
|
|
|
|||
|
|
@ -225,7 +225,17 @@ pub mod index {
|
|||
/// * `"3"`, `ssa_function_bodies.body` changed from JSON TEXT to
|
||||
/// bincode BLOB. Old JSON payloads cannot be deserialised by the
|
||||
/// new engine, so they are silently rebuilt on open.
|
||||
pub const SCHEMA_VERSION: &str = "3";
|
||||
/// * `"4"`, `Cap` widened from u16 to u32 to accommodate cap bits
|
||||
/// ≥ 14 (LDAP_INJECTION, XPATH_INJECTION, HEADER_INJECTION,
|
||||
/// OPEN_REDIRECT, SSTI, XXE, PROTOTYPE_POLLUTION). The `Cap`
|
||||
/// deserialiser accepts both u16- and u32-width JSON values, so
|
||||
/// pre-bump caches load without crashing, but the cached
|
||||
/// `source_caps` / `sanitizer_caps` / `sink_caps` blobs were
|
||||
/// produced before any of these caps could appear and would
|
||||
/// underreport rules that emit them. Bumping forces a rescan so
|
||||
/// newly-emitted gates and sinks land in the cache with the wider
|
||||
/// footprint.
|
||||
pub const SCHEMA_VERSION: &str = "4";
|
||||
|
||||
// TODO: ADD CLEANS FOR EACH TABLE BASED ON PROJECT WHICH RUNS ON CLEAN
|
||||
// TODO: ADD DROP AND GIVE A CLI PARAMETER FOR DROP
|
||||
|
|
@ -2899,6 +2909,8 @@ fn make_test_callee_body(
|
|||
type_facts: crate::ssa::type_facts::TypeFactResult {
|
||||
facts: std::collections::HashMap::new(),
|
||||
},
|
||||
xml_parser_config: crate::ssa::xml_config::XmlParserConfigResult::default(),
|
||||
xpath_config: crate::ssa::xpath_config::XPathConfigResult::default(),
|
||||
alias_result: crate::ssa::alias::BaseAliasResult::empty(),
|
||||
points_to: crate::ssa::heap::PointsToResult::empty(),
|
||||
module_aliases: std::collections::HashMap::new(),
|
||||
|
|
@ -3765,7 +3777,7 @@ fn metadata_table_survives_clear() {
|
|||
/// receiver sentinel (`u32::MAX`), the container-element marker
|
||||
/// (`<elem>`), and the `overflow` flag across serialise → store →
|
||||
/// load → deserialise. This is the strict-additive contract for
|
||||
/// pre-Phase-5 blobs (default-empty deserialises cleanly) and the
|
||||
/// older blobs without field_points_to (default-empty deserialises cleanly) and the
|
||||
/// completeness check for the W3 cross-call resolver.
|
||||
#[test]
|
||||
fn ssa_summaries_round_trip_preserves_field_points_to() {
|
||||
|
|
@ -3840,15 +3852,15 @@ fn ssa_summaries_round_trip_preserves_field_points_to() {
|
|||
assert!(!sum.field_points_to.overflow);
|
||||
}
|
||||
|
||||
/// Pre-Phase-5 blob compatibility: a summary serialised without
|
||||
/// Older blob compatibility: a summary serialised without
|
||||
/// `field_points_to` deserialises with the empty default, no
|
||||
/// migration needed because the field is `#[serde(default)]`.
|
||||
#[test]
|
||||
fn ssa_summaries_pre_phase5_blob_decodes_with_empty_field_points_to() {
|
||||
fn ssa_summaries_legacy_blob_decodes_with_empty_field_points_to() {
|
||||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||||
|
||||
// Hand-craft JSON without the `field_points_to` key.
|
||||
let pre_phase5_json = r#"{
|
||||
let legacy_json = r#"{
|
||||
"param_to_return": [],
|
||||
"param_to_sink": [],
|
||||
"source_caps": 0,
|
||||
|
|
@ -3865,7 +3877,7 @@ fn ssa_summaries_pre_phase5_blob_decodes_with_empty_field_points_to() {
|
|||
"return_path_facts": [],
|
||||
"typed_call_receivers": []
|
||||
}"#;
|
||||
let sum: SsaFuncSummary = serde_json::from_str(pre_phase5_json).unwrap();
|
||||
let sum: SsaFuncSummary = serde_json::from_str(legacy_json).unwrap();
|
||||
assert!(
|
||||
sum.field_points_to.is_empty(),
|
||||
"missing field_points_to must default to empty",
|
||||
|
|
|
|||
|
|
@ -217,15 +217,15 @@ pub struct Evidence {
|
|||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub symbolic: Option<SymbolicVerdict>,
|
||||
|
||||
/// Resolved sink capability bits (u16 from `Cap::bits()`).
|
||||
/// Resolved sink capability bits (u32 from `Cap::bits()`).
|
||||
///
|
||||
/// Used by deduplication to distinguish findings that share a
|
||||
/// `(path, line, severity)` key but target different sinks (e.g.
|
||||
/// `sink_sql(x); sink_shell(x);` on the same line). 0 when the sink
|
||||
/// caps could not be resolved at the CFG node (e.g. pure summary
|
||||
/// resolution where the caller's sink node carries no label).
|
||||
#[serde(default, skip_serializing_if = "is_zero_u16")]
|
||||
pub sink_caps: u16,
|
||||
#[serde(default, skip_serializing_if = "is_zero_cap_bits")]
|
||||
pub sink_caps: u32,
|
||||
|
||||
/// Engine provenance notes attached to this finding (e.g. "worklist
|
||||
/// iteration budget was hit before convergence"), propagated from
|
||||
|
|
@ -243,7 +243,7 @@ pub struct Evidence {
|
|||
pub data_exfil_field: Option<String>,
|
||||
}
|
||||
|
||||
fn is_zero_u16(v: &u16) -> bool {
|
||||
fn is_zero_cap_bits(v: &u32) -> bool {
|
||||
*v == 0
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -177,7 +177,10 @@ pub fn render_welcome() -> String {
|
|||
out.push('\n');
|
||||
|
||||
for line in LOGO {
|
||||
out.push_str(&format!(" {}\n", style(line).color256(141).bold()));
|
||||
out.push_str(&format!(
|
||||
" {}\n",
|
||||
style(line).true_color(114, 243, 215).bold()
|
||||
));
|
||||
}
|
||||
|
||||
out.push_str(&format!(
|
||||
|
|
|
|||
|
|
@ -67,6 +67,30 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ─── LDAP injection sinks ───
|
||||
//
|
||||
// OpenLDAP / libldap surface: `ldap_search_s(ld, base, scope, filter, ...)`
|
||||
// and the asynchronous variant `ldap_search_ext_s(ld, base, scope, filter,
|
||||
// attrs, attrsonly, serverctrls, clientctrls, timeout, sizelimit, *res)`.
|
||||
// The filter argument (position 3) is the LDAP-injection vector. No
|
||||
// standard libldap escape helper exists in the C surface; sanitisation is
|
||||
// typically caller-implemented (`sanitize_*` covers the developer-named
|
||||
// case via the existing prefix rule above).
|
||||
LabelRule {
|
||||
matchers: &["ldap_search_s", "ldap_search_ext_s"],
|
||||
label: DataLabel::Sink(Cap::LDAP_INJECTION),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ─── XPath injection sinks ───
|
||||
//
|
||||
// libxml2 evaluation entry points: `xmlXPathEvalExpression(expr, ctx)`,
|
||||
// `xmlXPathEval(expr, ctx)`, `xmlXPathCompile(expr)`. The expression
|
||||
// string is arg 0 and is the canonical XPath-injection vector.
|
||||
LabelRule {
|
||||
matchers: &["xmlXPathEvalExpression", "xmlXPathEval", "xmlXPathCompile"],
|
||||
label: DataLabel::Sink(Cap::XPATH_INJECTION),
|
||||
case_sensitive: false,
|
||||
},
|
||||
];
|
||||
|
||||
/// Gated sinks for C.
|
||||
|
|
|
|||
|
|
@ -89,6 +89,24 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::SSRF),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ─── LDAP injection sinks ───
|
||||
//
|
||||
// OpenLDAP / libldap C interface (also used from C++ wrappers): the filter
|
||||
// argument carries attacker-controlled data unless explicitly escaped.
|
||||
LabelRule {
|
||||
matchers: &["ldap_search_s", "ldap_search_ext_s"],
|
||||
label: DataLabel::Sink(Cap::LDAP_INJECTION),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ─── XPath injection sinks ───
|
||||
//
|
||||
// libxml2 (the dominant C++ XML parser surface): `xmlXPathEvalExpression`,
|
||||
// `xmlXPathEval`, `xmlXPathCompile` accept the expression string as arg 0.
|
||||
LabelRule {
|
||||
matchers: &["xmlXPathEvalExpression", "xmlXPathEval", "xmlXPathCompile"],
|
||||
label: DataLabel::Sink(Cap::XPATH_INJECTION),
|
||||
case_sensitive: false,
|
||||
},
|
||||
];
|
||||
|
||||
/// Gated sinks for C++.
|
||||
|
|
|
|||
|
|
@ -148,6 +148,97 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::CRYPTO),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ─── LDAP injection sinks ───
|
||||
//
|
||||
// go-ldap (`github.com/go-ldap/ldap/v3`): `conn, _ := ldap.DialURL(url);
|
||||
// req := ldap.NewSearchRequest(base, scope, deref, sizeLimit, timeLimit,
|
||||
// typesOnly, filter, attrs, controls)`. The filter argument (position 6)
|
||||
// is the LDAP-injection vector; passing the request to `conn.Search(req)`
|
||||
// executes the filter. Type-qualified resolution rewrites `conn.Search`
|
||||
// → `LdapClient.Search` when the receiver was returned by
|
||||
// `ldap.DialURL` / `ldap.Dial` / `ldap.DialTLS` (see
|
||||
// [`crate::ssa::type_facts::constructor_type`]). We also tag
|
||||
// `ldap.NewSearchRequest` directly so taint reaching the filter argument
|
||||
// surfaces at the construction call (matches the typical FP-free shape
|
||||
// where the request is built once and passed straight to `Search`).
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"LdapClient.Search",
|
||||
"LdapClient.SearchWithPaging",
|
||||
"ldap.NewSearchRequest",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::LDAP_INJECTION),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// ─── LDAP-filter sanitizer ───
|
||||
//
|
||||
// go-ldap exposes `ldap.EscapeFilter(s string) string` (RFC 4515 metachar
|
||||
// escaping). Treat any call as clearing the LDAP_INJECTION cap.
|
||||
LabelRule {
|
||||
matchers: &["ldap.EscapeFilter"],
|
||||
label: DataLabel::Sanitizer(Cap::LDAP_INJECTION),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// ─── Header / CRLF injection sinks ───
|
||||
//
|
||||
// `net/http` `ResponseWriter.Header()` returns a `Header` map; calls to
|
||||
// `Set(name, val)` / `Add(name, val)` write a single header value.
|
||||
// After paren-group stripping the chain text becomes
|
||||
// `w.Header.Set` / `w.Header.Add`, so suffix matchers on `Header.Set` /
|
||||
// `Header.Add` cover both the bound-receiver form (`w.Header().Set(...)`)
|
||||
// and the documentation-style class-qualified form (`Header.Set`).
|
||||
// Tainted strings without `\r\n` stripping enable response splitting.
|
||||
LabelRule {
|
||||
matchers: &["Header.Set", "Header.Add"],
|
||||
label: DataLabel::Sink(Cap::HEADER_INJECTION),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// ─── Header / CRLF sanitizers ───
|
||||
//
|
||||
// Project-local `stripCRLF` / `escapeHeader` helpers that strip `\r` and
|
||||
// `\n` from a value before it is written to a response header.
|
||||
LabelRule {
|
||||
matchers: &["stripCRLF", "stripCrlf", "escapeHeader", "sanitizeHeader"],
|
||||
label: DataLabel::Sanitizer(Cap::HEADER_INJECTION),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ─── Open redirect sinks ───
|
||||
//
|
||||
// `net/http` `http.Redirect(w, r, url, code)` writes a `Location` header
|
||||
// and a 3xx status from the supplied URL. Without an allowlist check,
|
||||
// a tainted `url` is the canonical Go open-redirect vector.
|
||||
LabelRule {
|
||||
matchers: &["http.Redirect"],
|
||||
label: DataLabel::Sink(Cap::OPEN_REDIRECT),
|
||||
case_sensitive: false,
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"validateRedirectUrl",
|
||||
"isSafeRedirect",
|
||||
"stripScheme",
|
||||
"ensureRelativeUrl",
|
||||
"assertRelativePath",
|
||||
"isRelativeUrl",
|
||||
],
|
||||
label: DataLabel::Sanitizer(Cap::OPEN_REDIRECT),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ─── SSTI sinks ───
|
||||
//
|
||||
// `text/template` and `html/template` parse a template source string via
|
||||
// `template.New(name).Parse(src)`. After paren-group stripping the chain
|
||||
// text becomes `template.New.Parse`, so the suffix matcher catches both
|
||||
// packages (`text/template`, `html/template`) regardless of import alias.
|
||||
// `template.ParseFiles` / `ParseGlob` take file paths (path-traversal,
|
||||
// not SSTI) and are intentionally excluded. `html/template`'s auto-
|
||||
// escaping applies during `Execute`, not `Parse`, so a tainted source
|
||||
// string still yields SSTI.
|
||||
LabelRule {
|
||||
matchers: &["template.New.Parse"],
|
||||
label: DataLabel::Sink(Cap::SSTI),
|
||||
case_sensitive: false,
|
||||
},
|
||||
];
|
||||
|
||||
/// Argument-role-aware Go sinks. Two classes coexist on the outbound HTTP
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
use crate::labels::{Cap, DataLabel, Kind, LabelRule, ParamConfig, RuntimeLabelRule};
|
||||
use crate::labels::{
|
||||
Cap, DataLabel, GateActivation, Kind, LabelRule, ParamConfig, RuntimeLabelRule, SinkGate,
|
||||
};
|
||||
use crate::utils::project::{DetectedFramework, FrameworkContext};
|
||||
use phf::{Map, phf_map};
|
||||
|
||||
|
|
@ -265,6 +267,223 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::CODE_EXEC),
|
||||
case_sensitive: false,
|
||||
},
|
||||
// ─── LDAP injection sinks ───
|
||||
//
|
||||
// JNDI / Spring LDAP search APIs accept an attacker-influenceable filter
|
||||
// expression as either the second positional argument (`DirContext.search(name,
|
||||
// filter, controls)` / `LdapTemplate.search(base, filter, mapper)`). Without
|
||||
// RFC 4515 escaping the filter can be rewritten to bypass authentication or
|
||||
// exfiltrate directory entries. Type-qualified resolution rewrites
|
||||
// `ctx.search(...)` → `LdapClient.search` when the receiver carries a
|
||||
// `TypeKind::LdapClient` fact (set by `class_name_to_type_kind` for the
|
||||
// declared types `DirContext`, `InitialDirContext`, `LdapContext`,
|
||||
// `LdapTemplate`, or by `constructor_type` for `new InitialDirContext(...)`
|
||||
// / `new InitialLdapContext(...)`). Direct flat matchers cover the
|
||||
// documentation-style class-qualified call forms that bypass receiver
|
||||
// typing.
|
||||
LabelRule {
|
||||
matchers: &[
|
||||
"LdapClient.search",
|
||||
"LdapClient.searchByEntity",
|
||||
"LdapClient.searchForObject",
|
||||
"LdapClient.searchForContext",
|
||||
"DirContext.search",
|
||||
"LdapTemplate.search",
|
||||
"LdapTemplate.searchByEntity",
|
||||
"LdapTemplate.searchForObject",
|
||||
"LdapTemplate.searchForContext",
|
||||
"ctx.search",
|
||||
],
|
||||
label: DataLabel::Sink(Cap::LDAP_INJECTION),
|
||||
case_sensitive: true,
|
||||
},
|
||||
// ─── LDAP-filter sanitizers ───
|
||||
//
|
||||
// Spring LDAP's `LdapEncoder.filterEncode(s)` applies RFC 4515 escaping to
|
||||
// metacharacters (`\`, `*`, `(`, `)`, ` | ||||