feat(ssa): optimize branch condition handling via constant folding, enhance precision for taint analysis, and expand OWASP Benchmark support

This commit is contained in:
elipeter 2026-06-02 13:41:45 -05:00
parent ec76c9e08f
commit 9c99f6c6a9
22 changed files with 1020 additions and 17 deletions

View file

@ -3997,3 +3997,94 @@ function outer(obj, x, y) {
let (mline, _) = method_site.span.expect("method span populated");
assert_eq!(mline, 4, "obj.method(x) on line 4");
}
// ─────────────────────────────────────────────────────────────────
// Constant-branch fold: CondArith capture + evaluation
// ─────────────────────────────────────────────────────────────────
/// `CondArith::eval`/`eval_bool` must fold the two OWASP-Benchmark
/// arithmetic guard shapes to a definite boolean, using integer
/// (truncating) division, and must return `None` — never a wrong fold —
/// for any undefined operation or unresolved variable.
#[test]
fn cond_arith_eval_is_sound() {
use crate::cfg::{BinOp, CondArith, CondVal};
let lit = |n| Box::new(CondArith::Lit(n));
let var = |s: &str| Box::new(CondArith::Var(s.to_string()));
let bin = |op, l, r| Box::new(CondArith::Bin(op, l, r));
// num = 86 resolver.
let r86 = |name: &str| if name == "num" { Some(86) } else { None };
// (7*42) - num > 200 → 208 > 200 → true.
let shape1 = CondArith::Bin(
BinOp::Gt,
bin(BinOp::Sub, bin(BinOp::Mul, lit(7), lit(42)), var("num")),
lit(200),
);
assert_eq!(shape1.eval_bool(&r86), Some(true));
// (500/42) + num > 200 → 11 + 196 = 207 > 200 → true (integer div).
let r196 = |name: &str| if name == "num" { Some(196) } else { None };
let shape2 = CondArith::Bin(
BinOp::Gt,
bin(BinOp::Add, bin(BinOp::Div, lit(500), lit(42)), var("num")),
lit(200),
);
assert_eq!(shape2.eval_bool(&r196), Some(true));
// Integer division truncates toward zero (500/42 == 11, not ~11.9).
assert_eq!(
CondArith::Bin(BinOp::Div, lit(500), lit(42)).eval(&r86),
Some(CondVal::Int(11))
);
// Unresolved variable → None (no prune).
let none = |_: &str| None;
assert_eq!(shape1.eval_bool(&none), None);
// Division / modulo by zero → None (never a wrong fold).
assert_eq!(CondArith::Bin(BinOp::Div, lit(1), lit(0)).eval(&r86), None);
assert_eq!(CondArith::Bin(BinOp::Mod, lit(1), lit(0)).eval(&r86), None);
// Arithmetic overflow → None.
assert_eq!(
CondArith::Bin(BinOp::Mul, lit(i64::MAX), lit(2)).eval(&r86),
None
);
// Bare integer at the top level is not a branch condition → eval_bool None.
assert_eq!(CondArith::Lit(1).eval_bool(&r86), None);
// Comparing a boolean sub-result as an integer operand → None.
let cmp = bin(BinOp::Gt, lit(2), lit(1)); // yields Bool
assert_eq!(CondArith::Bin(BinOp::Add, cmp, lit(1)).eval(&r86), None);
}
/// The CFG builder must capture a pure integer-arithmetic comparison as a
/// `CondArith` on the `If` node, and must refuse (None) any condition that
/// touches a call / field access / string.
#[test]
fn build_cond_arith_captures_pure_int_comparison() {
let ts_lang = Language::from(tree_sitter_java::LANGUAGE);
let src = br#"
class C {
void m(int num, String s) {
if ((7 * 42) - num > 200) { foo(); }
if (s.length() > 200) { bar(); }
}
}
"#;
let (cfg, _entry) = parse_and_build(src, "java", ts_lang);
let ifs = if_nodes(&cfg);
let arith: Vec<_> = ifs.iter().filter_map(|&n| cfg[n].cond_arith.clone()).collect();
// Exactly one If condition is a pure int-arith comparison; the
// `s.length() > 200` one must NOT be captured (it contains a call).
assert_eq!(
arith.len(),
1,
"only the pure int comparison should yield a CondArith, got {arith:?}"
);
// It folds to a definite bool once `num` is known constant.
let r = |name: &str| if name == "num" { Some(86) } else { None };
assert_eq!(arith[0].eval_bool(&r), Some(true));
}

View file

@ -1198,10 +1198,14 @@ pub(super) fn is_syntactic_literal(node: Node, code: &[u8]) -> bool {
| "string_content"
| "string_fragment" => !has_string_interpolation(node),
// Numbers
"integer" | "integer_literal" | "int_literal" | "float" | "float_literal" | "number" => {
true
}
// Numbers. Java's grammar uses radix-tagged kinds
// (`decimal_integer_literal`, `hex_integer_literal`, …) rather than a
// bare `integer`, so `int num = 86;` would otherwise miss this arm and
// lower to `Const(None)` (Varying) instead of `Const("86")`.
"integer" | "integer_literal" | "int_literal" | "float" | "float_literal" | "number"
| "decimal_integer_literal" | "hex_integer_literal" | "octal_integer_literal"
| "binary_integer_literal" | "decimal_floating_point_literal"
| "hex_floating_point_literal" => true,
// Booleans / null / nil / none
"true" | "false" | "null" | "nil" | "none" | "null_literal" | "boolean"

View file

@ -431,6 +431,129 @@ pub enum BinOp {
GtEq,
}
impl BinOp {
/// True for the six comparison operators (result is a boolean 0/1).
pub fn is_comparison(self) -> bool {
matches!(
self,
BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq
)
}
}
/// A branch condition captured as a pure integer-arithmetic + comparison
/// expression tree at CFG-build time (where the real tree-sitter AST is
/// available, so operator precedence and parentheses are correct by
/// construction — no text re-parsing downstream).
///
/// Built only when *every* leaf is an integer literal or a plain identifier
/// and *every* interior node is an arithmetic / comparison / bitwise operator,
/// a unary `-`, or a parenthesis. Any call, field access, string, container,
/// or compound-boolean (`&&` / `||`) subtree makes the builder return `None`
/// for the whole condition. Identifiers are stored by name and resolved to
/// their constant SSA value at fold time
/// ([`crate::ssa::const_prop::fold_constant_branches`]); the actual numeric
/// evaluation is shared in [`CondArith::eval`].
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum CondArith {
/// Integer literal.
Lit(i64),
/// Identifier — resolved to a constant integer at fold time, else unknown.
Var(String),
/// Unary integer negation: `-x`.
Neg(Box<CondArith>),
/// Binary arithmetic / bitwise / comparison.
Bin(BinOp, Box<CondArith>, Box<CondArith>),
}
/// Result of folding a [`CondArith`] against a constant environment.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CondVal {
Int(i64),
Bool(bool),
}
impl CondArith {
/// Evaluate against a variable→constant-integer resolver. Returns `None`
/// the moment anything is non-constant or an operation is undefined
/// (division/modulo by zero, arithmetic overflow, type mismatch), so a
/// caller can only ever prune on a *definite* result. All integer
/// arithmetic is checked; overflow yields `None` rather than a wrapped
/// value, which keeps the fold sound across the i32/i64 gap.
pub fn eval(&self, resolve: &impl Fn(&str) -> Option<i64>) -> Option<CondVal> {
match self {
CondArith::Lit(n) => Some(CondVal::Int(*n)),
CondArith::Var(name) => resolve(name).map(CondVal::Int),
CondArith::Neg(inner) => match inner.eval(resolve)? {
CondVal::Int(n) => n.checked_neg().map(CondVal::Int),
CondVal::Bool(_) => None,
},
CondArith::Bin(op, l, r) => {
let lhs = match l.eval(resolve)? {
CondVal::Int(n) => n,
CondVal::Bool(_) => return None,
};
let rhs = match r.eval(resolve)? {
CondVal::Int(n) => n,
CondVal::Bool(_) => return None,
};
let arith = |v: Option<i64>| v.map(CondVal::Int);
match op {
BinOp::Add => arith(lhs.checked_add(rhs)),
BinOp::Sub => arith(lhs.checked_sub(rhs)),
BinOp::Mul => arith(lhs.checked_mul(rhs)),
// Java/Rust integer division and modulo both truncate
// toward zero; `checked_*` rejects div-by-zero and
// i64::MIN / -1 overflow.
BinOp::Div => arith(lhs.checked_div(rhs)),
BinOp::Mod => arith(lhs.checked_rem(rhs)),
BinOp::BitAnd => arith(Some(lhs & rhs)),
BinOp::BitOr => arith(Some(lhs | rhs)),
BinOp::BitXor => arith(Some(lhs ^ rhs)),
BinOp::LeftShift => {
u32::try_from(rhs).ok().and_then(|s| lhs.checked_shl(s)).map(CondVal::Int)
}
BinOp::RightShift => {
u32::try_from(rhs).ok().and_then(|s| lhs.checked_shr(s)).map(CondVal::Int)
}
BinOp::Eq => Some(CondVal::Bool(lhs == rhs)),
BinOp::NotEq => Some(CondVal::Bool(lhs != rhs)),
BinOp::Lt => Some(CondVal::Bool(lhs < rhs)),
BinOp::LtEq => Some(CondVal::Bool(lhs <= rhs)),
BinOp::Gt => Some(CondVal::Bool(lhs > rhs)),
BinOp::GtEq => Some(CondVal::Bool(lhs >= rhs)),
}
}
}
}
/// Evaluate to a definite boolean, or `None`. The top-level node must be a
/// comparison (a bare integer is not a branch condition we fold).
pub fn eval_bool(&self, resolve: &impl Fn(&str) -> Option<i64>) -> Option<bool> {
match self.eval(resolve)? {
CondVal::Bool(b) => Some(b),
CondVal::Int(_) => None,
}
}
/// Collect every identifier name referenced by the tree.
pub fn collect_vars(&self, out: &mut Vec<String>) {
match self {
CondArith::Lit(_) => {}
CondArith::Var(name) => {
if !out.iter().any(|v| v == name) {
out.push(name.clone());
}
}
CondArith::Neg(inner) => inner.collect_vars(out),
CondArith::Bin(_, l, r) => {
l.collect_vars(out);
r.collect_vars(out);
}
}
}
}
/// Call-related metadata for CFG nodes.
#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct CallMeta {
@ -662,6 +785,17 @@ pub struct NodeInfo {
pub condition_vars: Vec<String>,
/// For If nodes: whether the condition has a leading negation (`!` / `not`).
pub condition_negated: bool,
/// For If / conditional (ternary) nodes: the condition as a pure
/// integer-arithmetic + comparison expression tree, when the whole
/// condition is built only from integer literals, identifiers, arithmetic
/// / comparison operators, and parentheses. `None` for any condition that
/// touches a call, field access, string, compound boolean (`&&`/`||`), or
/// any shape this evaluator cannot prove constant. Consumed by
/// [`crate::ssa::const_prop::fold_constant_branches`] to prune branches
/// whose condition folds to a definite boolean once its variables are
/// resolved to constants — closing the synthetic "dead branch keeps the
/// tainted phi operand alive" false positive without any text re-parsing.
pub cond_arith: Option<CondArith>,
/// True when this is a Call node whose argument list contains only
/// syntactic literal values (strings, numbers, booleans, null/nil,
/// arrays/lists/tuples of literals). Also true for zero-argument calls
@ -1065,7 +1199,7 @@ fn extract_condition_raw<'a>(
ast: Node<'a>,
lang: &str,
code: &'a [u8],
) -> (Option<String>, Vec<String>, bool) {
) -> (Option<String>, Vec<String>, bool, Option<CondArith>) {
// 1. Find the condition subtree.
let cond_node = ast.child_by_field_name("condition").or_else(|| {
// Rust `if_expression` uses positional children: the condition is
@ -1085,7 +1219,7 @@ fn extract_condition_raw<'a>(
});
let Some(cond) = cond_node else {
return (None, Vec::new(), false);
return (None, Vec::new(), false, None);
};
// 2. Detect leading negation (`!expr`, `not expr`, Ruby `unless`).
@ -1103,7 +1237,20 @@ fn extract_condition_raw<'a>(
let text = text_of(cond, code)
.map(|t| truncate_at_char_boundary(&t, MAX_CONDITION_TEXT_LEN).to_string());
(text, vars, negated)
// 5. Capture the pure integer-arithmetic + comparison tree (for constant
// branch folding). Built from the FULL condition node `cond` (not the
// negation-stripped `inner`) so the folded boolean matches the
// Branch terminator's `true_blk = cond-true` semantics directly. Ruby
// `unless` swaps the True/False edges in the CFG builder (lines
// ~5029), so the branch polarity would be inverted — skip it to stay
// sound (`unless` with a constant arithmetic guard is negligible).
let cond_arith = if ast.kind() == "unless" {
None
} else {
build_cond_arith(cond, lang, code, 0)
};
(text, vars, negated, cond_arith)
}
/// Detect leading negation and return the inner expression.
@ -1241,6 +1388,155 @@ fn extract_bin_op(ast: Node, lang: &str) -> Option<BinOp> {
None
}
/// Parse an integer literal node to its `i64` value, honouring hex / octal /
/// binary radix prefixes and Java/Rust digit separators (`1_000`). Returns
/// `None` for floats, non-literals, or values that overflow `i64`.
fn parse_int_literal(node: Node, code: &[u8]) -> Option<i64> {
let kind = node.kind();
let is_int = matches!(
kind,
"integer"
| "integer_literal"
| "int_literal"
| "number"
| "number_literal"
| "decimal_integer_literal"
| "hex_integer_literal"
| "octal_integer_literal"
| "binary_integer_literal"
);
if !is_int {
return None;
}
let raw = std::str::from_utf8(&code[node.byte_range()]).ok()?.trim();
// Strip Java long suffix and digit separators.
let cleaned: String = raw
.trim_end_matches(['l', 'L'])
.chars()
.filter(|c| *c != '_')
.collect();
if let Ok(v) = cleaned.parse::<i64>() {
return Some(v);
}
if let Some(h) = cleaned.strip_prefix("0x").or_else(|| cleaned.strip_prefix("0X")) {
return i64::from_str_radix(h, 16).ok();
}
if let Some(o) = cleaned.strip_prefix("0o").or_else(|| cleaned.strip_prefix("0O")) {
return i64::from_str_radix(o, 8).ok();
}
if let Some(b) = cleaned.strip_prefix("0b").or_else(|| cleaned.strip_prefix("0B")) {
return i64::from_str_radix(b, 2).ok();
}
None
}
/// Map the operator token of a binary expression node to a [`BinOp`].
/// Scans for the single anonymous operator child (operands are named).
/// Returns `None` for boolean operators (`&&` / `||`), assignment, or any
/// token not in the arithmetic / bitwise / comparison set — those make the
/// enclosing [`CondArith`] build bail.
fn binary_op_token(node: Node) -> Option<BinOp> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.is_named() {
continue;
}
return match child.kind() {
"+" => Some(BinOp::Add),
"-" => Some(BinOp::Sub),
"*" => Some(BinOp::Mul),
"/" => Some(BinOp::Div),
"%" => Some(BinOp::Mod),
"&" => Some(BinOp::BitAnd),
"|" => Some(BinOp::BitOr),
"^" => Some(BinOp::BitXor),
"<<" => Some(BinOp::LeftShift),
">>" => Some(BinOp::RightShift),
"==" | "===" => Some(BinOp::Eq),
"!=" | "!==" => Some(BinOp::NotEq),
"<" => Some(BinOp::Lt),
"<=" => Some(BinOp::LtEq),
">" => Some(BinOp::Gt),
">=" => Some(BinOp::GtEq),
_ => None,
};
}
None
}
/// Build a [`CondArith`] tree from a condition AST subtree, or `None` if the
/// condition is not a pure integer-arithmetic + comparison expression. Uses
/// the real tree-sitter node so operator precedence and parentheses are
/// already encoded in the tree shape — no text parsing. Conservative by
/// construction: any unrecognised node kind (call, field access, string,
/// boolean `&&`/`||`, unary `!`) returns `None`, which disables folding for
/// that branch (never a wrong fold). Depth-bounded to guard against
/// pathological nesting.
fn build_cond_arith(node: Node, lang: &str, code: &[u8], depth: u32) -> Option<CondArith> {
if depth > 64 {
return None;
}
let kind = node.kind();
// Unwrap parentheses (transparent to value).
if matches!(kind, "parenthesized_expression" | "parenthesized" | "parenthesized_statement") {
let inner = node.named_child(0)?;
return build_cond_arith(inner, lang, code, depth + 1);
}
if let Some(n) = parse_int_literal(node, code) {
return Some(CondArith::Lit(n));
}
// Bare identifier (reject dotted paths / field access — those are not
// captured here; only a plain local whose const value we can resolve).
if matches!(kind, "identifier" | "simple_identifier") {
let name = text_of(node, code)?;
if !name.is_empty()
&& name.chars().all(|c| c.is_alphanumeric() || c == '_' || c == '$')
{
return Some(CondArith::Var(name));
}
return None;
}
// Unary `-` only (boolean `!` / `not` is intentionally unsupported: its
// operand would be a boolean, which `CondArith::eval` rejects, so folding
// a negated condition is left to the conservative `None` path).
if matches!(
kind,
"unary_expression" | "unary_operator" | "prefix_unary_expression" | "unary"
) {
let operand = node.named_child(0)?;
let mut cursor = node.walk();
let is_neg = node
.children(&mut cursor)
.any(|c| !c.is_named() && c.kind() == "-");
if is_neg {
return Some(CondArith::Neg(Box::new(build_cond_arith(
operand,
lang,
code,
depth + 1,
)?)));
}
return None;
}
// Binary arithmetic / comparison: exactly two operands + one operator.
if is_binary_expr_kind(kind, lang) {
if node.named_child_count() != 2 {
return None; // chained comparison (Python `a < b < c`) etc.
}
let op = binary_op_token(node)?;
let lhs = build_cond_arith(node.named_child(0)?, lang, code, depth + 1)?;
let rhs = build_cond_arith(node.named_child(1)?, lang, code, depth + 1)?;
return Some(CondArith::Bin(op, Box::new(lhs), Box::new(rhs)));
}
None
}
/// Find the RHS value node of an assignment-like AST node (variable declarator,
/// lexical declaration, assignment expression). Used by helpers that need to
/// inspect what an identifier is being initialized to.
@ -3231,11 +3527,11 @@ pub(super) fn push_node<'a>(
};
// Extract condition metadata for If nodes.
let (condition_text, condition_vars, condition_negated) =
let (condition_text, condition_vars, condition_negated, cond_arith) =
if matches!(lookup(lang, ast.kind()), Kind::If) {
extract_condition_raw(ast, lang, code)
} else {
(None, Vec::new(), false)
(None, Vec::new(), false, None)
};
// Extract per-argument identifiers for Call nodes.
@ -3512,6 +3808,7 @@ pub(super) fn push_node<'a>(
condition_text,
condition_vars,
condition_negated,
cond_arith,
all_args_literal,
catch_param: false,
arg_callees,

View file

@ -231,6 +231,13 @@ fn type_kind_index(kind: &TypeKind) -> u32 {
| TypeKind::GormDb
| TypeKind::SqlxDb
| TypeKind::HibernateSession => 3,
// ProcessBuilder participates only in the type-qualified callee
// resolver via `label_prefix()`; no dedicated bitset slot, share
// the Object index like the other receiver-only TypeKinds.
TypeKind::ProcessBuilder => 3,
// Runtime is likewise a type-qualified-resolver-only receiver kind
// (`Runtime.exec`); no dedicated bitset slot, share the Object index.
TypeKind::Runtime => 3,
}
}

View file

@ -275,6 +275,14 @@ pub fn class_name_to_type_kind(name: &str) -> Option<TypeKind> {
// type-qualified resolution to `Template.process`, the SSTI
// sink defined in `labels/java.rs`.
"Template" => Some(TypeKind::Template),
// `java.lang.Runtime` declared receiver type. Routes the
// split-receiver shape `Runtime r = Runtime.getRuntime(); ...
// r.exec(...)` through type-qualified resolution to
// `Runtime.exec` (the only `Runtime.*` rule, always SHELL_ESCAPE),
// complementing the `constructor_type` factory route for
// `Runtime.getRuntime()`. No benign `Runtime.exec` exists, so
// typing any `Runtime`-declared receiver carries no FP risk.
"Runtime" => Some(TypeKind::Runtime),
// Python qualified type names.
// Only covers raw lowered names from isinstance(). The lowering in lower.rs
// extracts the literal type text: isinstance(x, requests.Session) produces

View file

@ -124,6 +124,23 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
},
// `ProcessBuilder.command(argList)` — the dominant OWASP Benchmark
// command-injection shape builds an argument `List<String>`, attaches it
// via `pb.command(argList)`, then runs `pb.start()`. The argument list is
// a separate channel from the constructor, so the flat `ProcessBuilder`
// constructor sink above never sees the tainted args. This rule fires
// only via type-qualified resolution: the receiver `pb` must carry a
// `TypeKind::ProcessBuilder` fact (set by `constructor_type` for
// `new ProcessBuilder(...)`), so the resolver rewrites `pb.command(...)` →
// `ProcessBuilder.command`. Case-sensitive and receiver-typed to avoid
// colliding with the many unrelated `.command(...)` methods (CLI builders,
// JCommander, picocli, Swing actions). The payload is restricted to arg 0
// (the command list) via `type_qualified_sink_payload_args`.
LabelRule {
matchers: &["ProcessBuilder.command"],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: true,
},
LabelRule {
matchers: &["executeQuery", "executeUpdate"],
label: DataLabel::Sink(Cap::SQL_QUERY),

View file

@ -1496,7 +1496,11 @@ pub fn type_qualified_sink_payload_args(qualified_callee: &str) -> Option<&'stat
| "TypeOrmRepo.createQueryBuilder"
| "TypeOrmManager.query"
| "TypeOrmManager.createQueryBuilder"
| "MikroOrmEm.execute" => Some(&[0]),
| "MikroOrmEm.execute"
// `ProcessBuilder.command(argList)` — arg 0 is the command list;
// any later positional args are not part of the v1 shape. Restrict
// sink-taint scanning to arg 0 so receiver / unrelated args don't fire.
| "ProcessBuilder.command" => Some(&[0]),
_ => None,
}
}

View file

@ -1202,6 +1202,8 @@ fn type_kind_tag(k: &TypeKind) -> String {
TypeKind::GormDb => "GormDb".into(),
TypeKind::SqlxDb => "SqlxDb".into(),
TypeKind::HibernateSession => "HibernateSession".into(),
TypeKind::ProcessBuilder => "ProcessBuilder".into(),
TypeKind::Runtime => "Runtime".into(),
}
}

View file

@ -624,6 +624,192 @@ pub fn apply_const_prop(body: &mut SsaBody, result: &ConstPropResult) -> usize {
pruned
}
/// Resolve a condition variable name to the SSA value reaching `block`.
///
/// Mirrors `constraint::lower::resolve_single_var` (the established resolver
/// for branch-condition variables): prefer the highest-indexed definition in
/// the branch block itself, else the highest-indexed definition elsewhere.
/// Kept local to avoid a `ssa → constraint` dependency cycle (constraint
/// already depends on ssa).
fn resolve_const_var(body: &SsaBody, var_name: &str, block: BlockId) -> Option<SsaValue> {
let mut best_in_block: Option<SsaValue> = None;
let mut best_outside: Option<SsaValue> = None;
for (idx, vd) in body.value_defs.iter().enumerate() {
if vd.var_name.as_deref() != Some(var_name) {
continue;
}
let v = SsaValue(idx as u32);
if vd.block == block {
best_in_block = Some(match best_in_block {
Some(existing) if existing.0 > v.0 => existing,
_ => v,
});
} else {
best_outside = Some(match best_outside {
Some(existing) if existing.0 > v.0 => existing,
_ => v,
});
}
}
best_in_block.or(best_outside)
}
/// Fold branch conditions that are pure integer-arithmetic comparisons over
/// constant operands, pruning the statically-dead edge.
///
/// Complements [`apply_const_prop`], which only folds a condition that lowers
/// to a single SSA boolean value. An arithmetic comparison condition such as
/// `(7*42) - num > 200` is **never** an SSA value — condition nodes lower to
/// `Nop` and the comparison is held structurally on the branch terminator — so
/// SCCP cannot reach it. This pass instead evaluates the
/// [`crate::cfg::CondArith`] tree captured at CFG-build time, resolving each
/// variable to its const-propagated integer.
///
/// Sound by construction:
/// * A branch is pruned only when its `CondArith` evaluates to a **definite**
/// boolean — every variable bound to a known integer constant and every
/// operation defined (no div-by-zero / overflow). `None`/`Varying` leaves
/// both edges intact.
/// * After the terminator is rewritten to `Goto(taken)` and the dead edge is
/// dropped (symmetrically, preserving pred/succ consistency), every phi
/// operand whose predecessor is no longer reachable from entry is removed.
/// That last step is what actually drops the dead-branch operand from a
/// merge phi like `bar = phi(then: "const", else: param)` — without it the
/// taint engine's phi fallback would still read the tainted `param` from
/// the joined entry state.
///
/// Returns the number of branches pruned.
pub fn fold_constant_branches(
body: &mut SsaBody,
cfg: &crate::cfg::Cfg,
const_values: &HashMap<SsaValue, ConstLattice>,
) -> usize {
use crate::ssa::ir::Terminator;
// 1. Collect definite fold decisions: (branch_block_idx, taken, untaken).
let mut prune_ops: Vec<(usize, BlockId, BlockId)> = Vec::new();
for (block_idx, block) in body.blocks.iter().enumerate() {
let Terminator::Branch {
cond,
true_blk,
false_blk,
..
} = &block.terminator
else {
continue;
};
// Degenerate `cond ? X : X` (both edges to one block): nothing to prune.
if true_blk == false_blk {
continue;
}
let Some(cond_info) = cfg.node_weight(*cond) else {
continue;
};
let Some(arith) = cond_info.cond_arith.as_ref() else {
continue;
};
let branch_block = block.id;
let resolve = |name: &str| -> Option<i64> {
let v = resolve_const_var(body, name, branch_block)?;
match const_values.get(&v) {
Some(ConstLattice::Int(n)) => Some(*n),
_ => None,
}
};
match arith.eval_bool(&resolve) {
Some(true) => prune_ops.push((block_idx, *true_blk, *false_blk)),
Some(false) => prune_ops.push((block_idx, *false_blk, *true_blk)),
None => {}
}
}
let pruned = prune_ops.len();
if pruned == 0 {
return 0;
}
// 2. Rewrite terminators + drop the dead edge (symmetrically).
for &(block_idx, taken, untaken) in &prune_ops {
let pred_id = body.blocks[block_idx].id;
body.blocks[block_idx].terminator = Terminator::Goto(taken);
body.blocks[block_idx].succs.retain(|s| *s != untaken);
let untaken_idx = untaken.0 as usize;
if untaken_idx < body.blocks.len() {
body.blocks[untaken_idx].preds.retain(|p| *p != pred_id);
}
}
// 3. Recompute reachability from entry over the (now-pruned) succ edges.
let n = body.blocks.len();
let mut reachable = vec![false; n];
let mut stack = vec![body.entry];
if (body.entry.0 as usize) < n {
reachable[body.entry.0 as usize] = true;
}
while let Some(b) = stack.pop() {
let bidx = b.0 as usize;
if bidx >= n {
continue;
}
// Clone succs to avoid borrow conflict with `reachable`.
let succs: SmallVec<[BlockId; 2]> = body.blocks[bidx].succs.clone();
for s in succs {
let sidx = s.0 as usize;
if sidx < n && !reachable[sidx] {
reachable[sidx] = true;
stack.push(s);
}
}
}
// 4. Reachable blocks: drop the now-dead predecessor. Removing the phi
// operand from the merge block is what stops the tainted dead-branch
// value feeding the phi; removing the pred keeps pred/succ symmetric
// with step 5's succ clearing. Operands from still-reachable
// predecessors are untouched, so no live flow is lost.
for (bidx, block) in body.blocks.iter_mut().enumerate() {
if !reachable[bidx] {
continue;
}
block.preds.retain(|p| {
let pidx = p.0 as usize;
pidx < n && reachable[pidx]
});
for phi in &mut block.phis {
if let SsaOp::Phi(operands) = &mut phi.op {
operands.retain(|(pred, _)| {
let pidx = pred.0 as usize;
pidx < n && reachable[pidx]
});
}
}
}
// 5. Unreachable blocks: neutralise them so the *later* optimiser passes
// (copy-prop, base-alias grouping, type-facts, points-to) and the taint
// transfer never observe their dead instructions. This is the
// load-bearing step for precision: a dead `else bar = param` would
// otherwise make copy-prop alias `bar`↔`param`, and
// `propagate_taint_to_aliases` would then poison the *surviving const*
// `bar` with `param`'s (still-reachable) taint — defeating the whole
// prune. Each instruction is rewritten to `Nop` (value + cfg_node
// preserved so `value_defs` coverage holds), the terminator to
// `Unreachable`, and the block is fully disconnected.
for (bidx, block) in body.blocks.iter_mut().enumerate() {
if reachable[bidx] {
continue;
}
for inst in block.phis.iter_mut().chain(block.body.iter_mut()) {
inst.op = SsaOp::Nop;
}
block.terminator = Terminator::Unreachable;
block.succs.clear();
block.preds.clear();
}
pruned
}
/// Collect module aliases from `require()` calls in the SSA body.
///
/// Detects patterns like `const http = require("http")` and propagates

View file

@ -101,7 +101,12 @@ pub fn optimize_ssa_with_param_types(
) -> OptimizeResult {
// 1. Constant propagation (SCCP)
let cp = const_prop::const_propagate(body);
let branches_pruned = const_prop::apply_const_prop(body, &cp);
let mut branches_pruned = const_prop::apply_const_prop(body, &cp);
// 1b. Fold pure integer-arithmetic comparison branch conditions that SCCP
// cannot reach (the comparison is held on the terminator, not an SSA
// value). Prunes statically-dead edges + their merge-phi operands so a
// dead `else bar = param` stops feeding a tainted operand into the phi.
branches_pruned += const_prop::fold_constant_branches(body, cfg, &cp.values);
// 2. Copy propagation
let (copies_eliminated, copy_map) = copy_prop::copy_propagate(body, cfg);

View file

@ -261,6 +261,33 @@ pub enum TypeKind {
/// arbitrary-receiver-name shape (`sess`, `hibernateSession`, etc.)
/// via type-qualified resolution.
HibernateSession,
/// A `java.lang.ProcessBuilder` instance produced by
/// `new ProcessBuilder(...)`. The dominant OWASP Benchmark
/// command-injection shape builds an argument `List<String>`, attaches
/// it via `pb.command(argList)`, then runs it with `pb.start()`. The
/// argument list is a separate channel from the constructor, so the
/// flat `ProcessBuilder` constructor sink never sees the tainted args.
/// Mapping the receiver to this TypeKind lets the type-qualified
/// resolver rewrite `pb.command(argList)` → `ProcessBuilder.command`
/// against the flat SHELL_ESCAPE rule in `labels/java.rs`, so tainted
/// list contents reaching the command builder are caught at the
/// `command(...)` call site.
ProcessBuilder,
/// A `java.lang.Runtime` instance produced by the static factory
/// `Runtime.getRuntime()`. The dominant OWASP Benchmark
/// command-injection shape splits the receiver across statements:
/// `Runtime r = Runtime.getRuntime(); ... r.exec(args, argsEnv)`. The
/// callee text at the sink is `r.exec`, which does not suffix-match the
/// flat `Runtime.exec` rule in `labels/java.rs` (the chained
/// `Runtime.getRuntime().exec(...)` form fires only because its callee
/// text literally contains `Runtime`). Mapping the receiver `r` to
/// this TypeKind lets the type-qualified resolver rewrite `r.exec(...)`
/// → `Runtime.exec` against the flat SHELL_ESCAPE rule, so tainted data
/// reaching the split-receiver exec is caught. No payload-arg
/// restriction: `Runtime.exec` overloads place the tainted data in
/// either the command (arg 0) or the environment array (arg 1), so the
/// default all-args sink scan must cover every position.
Runtime,
}
/// structural carrier for a recognised DTO type. Maps
@ -318,6 +345,8 @@ impl TypeKind {
Self::GormDb => Some("GormDb"),
Self::SqlxDb => Some("SqlxDb"),
Self::HibernateSession => Some("HibernateSession"),
Self::ProcessBuilder => Some("ProcessBuilder"),
Self::Runtime => Some("Runtime"),
_ => None,
}
}
@ -708,6 +737,18 @@ pub(crate) fn constructor_type(lang: Lang, callee: &str) -> Option<TypeKind> {
"openSession" | "getCurrentSession" | "openStatelessSession" => {
Some(TypeKind::HibernateSession)
}
// `new ProcessBuilder(...)` — the receiver's `command(argList)`
// setter is a command-injection sink for the list contents.
// Type-qualified resolution rewrites `pb.command(...)` →
// `ProcessBuilder.command` against the flat SHELL_ESCAPE rule.
"ProcessBuilder" => Some(TypeKind::ProcessBuilder),
// `Runtime.getRuntime()` — the static factory returns the
// singleton `java.lang.Runtime`. Gating on `callee.contains
// ("Runtime")` keeps an unrelated `foo.getRuntime()` method from
// being mistyped. Type-qualified resolution rewrites the
// split-receiver `r.exec(...)` → `Runtime.exec` against the flat
// SHELL_ESCAPE rule.
"getRuntime" if callee.contains("Runtime") => Some(TypeKind::Runtime),
_ => None,
},
Lang::JavaScript | Lang::TypeScript => {

View file

@ -1929,7 +1929,7 @@ pub(crate) fn extract_intra_file_ssa_summaries(
for (func_name, func_entry) in &func_entries {
let formal_params = lookup_formal_params(local_summaries, func_name);
let func_ssa = match crate::ssa::lower_to_ssa_with_params(
let mut func_ssa = match crate::ssa::lower_to_ssa_with_params(
cfg,
*func_entry,
Some(func_name),
@ -1939,6 +1939,9 @@ pub(crate) fn extract_intra_file_ssa_summaries(
Ok(ssa) => ssa,
Err(_) => continue,
};
// Match the `_from_bodies` path: prune dead constant branches before
// the summary probe (see `prefold_dead_branches_for_summary`).
prefold_dead_branches_for_summary(&mut func_ssa, cfg);
// `formal_params` is authoritative even when it is empty. SSA lowering
// also emits Param ops for external captures; counting those as arity
@ -2019,6 +2022,22 @@ pub(crate) fn extract_intra_file_ssa_summaries(
/// name overloads with different arity, and anonymous bodies at distinct
/// source spans all get distinct keys.
#[allow(clippy::too_many_arguments)]
/// Prune definite-constant dead branches on a freshly-lowered body *before*
/// its interprocedural summary is extracted.
///
/// Summary extraction ([`ssa_transfer::extract_ssa_func_summary`]) runs on the
/// pre-optimisation SSA, so without this a helper whose body returns a constant
/// only because a dead `else x = param` branch is never taken would still emit
/// a `param → return` transform — re-tainting the caller's `bar =
/// helper(param)` and defeating the in-body branch fold. Only
/// [`crate::ssa::const_prop::fold_constant_branches`] is applied (no copy-prop /
/// DCE), so the change is limited to provably-dead arithmetic-comparison
/// branches; the body's value numbering is otherwise untouched.
fn prefold_dead_branches_for_summary(func_ssa: &mut crate::ssa::SsaBody, cfg: &crate::cfg::Cfg) {
let cp = crate::ssa::const_prop::const_propagate(func_ssa);
crate::ssa::const_prop::fold_constant_branches(func_ssa, cfg, &cp.values);
}
pub(crate) fn lower_all_functions_from_bodies(
file_cfg: &FileCfg,
lang: Lang,
@ -2108,6 +2127,9 @@ fn lower_all_functions_from_bodies_inner(
Err(_) => continue,
};
perf_lower_record(0, _t_lower.elapsed().as_micros());
// Prune dead constant branches before the summary probe so a helper's
// dead `else x = param` does not surface as a spurious param→return.
prefold_dead_branches_for_summary(&mut func_ssa, &body.graph);
let param_count = if !formal_params.is_empty() {
formal_params.len()

View file

@ -55,6 +55,19 @@ _CAP_BIT_TABLE = [
(1 << 20, "prototype_pollution"),
]
# Static lens (see --static): SHELL_ESCAPE (1<<2) is the command-injection sink
# cap for *every* language (`grep SHELL_ESCAPE src/labels/` — all Sink uses are
# command-exec; CODE_EXEC=1<<10 is the eval/code-exec variant, also cmdi). In a
# normal `nyx scan` (no dynamic confirmation) a Java cmdi finding carries only
# SHELL_ESCAPE; the SHELL_ESCAPE→CODE_EXEC remap that buckets it as cmdi is gated
# on VerifyStatus::Confirmed (src/commands/scan.rs), so with 0 confirmations the
# default table leaves these in "other" and the cmdi cell reads 0/0/N. The
# static lens appends SHELL_ESCAPE→cmdi at the LOWEST priority (after every other
# bit) so a SHELL_ESCAPE-only finding buckets as cmdi while a finding that also
# carries a higher-priority sink bit (e.g. FILE_IO) keeps its existing bucket.
# Opt-in via --static so the default confirmed-recall bucketing is byte-identical.
_CAP_BIT_TABLE_STATIC = _CAP_BIT_TABLE + [(1 << 2, "cmdi")] # SHELL_ESCAPE
# Substring → cap lookup for rule IDs. Order matters: most specific first.
_CAP_RULE_TABLE = [
("path_traversal", "path_traversal"),
@ -83,12 +96,13 @@ def load_json(path: str) -> object:
return json.load(f)
def cap_of(finding: dict) -> str:
def cap_of(finding: dict, static_lens: bool = False) -> str:
# 1. Prefer evidence.sink_caps bitmask — the engine's own classification.
ev = finding.get("evidence", {}) or {}
sink_caps = ev.get("sink_caps")
if isinstance(sink_caps, int) and sink_caps:
for bit, name in _CAP_BIT_TABLE:
table = _CAP_BIT_TABLE_STATIC if static_lens else _CAP_BIT_TABLE
for bit, name in table:
if sink_caps & bit:
return name
# 2. Fall back to rule id substring (e.g. py.cmdi.os_system, java.deser.readobject).
@ -383,6 +397,20 @@ def main() -> int:
default="",
help="path to a previous results JSON; fail on monotonic-improvement regression",
)
p.add_argument(
"--static",
action="store_true",
help=(
"static lens: bucket SHELL_ESCAPE (1<<2) findings as cmdi even when "
"they are unconfirmed. Java (and other) command-exec sinks carry "
"SHELL_ESCAPE and only get remapped to CODE_EXEC on dynamic Confirm; "
"without this flag, an env with 0 confirmations reads the cmdi cell "
"as 0/0/N regardless of static quality. SHELL_ESCAPE is the "
"command-injection sink cap for every language, so this is sound "
"globally; it is opt-in only so the default confirmed-recall "
"bucketing stays byte-identical."
),
)
args = p.parse_args()
lang_filter = {l.strip() for l in args.lang.split(",") if l.strip()}
@ -418,7 +446,7 @@ def main() -> int:
continue
f_path = f.get("path", "")
f_line = f.get("line", 0)
f_cap = cap_of(f)
f_cap = cap_of(f, static_lens=args.static)
for idx, entry in enumerate(not_vuln):
if idx in used:
continue
@ -455,7 +483,7 @@ def main() -> int:
)
for f in findings:
cap = cap_of(f)
cap = cap_of(f, static_lens=args.static)
lang = lang_of(f)
key = (cap, lang)
ev = f.get("evidence", {}) or {}
@ -501,7 +529,7 @@ def main() -> int:
for f in findings:
f_path = f.get("path", "")
f_line = f.get("line", 0)
f_cap = cap_of(f)
f_cap = cap_of(f, static_lens=args.static)
cap = f_cap
lang = lang_of(f)
cell_key = (cap, lang)

View file

@ -46,6 +46,8 @@ def write_json(path: Path, data: object) -> None:
# Cap bit positions cribbed from tabulate.py / src/labels/mod.rs.
SINK_BIT_SQL = 1 << 7 # SQL_QUERY
SINK_BIT_CMDI = 1 << 10 # CODE_EXEC
SINK_BIT_SHELL = 1 << 2 # SHELL_ESCAPE (Java/other command-exec sink)
SINK_BIT_FILE = 1 << 5 # FILE_IO (path_traversal)
def python_finding(cap_bit: int, path: str, line: int, status: str | None) -> dict:
@ -353,6 +355,91 @@ def test_lang_filter_scopes_findings_and_gt(tmp: Path) -> None:
assert all(lang != "javascript" for _cap, lang in cells), cells
def test_static_lens_buckets_shell_escape_as_cmdi(tmp: Path) -> None:
# Caveat-1 fix: in an env with 0 dynamic confirmations a Java command-exec
# finding carries only SHELL_ESCAPE (1<<2), which the default bit table
# leaves in "other" — so the cmdi cell reads 0 TP / N FN regardless of
# static quality. --static appends SHELL_ESCAPE→cmdi so static recall is
# measurable without dynamic confirmation.
gt = tmp / "gt.json"
write_json(
gt,
[{"path": "testcode/Cmd.java", "line": 0, "cap": "cmdi", "vuln": True}],
)
# Real Java taint findings carry id "taint-unsanitised-flow" (no cap
# substring), so the rule-id fallback yields "other" — not the sqli/cmdi
# the hand-crafted python_finding id would imply.
java_cmdi = {
"path": "/x/testcode/Cmd.java",
"line": 10,
"col": 0,
"id": "taint-unsanitised-flow",
"evidence": {"sink_caps": SINK_BIT_SHELL, "dynamic_verdict": {"status": "NotConfirmed"}},
}
scan = tmp / "scan.json"
write_json(scan, {"findings": [java_cmdi]})
# Default lens: the finding buckets as "other", so cmdi shows the GT
# positive as a pure FN (recall 0) — the measurement gap.
default = tmp / "default.json"
write_json(default, [])
proc = run_tabulate(
"--label", "owasp",
"--scan", str(scan),
"--ground-truth", str(gt),
"--append", str(default),
)
assert proc.returncode == 0, proc.stdout + proc.stderr
cells = {(c["cap"], c["lang"]): c for c in json.loads(default.read_text())[-1]["cells"]}
assert ("cmdi", "java") in cells and cells[("cmdi", "java")]["tp"] == 0, cells
assert cells[("cmdi", "java")]["fn"] == 1, cells[("cmdi", "java")]
assert ("other", "java") in cells, f"SHELL_ESCAPE must bucket as other by default: {list(cells)}"
# Static lens: the finding buckets as cmdi → recall measurable (TP=1, FN=0).
static = tmp / "static.json"
write_json(static, [])
proc = run_tabulate(
"--label", "owasp",
"--scan", str(scan),
"--ground-truth", str(gt),
"--static",
"--append", str(static),
)
assert proc.returncode == 0, proc.stdout + proc.stderr
cells = {(c["cap"], c["lang"]): c for c in json.loads(static.read_text())[-1]["cells"]}
cmdi = cells[("cmdi", "java")]
assert cmdi["tp"] == 1 and cmdi["fn"] == 0, cmdi
assert ("other", "java") not in cells, f"static lens must reclaim the other-bucketed finding: {list(cells)}"
def test_static_lens_preserves_higher_priority_bits(tmp: Path) -> None:
# A finding carrying BOTH FILE_IO and SHELL_ESCAPE must keep bucketing as
# path_traversal under the static lens (SHELL_ESCAPE is appended at lowest
# priority), so the static lens never steals a finding from a non-cmdi cell.
scan = tmp / "scan.json"
write_json(
scan,
{
"findings": [
python_finding(SINK_BIT_FILE | SINK_BIT_SHELL, "B.java", 10, "NotConfirmed"),
]
},
)
for flag in ([], ["--static"]):
append = tmp / f"out{len(flag)}.json"
write_json(append, [])
proc = run_tabulate(
"--label", "x",
"--scan", str(scan),
"--inhouse",
"--append", str(append),
*flag,
)
assert proc.returncode == 0, proc.stdout + proc.stderr
caps = {c["cap"] for c in json.loads(append.read_text())[-1]["cells"]}
assert caps == {"path_traversal"}, f"flag={flag}: {caps}"
def test_budget_malformed_exits_3(tmp: Path) -> None:
bad = tmp / "bad.toml"
bad.write_text("[default]\nunsupported_rate = not_a_number\n")
@ -661,6 +748,8 @@ def main() -> int:
test_manual_triage_stamps_wrong_confirmed,
test_manual_triage_ignores_vuln_true_entries,
test_lang_filter_scopes_findings_and_gt,
test_static_lens_buckets_shell_escape_as_cmdi,
test_static_lens_preserves_higher_priority_bits,
test_budget_malformed_exits_3,
test_relative_gt_path_suffix_matches_absolute_finding,
test_unmatched_gt_positive_lands_in_lang_cell,

View file

@ -0,0 +1,19 @@
{
"description": "Dead-branch constant condition (OWASP Benchmark cmdi non-vulnerable shape). `(7*42) - num > 200` with num=86 is 208 > 200 — always true — so `bar` is the constant string and the `else bar = param` arm is statically dead. The constant-branch fold (src/ssa/const_prop.rs::fold_constant_branches) evaluates the captured CondArith tree, prunes the dead edge, and drops the tainted phi operand AND neutralises the dead block so copy-prop cannot alias `bar`<->`param`. Result: `r.exec(cmd + bar)` carries no taint. Asserts NO taint finding fires (strict_unexpected promotes any taint-unsanitised-flow to a hard failure).",
"tags": [
"taint",
"cmdi",
"servlet",
"runtime",
"dead-branch",
"const-fold",
"precision"
],
"modes": [
"full"
],
"strict_unexpected": [
"taint-unsanitised-flow"
],
"expected": []
}

View file

@ -0,0 +1,27 @@
import java.io.*;
import javax.servlet.http.*;
// Dead-branch constant condition (OWASP Benchmark cmdi non-vulnerable shape).
// The guard `(7*42) - num > 200` is `294 - 86 = 208 > 200`, i.e. ALWAYS true,
// so `bar` is provably the constant string and the tainted `else` arm
// (`bar = param`) is unreachable. The constant-branch fold
// (`fold_constant_branches`) must prune the dead edge and drop the tainted
// phi operand so `r.exec(cmd + bar)` carries no attacker data NO finding.
public class DeadBranchConstSafe extends HttpServlet {
protected void doPost(HttpServletRequest request, HttpServletResponse response)
throws IOException {
String param = request.getHeader("vector");
String bar;
int num = 86;
if ((7 * 42) - num > 200) {
bar = "This_should_always_happen";
} else {
bar = param;
}
String cmd = "echo ";
Runtime r = Runtime.getRuntime();
Process p = r.exec(cmd + bar);
}
}

View file

@ -0,0 +1,32 @@
{
"description": "Dead-branch constant condition with VULNERABLE polarity. `(500/42) + num > 200` is `11 + 196 = 207 > 200` (integer division) — always true — and the TRUE arm assigns the tainted `param`, so the reachable branch carries taint and only the `else bar = \"...\"` arm is dead. The constant-branch fold must prune the DEAD else edge while keeping the live `bar = param`, so the command-injection finding at `r.exec(cmd + bar)` MUST still fire. Zero-false-negative guard: it proves the fold never prunes the reachable (tainted) arm.",
"tags": [
"taint",
"cmdi",
"servlet",
"runtime",
"dead-branch",
"const-fold",
"no-false-negative"
],
"modes": [
"full"
],
"strict_unexpected": [
"taint-unsanitised-flow"
],
"expected": [
{
"rule_id": "taint-unsanitised-flow",
"severity": "HIGH",
"must_match": true,
"line_range": [
26,
26
],
"evidence_contains": [],
"notes": "request.getHeader (line 15) flows into bar on the always-taken true arm (line 21), then into r.exec at line 26. Exactly one finding survives.",
"max_count": 1
}
]
}

View file

@ -0,0 +1,28 @@
import java.io.*;
import javax.servlet.http.*;
// Dead-branch constant condition, VULNERABLE polarity (OWASP Benchmark cmdi
// vulnerable shape). The guard `(500/42) + num > 200` is `11 + 196 = 207 > 200`
// using integer division ALWAYS true and the TRUE arm assigns the tainted
// `param`. So the live branch carries taint and the `else bar = "never"` arm is
// dead. The constant-branch fold must prune the DEAD (else) edge and keep the
// reachable tainted `bar = param`, so `r.exec(cmd + bar)` MUST still fire. This
// is the zero-false-negative guard: the fold must never prune the live arm.
public class DeadBranchParamVuln extends HttpServlet {
protected void doPost(HttpServletRequest request, HttpServletResponse response)
throws IOException {
String param = request.getHeader("vector");
String bar;
int num = 196;
if ((500 / 42) + num > 200) {
bar = param;
} else {
bar = "This_should_never_happen";
}
String cmd = "echo ";
Runtime r = Runtime.getRuntime();
Process p = r.exec(cmd + bar);
}
}

View file

@ -0,0 +1,29 @@
{
"description": "HttpServletRequest parameter flows through a List into ProcessBuilder.command(argList) — command injection via the setter form (list attached separately from the constructor, then pb.start()). This is the dominant OWASP Benchmark cmdi shape; resolved via type-qualified ProcessBuilder.command sink on the typed receiver plus container-element taint on the argument list.",
"tags": [
"taint",
"cmdi",
"servlet",
"container"
],
"modes": [
"full"
],
"strict_unexpected": [
"taint-unsanitised-flow"
],
"expected": [
{
"rule_id": "taint-unsanitised-flow",
"severity": "HIGH",
"must_match": true,
"line_range": [
16,
16
],
"evidence_contains": [],
"notes": "request.getParameter (line 8) is concatenated into a list element (argList.add at line 13), the list is attached to ProcessBuilder via pb.command(argList) at line 16, and executed by pb.start() at line 17. The type-qualified ProcessBuilder.command sink fires at line 16 on the tainted container argument. Exactly one finding survives.",
"max_count": 1
}
]
}

View file

@ -0,0 +1,19 @@
import java.io.*;
import java.util.*;
import javax.servlet.http.*;
public class ProcessCommandHandler extends HttpServlet {
protected void doPost(HttpServletRequest request, HttpServletResponse response)
throws IOException {
String param = request.getParameter("vector");
List<String> argList = new ArrayList<String>();
argList.add("sh");
argList.add("-c");
argList.add("echo " + param);
ProcessBuilder pb = new ProcessBuilder();
pb.command(argList);
pb.start();
}
}

View file

@ -0,0 +1,30 @@
{
"description": "HttpServletRequest header flows into a String[] env array passed to a split-receiver Runtime.exec — command injection via the `Runtime r = Runtime.getRuntime(); ... r.exec(cmd, argsEnv)` shape (the dominant remaining OWASP Benchmark cmdi form). The callee text at the sink is `r.exec`, which does not suffix-match the flat `Runtime.exec` rule; resolution depends on the receiver `r` carrying TypeKind::Runtime (from the `Runtime.getRuntime()` factory / the `Runtime` declared type) so the type-qualified resolver rewrites `r.exec` → `Runtime.exec`. Taint is in the env array (arg 1), so no payload-arg restriction may be applied.",
"tags": [
"taint",
"cmdi",
"servlet",
"runtime",
"split-receiver"
],
"modes": [
"full"
],
"strict_unexpected": [
"taint-unsanitised-flow"
],
"expected": [
{
"rule_id": "taint-unsanitised-flow",
"severity": "HIGH",
"must_match": true,
"line_range": [
16,
16
],
"evidence_contains": [],
"notes": "request.getHeader (line 7) flows into the env array element argsEnv (line 15), which is passed as arg 1 of r.exec at line 16. The receiver r is typed Runtime via Runtime.getRuntime() (line 13), so the type-qualified Runtime.exec sink fires at the split-receiver call. Exactly one finding survives.",
"max_count": 1
}
]
}

View file

@ -0,0 +1,18 @@
import java.io.*;
import javax.servlet.http.*;
public class RuntimeSplitReceiverHandler extends HttpServlet {
protected void doPost(HttpServletRequest request, HttpServletResponse response)
throws IOException {
String param = request.getHeader("vector");
// Split-receiver Runtime.exec: the receiver is bound to a local in
// one statement, then exec is called on it in another. The OWASP
// Benchmark cmdi shape places the tainted data in the environment
// array (arg 1), not the command (arg 0).
Runtime r = Runtime.getRuntime();
String[] args = { "/bin/sh", "-c", "echo nyx" };
String[] argsEnv = { "TAINT=" + param };
r.exec(args, argsEnv);
}
}