nyx/src/taint/path_state.rs
Eli Peter 1f2bfe76c1
docs: Enhance module documentation across various files for clarity a… (#62)
* docs: Enhance module documentation across various files for clarity and completeness

* fix: Remove unnecessary blank line in build.rs for cleaner code

* docs: Update documentation to improve clarity and consistency in code comments
2026-05-02 17:46:45 -04:00

1561 lines
57 KiB
Rust

#![allow(clippy::collapsible_if)]
// ─── PredicateKind ───────────────────────────────────────────────────────────
/// Classification of what an if-condition tests.
///
/// Determined by heuristic analysis of the raw condition text.
/// Classification is conservative: prefer [`Unknown`](PredicateKind::Unknown)
/// over a wrong guess.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PredicateKind {
/// `x.is_none()`, `x == null`, `x == nil`, `x is None`
NullCheck,
/// `x.is_empty()`, `x.len() == 0`, `x == ""`
EmptyCheck,
/// `x.is_err()`, `x.is_ok()`, `err != nil`
ErrorCheck,
/// Call to a validation/guard function: `validate(x)`, `is_safe(x)`
ValidationCall,
/// Call to a sanitizer function: `sanitize(x)`, `escape(x)`
SanitizerCall,
/// Allowlist/membership check: `.includes(x)`, `x in ALLOWED`, `in_array(x, ...)`
AllowlistCheck,
/// Type-check guard: `typeof x`, `isinstance(x, int)`, `is_numeric(x)`
TypeCheck,
/// Negative-validation of shell metacharacters:
/// `x.contains(";")`, `x.match(/[;|&]/)`, `";" in x`, etc.
///
/// The **true branch is the REJECT path** (early return / panic / throw)
/// and the **false branch is the validated path**. Use inverted polarity
/// when applying branch predicates.
ShellMetaValidated,
/// Bounded-length rejection: `x.len() > N` / `x.length < N` with N >= 2.
///
/// Commonly paired with `ShellMetaValidated` in OR-chain rejection
/// idioms (`if x.len() > MAX || x.contains(";") { reject }`). Counts as
/// a dominator guard for `cfg-unguarded-sink` purposes, but intentionally
/// does **not** mark variables as validated, the rejection direction is
/// ambiguous from the condition alone (a `.len() > 5 { sink(x) }`
/// gate is a precondition, not a rejection).
BoundedLength,
/// Comparison operators: `x == 5`, `x > threshold`
Comparison,
/// Generic boolean test, cannot classify further.
Unknown,
}
/// Single-character shell metacharacters that a rejection check commonly
/// guards against before constructing a shell command.
///
/// Presence of any of these in user input is sufficient to enable shell
/// injection, so rejecting input that contains them is a real sanitizer.
/// `"foo"` or other non-metachar needles don't qualify, a rejection of
/// those is business logic, not security.
const SHELL_METACHARS: &[&str] = &[";", "|", "&", "`", "$", ">", "<", "\n", "\r", "\0"];
/// Check whether `text` matches a shell-metachar rejection idiom.
///
/// Recognizes:
/// - Rust / Java / Go: `x.contains("<METACHAR>")`
/// - JS / TS: `x.includes("<METACHAR>")`
/// - Python: `"<METACHAR>" in x`
/// - Ruby: `x.include?("<METACHAR>")`
/// - Regex form: `x.match(/[;|&]/)` / `re.search(r"[;|&]", x)` with a
/// character class containing only metacharacters.
///
/// Returns `false` if the needle is a non-metachar literal or cannot be
/// extracted, falls through to broader classification.
fn is_shell_metachar_rejection(text: &str) -> bool {
// Method-call form: `.contains(…)` / `.includes(…)` / `.include?(…)`
for method in [".contains(", ".includes(", ".include?("] {
if let Some(idx) = text.find(method) {
let args_start = idx + method.len();
if let Some(needle) = extract_first_string_arg(&text[args_start..]) {
if SHELL_METACHARS.contains(&needle.as_str()) {
return true;
}
}
}
}
// Python membership form: `"<METACHAR>" in x` (but not `x in ALLOWED`)
if let Some(needle) = extract_python_in_needle(text) {
if SHELL_METACHARS.contains(&needle.as_str()) {
return true;
}
}
// Regex character-class form: `.match(/[;|&]/)` / `re.search(r"[…]", …)`
if is_metachar_regex_class(text) {
return true;
}
false
}
/// Extract the first string literal argument from a slice starting just after
/// an opening `(` in a call expression. Returns the raw inner text of the
/// literal (without surrounding quotes).
///
/// Handles `"..."`, `'...'`, and simple escapes `\"`, `\'`, `\\`.
fn extract_first_string_arg(after_open: &str) -> Option<String> {
let bytes = after_open.as_bytes();
let mut i = 0;
while i < bytes.len() && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i >= bytes.len() {
return None;
}
let quote = bytes[i];
if quote != b'"' && quote != b'\'' {
return None;
}
i += 1;
let mut out = Vec::new();
while i < bytes.len() {
let b = bytes[i];
if b == b'\\' && i + 1 < bytes.len() {
match bytes[i + 1] {
b'n' => out.push(b'\n'),
b'r' => out.push(b'\r'),
b't' => out.push(b'\t'),
b'0' => out.push(b'\0'),
c => out.push(c),
}
i += 2;
continue;
}
if b == quote {
return String::from_utf8(out).ok();
}
out.push(b);
i += 1;
}
None
}
/// For Python `"<METACHAR>" in x` (needle on the left side of ` in `), return
/// the needle. Returns `None` for `x in ALLOWED` (identifier on the left) ,
/// that is an allowlist check, not a rejection.
fn extract_python_in_needle(text: &str) -> Option<String> {
let pos = text.find(" in ")?;
let left = text[..pos].trim();
// Strip leading `!` / `not` for rejection contexts
let left = left.strip_prefix('!').unwrap_or(left).trim();
let bytes = left.as_bytes();
let quote = *bytes.first()?;
if quote != b'"' && quote != b'\'' {
return None;
}
if bytes.last() != Some(&quote) || bytes.len() < 2 {
return None;
}
let inner = &left[1..left.len() - 1];
Some(inner.to_string())
}
/// Detect regex character classes that contain only shell metacharacters:
/// `[;|&]`, `[;&`$]`, etc. Missing: escape-class metacharacters inside the
/// class (e.g. `[\n]`), conservative, returns false there.
fn is_metachar_regex_class(text: &str) -> bool {
// Find `[` followed by content and `]`, anywhere in the text.
let mut rest = text;
while let Some(open) = rest.find('[') {
let after = &rest[open + 1..];
if let Some(close) = after.find(']') {
let inner = &after[..close];
if !inner.is_empty()
&& inner
.chars()
.all(|c| SHELL_METACHARS.iter().any(|m| m.starts_with(c)))
{
return true;
}
rest = &after[close + 1..];
} else {
break;
}
}
false
}
/// Check whether `text` looks like a bounded-length rejection:
/// `x.len() > N`, `x.len() < N`, `x.length >= N`, etc. where `N` is an
/// integer literal >= 2. Excludes `> 0` / `>= 1` / `< 1`, those are
/// non-empty checks, which are not length-bound validations.
fn is_bounded_length_check(lower: &str) -> bool {
const PROBES: &[&str] = &[
".len()", ".length", // JS/TS/Java `.length` property (no parens)
];
for probe in PROBES {
let mut rest = lower;
while let Some(pos) = rest.find(probe) {
let after = &rest[pos + probe.len()..];
// Skip the optional `()` that `.length` never has but `.len` does.
let after = after.trim_start();
let after = after.strip_prefix("()").unwrap_or(after);
let after = after.trim_start();
for op in [">=", "<=", ">", "<"] {
if let Some(tail) = after.strip_prefix(op) {
let tail = tail.trim_start();
if let Some(n) = parse_leading_uint(tail) {
if n >= 2 {
return true;
}
}
break;
}
}
rest = &rest[pos + probe.len()..];
}
}
false
}
/// Parse a leading non-negative integer literal (decimal only).
fn parse_leading_uint(s: &str) -> Option<u64> {
let mut n: u64 = 0;
let mut any = false;
for c in s.chars() {
if let Some(d) = c.to_digit(10) {
n = n.checked_mul(10)?.checked_add(d as u64)?;
any = true;
} else {
break;
}
}
any.then_some(n)
}
/// Classify a raw condition text into a [`PredicateKind`].
///
/// # Rules
///
/// - Empty/None text → [`Unknown`](PredicateKind::Unknown).
/// - `ValidationCall` / `SanitizerCall` require a `(` in the text **and** a
/// matching callee token. This avoids misclassifying comparisons like
/// `x_valid == true`.
/// - Prefers [`Unknown`](PredicateKind::Unknown) over false positives.
pub fn classify_condition(text: &str) -> PredicateKind {
if text.is_empty() {
return PredicateKind::Unknown;
}
let lower = text.to_ascii_lowercase();
// ── Error checks (before null checks: `err != nil` is an error check,
// not a null check, even though it contains `!= nil`) ──────────────
if lower.contains("is_err")
|| lower.contains("is_ok")
|| lower.contains("err != nil")
|| lower.contains("err == nil")
|| lower.contains("error != nil")
|| lower.contains("error == nil")
{
return PredicateKind::ErrorCheck;
}
// ── Null checks ──────────────────────────────────────────────────────
if lower.contains("is_none")
|| lower.contains("is_some")
|| lower.contains("== none")
|| lower.contains("!= none")
|| lower.contains("is none")
|| lower.contains("is not none")
|| lower.contains("== null")
|| lower.contains("!= null")
|| lower.contains("=== null")
|| lower.contains("!== null")
|| lower.contains("== nil")
|| lower.contains("!= nil")
{
return PredicateKind::NullCheck;
}
// ── Empty checks ─────────────────────────────────────────────────────
if lower.contains("is_empty")
|| lower.contains(".len() == 0")
|| lower.contains(".len() != 0")
|| lower.contains(".length == 0")
|| lower.contains(".length === 0")
|| lower.contains(".length != 0")
|| lower.contains(".length !== 0")
|| lower.contains("== \"\"")
|| lower.contains("== ''")
{
return PredicateKind::EmptyCheck;
}
// ── Shell-metachar negative validation ───────────────────────────────
//
// Matched BEFORE AllowlistCheck so that `x.contains(";")` is recognized
// as a rejection idiom rather than a membership test. Checked on the
// raw (non-lowercased) text so metacharacter comparisons stay
// case-accurate, `;` / `|` / `&` have no case.
if is_shell_metachar_rejection(text) {
return PredicateKind::ShellMetaValidated;
}
// ── Allowlist / membership checks ────────────────────────────────────
if lower.contains(".includes(")
|| lower.contains(".include?(")
|| lower.contains(".contains(")
|| lower.contains(".indexof(")
|| lower.contains(".has(")
|| lower.contains("in_array(")
|| lower.contains(" in ")
|| (lower.contains('[') && !lower.contains('('))
{
return PredicateKind::AllowlistCheck;
}
// ── Type-check guards ──────────────────────────────────────────────
if lower.contains("typeof ")
|| lower.contains("isinstance(")
|| lower.contains(" instanceof ")
|| lower.contains(".matches(")
|| lower.contains("is_numeric(")
|| lower.contains("is_int(")
|| lower.contains("is_string(")
|| lower.contains("is_float(")
|| lower.contains("ctype_")
|| lower.contains(".is_a?(")
|| lower.contains(".kind_of?(")
// Rust character-class validation: `.chars().all(|c| c.is_ascii_*())`
// and similar per-character validations. Presence of `is_ascii_`
// inside an `.all(…)` / `.iter().all(…)` call is a strong validation
// signal equivalent to a TypeCheck.
|| (lower.contains(".all(") && lower.contains("is_ascii_"))
|| (lower.contains(".all(") && lower.contains("is_alphanumeric"))
|| (lower.contains(".all(") && lower.contains("is_numeric("))
{
return PredicateKind::TypeCheck;
}
// ── Bounded-length rejection ─────────────────────────────────────────
//
// `.len() > N` / `.length < N` with N >= 2. Pairs with
// ShellMetaValidated in OR-chain rejection patterns. Kept as its own
// kind (not TypeCheck) because the rejection direction is ambiguous: a
// `.len() > 5 { sink(x) }` gate is a precondition, not a rejection, so
// marking condition vars as validated on the true branch would silence
// legitimate findings. `cfg-unguarded-sink` still treats this as a
// dominator guard (structural intent), just without SSA-level validation.
if is_bounded_length_check(&lower) {
return PredicateKind::BoundedLength;
}
// ── Call-based kinds (require `(` to be present) ─────────────────────
if lower.contains('(') {
// Strip leading wrappers (parens, `!`, whitespace) before locating
// the callee token. Without this, idiomatic forms like
// `(!validate(x))` (TypeScript / JS) or `not validate(x)` (Python)
// produce an empty `callee_part` and the classifier misses
// ValidationCall, defeating downstream validated-must propagation.
let trimmed = lower.trim_start_matches(['(', '!', ' ', '\t']);
// Strip a leading `not ` keyword (Python boolean not) plus surrounding
// whitespace. Without this, `not validate_no_dotdot(raw)` skips
// ValidationCall classification and validation never propagates.
let trimmed = trimmed.strip_prefix("not ").unwrap_or(trimmed).trim();
// Extract a rough callee token: everything before the first `(`
// that looks like an identifier (letters, digits, underscores, dots).
let callee_part = trimmed.split('(').next().unwrap_or("");
// Take the last segment (after `.` or `::`) as the bare name.
let bare = callee_part
.rsplit(['.', ':'])
.next()
.unwrap_or(callee_part)
.trim();
// Validation
if bare.contains("valid")
|| bare.contains("check")
|| bare.contains("verify")
|| bare.starts_with("is_safe")
|| bare.starts_with("is_authorized")
|| bare.starts_with("is_authenticated")
{
return PredicateKind::ValidationCall;
}
// Regex / pattern allowlist `<X>.test(value)` / `<X>.match(value)` calls
// where the receiver name carries a regex or pattern marker. The
// standard JS / TS / Python / Java / Ruby / Go regex APIs all expose a
// boolean test method; the success arm (true) means `value` matches the
// pattern. Conservative on receiver names so non-regex methods like
// `obj.test(x)` (test runner), `db.test(...)` (test column) etc. don't
// get pulled in. Motivated by Payload CVE-2026-25544
// (`if (!SAFE_STRING_REGEX.test(value)) throw …;`).
if (bare == "test" || bare == "match" || bare == "matches")
&& let Some(dot_pos) = callee_part.rfind('.')
{
let receiver = &callee_part[..dot_pos];
let receiver_lower = receiver.to_ascii_lowercase();
if receiver_lower.contains("regex") || receiver_lower.contains("pattern") {
return PredicateKind::ValidationCall;
}
}
// Sanitizer
if bare.contains("sanitiz") || bare.contains("escape") || bare.contains("encode") {
return PredicateKind::SanitizerCall;
}
}
// ── Comparison operators ─────────────────────────────────────────────
if lower.contains("==")
|| lower.contains("!=")
|| lower.contains(">=")
|| lower.contains("<=")
|| lower.contains(" > ")
|| lower.contains(" < ")
{
return PredicateKind::Comparison;
}
PredicateKind::Unknown
}
/// Classify a condition AND extract the specific validated variable target.
///
/// For `ValidationCall`/`SanitizerCall`, tries to extract the first argument
/// or method receiver as the validated variable:
/// - `validate(x, ...)` → target = `"x"`
/// - `x.validate(...)` → target = `"x"`
///
/// When target extraction fails on a multi-argument call (e.g.,
/// `validate(expr, limit)` where `expr` is not a plain identifier), the
/// validator's effect is opaque: we can't tell which argument is being
/// checked. Returning the original kind with `None` target would cause
/// upstream code to over-validate (mark every `condition_var` as validated).
/// Instead, we fall back to `PredicateKind::Unknown`, safer to assume the
/// validator did nothing than to assume it validated every variable in the
/// condition. Single-argument calls retain `(kind, None)` so downstream code
/// can still use the predicate-summary bit tracking.
pub fn classify_condition_with_target(text: &str) -> (PredicateKind, Option<String>) {
let kind = classify_condition(text);
match kind {
PredicateKind::ValidationCall | PredicateKind::SanitizerCall => {
if let Some(target) = extract_validation_target(text) {
(kind, Some(target))
} else if count_call_args(text).map(|n| n > 1).unwrap_or(false) {
(PredicateKind::Unknown, None)
} else {
(kind, None)
}
}
PredicateKind::AllowlistCheck => {
let target = extract_allowlist_target(text);
(kind, target)
}
PredicateKind::TypeCheck => {
let target = extract_type_check_target(text);
(kind, target)
}
PredicateKind::ShellMetaValidated => {
// The receiver of `.contains(…)` / `.includes(…)` is the value
// being validated. Reuses the validation extractor which already
// handles `x.method(arg)` → `"x"`.
let target = extract_validation_target(text);
(kind, target)
}
PredicateKind::Comparison => {
// `x === '/login'`, `x == 5`, `null != obj`, when exactly one
// side is a literal, extract the identifier side as the target.
// Downstream `apply_branch_predicates` uses this to mark the
// variable as `validated_may` on the true (equal) branch.
let target = extract_comparison_target(text);
(kind, target)
}
_ => (kind, None),
}
}
/// Extract the identifier side of an equality/inequality comparison where
/// exactly one side is a scalar literal.
///
/// Examples:
/// - `x === '/login'` → `Some("x")`
/// - `x !== 5` → `Some("x")`
/// - `null != obj` → `Some("obj")`
/// - `x === y` → `None` (neither side is a literal)
/// - `'a' == 'b'` → `None` (both sides are literals)
/// - `obj.field == 3` → `None` (not a bare identifier)
///
/// Best-effort text analysis, kept conservative to avoid false validation.
fn extract_comparison_target(text: &str) -> Option<String> {
let trimmed = text.trim();
// Find the operator token. Check longer forms first so `===` doesn't
// match as `==` with a trailing `=`.
for op in &["===", "!==", "==", "!="] {
if let Some(pos) = trimmed.find(op) {
let left = trimmed[..pos].trim();
let right = trimmed[pos + op.len()..].trim();
let left_is_ident = is_identifier(left);
let right_is_ident = is_identifier(right);
let left_is_lit = is_comparison_literal(left);
let right_is_lit = is_comparison_literal(right);
return match (left_is_ident, right_is_ident, left_is_lit, right_is_lit) {
(true, _, false, true) => Some(left.to_string()),
(_, true, true, false) => Some(right.to_string()),
_ => None,
};
}
}
None
}
/// Test whether `s` is a scalar literal for comparison-target extraction.
/// Accepts string literals (single/double/backtick quoted), numeric literals,
/// and the null/undefined/nil/true/false tokens.
fn is_comparison_literal(s: &str) -> bool {
let s = s.trim();
if s.is_empty() {
return false;
}
// String literal: delimited by matching quotes.
let bytes = s.as_bytes();
if bytes.len() >= 2 {
let first = bytes[0];
let last = bytes[bytes.len() - 1];
if (first == b'"' || first == b'\'' || first == b'`') && first == last {
return true;
}
}
// Keyword literal tokens.
if matches!(s, "null" | "undefined" | "nil" | "None" | "true" | "false") {
return true;
}
// Numeric literal: optional sign + digits, optional decimal point.
let mut chars = s.chars();
let first = chars.next().unwrap();
let rest_start = if first == '-' || first == '+' {
match chars.next() {
Some(c) => c,
None => return false,
}
} else {
first
};
if !rest_start.is_ascii_digit() {
return false;
}
s.chars()
.skip(if first == '-' || first == '+' { 1 } else { 0 })
.all(|c| c.is_ascii_digit() || c == '.' || c == '_')
}
/// Count positional arguments in a call-shaped condition text.
///
/// Returns `None` when the text does not look like a call (no `(`). Returns
/// `Some(0)` for a call with empty argument list. Respects paren/bracket/brace
/// nesting so `f(g(a, b), c)` counts as 2 top-level args.
///
/// Best-effort, operates on source text, not an AST. Used by
/// `classify_condition_with_target` to distinguish single-arg vs multi-arg
/// validator calls when target extraction fails.
fn count_call_args(text: &str) -> Option<usize> {
let trimmed = text.trim();
let trimmed = trimmed.strip_prefix('!').unwrap_or(trimmed).trim();
let paren_pos = trimmed.find('(')?;
let args_part = &trimmed[paren_pos + 1..];
let args_inner = args_part
.trim_end()
.strip_suffix(')')
.unwrap_or(args_part)
.trim();
if args_inner.is_empty() {
return Some(0);
}
let mut count = 1usize;
let mut depth: i32 = 0;
for ch in args_inner.chars() {
match ch {
'(' | '[' | '{' => depth += 1,
')' | ']' | '}' => depth -= 1,
',' if depth == 0 => count += 1,
_ => {}
}
}
Some(count)
}
/// Extract the first top-level argument from `args_part`, the substring
/// immediately following the open paren of a call expression. Walks
/// paren/bracket/brace depth and skips quoted strings so nested calls and
/// punctuation inside string literals do not confuse the scan. Returns
/// the trimmed argument substring up to the first top-level `,` or
/// matching `)`, or `None` when no balanced close paren is found.
///
/// Robust against trailing wrapper parens such as
/// `(!ALLOWED.includes(cmd))` where naïve `strip_suffix(')')` would leave
/// `cmd)` and lose the argument.
fn first_call_arg(args_part: &str) -> Option<&str> {
let bytes = args_part.as_bytes();
let mut depth: usize = 1;
let mut end: Option<usize> = None;
let mut first_comma: Option<usize> = None;
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
match b {
b'(' | b'[' | b'{' => depth += 1,
b')' | b']' | b'}' => {
depth -= 1;
if depth == 0 {
end = Some(i);
break;
}
}
b',' if depth == 1 && first_comma.is_none() => first_comma = Some(i),
b'"' | b'\'' => {
let quote = b;
i += 1;
while i < bytes.len() {
if bytes[i] == b'\\' && i + 1 < bytes.len() {
i += 2;
continue;
}
if bytes[i] == quote {
break;
}
i += 1;
}
}
_ => {}
}
i += 1;
}
let end = end?;
let cut = first_comma.unwrap_or(end);
Some(args_part[..cut].trim())
}
/// Extract the validated variable from a condition text.
///
/// Handles two patterns:
/// - Function call: `validate(x, ...)` → `"x"`
/// - Method call: `x.validate(...)` → `"x"`
fn extract_validation_target(text: &str) -> Option<String> {
let trimmed = text.trim();
// Strip leading wrappers (parens, `!`, `not `) so idiomatic forms like
// `(!validate(x))` (TS/JS) and `not validate(x)` (Python) are reachable.
let trimmed = trimmed.trim_start_matches(['(', '!', ' ', '\t']);
let trimmed = trimmed.strip_prefix("not ").unwrap_or(trimmed).trim();
// Find the first `(` which separates callee from args
let paren_pos = trimmed.find('(')?;
let callee_part = &trimmed[..paren_pos];
let args_part = &trimmed[paren_pos + 1..];
// Check for method call pattern: `x.method(...)` or `x.method_name(...)`
if let Some(dot_pos) = callee_part.rfind('.') {
let receiver = callee_part[..dot_pos].trim();
let method = callee_part[dot_pos + 1..].trim().to_ascii_lowercase();
// Regex-allowlist `<re>.test(value)` / `<re>.match(value)` / `<re>.matches(value)`:
// the validated target is the call's first argument, not the regex
// receiver. Without this special case, branch narrowing would mark
// the regex itself as validated and leave the user input alone.
if matches!(method.as_str(), "test" | "match" | "matches")
&& let Some(first_arg) = first_call_arg(args_part)
{
let first_arg = first_arg.strip_prefix('&').unwrap_or(first_arg).trim();
if !first_arg.is_empty() && is_identifier(first_arg) {
return Some(first_arg.to_string());
}
}
if !receiver.is_empty() && is_identifier(receiver) {
return Some(receiver.to_string());
}
}
// Function call pattern: `func(x, ...)`, extract first argument with
// balanced-paren scan so trailing wrapper parens (`(validate(x))`) do
// not corrupt the argument substring.
let first_arg = first_call_arg(args_part)?;
// Strip reference operators (e.g. `&x` → `x`)
let first_arg = first_arg.strip_prefix('&').unwrap_or(first_arg).trim();
if !first_arg.is_empty() && is_identifier(first_arg) {
Some(first_arg.to_string())
} else {
None
}
}
/// Extract the target variable from an allowlist/membership check.
///
/// Handles:
/// - `.includes(cmd)` → `cmd` (first argument)
/// - `in_array($cmd, $allowed)` → `cmd` (first arg, strip `$`)
/// - `cmd not in ALLOWED` / `cmd in ALLOWED` → `cmd` (left of ` in `)
/// - `allowed[cmd]` → `cmd` (inside brackets)
fn extract_allowlist_target(text: &str) -> Option<String> {
let trimmed = text.trim();
let lower = trimmed.to_ascii_lowercase();
// Method call pattern: something.includes(arg) / .contains(arg) / .has(arg) / .indexof(arg)
for method in &[
".includes(",
".include?(",
".contains(",
".indexof(",
".has(",
] {
if let Some(pos) = lower.find(method) {
let args_start = pos + method.len();
let args_part = &trimmed[args_start..];
if let Some(first_arg) = first_call_arg(args_part) {
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
if !first_arg.is_empty() && is_identifier(first_arg) {
return Some(first_arg.to_string());
}
}
}
}
// in_array($cmd, $allowed) → cmd
if let Some(pos) = lower.find("in_array(") {
let args_start = pos + "in_array(".len();
let args_part = &trimmed[args_start..];
if let Some(first_arg) = first_call_arg(args_part) {
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
if !first_arg.is_empty() && is_identifier(first_arg) {
return Some(first_arg.to_string());
}
}
}
// Python `in` operator: `cmd in ALLOWED` / `cmd not in ALLOWED`
if lower.contains(" in ") {
// Find the leftmost ` in `, everything before it is the target expression
// Handle `not in` by looking for ` not in ` first
let target_part = if let Some(pos) = lower.find(" not in ") {
&trimmed[..pos]
} else if let Some(pos) = lower.find(" in ") {
&trimmed[..pos]
} else {
return None;
};
let target = target_part.trim();
let target = target.strip_prefix('!').unwrap_or(target).trim();
let target = target.strip_prefix('$').unwrap_or(target);
if !target.is_empty() && is_identifier(target) {
return Some(target.to_string());
}
}
// Go map lookup: `allowed[cmd]`
if let Some(open) = trimmed.find('[') {
if let Some(close) = trimmed.find(']') {
if close > open + 1 {
let inner = trimmed[open + 1..close].trim();
let inner = inner.strip_prefix('$').unwrap_or(inner);
if !inner.is_empty() && is_identifier(inner) {
return Some(inner.to_string());
}
}
}
}
None
}
/// Extract the target variable from a type-check guard.
///
/// Handles:
/// - `typeof input !== 'number'` → `input` (word after `typeof`)
/// - `isinstance(user_id, int)` → `user_id` (first arg)
/// - `input.matches("\\d+")` → `input` (receiver)
/// - `is_numeric($id)` → `id` (first arg, strip `$`)
fn extract_type_check_target(text: &str) -> Option<String> {
let trimmed = text.trim();
let lower = trimmed.to_ascii_lowercase();
// typeof: `typeof input !== 'number'`
if let Some(pos) = lower.find("typeof ") {
let after = &trimmed[pos + "typeof ".len()..];
// The target is the next identifier-like word
let target: String = after
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '_')
.collect();
if !target.is_empty() {
return Some(target);
}
}
// isinstance(user_id, int) → user_id
if let Some(pos) = lower.find("isinstance(") {
let args_start = pos + "isinstance(".len();
let args_part = &trimmed[args_start..];
let inner = args_part.strip_suffix(')').unwrap_or(args_part);
let first_arg = inner.split(',').next()?.trim();
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
if !first_arg.is_empty() && is_identifier(first_arg) {
return Some(first_arg.to_string());
}
}
// Java/TS instanceof: "x instanceof String" → "x"
if let Some(pos) = lower.find(" instanceof ") {
let var_part = trimmed[..pos].trim();
if !var_part.is_empty() && is_identifier(var_part) {
return Some(var_part.to_string());
}
}
// .matches("...") → receiver
if let Some(pos) = lower.find(".matches(") {
let receiver = trimmed[..pos].trim();
let receiver = receiver.strip_prefix('!').unwrap_or(receiver).trim();
if !receiver.is_empty() && is_identifier(receiver) {
return Some(receiver.to_string());
}
}
// PHP type checks: is_numeric($id), is_int($x), is_string($x), is_float($x)
for func in &["is_numeric(", "is_int(", "is_string(", "is_float("] {
if let Some(pos) = lower.find(func) {
let args_start = pos + func.len();
let args_part = &trimmed[args_start..];
let inner = args_part.strip_suffix(')').unwrap_or(args_part);
let first_arg = inner.split(',').next()?.trim();
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
if !first_arg.is_empty() && is_identifier(first_arg) {
return Some(first_arg.to_string());
}
}
}
// Ruby type checks: user_id.is_a?(Integer), x.kind_of?(String) → receiver
for method in &[".is_a?(", ".kind_of?("] {
if let Some(pos) = lower.find(method) {
let receiver = trimmed[..pos].trim();
let receiver = receiver.strip_prefix('!').unwrap_or(receiver).trim();
if !receiver.is_empty() && is_identifier(receiver) {
return Some(receiver.to_string());
}
}
}
// ctype_ functions: ctype_digit($x)
if let Some(pos) = lower.find("ctype_") {
// Find the `(` after ctype_xxx
if let Some(paren_pos) = trimmed[pos..].find('(') {
let args_start = pos + paren_pos + 1;
let args_part = &trimmed[args_start..];
let inner = args_part.strip_suffix(')').unwrap_or(args_part);
let first_arg = inner.split(',').next()?.trim();
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
if !first_arg.is_empty() && is_identifier(first_arg) {
return Some(first_arg.to_string());
}
}
}
None
}
/// Check if a string is a simple identifier (letters, digits, underscores, dots).
fn is_identifier(s: &str) -> bool {
!s.is_empty()
&& s.chars()
.all(|c| c.is_alphanumeric() || c == '_' || c == '.')
&& !s.starts_with(|c: char| c.is_ascii_digit())
}
// ─── Tests ───────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
// ── classify_condition ────────────────────────────────────────────────
#[test]
fn classify_empty_is_unknown() {
assert_eq!(classify_condition(""), PredicateKind::Unknown);
}
#[test]
fn classify_null_checks() {
assert_eq!(classify_condition("x.is_none()"), PredicateKind::NullCheck);
assert_eq!(classify_condition("x == null"), PredicateKind::NullCheck);
assert_eq!(classify_condition("x != nil"), PredicateKind::NullCheck);
assert_eq!(classify_condition("x is None"), PredicateKind::NullCheck);
assert_eq!(classify_condition("x === null"), PredicateKind::NullCheck);
}
#[test]
fn classify_error_checks() {
assert_eq!(classify_condition("x.is_err()"), PredicateKind::ErrorCheck);
assert_eq!(classify_condition("err != nil"), PredicateKind::ErrorCheck);
assert_eq!(classify_condition("x.is_ok()"), PredicateKind::ErrorCheck);
}
#[test]
fn classify_empty_checks() {
assert_eq!(
classify_condition("x.is_empty()"),
PredicateKind::EmptyCheck
);
assert_eq!(
classify_condition("x.len() == 0"),
PredicateKind::EmptyCheck
);
assert_eq!(
classify_condition("x.length === 0"),
PredicateKind::EmptyCheck
);
}
#[test]
fn classify_validation_call() {
assert_eq!(
classify_condition("validate(x)"),
PredicateKind::ValidationCall
);
assert_eq!(
classify_condition("is_safe(input)"),
PredicateKind::ValidationCall
);
assert_eq!(
classify_condition("check_auth(req)"),
PredicateKind::ValidationCall
);
assert_eq!(
classify_condition("input.verify(sig)"),
PredicateKind::ValidationCall
);
}
#[test]
fn classify_validation_requires_paren() {
// `x_valid == true` should NOT be ValidationCall, no `(` call syntax.
assert_eq!(
classify_condition("x_valid == true"),
PredicateKind::Comparison
);
assert_eq!(
classify_condition("is_valid && ready"),
PredicateKind::Unknown
);
}
#[test]
fn classify_sanitizer_call() {
assert_eq!(
classify_condition("sanitize(x)"),
PredicateKind::SanitizerCall
);
assert_eq!(
classify_condition("html_escape(s)"),
PredicateKind::SanitizerCall
);
assert_eq!(
classify_condition("url_encode(path)"),
PredicateKind::SanitizerCall
);
}
#[test]
fn classify_comparison() {
assert_eq!(classify_condition("x == 5"), PredicateKind::Comparison);
assert_eq!(classify_condition("x != y"), PredicateKind::Comparison);
assert_eq!(classify_condition("a >= b"), PredicateKind::Comparison);
}
#[test]
fn classify_unknown_fallback() {
assert_eq!(classify_condition("flag"), PredicateKind::Unknown);
assert_eq!(classify_condition("a && b"), PredicateKind::Unknown);
}
// ── classify_condition_with_target ──────────────────────────────────
#[test]
fn target_function_call_first_arg() {
let (kind, target) = classify_condition_with_target("validate(x, config)");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("x"));
}
#[test]
fn target_method_call_receiver() {
let (kind, target) = classify_condition_with_target("x.isValid()");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("x"));
}
#[test]
fn target_sanitizer_first_arg() {
let (kind, target) = classify_condition_with_target("sanitize(input)");
assert_eq!(kind, PredicateKind::SanitizerCall);
assert_eq!(target.as_deref(), Some("input"));
}
#[test]
fn target_negated_validation() {
let (kind, target) = classify_condition_with_target("!validate(&x)");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("x"));
}
/// Regex `<X>.test(value)` should classify as ValidationCall and the
/// validated target should be the call argument, not the regex
/// receiver. Pinned because the receiver-as-target heuristic is the
/// default for method calls. Motivated by Payload CVE-2026-25544
/// (`if (!SAFE_STRING_REGEX.test(value)) throw …;`).
#[test]
fn target_regex_test_first_arg() {
let (kind, target) = classify_condition_with_target("!SAFE_STRING_REGEX.test(value)");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("value"));
}
#[test]
fn target_regex_test_pattern_receiver() {
let (kind, target) = classify_condition_with_target("ALLOWED_PATTERN.test(s)");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("s"));
}
/// Receiver name without a regex/pattern marker should NOT be pulled
/// in as a validator: `obj.test(x)` is a test runner, not a regex.
#[test]
fn target_test_non_regex_receiver_is_not_validation() {
let kind = classify_condition("obj.test(value)");
assert_eq!(kind, PredicateKind::Unknown);
}
#[test]
fn target_comparison_extracts_identifier_side() {
let (kind, target) = classify_condition_with_target("x == 5");
assert_eq!(kind, PredicateKind::Comparison);
assert_eq!(target.as_deref(), Some("x"));
}
#[test]
fn target_comparison_strict_equality_with_string() {
let (kind, target) = classify_condition_with_target("x === '/login'");
assert_eq!(kind, PredicateKind::Comparison);
assert_eq!(target.as_deref(), Some("x"));
}
#[test]
fn target_comparison_literal_on_left() {
let (kind, target) = classify_condition_with_target("null != obj");
assert_eq!(kind, PredicateKind::Comparison);
assert_eq!(target.as_deref(), Some("obj"));
}
#[test]
fn target_comparison_both_identifiers_returns_none() {
let (kind, target) = classify_condition_with_target("x === y");
assert_eq!(kind, PredicateKind::Comparison);
assert_eq!(target, None);
}
#[test]
fn target_comparison_both_literals_returns_none() {
let (kind, target) = classify_condition_with_target("'a' == 'b'");
assert_eq!(kind, PredicateKind::Comparison);
assert_eq!(target, None);
}
#[test]
fn target_check_auth_first_arg() {
let (kind, target) = classify_condition_with_target("check_auth(req)");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("req"));
}
#[test]
fn target_method_with_args() {
let (kind, target) = classify_condition_with_target("input.verify(sig)");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("input"));
}
#[test]
fn target_multi_arg_fallback_opaque_expr_is_unknown() {
// `validate(x + 1, y)`, first arg is an expression, not an identifier.
// Target extraction fails. Multi-arg call, so fall back to Unknown
// rather than letting upstream validate every condition var.
let (kind, target) = classify_condition_with_target("validate(x + 1, y)");
assert_eq!(kind, PredicateKind::Unknown);
assert_eq!(target, None);
}
#[test]
fn target_single_arg_fallback_preserves_kind() {
// Single-arg call with unextractable target: keep the original kind so
// the predicate-summary bit can still be set. No over-validation risk
// because there is only one var in scope.
let (kind, target) = classify_condition_with_target("validate(x + 1)");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target, None);
}
#[test]
fn count_call_args_basic() {
assert_eq!(super::count_call_args("f(a, b, c)"), Some(3));
assert_eq!(super::count_call_args("f(a)"), Some(1));
assert_eq!(super::count_call_args("f()"), Some(0));
assert_eq!(super::count_call_args("f(g(x, y), z)"), Some(2));
assert_eq!(super::count_call_args("not_a_call"), None);
}
// ── AllowlistCheck classification ─────────────────────────────────
#[test]
fn classify_allowlist_includes() {
assert_eq!(
classify_condition("ALLOWED.includes(cmd)"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn classify_allowlist_in_array() {
assert_eq!(
classify_condition("in_array($cmd, $allowed)"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn classify_allowlist_python_not_in() {
assert_eq!(
classify_condition("cmd not in ALLOWED"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn classify_allowlist_python_in() {
assert_eq!(
classify_condition("cmd in ALLOWED"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn classify_allowlist_map_lookup() {
assert_eq!(
classify_condition("allowed[cmd]"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn classify_allowlist_contains() {
assert_eq!(
classify_condition("whitelist.contains(value)"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn classify_allowlist_has() {
assert_eq!(
classify_condition("allowedSet.has(key)"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn extract_allowlist_target_negated_paren_wrapper() {
// Tree-sitter records the if-condition as `(!ALLOWED.includes(cmd))`,
// including the surrounding parens. Naïve `strip_suffix(')')` left
// `cmd)` and `is_identifier` rejected the trailing `)`, dropping the
// structural guard for `cfg-unguarded-sink` suppression. The
// balanced-paren scan must return `Some("cmd")`.
let (kind, target) = classify_condition_with_target("(!ALLOWED.includes(cmd))");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
#[test]
fn extract_allowlist_target_java_contains_paren_wrapper() {
let (kind, target) = classify_condition_with_target("(!ALLOWED.contains(cmd))");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
#[test]
fn extract_allowlist_target_in_array_paren_wrapper() {
let (kind, target) = classify_condition_with_target("(!in_array($cmd, $allowed))");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
// ── TypeCheck classification ──────────────────────────────────────
#[test]
fn classify_type_check_typeof() {
assert_eq!(
classify_condition("typeof input !== 'number'"),
PredicateKind::TypeCheck
);
}
#[test]
fn classify_type_check_isinstance() {
assert_eq!(
classify_condition("isinstance(user_id, int)"),
PredicateKind::TypeCheck
);
}
#[test]
fn classify_type_check_matches() {
assert_eq!(
classify_condition("input.matches(\"\\\\d+\")"),
PredicateKind::TypeCheck
);
}
#[test]
fn classify_type_check_is_numeric() {
assert_eq!(
classify_condition("is_numeric($id)"),
PredicateKind::TypeCheck
);
}
#[test]
fn classify_type_check_is_int() {
assert_eq!(classify_condition("is_int($x)"), PredicateKind::TypeCheck);
}
#[test]
fn classify_type_check_ctype() {
assert_eq!(
classify_condition("ctype_digit($x)"),
PredicateKind::TypeCheck
);
}
// ── Allowlist target extraction ───────────────────────────────────
#[test]
fn target_allowlist_includes() {
let (kind, target) = classify_condition_with_target("ALLOWED.includes(cmd)");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
#[test]
fn target_allowlist_in_array() {
let (kind, target) = classify_condition_with_target("in_array($cmd, $allowed)");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
#[test]
fn target_allowlist_python_in() {
let (kind, target) = classify_condition_with_target("cmd in ALLOWED");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
#[test]
fn target_allowlist_python_not_in() {
let (kind, target) = classify_condition_with_target("cmd not in ALLOWED");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
#[test]
fn target_allowlist_map_lookup() {
let (kind, target) = classify_condition_with_target("allowed[cmd]");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
// ── TypeCheck target extraction ───────────────────────────────────
#[test]
fn target_type_check_typeof() {
let (kind, target) = classify_condition_with_target("typeof input !== 'number'");
assert_eq!(kind, PredicateKind::TypeCheck);
assert_eq!(target.as_deref(), Some("input"));
}
#[test]
fn target_type_check_isinstance() {
let (kind, target) = classify_condition_with_target("isinstance(user_id, int)");
assert_eq!(kind, PredicateKind::TypeCheck);
assert_eq!(target.as_deref(), Some("user_id"));
}
#[test]
fn target_type_check_matches() {
let (kind, target) = classify_condition_with_target("input.matches(\"\\\\d+\")");
assert_eq!(kind, PredicateKind::TypeCheck);
assert_eq!(target.as_deref(), Some("input"));
}
#[test]
fn target_type_check_is_numeric() {
let (kind, target) = classify_condition_with_target("is_numeric($id)");
assert_eq!(kind, PredicateKind::TypeCheck);
assert_eq!(target.as_deref(), Some("id"));
}
#[test]
fn target_type_check_ctype() {
let (kind, target) = classify_condition_with_target("ctype_digit($x)");
assert_eq!(kind, PredicateKind::TypeCheck);
assert_eq!(target.as_deref(), Some("x"));
}
#[test]
fn classify_type_check_is_a() {
assert_eq!(
classify_condition("user_id.is_a?(Integer)"),
PredicateKind::TypeCheck
);
}
#[test]
fn target_type_check_is_a() {
let (kind, target) = classify_condition_with_target("user_id.is_a?(Integer)");
assert_eq!(kind, PredicateKind::TypeCheck);
assert_eq!(target.as_deref(), Some("user_id"));
}
#[test]
fn classify_allowlist_include_question() {
assert_eq!(
classify_condition("ALLOWED.include?(cmd)"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn target_allowlist_include_question() {
let (kind, target) = classify_condition_with_target("ALLOWED.include?(cmd)");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
// ── instanceof classification and target ─────────────────────────────
#[test]
fn classify_instanceof_is_type_check() {
assert_eq!(
classify_condition("x instanceof String"),
PredicateKind::TypeCheck
);
}
#[test]
fn target_instanceof_x_string() {
let (kind, target) = classify_condition_with_target("x instanceof String");
assert_eq!(kind, PredicateKind::TypeCheck);
assert_eq!(target.as_deref(), Some("x"));
}
#[test]
fn target_instanceof_obj_integer() {
let (kind, target) = classify_condition_with_target("obj instanceof Integer");
assert_eq!(kind, PredicateKind::TypeCheck);
assert_eq!(target.as_deref(), Some("obj"));
}
// ── ShellMetaValidated classification ─────────────────────────────────
#[test]
fn classify_shell_metachar_contains_rust() {
assert_eq!(
classify_condition("input.contains(\";\")"),
PredicateKind::ShellMetaValidated
);
assert_eq!(
classify_condition("cmd.contains(\"|\")"),
PredicateKind::ShellMetaValidated
);
assert_eq!(
classify_condition("s.contains(\"&\")"),
PredicateKind::ShellMetaValidated
);
assert_eq!(
classify_condition("s.contains(\"`\")"),
PredicateKind::ShellMetaValidated
);
assert_eq!(
classify_condition("s.contains(\"$\")"),
PredicateKind::ShellMetaValidated
);
}
#[test]
fn classify_shell_metachar_includes_js() {
assert_eq!(
classify_condition("input.includes(';')"),
PredicateKind::ShellMetaValidated
);
assert_eq!(
classify_condition("cmd.includes(\"|\")"),
PredicateKind::ShellMetaValidated
);
}
#[test]
fn classify_shell_metachar_include_question_ruby() {
assert_eq!(
classify_condition("cmd.include?(\";\")"),
PredicateKind::ShellMetaValidated
);
}
#[test]
fn classify_shell_metachar_python_in() {
assert_eq!(
classify_condition("\";\" in cmd"),
PredicateKind::ShellMetaValidated
);
assert_eq!(
classify_condition("'|' in cmd"),
PredicateKind::ShellMetaValidated
);
}
#[test]
fn classify_shell_metachar_regex_class() {
assert_eq!(
classify_condition("cmd.match(/[;|&]/)"),
PredicateKind::ShellMetaValidated
);
assert_eq!(
classify_condition("re.search(\"[;|&]\", cmd)"),
PredicateKind::ShellMetaValidated
);
}
#[test]
fn classify_non_metachar_contains_stays_allowlist() {
// `x.contains("foo")` must NOT be credited as a shell-metachar
// rejection. It falls back to the existing AllowlistCheck behavior.
assert_eq!(
classify_condition("input.contains(\"foo\")"),
PredicateKind::AllowlistCheck
);
assert_eq!(
classify_condition("path.contains(\"..\")"),
PredicateKind::AllowlistCheck
);
assert_eq!(
classify_condition("name.contains(\"admin\")"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn classify_allowlist_membership_unaffected() {
// `x in ALLOWED` (identifier on left) remains AllowlistCheck.
// Only a quoted metachar on the LEFT of ` in ` triggers ShellMeta.
assert_eq!(
classify_condition("cmd in ALLOWED"),
PredicateKind::AllowlistCheck
);
assert_eq!(
classify_condition("cmd not in ALLOWED"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn target_shell_metachar_receiver() {
let (kind, target) = classify_condition_with_target("input.contains(\";\")");
assert_eq!(kind, PredicateKind::ShellMetaValidated);
assert_eq!(target.as_deref(), Some("input"));
}
// ── Bounded-length TypeCheck ──────────────────────────────────────────
#[test]
fn classify_bounded_length_rust_len() {
assert_eq!(
classify_condition("input.len() > 100"),
PredicateKind::BoundedLength
);
assert_eq!(
classify_condition("s.len() >= 256"),
PredicateKind::BoundedLength
);
assert_eq!(
classify_condition("s.len() < 4096"),
PredicateKind::BoundedLength
);
}
#[test]
fn classify_bounded_length_js_length() {
assert_eq!(
classify_condition("input.length > 100"),
PredicateKind::BoundedLength
);
}
#[test]
fn classify_non_empty_len_stays_comparison() {
// `.len() > 0` is a non-empty check, NOT a bounded-length validation.
// Must fall through to Comparison.
assert_eq!(
classify_condition("input.len() > 0"),
PredicateKind::Comparison
);
assert_eq!(
classify_condition("s.len() >= 1"),
PredicateKind::Comparison
);
}
// ── Helper sanity ─────────────────────────────────────────────────────
#[test]
fn shell_metachar_rejection_detects_common_chars() {
for m in &[";", "|", "&", "`", "$", ">", "<"] {
let text = format!("x.contains(\"{m}\")");
assert!(
is_shell_metachar_rejection(&text),
"should detect metachar {m:?} in {text:?}"
);
}
}
#[test]
fn shell_metachar_rejection_rejects_non_metachar() {
assert!(!is_shell_metachar_rejection("x.contains(\"foo\")"));
assert!(!is_shell_metachar_rejection("x.contains(\"admin\")"));
assert!(!is_shell_metachar_rejection("x.contains(\"..\")"));
}
#[test]
fn shell_metachar_rejection_handles_escapes() {
assert!(is_shell_metachar_rejection("x.contains(\"\\n\")"));
}
#[test]
fn bounded_length_rejects_zero_and_one() {
assert!(!is_bounded_length_check("x.len() > 0"));
assert!(!is_bounded_length_check("x.len() >= 1"));
assert!(!is_bounded_length_check("x.len() < 1"));
}
#[test]
fn bounded_length_accepts_small_bounds() {
assert!(is_bounded_length_check("x.len() > 2"));
assert!(is_bounded_length_check("x.len() <= 256"));
}
}