mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-27 20:29:39 +02:00
Performance and precision pass (#64)
This commit is contained in:
parent
c7c5e0f3a1
commit
fb698d2c27
97 changed files with 9932 additions and 517 deletions
|
|
@ -211,6 +211,41 @@ fn is_bounded_length_check(lower: &str) -> bool {
|
|||
false
|
||||
}
|
||||
|
||||
/// Normalise an identifier to its snake-case lowercase form so that
|
||||
/// camelCase / PascalCase / SCREAMING variants line up against snake-cased
|
||||
/// prefix lists (`is_safe`, `is_authorized`, `is_authenticated`).
|
||||
///
|
||||
/// Underscore is inserted at every case boundary:
|
||||
/// - lowercase/digit → uppercase (`isSafe` → `is_safe`)
|
||||
/// - uppercase → uppercase-then-lowercase (`HTTPClient` → `http_client`)
|
||||
///
|
||||
/// Inputs already in snake_case round-trip unchanged: `is_safe` → `is_safe`.
|
||||
/// Used by `classify_condition` so a sanitiser predicate authored in any
|
||||
/// of the dominant identifier conventions classifies the same.
|
||||
pub(crate) fn to_snake_lower(s: &str) -> String {
|
||||
let chars: Vec<char> = s.chars().collect();
|
||||
let mut out = String::with_capacity(chars.len() + 4);
|
||||
for i in 0..chars.len() {
|
||||
let c = chars[i];
|
||||
if c.is_ascii_uppercase() {
|
||||
if i > 0 {
|
||||
let prev = chars[i - 1];
|
||||
let next = chars.get(i + 1).copied();
|
||||
let between_camel = prev.is_ascii_lowercase() || prev.is_ascii_digit();
|
||||
let acronym_end =
|
||||
prev.is_ascii_uppercase() && next.is_some_and(|n| n.is_ascii_lowercase());
|
||||
if (between_camel || acronym_end) && !out.ends_with('_') {
|
||||
out.push('_');
|
||||
}
|
||||
}
|
||||
out.push(c.to_ascii_lowercase());
|
||||
} else {
|
||||
out.push(c.to_ascii_lowercase());
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Parse a leading non-negative integer literal (decimal only).
|
||||
fn parse_leading_uint(s: &str) -> Option<u64> {
|
||||
let mut n: u64 = 0;
|
||||
|
|
@ -384,13 +419,35 @@ pub fn classify_condition(text: &str) -> PredicateKind {
|
|||
.unwrap_or(callee_part)
|
||||
.trim();
|
||||
|
||||
// Derive a snake-cased form from the **original** text so that
|
||||
// camelCase identifiers (`isSafeRemoteUrl`, `isAuthorized`,
|
||||
// `isValidUUID`) classify against the snake-cased prefix list
|
||||
// (`is_safe`, `is_authorized`, `is_authenticated`) the same as
|
||||
// `is_safe_remote_url` would. Required to recognise CVE-2026-33486
|
||||
// (roadiz/documents `isSafeRemoteUrl` SSRF sanitiser) as a
|
||||
// ValidationCall on the patched fixture. Mirrors the trim/strip
|
||||
// pipeline above on case-preserved text so the snake form lines up
|
||||
// with `bare`.
|
||||
let orig_trimmed = text.trim_start_matches(['(', '!', ' ', '\t']);
|
||||
let orig_trimmed = orig_trimmed
|
||||
.strip_prefix("not ")
|
||||
.unwrap_or(orig_trimmed)
|
||||
.trim();
|
||||
let orig_callee_part = orig_trimmed.split('(').next().unwrap_or("");
|
||||
let orig_bare = orig_callee_part
|
||||
.rsplit(['.', ':'])
|
||||
.next()
|
||||
.unwrap_or(orig_callee_part)
|
||||
.trim();
|
||||
let bare_snake = to_snake_lower(orig_bare);
|
||||
|
||||
// Validation
|
||||
if bare.contains("valid")
|
||||
|| bare.contains("check")
|
||||
|| bare.contains("verify")
|
||||
|| bare.starts_with("is_safe")
|
||||
|| bare.starts_with("is_authorized")
|
||||
|| bare.starts_with("is_authenticated")
|
||||
|| bare_snake.starts_with("is_safe")
|
||||
|| bare_snake.starts_with("is_authorized")
|
||||
|| bare_snake.starts_with("is_authenticated")
|
||||
{
|
||||
return PredicateKind::ValidationCall;
|
||||
}
|
||||
|
|
@ -734,8 +791,12 @@ fn extract_validation_target(text: &str) -> Option<String> {
|
|||
// not corrupt the argument substring.
|
||||
let first_arg = first_call_arg(args_part)?;
|
||||
|
||||
// Strip reference operators (e.g. `&x` → `x`)
|
||||
// Strip reference operators (e.g. `&x` → `x`) and PHP variable sigil
|
||||
// (`$url` → `url`) so the extracted target lines up with the var-name
|
||||
// form used in branch-narrowing. Mirrors the `$` strip already done by
|
||||
// `extract_allowlist_target` for `in_array($cmd, $allowed)`.
|
||||
let first_arg = first_arg.strip_prefix('&').unwrap_or(first_arg).trim();
|
||||
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
|
||||
|
||||
if !first_arg.is_empty() && is_identifier(first_arg) {
|
||||
Some(first_arg.to_string())
|
||||
|
|
@ -991,6 +1052,63 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_camelcase_safety_validators_are_validation_call() {
|
||||
// Real-CVE shape: roadiz/documents `isSafeRemoteUrl($url)` (CVE-2026-33486).
|
||||
// Without snake-case normalisation, the bare `issaferemoteurl` would
|
||||
// not match the `is_safe` prefix and the predicate would silently
|
||||
// fall into `Comparison`/`Unknown`, leaving `$url` un-validated past
|
||||
// the early-return.
|
||||
assert_eq!(
|
||||
classify_condition("self::isSafeRemoteUrl($url)"),
|
||||
PredicateKind::ValidationCall
|
||||
);
|
||||
assert_eq!(
|
||||
classify_condition("isAuthorized(user)"),
|
||||
PredicateKind::ValidationCall
|
||||
);
|
||||
assert_eq!(
|
||||
classify_condition("isAuthenticated(req)"),
|
||||
PredicateKind::ValidationCall
|
||||
);
|
||||
// Acronym handling: `isValidUUID` → `is_valid_uuid` → contains "valid".
|
||||
assert_eq!(
|
||||
classify_condition("isValidUUID(id)"),
|
||||
PredicateKind::ValidationCall
|
||||
);
|
||||
// Snake-case round-trips unchanged.
|
||||
assert_eq!(
|
||||
classify_condition("is_safe_remote_url(x)"),
|
||||
PredicateKind::ValidationCall
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_validation_target_strips_php_dollar_sigil() {
|
||||
// PHP `$url` strips the sigil so the extracted target lines up with
|
||||
// the var-name form used in branch narrowing. Required for
|
||||
// CVE-2026-33486 patched fixture to silence on `fopen($url, 'r')`.
|
||||
assert_eq!(
|
||||
extract_validation_target("self::isSafeRemoteUrl($url)"),
|
||||
Some("url".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
extract_validation_target("validate($input)"),
|
||||
Some("input".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_snake_lower_handles_common_variants() {
|
||||
assert_eq!(to_snake_lower("isSafeRemoteUrl"), "is_safe_remote_url");
|
||||
assert_eq!(to_snake_lower("isValidUUID"), "is_valid_uuid");
|
||||
assert_eq!(to_snake_lower("HTTPClient"), "http_client");
|
||||
assert_eq!(to_snake_lower("IsSafe"), "is_safe");
|
||||
assert_eq!(to_snake_lower("is_safe"), "is_safe");
|
||||
assert_eq!(to_snake_lower("validate"), "validate");
|
||||
assert_eq!(to_snake_lower(""), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_validation_requires_paren() {
|
||||
// `x_valid == true` should NOT be ValidationCall, no `(` call syntax.
|
||||
|
|
|
|||
|
|
@ -1523,6 +1523,121 @@ fn apply_input_validator_branch_narrowing(
|
|||
}
|
||||
}
|
||||
|
||||
/// JS/TS Array-method validator-callback narrowing.
|
||||
///
|
||||
/// `arr.filter(isSafeIdentifier)`, `arr.find(isValidId)`, and the
|
||||
/// `findLast` variant are gating array methods whose return value is
|
||||
/// composed of elements that passed the callback. When the callback
|
||||
/// argument resolves to a name `classify_input_validator_callee` tags
|
||||
/// as `BooleanTrueIsValid` (`isValid…`, `isSafe…`, `hasValid…` and
|
||||
/// snake-case variants), every element of the result satisfies the
|
||||
/// validator, so the call's downstream sinks see the same flow as
|
||||
/// validated taint.
|
||||
///
|
||||
/// The companion `if (isValidX(x)) use(x)` narrowing already exists in
|
||||
/// [`apply_input_validator_branch_narrowing`]; this is the same idea
|
||||
/// lifted to the call site for filter/find chains so taint stops at
|
||||
/// the gate rather than leaking through subsequent
|
||||
/// `Array[index]`/template/sink reads.
|
||||
///
|
||||
/// Strict-additive: if the callback's name does not match the
|
||||
/// validator pattern (anonymous arrow, opaque identifier, etc.), the
|
||||
/// helper is a no-op and the existing default propagation runs
|
||||
/// unchanged.
|
||||
///
|
||||
/// Motivated by CVE-2026-42353 (i18next-http-middleware path
|
||||
/// traversal): the patched fix is `languages.filter(utils.isSafeIdentifier)`
|
||||
/// before forwarding `languages` into the backend connector, and the
|
||||
/// dual deferred TS-side gap CVE-2026-25544 (Payload sqli).
|
||||
fn try_array_method_validator_callback_narrowing(
|
||||
inst: &SsaInst,
|
||||
info: &NodeInfo,
|
||||
callee: &str,
|
||||
args: &[SmallVec<[SsaValue; 2]>],
|
||||
return_bits: &mut Cap,
|
||||
return_origins: &mut SmallVec<[TaintOrigin; 2]>,
|
||||
state: &mut SsaTaintState,
|
||||
transfer: &SsaTaintTransfer,
|
||||
ssa: &SsaBody,
|
||||
) -> bool {
|
||||
if !matches!(transfer.lang, Lang::JavaScript | Lang::TypeScript) {
|
||||
return false;
|
||||
}
|
||||
// Method-call shape: callee text contains a `.` and the trailing
|
||||
// segment is one of the gating array methods. `findIndex` /
|
||||
// `every` / `some` return scalar shapes (index, boolean) rather
|
||||
// than a filtered collection so they are excluded — element-level
|
||||
// validation does not apply to a numeric/boolean result.
|
||||
let dot = match callee.rfind('.') {
|
||||
Some(p) => p,
|
||||
None => return false,
|
||||
};
|
||||
let method = &callee[dot + 1..];
|
||||
if !matches!(method, "filter" | "find" | "findLast") {
|
||||
return false;
|
||||
}
|
||||
// The first positional argument's callable name. Two channels:
|
||||
// 1. `info.arg_callees` — populated by `extract_arg_callees`
|
||||
// (`call_ident_of` walks call shapes inside the arg). Catches
|
||||
// `arr.filter(cb())` and dotted-callback shapes where the
|
||||
// tree-sitter node kind reaches `Kind::CallFn` or
|
||||
// `Kind::CallMethod`.
|
||||
// 2. SSA `value_defs[v].var_name` for the arg's first SSA value
|
||||
// — covers the bare-identifier shape (`arr.filter(cb)`)
|
||||
// where the AST node is a plain identifier and
|
||||
// `extract_arg_callees` pushes `None` because there is no
|
||||
// call to recurse into. This is the shape every patched
|
||||
// CVE fix uses, so it is the dominant source of validator
|
||||
// callbacks in real code.
|
||||
let arg0 = match args.first() {
|
||||
Some(a) => a,
|
||||
None => return false,
|
||||
};
|
||||
let cb_from_arg_callees = info.arg_callees.first().and_then(|s| s.as_deref());
|
||||
let cb_from_ssa = arg0.iter().find_map(|&v| {
|
||||
ssa.value_defs
|
||||
.get(v.0 as usize)
|
||||
.and_then(|vd| vd.var_name.as_deref())
|
||||
});
|
||||
let cb_name = match cb_from_arg_callees.or(cb_from_ssa) {
|
||||
Some(n) => n,
|
||||
None => return false,
|
||||
};
|
||||
if crate::ssa::type_facts::classify_input_validator_callee(cb_name)
|
||||
!= Some(InputValidatorPolarity::BooleanTrueIsValid)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Strip every cap from the return value: the returned array (or
|
||||
// single found element) is composed exclusively of elements the
|
||||
// recognised validator approved. `Cap::all()` is the conservative
|
||||
// ceiling because the validator's body is opaque to this layer; a
|
||||
// future extension could narrow caps by inspecting the body's
|
||||
// rejection patterns.
|
||||
*return_bits = Cap::empty();
|
||||
return_origins.clear();
|
||||
|
||||
// Mark the result's var_name as validated, mirroring the
|
||||
// [`apply_input_validator_branch_narrowing`] insertion. Useful
|
||||
// for direct same-name reads of the rebound array (`arr =
|
||||
// arr.filter(p)` then `arr.length`) but does not propagate
|
||||
// through Assigns to differently-named bindings (`const lng =
|
||||
// arr[0]`); the `return_bits` strip above is what gates those
|
||||
// downstream flows.
|
||||
if let Some(name) = ssa
|
||||
.value_defs
|
||||
.get(inst.value.0 as usize)
|
||||
.and_then(|vd| vd.var_name.as_deref())
|
||||
{
|
||||
if let Some(sym) = transfer.interner.get(name) {
|
||||
state.validated_must.insert(sym);
|
||||
state.validated_may.insert(sym);
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Find the latest reaching SSA definition for `var_name` at the end of
|
||||
/// `block`. Mirrors `crate::constraint::lower::resolve_single_var` but
|
||||
/// avoids the cross-module privacy leak: callers in this module need it
|
||||
|
|
@ -4081,6 +4196,24 @@ pub(super) fn transfer_inst(
|
|||
}
|
||||
}
|
||||
|
||||
// Receiver-side validator strip. Some method-call validators
|
||||
// raise on failure rather than transforming a return value,
|
||||
// so the canonical `Sanitizer` mechanism (which clears the
|
||||
// return) is the wrong shape. After the call returns, the
|
||||
// *receiver* (and any args carrying the same equivalence
|
||||
// class) is proven to satisfy the validated property. Strip
|
||||
// the registered cap from receiver+args here so that
|
||||
// `path.relative_to(base)` clears `Cap::FILE_IO` from
|
||||
// `path` for downstream uses. Motivated by CVE-2024-23334
|
||||
// (aiohttp StaticResource symlink-bypass): the patched code
|
||||
// calls `filepath.relative_to(self._directory)` inside a
|
||||
// try/except and serves `filepath` afterwards.
|
||||
if let Some(cap) =
|
||||
crate::labels::lookup_receiver_validator(transfer.lang.as_str(), callee)
|
||||
{
|
||||
strip_cap_from_call_args(args, receiver, state, cap);
|
||||
}
|
||||
|
||||
// Alias-aware sanitization: propagate through must-aliased field paths
|
||||
if !sanitizer_bits.is_empty() {
|
||||
if let Some(aliases) = transfer.base_aliases {
|
||||
|
|
@ -4444,6 +4577,28 @@ pub(super) fn transfer_inst(
|
|||
}
|
||||
}
|
||||
|
||||
// JS/TS array-method validator-callback narrowing. When a
|
||||
// call shape matches `<arr>.filter(<recognised-validator>)`
|
||||
// (or `find` / `findLast`), strip the caps that flowed into
|
||||
// `return_bits` from the receiver — the result holds only
|
||||
// elements the validator approved. Strict-additive: the
|
||||
// helper is a no-op when the callback name does not match
|
||||
// the BooleanTrueIsValid bucket, leaving the default
|
||||
// propagation result unchanged. See
|
||||
// [`try_array_method_validator_callback_narrowing`] for the
|
||||
// motivating CVE pair.
|
||||
try_array_method_validator_callback_narrowing(
|
||||
inst,
|
||||
info,
|
||||
callee,
|
||||
args,
|
||||
&mut return_bits,
|
||||
&mut return_origins,
|
||||
state,
|
||||
transfer,
|
||||
ssa,
|
||||
);
|
||||
|
||||
// Constructor cap narrowing: a `new X(...)` call returns an object
|
||||
// instance, not a string. Caps that name a string-shaped sink
|
||||
// pattern (path argument, format string, URL component, JSON
|
||||
|
|
|
|||
|
|
@ -6779,3 +6779,83 @@ const handler = (req, res) => {
|
|||
"expected taint flow via double-call chain rebinding; got 0 findings",
|
||||
);
|
||||
}
|
||||
|
||||
/// CVE-2026-42353 i18next-http-middleware: the patched fix wraps a
|
||||
/// tainted array in `arr.filter(isSafeIdentifier)` before forwarding.
|
||||
/// `try_array_method_validator_callback_narrowing` recognises the
|
||||
/// `<arr>.filter(<recognised-validator>)` shape on JS/TS and strips
|
||||
/// the receiver-derived caps from the call result, so a downstream
|
||||
/// `arr[0]` → template-literal → `fs.readFileSync` chain no longer
|
||||
/// flags. The bare-identifier callback case is the dominant patched
|
||||
/// shape — `extract_arg_callees` returns `None` for plain
|
||||
/// identifiers (no inner call to recurse into), so the helper falls
|
||||
/// back to the SSA value's `var_name` channel.
|
||||
#[test]
|
||||
fn cve_2026_42353_filter_isvalid_callback_strips_taint() {
|
||||
let src = br#"
|
||||
const fs = require('fs');
|
||||
function isSafeIdentifier(v) {
|
||||
return typeof v === 'string' && v.indexOf('..') === -1 && v.indexOf('/') === -1;
|
||||
}
|
||||
function handler(req, res) {
|
||||
let languages = req.query.lng ? req.query.lng.split(' ') : [];
|
||||
languages = languages.filter(isSafeIdentifier);
|
||||
const lng = languages[0];
|
||||
const filename = `/locales/${lng}.json`;
|
||||
fs.readFileSync(filename);
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "javascript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
"expected no taint flow when filtered through isSafeIdentifier; got {} findings",
|
||||
findings.len(),
|
||||
);
|
||||
}
|
||||
|
||||
/// Negative regression for the array-method validator-callback gate:
|
||||
/// the same shape WITHOUT the `filter(isSafe…)` step keeps the path
|
||||
/// traversal flow alive end-to-end. Pins the precision claim — the
|
||||
/// strip is element-of-array-after-filter scoped, not a wholesale
|
||||
/// kill on any `<arr>.filter` call regardless of callback identity.
|
||||
#[test]
|
||||
fn cve_2026_42353_filter_without_validator_callback_preserves_taint() {
|
||||
let src = br#"
|
||||
const fs = require('fs');
|
||||
function pickFirst(v) { return true; }
|
||||
function handler(req, res) {
|
||||
let languages = req.query.lng ? req.query.lng.split(' ') : [];
|
||||
languages = languages.filter(pickFirst);
|
||||
const lng = languages[0];
|
||||
const filename = `/locales/${lng}.json`;
|
||||
fs.readFileSync(filename);
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "javascript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
!findings.is_empty(),
|
||||
"expected taint flow via filter(pickFirst) — pickFirst is not a recognised validator and must not strip taint; got 0 findings",
|
||||
);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue