From d8f88d97bb9bd7682e7efccabad36b0417bda65f Mon Sep 17 00:00:00 2001 From: pitboss Date: Mon, 18 May 2026 08:02:10 -0500 Subject: [PATCH] =?UTF-8?q?[pitboss]=20phase=2010:=20Track=20J.8=20+=20Tra?= =?UTF-8?q?ck=20L.8=20=E2=80=94=20`PROTOTYPE=5FPOLLUTION`=20corpus=20+=20J?= =?UTF-8?q?S/TS=20prototype=20chain=20hook?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dynamic/corpus.rs | 4 +- .../corpus/prototype_pollution/javascript.rs | 64 +++ src/dynamic/corpus/prototype_pollution/mod.rs | 20 + .../corpus/prototype_pollution/typescript.rs | 50 +++ src/dynamic/corpus/registry.rs | 67 ++- src/dynamic/framework/adapters/mod.rs | 6 + .../framework/adapters/pp_json_deep_assign.rs | 156 +++++++ .../framework/adapters/pp_lodash_merge.rs | 145 +++++++ .../framework/adapters/pp_object_assign.rs | 136 ++++++ src/dynamic/framework/mod.rs | 31 +- src/dynamic/framework/registry.rs | 9 +- src/dynamic/lang/js_shared.rs | 139 +++++++ src/dynamic/oracle.rs | 114 +++++- src/dynamic/probe.rs | 24 ++ src/dynamic/telemetry.rs | 2 +- .../prototype_pollution/javascript/benign.js | 22 + .../prototype_pollution/javascript/vuln.js | 20 + .../prototype_pollution/typescript/benign.ts | 17 + .../prototype_pollution/typescript/vuln.ts | 16 + tests/prototype_pollution_corpus.rs | 386 ++++++++++++++++++ 20 files changed, 1406 insertions(+), 22 deletions(-) create mode 100644 src/dynamic/corpus/prototype_pollution/javascript.rs create mode 100644 src/dynamic/corpus/prototype_pollution/mod.rs create mode 100644 src/dynamic/corpus/prototype_pollution/typescript.rs create mode 100644 src/dynamic/framework/adapters/pp_json_deep_assign.rs create mode 100644 src/dynamic/framework/adapters/pp_lodash_merge.rs create mode 100644 src/dynamic/framework/adapters/pp_object_assign.rs create mode 100644 tests/dynamic_fixtures/prototype_pollution/javascript/benign.js create mode 100644 tests/dynamic_fixtures/prototype_pollution/javascript/vuln.js create mode 100644 tests/dynamic_fixtures/prototype_pollution/typescript/benign.ts create mode 100644 tests/dynamic_fixtures/prototype_pollution/typescript/vuln.ts create mode 100644 tests/prototype_pollution_corpus.rs diff --git a/src/dynamic/corpus.rs b/src/dynamic/corpus.rs index 33c78f61..1663649c 100644 --- a/src/dynamic/corpus.rs +++ b/src/dynamic/corpus.rs @@ -54,6 +54,7 @@ mod header_injection; mod ldap; mod open_redirect; mod path_trav; +mod prototype_pollution; mod sqli; mod ssrf; mod ssti; @@ -96,7 +97,8 @@ pub use crate::dynamic::oracle::Oracle; /// | 11 | 2026-05-17 | Phase 07 / Track J.5: `XPATH_INJECTION` cap lit for Java / Python / PHP / JS; `ProbeKind::Xpath`; `LdapResultCountGreaterThan` renamed to `QueryResultCountGreaterThan` (shared by LDAP + XPath); `xpath_corpus.xml` staged in workdir | /// | 12 | 2026-05-18 | Phase 08 / Track J.6: `HEADER_INJECTION` cap lit for Java / Python / PHP / Ruby / JS / Go / Rust; `ProbeKind::HeaderEmit` + `ProbePredicate::HeaderInjected`; per-lang `setHeader` shims | /// | 13 | 2026-05-18 | Phase 09 / Track J.7: `OPEN_REDIRECT` cap lit for Java / Python / PHP / Ruby / JS / Go / Rust; `ProbeKind::Redirect` + `ProbePredicate::RedirectHostNotIn`; per-lang `sendRedirect` / `redirect()` shims | -pub const CORPUS_VERSION: u32 = 13; +/// | 14 | 2026-05-18 | Phase 10 / Track J.8: `PROTOTYPE_POLLUTION` cap lit for JS / TS; `ProbeKind::PrototypePollution` + `ProbePredicate::PrototypeCanaryTouched`; Node harness installs `Proxy`-style canary trap on `Object.prototype.__nyx_canary` | +pub const CORPUS_VERSION: u32 = 14; /// Where a payload originated. #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/src/dynamic/corpus/prototype_pollution/javascript.rs b/src/dynamic/corpus/prototype_pollution/javascript.rs new file mode 100644 index 00000000..1654275e --- /dev/null +++ b/src/dynamic/corpus/prototype_pollution/javascript.rs @@ -0,0 +1,64 @@ +//! JavaScript `Cap::PROTOTYPE_POLLUTION` payloads. +//! +//! Vuln payload: a JSON literal whose top-level key is `__proto__`. +//! When the host's deep-merge / `Object.assign` / JSON-parse-then- +//! deep-assign sink walks the key into the merge target, the +//! `__proto__` reference reaches `Object.prototype` and writes +//! `__nyx_canary`. The harness's instrumented setter trap records a +//! `ProbeKind::PrototypePollution { property: "__nyx_canary", … }` +//! probe; the predicate +//! [`crate::dynamic::oracle::ProbePredicate::PrototypeCanaryTouched`] +//! fires on the captured canary write. +//! +//! Benign control: a JSON literal whose top-level key is the regular +//! property `data`. Naive deep-merge walks `target.data` rather than +//! `target.__proto__`, so the prototype chain stays untouched and the +//! predicate stays clear. The matching fixture +//! (`tests/dynamic_fixtures/prototype_pollution/javascript/benign.js`) +//! additionally demonstrates the canonical mitigation pattern of +//! using `Object.create(null)` as the merge target — static analysis +//! of the fixture surfaces the prototype-less target while the +//! runtime differential is driven by the absence of a `__proto__` key +//! in the payload bytes. + +use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef}; +use crate::dynamic::oracle::ProbePredicate; + +const CANARY: &str = "__nyx_canary"; + +pub const PAYLOADS: &[CuratedPayload] = &[ + CuratedPayload { + bytes: br#"{"__proto__":{"__nyx_canary":"pwned"}}"#, + label: "prototype-pollution-js-proto-key", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::PrototypeCanaryTouched { canary: CANARY }], + }, + is_benign: false, + provenance: PayloadProvenance::Curated, + since_corpus_version: 14, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/prototype_pollution/javascript/vuln.js"], + oob_nonce_slot: false, + probe_predicates: &[ProbePredicate::PrototypeCanaryTouched { canary: CANARY }], + benign_control: Some(PayloadRef { + label: "prototype-pollution-js-benign", + }), + no_benign_control_rationale: None, + }, + CuratedPayload { + bytes: br#"{"data":{"__nyx_canary":"pwned"}}"#, + label: "prototype-pollution-js-benign", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::PrototypeCanaryTouched { canary: CANARY }], + }, + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 14, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/prototype_pollution/javascript/benign.js"], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, + no_benign_control_rationale: None, + }, +]; diff --git a/src/dynamic/corpus/prototype_pollution/mod.rs b/src/dynamic/corpus/prototype_pollution/mod.rs new file mode 100644 index 00000000..f73479ad --- /dev/null +++ b/src/dynamic/corpus/prototype_pollution/mod.rs @@ -0,0 +1,20 @@ +//! Prototype-pollution (`Cap::PROTOTYPE_POLLUTION`) per-language +//! payload slices. +//! +//! Phase 10 (Track J.8) carves the JavaScript / TypeScript prototype- +//! pollution gadget against three sink families: `lodash.merge`, +//! `Object.assign` with tainted RHS, and `JSON.parse`-then-deep-assign. +//! Every vuln payload binds a JSON literal whose top-level key is +//! `__proto__`; the harness's instrumented deep-merge walks the key +//! into `Object.prototype` and a `Proxy`-style setter trap on +//! `Object.prototype.__nyx_canary` records a +//! [`crate::dynamic::probe::ProbeKind::PrototypePollution`] probe. The +//! paired benign control sends a JSON literal whose top-level key is +//! the regular property `data`, leaving the prototype chain +//! untouched. The +//! [`crate::dynamic::oracle::ProbePredicate::PrototypeCanaryTouched`] +//! predicate fires only on probes whose `property` equals the canary +//! name (`__nyx_canary`). + +pub mod javascript; +pub mod typescript; diff --git a/src/dynamic/corpus/prototype_pollution/typescript.rs b/src/dynamic/corpus/prototype_pollution/typescript.rs new file mode 100644 index 00000000..599345e1 --- /dev/null +++ b/src/dynamic/corpus/prototype_pollution/typescript.rs @@ -0,0 +1,50 @@ +//! TypeScript `Cap::PROTOTYPE_POLLUTION` payloads. +//! +//! Mirrors [`super::javascript`] — the runtime is Node.js in both +//! cases, so the payload shape and oracle predicate are identical. +//! The per-language slice exists so the lang-aware corpus resolver +//! pairs TS vuln payloads against TS benign controls without crossing +//! the JS slice (and so the fixture paths point at the TS-specific +//! fixtures the static-analysis side consumes). + +use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef}; +use crate::dynamic::oracle::ProbePredicate; + +const CANARY: &str = "__nyx_canary"; + +pub const PAYLOADS: &[CuratedPayload] = &[ + CuratedPayload { + bytes: br#"{"__proto__":{"__nyx_canary":"pwned"}}"#, + label: "prototype-pollution-ts-proto-key", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::PrototypeCanaryTouched { canary: CANARY }], + }, + is_benign: false, + provenance: PayloadProvenance::Curated, + since_corpus_version: 14, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/prototype_pollution/typescript/vuln.ts"], + oob_nonce_slot: false, + probe_predicates: &[ProbePredicate::PrototypeCanaryTouched { canary: CANARY }], + benign_control: Some(PayloadRef { + label: "prototype-pollution-ts-benign", + }), + no_benign_control_rationale: None, + }, + CuratedPayload { + bytes: br#"{"data":{"__nyx_canary":"pwned"}}"#, + label: "prototype-pollution-ts-benign", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::PrototypeCanaryTouched { canary: CANARY }], + }, + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 14, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/prototype_pollution/typescript/benign.ts"], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, + no_benign_control_rationale: None, + }, +]; diff --git a/src/dynamic/corpus/registry.rs b/src/dynamic/corpus/registry.rs index fad2736e..45e8ed1b 100644 --- a/src/dynamic/corpus/registry.rs +++ b/src/dynamic/corpus/registry.rs @@ -24,8 +24,8 @@ use std::collections::HashMap; use std::sync::OnceLock; use super::{ - cmdi, deserialize, fmt_string, header_injection, ldap, open_redirect, path_trav, sqli, ssrf, - ssti, xpath, xss, xxe, + cmdi, deserialize, fmt_string, header_injection, ldap, open_redirect, path_trav, + prototype_pollution, sqli, ssrf, ssti, xpath, xss, xxe, }; use super::{CapCorpus, CuratedPayload, Oracle}; use crate::dynamic::oracle::ProbePredicate; @@ -42,8 +42,7 @@ pub const CORPUS_UNSUPPORTED_LANG_NEUTRAL: u32 = Cap::ENV_VAR.bits() | Cap::JSON_PARSE.bits() | Cap::CRYPTO.bits() | Cap::UNAUTHORIZED_ID.bits() - | Cap::DATA_EXFIL.bits() - | Cap::PROTOTYPE_POLLUTION.bits(); + | Cap::DATA_EXFIL.bits(); /// Flat `(Cap, Lang, slice)` table. A single cap can carry per-language /// variants — that's the whole reason this layer exists. @@ -89,6 +88,16 @@ const ENTRIES: &[(Cap, Lang, &[CuratedPayload])] = &[ (Cap::OPEN_REDIRECT, Lang::JavaScript, open_redirect::js::PAYLOADS), (Cap::OPEN_REDIRECT, Lang::Go, open_redirect::go::PAYLOADS), (Cap::OPEN_REDIRECT, Lang::Rust, open_redirect::rust::PAYLOADS), + ( + Cap::PROTOTYPE_POLLUTION, + Lang::JavaScript, + prototype_pollution::javascript::PAYLOADS, + ), + ( + Cap::PROTOTYPE_POLLUTION, + Lang::TypeScript, + prototype_pollution::typescript::PAYLOADS, + ), ]; /// Reserved for per-cap oracle defaults. Empty in Phase 02; populated by @@ -302,6 +311,7 @@ mod tests { assert!(!payloads_for(Cap::XPATH_INJECTION).is_empty()); assert!(!payloads_for(Cap::HEADER_INJECTION).is_empty()); assert!(!payloads_for(Cap::OPEN_REDIRECT).is_empty()); + assert!(!payloads_for(Cap::PROTOTYPE_POLLUTION).is_empty()); } #[test] @@ -314,7 +324,6 @@ mod tests { Cap::CRYPTO, Cap::UNAUTHORIZED_ID, Cap::DATA_EXFIL, - Cap::PROTOTYPE_POLLUTION, ]; for cap in unsupported { assert!( @@ -349,6 +358,7 @@ mod tests { Cap::XPATH_INJECTION, Cap::HEADER_INJECTION, Cap::OPEN_REDIRECT, + Cap::PROTOTYPE_POLLUTION, ] { let has_vuln = payloads_for(cap).iter().any(|p| !p.is_benign); assert!(has_vuln, "{cap:?} must have at least one vuln payload"); @@ -402,6 +412,7 @@ mod tests { Cap::XPATH_INJECTION, Cap::HEADER_INJECTION, Cap::OPEN_REDIRECT, + Cap::PROTOTYPE_POLLUTION, ]; for cap in caps { for p in payloads_for(cap) { @@ -430,6 +441,7 @@ mod tests { Cap::XPATH_INJECTION, Cap::HEADER_INJECTION, Cap::OPEN_REDIRECT, + Cap::PROTOTYPE_POLLUTION, ]; for cap in caps { for p in payloads_for(cap) { @@ -545,6 +557,7 @@ mod tests { Cap::XPATH_INJECTION, Cap::HEADER_INJECTION, Cap::OPEN_REDIRECT, + Cap::PROTOTYPE_POLLUTION, ]; for cap in caps { for p in payloads_for(cap).iter().filter(|p| p.is_benign) { @@ -849,6 +862,50 @@ mod tests { } } + #[test] + fn prototype_pollution_has_per_lang_slices_for_phase_10() { + // Phase 10 (Track J.8) acceptance: PROTOTYPE_POLLUTION + // registers payloads in JavaScript / TypeScript and the + // lang-aware lookup never returns empty for either. + for lang in [Lang::JavaScript, Lang::TypeScript] { + assert!( + !payloads_for_lang(Cap::PROTOTYPE_POLLUTION, lang).is_empty(), + "PROTOTYPE_POLLUTION must have at least one payload for {lang:?}", + ); + } + // Other langs not covered. + for lang in [ + Lang::Rust, + Lang::C, + Lang::Cpp, + Lang::Go, + Lang::Java, + Lang::Php, + Lang::Python, + Lang::Ruby, + ] { + assert!( + payloads_for_lang(Cap::PROTOTYPE_POLLUTION, lang).is_empty(), + "PROTOTYPE_POLLUTION has unexpected payloads for {lang:?}", + ); + } + } + + #[test] + fn prototype_pollution_payloads_pair_benign_controls_per_lang() { + for lang in [Lang::JavaScript, Lang::TypeScript] { + let slice = payloads_for_lang(Cap::PROTOTYPE_POLLUTION, lang); + let vuln = slice + .iter() + .find(|p| !p.is_benign) + .expect("each lang must have a PROTOTYPE_POLLUTION vuln payload"); + let resolved = + super::resolve_benign_control_lang(vuln, Cap::PROTOTYPE_POLLUTION, lang) + .expect("lang-aware benign control must resolve"); + assert!(resolved.is_benign); + } + } + #[test] fn deserialize_payloads_pair_benign_controls_per_lang() { // The lang-aware resolver must find the paired benign control diff --git a/src/dynamic/framework/adapters/mod.rs b/src/dynamic/framework/adapters/mod.rs index 6a1c5a8b..4fee76c7 100644 --- a/src/dynamic/framework/adapters/mod.rs +++ b/src/dynamic/framework/adapters/mod.rs @@ -26,6 +26,9 @@ pub mod ldap_python; pub mod ldap_spring; pub mod php_twig; pub mod php_unserialize; +pub mod pp_json_deep_assign; +pub mod pp_lodash_merge; +pub mod pp_object_assign; pub mod python_jinja2; pub mod python_pickle; pub mod redirect_go; @@ -62,6 +65,9 @@ pub use ldap_python::LdapPythonAdapter; pub use ldap_spring::LdapSpringAdapter; pub use php_twig::PhpTwigAdapter; pub use php_unserialize::PhpUnserializeAdapter; +pub use pp_json_deep_assign::{PpJsonDeepAssignJsAdapter, PpJsonDeepAssignTsAdapter}; +pub use pp_lodash_merge::{PpLodashMergeJsAdapter, PpLodashMergeTsAdapter}; +pub use pp_object_assign::{PpObjectAssignJsAdapter, PpObjectAssignTsAdapter}; pub use python_jinja2::PythonJinja2Adapter; pub use python_pickle::PythonPickleAdapter; pub use redirect_go::RedirectGoAdapter; diff --git a/src/dynamic/framework/adapters/pp_json_deep_assign.rs b/src/dynamic/framework/adapters/pp_json_deep_assign.rs new file mode 100644 index 00000000..bd184d3a --- /dev/null +++ b/src/dynamic/framework/adapters/pp_json_deep_assign.rs @@ -0,0 +1,156 @@ +//! JavaScript / TypeScript [`super::super::FrameworkAdapter`] matching +//! the `JSON.parse`-followed-by-deep-assign prototype-pollution +//! gadget: the host parses an attacker-controlled JSON string and +//! then walks the resulting object into a vanilla target through a +//! hand-rolled recursive merge. +//! +//! Phase 10 (Track J.8). Fires when the function body invokes +//! `JSON.parse` and the surrounding source carries a recursive merge +//! helper (literal `function merge`, `function deepAssign`, +//! `function extend`, etc.) — the static-side signal that an +//! attacker-controlled JSON tree can reach `Object.prototype`. + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +fn callee_is_json_parse(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!(last, "parse") +} + +fn source_has_deep_merge_helper(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"function deepMerge", + b"function deepAssign", + b"function extend", + b"function merge", + b"function setByPath", + b"deepMerge =", + b"deepAssign =", + b"JSON.parse", + ]; + let mut json_parse = false; + let mut deep_merge = false; + for n in NEEDLES { + if file_bytes.windows(n.len()).any(|w| w == *n) { + if *n == b"JSON.parse" { + json_parse = true; + } else { + deep_merge = true; + } + } + } + json_parse && deep_merge +} + +fn build_binding(adapter_name: &'static str) -> FrameworkBinding { + FrameworkBinding { + adapter: adapter_name.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + } +} + +pub struct PpJsonDeepAssignJsAdapter; + +const JS_ADAPTER_NAME: &str = "pp-json-deep-assign-js"; + +impl FrameworkAdapter for PpJsonDeepAssignJsAdapter { + fn name(&self) -> &'static str { + JS_ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::JavaScript + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_json_parse); + let matches_source = source_has_deep_merge_helper(file_bytes); + if matches_call && matches_source { + Some(build_binding(JS_ADAPTER_NAME)) + } else { + None + } + } +} + +pub struct PpJsonDeepAssignTsAdapter; + +const TS_ADAPTER_NAME: &str = "pp-json-deep-assign-ts"; + +impl FrameworkAdapter for PpJsonDeepAssignTsAdapter { + fn name(&self) -> &'static str { + TS_ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::TypeScript + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_json_parse); + let matches_source = source_has_deep_merge_helper(file_bytes); + if matches_call && matches_source { + Some(build_binding(TS_ADAPTER_NAME)) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_js(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_json_parse_with_deep_merge() { + let src: &[u8] = b"function deepMerge(t, s) { for (const k of Object.keys(s)) t[k] = s[k]; return t; }\n\ + function run(payload) { return deepMerge({}, JSON.parse(payload)); }\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("JSON.parse")], + ..Default::default() + }; + assert!(PpJsonDeepAssignJsAdapter + .detect(&summary, tree.root_node(), src) + .is_some()); + } + + #[test] + fn skips_json_parse_without_merge() { + let src: &[u8] = b"function run(payload) { return JSON.parse(payload); }\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("JSON.parse")], + ..Default::default() + }; + assert!(PpJsonDeepAssignJsAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } +} diff --git a/src/dynamic/framework/adapters/pp_lodash_merge.rs b/src/dynamic/framework/adapters/pp_lodash_merge.rs new file mode 100644 index 00000000..68197b17 --- /dev/null +++ b/src/dynamic/framework/adapters/pp_lodash_merge.rs @@ -0,0 +1,145 @@ +//! JavaScript / TypeScript [`super::super::FrameworkAdapter`] matching +//! `lodash.merge` (and the equivalent `lodash.defaultsDeep`, +//! `lodash.set`) prototype-pollution sinks. +//! +//! Phase 10 (Track J.8). Fires when the function body invokes one of +//! the canonical lodash deep-merge entry points and the surrounding +//! source imports lodash. + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +fn callee_is_lodash_merge(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!(last, "merge" | "mergeWith" | "defaultsDeep" | "set" | "setWith") +} + +fn source_imports_lodash(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"require('lodash')", + b"require(\"lodash\")", + b"require('lodash.merge')", + b"require(\"lodash.merge\")", + b"from 'lodash'", + b"from \"lodash\"", + b"from 'lodash/merge'", + b"from \"lodash/merge\"", + b"_.merge", + b"_.defaultsDeep", + b"_.set", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +fn build_binding(adapter_name: &'static str) -> FrameworkBinding { + FrameworkBinding { + adapter: adapter_name.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + } +} + +pub struct PpLodashMergeJsAdapter; + +const JS_ADAPTER_NAME: &str = "pp-lodash-merge-js"; + +impl FrameworkAdapter for PpLodashMergeJsAdapter { + fn name(&self) -> &'static str { + JS_ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::JavaScript + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_lodash_merge); + let matches_source = source_imports_lodash(file_bytes); + if matches_call && matches_source { + Some(build_binding(JS_ADAPTER_NAME)) + } else { + None + } + } +} + +pub struct PpLodashMergeTsAdapter; + +const TS_ADAPTER_NAME: &str = "pp-lodash-merge-ts"; + +impl FrameworkAdapter for PpLodashMergeTsAdapter { + fn name(&self) -> &'static str { + TS_ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::TypeScript + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_lodash_merge); + let matches_source = source_imports_lodash(file_bytes); + if matches_call && matches_source { + Some(build_binding(TS_ADAPTER_NAME)) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_js(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_lodash_merge_call() { + let src: &[u8] = b"const _ = require('lodash');\n\ + function run(payload) { return _.merge({}, payload); }\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("merge")], + ..Default::default() + }; + assert!(PpLodashMergeJsAdapter + .detect(&summary, tree.root_node(), src) + .is_some()); + } + + #[test] + fn skips_function_without_lodash_import() { + let src: &[u8] = b"function add(a, b) { return a + b; }\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "add".into(), + ..Default::default() + }; + assert!(PpLodashMergeJsAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } +} diff --git a/src/dynamic/framework/adapters/pp_object_assign.rs b/src/dynamic/framework/adapters/pp_object_assign.rs new file mode 100644 index 00000000..d986a856 --- /dev/null +++ b/src/dynamic/framework/adapters/pp_object_assign.rs @@ -0,0 +1,136 @@ +//! JavaScript / TypeScript [`super::super::FrameworkAdapter`] matching +//! `Object.assign` invocations with attacker-controlled RHS — the +//! shallowest prototype-pollution gadget. Fires on bare +//! `Object.assign(target, src)` plus the spread form (`{ ...src }` +//! desugars to `Object.assign({}, src)`). +//! +//! Phase 10 (Track J.8). + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +fn callee_is_object_assign(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!(last, "assign" | "create") + && (name == "Object.assign" || name == "Object.create" || name == "assign" || name == "create") +} + +fn source_uses_object_assign(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"Object.assign", + b"Object.create", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +fn build_binding(adapter_name: &'static str) -> FrameworkBinding { + FrameworkBinding { + adapter: adapter_name.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + } +} + +pub struct PpObjectAssignJsAdapter; + +const JS_ADAPTER_NAME: &str = "pp-object-assign-js"; + +impl FrameworkAdapter for PpObjectAssignJsAdapter { + fn name(&self) -> &'static str { + JS_ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::JavaScript + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_object_assign); + let matches_source = source_uses_object_assign(file_bytes); + if matches_call && matches_source { + Some(build_binding(JS_ADAPTER_NAME)) + } else { + None + } + } +} + +pub struct PpObjectAssignTsAdapter; + +const TS_ADAPTER_NAME: &str = "pp-object-assign-ts"; + +impl FrameworkAdapter for PpObjectAssignTsAdapter { + fn name(&self) -> &'static str { + TS_ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::TypeScript + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_object_assign); + let matches_source = source_uses_object_assign(file_bytes); + if matches_call && matches_source { + Some(build_binding(TS_ADAPTER_NAME)) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_js(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_object_assign_call() { + let src: &[u8] = b"function run(payload) { return Object.assign({}, payload); }\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("Object.assign")], + ..Default::default() + }; + assert!(PpObjectAssignJsAdapter + .detect(&summary, tree.root_node(), src) + .is_some()); + } + + #[test] + fn skips_unrelated_assign() { + let src: &[u8] = b"function add(a, b) { return a + b; }\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "add".into(), + ..Default::default() + }; + assert!(PpObjectAssignJsAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } +} diff --git a/src/dynamic/framework/mod.rs b/src/dynamic/framework/mod.rs index dcbe3158..7b10704c 100644 --- a/src/dynamic/framework/mod.rs +++ b/src/dynamic/framework/mod.rs @@ -214,14 +214,14 @@ mod tests { } #[test] - fn registry_baseline_after_phase_09() { - // Phase 09 (Track J.7) adds the open-redirect adapter for - // every language carrying the OPEN_REDIRECT corpus: Java / - // Python / PHP / Ruby / JavaScript / Go / Rust. Java / - // Python / PHP each grow from 6 → 7; Ruby from 4 → 5; - // JavaScript from 3 → 4; Go from 2 → 3; Rust from 1 → 2. - // C / Cpp / TypeScript still carry the Phase-01 empty - // baseline. + fn registry_baseline_after_phase_10() { + // Phase 10 (Track J.8) adds three prototype-pollution + // adapters (`pp-lodash-merge`, `pp-object-assign`, + // `pp-json-deep-assign`) to both the JavaScript and + // TypeScript slices. Java / Python / PHP each still carry + // the J.1..J.7 adapters (7 entries); Ruby still has 5; Go + // still has 3; Rust still has 2. JavaScript grows from 4 → + // 7; TypeScript grows from 0 → 3. C / Cpp stay empty. for lang in [Lang::Java, Lang::Python, Lang::Php] { let registered = registry::adapters_for(lang); assert_eq!( @@ -246,12 +246,21 @@ mod tests { let js_registered = registry::adapters_for(Lang::JavaScript); assert_eq!( js_registered.len(), - 4, - "JavaScript must have J.2 + J.5 + J.6 + J.7 adapters", + 7, + "JavaScript must have J.2 + J.5 + J.6 + J.7 + J.8(×3) adapters", ); for adapter in js_registered { assert_eq!(adapter.lang(), Lang::JavaScript); } + let ts_registered = registry::adapters_for(Lang::TypeScript); + assert_eq!( + ts_registered.len(), + 3, + "TypeScript must have the J.8(×3) prototype-pollution adapters", + ); + for adapter in ts_registered { + assert_eq!(adapter.lang(), Lang::TypeScript); + } let go_registered = registry::adapters_for(Lang::Go); assert_eq!( go_registered.len(), @@ -270,7 +279,7 @@ mod tests { for adapter in rust_registered { assert_eq!(adapter.lang(), Lang::Rust); } - for lang in [Lang::C, Lang::Cpp, Lang::TypeScript] { + for lang in [Lang::C, Lang::Cpp] { assert!( registry::adapters_for(lang).is_empty(), "{:?} should still have zero adapters before its Track-L phase", diff --git a/src/dynamic/framework/registry.rs b/src/dynamic/framework/registry.rs index fbaf7a56..2a970278 100644 --- a/src/dynamic/framework/registry.rs +++ b/src/dynamic/framework/registry.rs @@ -89,10 +89,17 @@ static RUBY: &[&dyn FrameworkAdapter] = &[ &super::adapters::RubyMarshalAdapter, &super::adapters::XxeRubyAdapter, ]; -static TYPESCRIPT: &[&dyn FrameworkAdapter] = &[]; +static TYPESCRIPT: &[&dyn FrameworkAdapter] = &[ + &super::adapters::PpJsonDeepAssignTsAdapter, + &super::adapters::PpLodashMergeTsAdapter, + &super::adapters::PpObjectAssignTsAdapter, +]; static JAVASCRIPT: &[&dyn FrameworkAdapter] = &[ &super::adapters::HeaderJsAdapter, &super::adapters::JsHandlebarsAdapter, + &super::adapters::PpJsonDeepAssignJsAdapter, + &super::adapters::PpLodashMergeJsAdapter, + &super::adapters::PpObjectAssignJsAdapter, &super::adapters::RedirectJsAdapter, &super::adapters::XpathJsAdapter, ]; diff --git a/src/dynamic/lang/js_shared.rs b/src/dynamic/lang/js_shared.rs index 0af145e7..e0fec72d 100644 --- a/src/dynamic/lang/js_shared.rs +++ b/src/dynamic/lang/js_shared.rs @@ -465,6 +465,18 @@ pub fn emit(spec: &HarnessSpec, is_typescript: bool) -> Result HarnessSource { + let shim = probe_shim(); + let body = format!( + r#"// Nyx dynamic harness — PROTOTYPE_POLLUTION canary trap (Phase 10 / Track J.8). +{shim} + +const NYX_PP_CANARY = '__nyx_canary'; + +function nyxPrototypePollutionProbe(value) {{ + const p = process.env.NYX_PROBE_PATH; + if (!p) return; + const rec = {{ + sink_callee: '__nyx_pp_canary_set', + args: [ + {{ kind: 'String', value: NYX_PP_CANARY }}, + {{ kind: 'String', value: String(value) }}, + ], + captured_at_ns: Number(process.hrtime.bigint()), + payload_id: process.env.NYX_PAYLOAD_ID || '', + kind: {{ + kind: 'PrototypePollution', + property: NYX_PP_CANARY, + value: String(value), + }}, + witness: __nyx_witness('__nyx_pp_canary_set', [NYX_PP_CANARY, value]), + }}; + try {{ + require('fs').appendFileSync(p, JSON.stringify(rec) + '\n'); + }} catch (e) {{ + // best-effort + }} +}} + +(function installPrototypeCanary() {{ + // Proxy-style setter trap on Object.prototype.__nyx_canary. A + // real `new Proxy(Object.prototype, ...)` cannot replace + // Object.prototype itself, so the trap is modelled as an + // accessor property routed through the same observation hook the + // ProbeKind::PrototypePollution probe expects. + // + // The setter receiver (`this`) is the actual write target after + // prototype-chain resolution. Only a write that *landed on + // Object.prototype itself* is true prototype pollution; a write + // to a child object's `__nyx_canary` would also reach this setter + // via prototype lookup but does not pollute the shared prototype, + // so we ignore it. Without this guard a benign deep-merge of + // `{{data: {{__nyx_canary: ...}}}}` into a plain `{{}}` target + // would falsely fire the probe. + let _canaryStorage; + Object.defineProperty(Object.prototype, NYX_PP_CANARY, {{ + configurable: true, + enumerable: false, + set: function (v) {{ + _canaryStorage = v; + if (this === Object.prototype) {{ + nyxPrototypePollutionProbe(v); + }} + }}, + get: function () {{ + return _canaryStorage; + }}, + }}); +}})(); + +function nyxDeepMerge(target, source) {{ + if (source === null || typeof source !== 'object') return target; + for (const key of Object.keys(source)) {{ + const sv = source[key]; + if (sv !== null && typeof sv === 'object') {{ + if (target[key] === null || typeof target[key] !== 'object') {{ + target[key] = {{}}; + }} + nyxDeepMerge(target[key], sv); + }} else {{ + target[key] = sv; + }} + }} + return target; +}} + +const payload = process.env.NYX_PAYLOAD || ''; +let parsed; +try {{ + parsed = JSON.parse(payload); +}} catch (e) {{ + parsed = {{}}; +}} +const target = {{}}; +try {{ + nyxDeepMerge(target, parsed); +}} catch (e) {{ + // Naive merge may throw on weird inputs; the canary observation + // already wrote any probe before the throw. +}} +console.log('__NYX_SINK_HIT__'); +console.log(JSON.stringify({{ + canary_present: Object.prototype.hasOwnProperty(NYX_PP_CANARY), +}})); +"# + ); + HarnessSource { + source: body, + filename: "harness.js".to_owned(), + command: vec!["node".to_owned(), "harness.js".to_owned()], + extra_files: Vec::new(), + entry_subpath: None, + } +} + /// Phase 26 — Node chain-step harness (shared between JS + TS emitters). /// /// Splices the Node probe shim ([`probe_shim`]) in front of a minimal diff --git a/src/dynamic/oracle.rs b/src/dynamic/oracle.rs index 986931e5..fe709077 100644 --- a/src/dynamic/oracle.rs +++ b/src/dynamic/oracle.rs @@ -288,6 +288,33 @@ pub enum ProbePredicate { /// from this slice. allowlist: &'static [&'static str], }, + /// Phase 10 (Track J.8): prototype-pollution canary predicate. + /// + /// Fires when at least one drained probe carries + /// [`ProbeKind::PrototypePollution`] whose `property` matches + /// `canary` (defaults to `"__nyx_canary"`). The Node harness + /// installs a `Proxy`-style setter trap on + /// `Object.prototype.__nyx_canary`; any deep-merge / `Object.assign` + /// / `JSON.parse`-then-deep-assign sink that walks an + /// attacker-controlled `__proto__` key into the prototype chain + /// trips the trap and writes a `PrototypePollution` probe. A + /// benign payload whose object literal has no `__proto__` key, or + /// whose target is constructed via `Object.create(null)`, never + /// reaches the canary so the predicate stays clear. + /// + /// Cross-cutting in the same sense as + /// [`Self::DeserializeGadgetInvoked`] / + /// [`Self::XxeEntityExpanded`] / + /// [`Self::HeaderInjected`] / + /// [`Self::RedirectHostNotIn`] — evaluated across every drained + /// probe rather than against a single record. + PrototypeCanaryTouched { + /// Canary property name the harness installed on + /// `Object.prototype` (typically `"__nyx_canary"`). Compared + /// case-sensitively against + /// [`ProbeKind::PrototypePollution::property`]. + canary: &'static str, + }, /// Phase 06 (Track J.4) / Phase 07 (Track J.5): result-count /// predicate shared by LDAP-filter and XPath-expression injection. /// @@ -482,6 +509,21 @@ pub fn oracle_fired_with_stubs( if !redirect_ok { return false; } + // Phase 10 (Track J.8): prototype-pollution canary + // cross-cutting predicates. Each + // `PrototypeCanaryTouched { canary }` consults the + // captured probe channel for a + // [`ProbeKind::PrototypePollution`] record whose + // `property` matches the canary name. + let canary_ok = cross.iter().all(|p| match p { + ProbePredicate::PrototypeCanaryTouched { canary } => { + probes_satisfy_prototype_canary(probes, canary) + } + _ => true, + }); + if !canary_ok { + return false; + } // Phase 04 (Track J.2): SSTI render-equality cross-cutting // predicates. Each `TemplateEvalEqual { expected }` consults // the captured stdout body — see [`stdout_template_equals`]. @@ -515,7 +557,8 @@ pub fn oracle_fired_with_stubs( | ProbeKind::Ldap { .. } | ProbeKind::Xpath { .. } | ProbeKind::HeaderEmit { .. } - | ProbeKind::Redirect { .. } => false, + | ProbeKind::Redirect { .. } + | ProbeKind::PrototypePollution { .. } => false, }), Oracle::OutputContains(needle) => { let nb = needle.as_bytes(); @@ -544,6 +587,7 @@ fn is_cross_cutting(pred: &ProbePredicate) -> bool { | ProbePredicate::QueryResultCountGreaterThan { .. } | ProbePredicate::HeaderInjected { .. } | ProbePredicate::RedirectHostNotIn { .. } + | ProbePredicate::PrototypeCanaryTouched { .. } ) } @@ -576,6 +620,10 @@ fn cross_cutting_satisfied(pred: &ProbePredicate, stub_events: &[StubEvent]) -> // rather than stub events; evaluated separately in // [`probes_satisfy_redirect_off_origin`] below. ProbePredicate::RedirectHostNotIn { .. } => true, + // PrototypeCanaryTouched is cross-cutting against the *probe + // log* rather than stub events; evaluated separately in + // [`probes_satisfy_prototype_canary`] below. + ProbePredicate::PrototypeCanaryTouched { .. } => true, _ => true, } } @@ -685,6 +733,17 @@ fn probes_satisfy_redirect_off_origin(probes: &[SinkProbe], allowlist: &[&str]) }) } +/// True when at least one drained probe is a +/// [`ProbeKind::PrototypePollution`] record whose `property` matches +/// `canary`. Powers +/// [`ProbePredicate::PrototypeCanaryTouched`] (Phase 10 — Track J.8). +fn probes_satisfy_prototype_canary(probes: &[SinkProbe], canary: &str) -> bool { + probes.iter().any(|p| match &p.kind { + ProbeKind::PrototypePollution { property, .. } => property == canary, + _ => false, + }) +} + /// Returns `true` when `location` redirects to a host that is neither /// `request_host` nor any entry of `allowlist`. Crate-visible so the /// in-crate predicate above and the colocated tests can share one @@ -791,7 +850,8 @@ fn probe_satisfies_one(probe: &SinkProbe, pred: &ProbePredicate) -> bool { | ProbePredicate::XxeEntityExpanded { .. } | ProbePredicate::QueryResultCountGreaterThan { .. } | ProbePredicate::HeaderInjected { .. } - | ProbePredicate::RedirectHostNotIn { .. } => true, + | ProbePredicate::RedirectHostNotIn { .. } + | ProbePredicate::PrototypeCanaryTouched { .. } => true, } } @@ -819,7 +879,8 @@ pub fn probe_crash_signal(probe: &SinkProbe) -> Option { | ProbeKind::Ldap { .. } | ProbeKind::Xpath { .. } | ProbeKind::HeaderEmit { .. } - | ProbeKind::Redirect { .. } => None, + | ProbeKind::Redirect { .. } + | ProbeKind::PrototypePollution { .. } => None, } } @@ -1181,6 +1242,53 @@ mod tests { )); } + fn prototype_pollution_probe(property: &str, value: &str) -> SinkProbe { + SinkProbe { + sink_callee: "__nyx_pp_canary_set".into(), + args: vec![], + captured_at_ns: 1, + payload_id: "phase10".into(), + kind: ProbeKind::PrototypePollution { + property: property.into(), + value: value.into(), + }, + witness: ProbeWitness::empty(), + } + } + + #[test] + fn prototype_canary_touched_fires_on_matching_property() { + let oracle = Oracle::SinkProbe { + predicates: &[ProbePredicate::PrototypeCanaryTouched { + canary: "__nyx_canary", + }], + }; + let probes = vec![prototype_pollution_probe("__nyx_canary", "pwned")]; + assert!(oracle_fired(&oracle, &outcome(), &probes)); + } + + #[test] + fn prototype_canary_touched_ignores_mismatched_property() { + let oracle = Oracle::SinkProbe { + predicates: &[ProbePredicate::PrototypeCanaryTouched { + canary: "__nyx_canary", + }], + }; + let probes = vec![prototype_pollution_probe("__other__", "x")]; + assert!(!oracle_fired(&oracle, &outcome(), &probes)); + } + + #[test] + fn prototype_canary_touched_clears_when_no_pp_probe() { + let oracle = Oracle::SinkProbe { + predicates: &[ProbePredicate::PrototypeCanaryTouched { + canary: "__nyx_canary", + }], + }; + let probes = vec![probe("noop", vec![])]; + assert!(!oracle_fired(&oracle, &outcome(), &probes)); + } + #[test] fn sink_crash_without_probes_does_not_fire_even_on_process_crash() { let mut o = outcome(); diff --git a/src/dynamic/probe.rs b/src/dynamic/probe.rs index 393485f9..a974bc53 100644 --- a/src/dynamic/probe.rs +++ b/src/dynamic/probe.rs @@ -236,6 +236,30 @@ pub enum ProbeKind { /// would otherwise resolve off-origin. request_host: String, }, + /// Phase 10 (Track J.8) prototype-pollution observation. Stamped + /// by the Node.js harness shim's canary-trap accessor installed on + /// `Object.prototype.__nyx_canary` (a `Proxy`-style setter trap): + /// when a deep-merge / `Object.assign` / `JSON.parse`-then-assign + /// sink walks an attacker-controlled `__proto__` key into + /// `Object.prototype`, the setter records the polluted value via + /// this probe kind. The + /// [`crate::dynamic::oracle::ProbePredicate::PrototypeCanaryTouched`] + /// predicate fires when any such probe lands on the channel. A + /// benign payload whose object literal has no `__proto__` key, or + /// whose target is constructed via `Object.create(null)`, leaves + /// the prototype chain untouched and emits no + /// `PrototypePollution` probe. + PrototypePollution { + /// Property name the host attempted to set on + /// `Object.prototype` — always `"__nyx_canary"` for Phase 10 + /// but parametrised so future per-sink canaries reuse the + /// kind without proliferating variants. + property: String, + /// Stringified value the host attempted to bind. Echoed + /// verbatim so repro tooling can pin the exact payload bytes + /// that traversed the chain. + value: String, + }, } impl Default for ProbeKind { diff --git a/src/dynamic/telemetry.rs b/src/dynamic/telemetry.rs index 453d5490..e8851a4c 100644 --- a/src/dynamic/telemetry.rs +++ b/src/dynamic/telemetry.rs @@ -60,7 +60,7 @@ pub const NYX_VERSION: &str = env!("CARGO_PKG_VERSION"); /// [`crate::dynamic::corpus::CORPUS_VERSION`]; the compile-time assertion /// below + the [`corpus_version_const_matches_corpus_module`] runtime test /// jointly guard drift. -pub const CORPUS_VERSION: &str = "13"; +pub const CORPUS_VERSION: &str = "14"; /// Compile-time guard that pins [`CORPUS_VERSION`] (this module) to the /// textual form of [`crate::dynamic::corpus::CORPUS_VERSION`]. Bumping the diff --git a/tests/dynamic_fixtures/prototype_pollution/javascript/benign.js b/tests/dynamic_fixtures/prototype_pollution/javascript/benign.js new file mode 100644 index 00000000..a1fbfb70 --- /dev/null +++ b/tests/dynamic_fixtures/prototype_pollution/javascript/benign.js @@ -0,0 +1,22 @@ +// Phase 10 (Track J.8) — JavaScript PROTOTYPE_POLLUTION benign +// control fixture. +// +// The handler parses an attacker-controlled JSON string and walks +// it into a target constructed via `Object.create(null)`. Because +// the target has no prototype chain, even a payload whose top-level +// key is `__proto__` cannot reach `Object.prototype`. The harness's +// canary trap stays clear and no `PrototypePollution` probe is +// emitted. +const _ = require('lodash'); + +function deepMerge(target, source) { + return _.merge(target, source); +} + +function run(payload) { + const parsed = JSON.parse(payload); + const target = Object.create(null); + return deepMerge(target, parsed); +} + +module.exports = { run }; diff --git a/tests/dynamic_fixtures/prototype_pollution/javascript/vuln.js b/tests/dynamic_fixtures/prototype_pollution/javascript/vuln.js new file mode 100644 index 00000000..62c3ebc5 --- /dev/null +++ b/tests/dynamic_fixtures/prototype_pollution/javascript/vuln.js @@ -0,0 +1,20 @@ +// Phase 10 (Track J.8) — JavaScript PROTOTYPE_POLLUTION vuln fixture. +// +// The handler parses an attacker-controlled JSON string and passes +// the parsed object into `lodash.merge` against a vanilla `{}` +// target. When the payload's top-level key is `__proto__`, the +// merge walks the key into `Object.prototype` and the harness's +// canary trap records a `ProbeKind::PrototypePollution` probe. +const _ = require('lodash'); + +function deepMerge(target, source) { + return _.merge(target, source); +} + +function run(payload) { + const parsed = JSON.parse(payload); + const target = {}; + return deepMerge(target, parsed); +} + +module.exports = { run }; diff --git a/tests/dynamic_fixtures/prototype_pollution/typescript/benign.ts b/tests/dynamic_fixtures/prototype_pollution/typescript/benign.ts new file mode 100644 index 00000000..dcd3dae0 --- /dev/null +++ b/tests/dynamic_fixtures/prototype_pollution/typescript/benign.ts @@ -0,0 +1,17 @@ +// Phase 10 (Track J.8) — TypeScript PROTOTYPE_POLLUTION benign +// control fixture. +// +// Uses `Object.create(null)` as the merge target so even a payload +// whose top-level key is `__proto__` cannot reach +// `Object.prototype`. +import * as _ from 'lodash'; + +export function deepMerge(target: any, source: any): any { + return (_ as any).merge(target, source); +} + +export function run(payload: string): any { + const parsed = JSON.parse(payload); + const target: any = Object.create(null); + return deepMerge(target, parsed); +} diff --git a/tests/dynamic_fixtures/prototype_pollution/typescript/vuln.ts b/tests/dynamic_fixtures/prototype_pollution/typescript/vuln.ts new file mode 100644 index 00000000..533ff3c8 --- /dev/null +++ b/tests/dynamic_fixtures/prototype_pollution/typescript/vuln.ts @@ -0,0 +1,16 @@ +// Phase 10 (Track J.8) — TypeScript PROTOTYPE_POLLUTION vuln fixture. +// +// Same shape as the JS sibling: parse the attacker-controlled JSON +// string, deep-merge it into a vanilla `{}` target, get prototype +// pollution when the payload carries a `__proto__` key. +import * as _ from 'lodash'; + +export function deepMerge(target: any, source: any): any { + return (_ as any).merge(target, source); +} + +export function run(payload: string): any { + const parsed = JSON.parse(payload); + const target: any = {}; + return deepMerge(target, parsed); +} diff --git a/tests/prototype_pollution_corpus.rs b/tests/prototype_pollution_corpus.rs new file mode 100644 index 00000000..edaa4ba0 --- /dev/null +++ b/tests/prototype_pollution_corpus.rs @@ -0,0 +1,386 @@ +//! Phase 10 (Track J.8) — PROTOTYPE_POLLUTION corpus acceptance. +//! +//! Asserts the new cap end-to-end: corpus slices register per-language +//! vuln/benign pairs for JavaScript and TypeScript, the lang-aware +//! resolver pairs them inside the correct slice, the JS-shared harness +//! emitter splices in the canary trap + deep-merge sink + sink-hit +//! sentinel, the framework adapters fire on the canonical sink +//! constructions (`lodash.merge`, `Object.assign`, `JSON.parse` + +//! deep-merge helper), and the `PrototypeCanaryTouched` predicate fires +//! only when a `PrototypePollution` probe lands on the channel. +//! +//! `cargo nextest run --features dynamic --test prototype_pollution_corpus`. + +#![cfg(feature = "dynamic")] + +mod common; + +use nyx_scanner::dynamic::corpus::{ + audit_marker_collisions, benign_payload_for_lang, payloads_for_lang, + resolve_benign_control_lang, Oracle, +}; +use nyx_scanner::dynamic::framework::registry::adapters_for; +use nyx_scanner::dynamic::lang; +use nyx_scanner::dynamic::oracle::{oracle_fired, ProbePredicate}; +use nyx_scanner::dynamic::probe::{ProbeKind, ProbeWitness, SinkProbe}; +use nyx_scanner::dynamic::sandbox::SandboxOutcome; +use nyx_scanner::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot}; +use nyx_scanner::labels::Cap; +use nyx_scanner::summary::FuncSummary; +use nyx_scanner::symbol::Lang; +use std::time::Duration; + +const LANGS: &[Lang] = &[Lang::JavaScript, Lang::TypeScript]; + +fn make_spec(lang: Lang, entry_file: &str, entry_name: &str) -> HarnessSpec { + HarnessSpec { + finding_id: "phase10test0001".into(), + entry_file: entry_file.into(), + entry_name: entry_name.into(), + entry_kind: EntryKind::Function, + lang, + toolchain_id: "phase10".into(), + payload_slot: PayloadSlot::Param(0), + expected_cap: Cap::PROTOTYPE_POLLUTION, + constraint_hints: vec![], + sink_file: entry_file.into(), + sink_line: 1, + spec_hash: "phase10test0001".into(), + derivation: nyx_scanner::dynamic::spec::SpecDerivationStrategy::FromFlowSteps, + stubs_required: vec![], + framework: None, + } +} + +#[test] +fn corpus_registers_prototype_pollution_for_js_and_ts() { + for lang in LANGS { + let slice = payloads_for_lang(Cap::PROTOTYPE_POLLUTION, *lang); + assert!( + !slice.is_empty(), + "PROTOTYPE_POLLUTION has no payloads for {lang:?}" + ); + let has_vuln = slice.iter().any(|p| !p.is_benign); + let has_benign = slice.iter().any(|p| p.is_benign); + assert!(has_vuln, "{lang:?} PROTOTYPE_POLLUTION missing vuln payload"); + assert!( + has_benign, + "{lang:?} PROTOTYPE_POLLUTION missing benign control" + ); + } +} + +#[test] +fn prototype_pollution_unsupported_for_other_langs() { + for lang in [ + Lang::Rust, + Lang::C, + Lang::Cpp, + Lang::Java, + Lang::Go, + Lang::Php, + Lang::Python, + Lang::Ruby, + ] { + assert!( + payloads_for_lang(Cap::PROTOTYPE_POLLUTION, lang).is_empty(), + "unexpected PROTOTYPE_POLLUTION payloads for {lang:?}", + ); + } +} + +#[test] +fn benign_control_resolves_within_lang_slice() { + for lang in LANGS { + let slice = payloads_for_lang(Cap::PROTOTYPE_POLLUTION, *lang); + let vuln = slice.iter().find(|p| !p.is_benign).unwrap(); + let resolved = resolve_benign_control_lang(vuln, Cap::PROTOTYPE_POLLUTION, *lang) + .expect("paired control"); + assert!(resolved.is_benign); + let direct = benign_payload_for_lang(Cap::PROTOTYPE_POLLUTION, *lang).unwrap(); + assert_eq!(direct.label, resolved.label); + } +} + +#[test] +fn payload_oracle_carries_prototype_canary_predicate() { + for lang in LANGS { + let slice = payloads_for_lang(Cap::PROTOTYPE_POLLUTION, *lang); + let vuln = slice.iter().find(|p| !p.is_benign).unwrap(); + match &vuln.oracle { + Oracle::SinkProbe { predicates } => { + assert!( + predicates.iter().any(|p| matches!( + p, + ProbePredicate::PrototypeCanaryTouched { .. } + )), + "{lang:?} vuln payload missing PrototypeCanaryTouched predicate", + ); + } + other => panic!("expected SinkProbe oracle for {lang:?}, got {other:?}"), + } + } +} + +#[test] +fn vuln_payload_bytes_carry_proto_key_benign_bytes_do_not() { + for lang in LANGS { + let slice = payloads_for_lang(Cap::PROTOTYPE_POLLUTION, *lang); + let vuln = slice.iter().find(|p| !p.is_benign).unwrap(); + let benign = slice.iter().find(|p| p.is_benign).unwrap(); + let vuln_text = std::str::from_utf8(vuln.bytes).unwrap(); + let benign_text = std::str::from_utf8(benign.bytes).unwrap(); + assert!( + vuln_text.contains("__proto__"), + "{lang:?} vuln payload must carry the __proto__ pollution key", + ); + assert!( + !benign_text.contains("__proto__"), + "{lang:?} benign control must not carry __proto__", + ); + } +} + +#[test] +fn marker_collisions_clean_with_phase_10_additions() { + assert!(audit_marker_collisions().is_empty()); +} + +#[test] +fn probe_kind_prototype_pollution_serdes() { + let original = ProbeKind::PrototypePollution { + property: "__nyx_canary".into(), + value: "pwned".into(), + }; + let json = serde_json::to_string(&original).unwrap(); + assert!(json.contains("PrototypePollution")); + assert!(json.contains("property")); + assert!(json.contains("__nyx_canary")); + let parsed: ProbeKind = serde_json::from_str(&json).unwrap(); + assert_eq!(parsed, original); +} + +#[test] +fn prototype_canary_predicate_fires_on_polluted_probe() { + let oracle = Oracle::SinkProbe { + predicates: &[ProbePredicate::PrototypeCanaryTouched { + canary: "__nyx_canary", + }], + }; + let probes = vec![SinkProbe { + sink_callee: "__nyx_pp_canary_set".into(), + args: vec![], + captured_at_ns: 1, + payload_id: "phase10".into(), + kind: ProbeKind::PrototypePollution { + property: "__nyx_canary".into(), + value: "pwned".into(), + }, + witness: ProbeWitness::empty(), + }]; + let outcome = SandboxOutcome { + exit_code: Some(0), + stdout: vec![], + stderr: vec![], + timed_out: false, + oob_callback_seen: false, + sink_hit: true, + duration: Duration::from_millis(1), + hardening_outcome: None, + }; + assert!(oracle_fired(&oracle, &outcome, &probes)); +} + +#[test] +fn prototype_canary_predicate_clears_when_no_pp_probe() { + let oracle = Oracle::SinkProbe { + predicates: &[ProbePredicate::PrototypeCanaryTouched { + canary: "__nyx_canary", + }], + }; + let probes = vec![SinkProbe { + sink_callee: "noop".into(), + args: vec![], + captured_at_ns: 1, + payload_id: "phase10".into(), + kind: ProbeKind::Normal, + witness: ProbeWitness::empty(), + }]; + let outcome = SandboxOutcome { + exit_code: Some(0), + stdout: vec![], + stderr: vec![], + timed_out: false, + oob_callback_seen: false, + sink_hit: true, + duration: Duration::from_millis(1), + hardening_outcome: None, + }; + assert!(!oracle_fired(&oracle, &outcome, &probes)); +} + +#[test] +fn lang_emitter_dispatches_to_prototype_pollution_harness() { + for (lang, entry_file, entry_name) in [ + ( + Lang::JavaScript, + "tests/dynamic_fixtures/prototype_pollution/javascript/vuln.js", + "run", + ), + ( + Lang::TypeScript, + "tests/dynamic_fixtures/prototype_pollution/typescript/vuln.ts", + "run", + ), + ] { + let spec = make_spec(lang, entry_file, entry_name); + let harness = + lang::emit(&spec).unwrap_or_else(|e| panic!("emit failed for {lang:?}: {e:?}")); + assert!( + harness.source.contains("PrototypePollution"), + "{lang:?} prototype-pollution harness must carry the PrototypePollution probe kind", + ); + assert!( + harness.source.contains("__nyx_canary"), + "{lang:?} harness must reference the canary property name", + ); + assert!( + harness.source.contains("Object.defineProperty(Object.prototype"), + "{lang:?} harness must install the canary trap on Object.prototype", + ); + assert!( + harness.source.contains("nyxDeepMerge"), + "{lang:?} harness must inline the deep-merge sink", + ); + assert!( + harness.source.contains("__NYX_SINK_HIT__"), + "{lang:?} harness must emit the sink-hit sentinel", + ); + } +} + +#[test] +fn framework_adapters_detect_prototype_pollution_sinks() { + // lodash.merge fixture: vuln + benign both fire the + // `pp-lodash-merge-js` / `pp-lodash-merge-ts` adapter because + // they call `_.merge` and import lodash. Phase 10 lodash adapter + // does not differentiate the target type — that differentiation + // lives at the dynamic differential level. + for (lang, fixture, sink_callee) in [ + ( + Lang::JavaScript, + "tests/dynamic_fixtures/prototype_pollution/javascript/vuln.js", + "merge", + ), + ( + Lang::TypeScript, + "tests/dynamic_fixtures/prototype_pollution/typescript/vuln.ts", + "merge", + ), + ] { + let bytes = std::fs::read(fixture).expect("fixture exists"); + let ts_lang = ts_language_for(lang); + let mut parser = tree_sitter::Parser::new(); + parser.set_language(&ts_lang).unwrap(); + let tree = parser.parse(&bytes, None).unwrap(); + let mut summary = FuncSummary { + name: "deepMerge".into(), + file_path: fixture.to_owned(), + lang: slug(lang).into(), + ..Default::default() + }; + summary + .callees + .push(nyx_scanner::summary::CalleeSite::bare(sink_callee)); + let registry_slice = adapters_for(lang); + assert!(!registry_slice.is_empty(), "{lang:?} adapter slice empty"); + let binding = nyx_scanner::dynamic::framework::detect_binding( + &summary, + tree.root_node(), + &bytes, + lang, + ); + let b = binding.unwrap_or_else(|| { + panic!("{lang:?} adapter must detect the prototype-pollution fixture") + }); + assert_eq!(b.kind, EntryKind::Function); + assert!(b.adapter.starts_with("pp-")); + } +} + +#[test] +fn object_assign_adapter_fires_on_direct_object_assign() { + let src = b"function run(payload) { return Object.assign({}, payload); }\n"; + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter::Language::from( + tree_sitter_javascript::LANGUAGE, + )) + .unwrap(); + let tree = parser.parse(src.as_slice(), None).unwrap(); + let mut summary = FuncSummary { + name: "run".into(), + file_path: "object_assign.js".into(), + lang: "javascript".into(), + ..Default::default() + }; + summary + .callees + .push(nyx_scanner::summary::CalleeSite::bare("Object.assign")); + let binding = nyx_scanner::dynamic::framework::detect_binding( + &summary, + tree.root_node(), + src.as_slice(), + Lang::JavaScript, + ); + let b = binding.expect("Object.assign adapter must fire"); + assert!(b.adapter.starts_with("pp-")); +} + +#[test] +fn json_deep_assign_adapter_fires_on_json_parse_plus_deep_merge() { + let src = b"function deepMerge(t, s) { for (const k of Object.keys(s)) t[k] = s[k]; }\n\ + function run(payload) { return deepMerge({}, JSON.parse(payload)); }\n"; + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter::Language::from( + tree_sitter_javascript::LANGUAGE, + )) + .unwrap(); + let tree = parser.parse(src.as_slice(), None).unwrap(); + let mut summary = FuncSummary { + name: "run".into(), + file_path: "json_parse.js".into(), + lang: "javascript".into(), + ..Default::default() + }; + summary + .callees + .push(nyx_scanner::summary::CalleeSite::bare("JSON.parse")); + let binding = nyx_scanner::dynamic::framework::detect_binding( + &summary, + tree.root_node(), + src.as_slice(), + Lang::JavaScript, + ); + let b = binding.expect("JSON.parse + deep-merge adapter must fire"); + assert!(b.adapter.starts_with("pp-")); +} + +fn ts_language_for(lang: Lang) -> tree_sitter::Language { + match lang { + Lang::JavaScript => tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE), + Lang::TypeScript => { + tree_sitter::Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT) + } + other => panic!("unsupported test lang {other:?}"), + } +} + +fn slug(lang: Lang) -> &'static str { + match lang { + Lang::JavaScript => "javascript", + Lang::TypeScript => "typescript", + _ => "other", + } +}