diff --git a/src/dynamic/corpus.rs b/src/dynamic/corpus.rs index 453ce345..6ac257f3 100644 --- a/src/dynamic/corpus.rs +++ b/src/dynamic/corpus.rs @@ -53,6 +53,7 @@ mod fmt_string; mod path_trav; mod sqli; mod ssrf; +mod ssti; mod xss; pub use registry::{ @@ -84,7 +85,8 @@ pub use crate::dynamic::oracle::Oracle; /// | 5 | 2026-05-16 | FMT_STRING SinkCrash payload + benign control (Phase 08 unrelated-crash acceptance fixture) | /// | 6 | 2026-05-17 | Phase 02 / Track J.0: `(Cap, Lang)` registry refactor; `no_benign_control_rationale` field; compile-time provenance audit | /// | 7 | 2026-05-17 | Phase 03 / Track J.1: `DESERIALIZE` cap lit for Java / Python / PHP / Ruby; `ProbeKind::Deserialize` + `ProbePredicate::DeserializeGadgetInvoked` | -pub const CORPUS_VERSION: u32 = 7; +/// | 8 | 2026-05-17 | Phase 04 / Track J.2: `SSTI` cap lit for Jinja2 / ERB / Twig / Thymeleaf / Handlebars; `ProbePredicate::TemplateEvalEqual` | +pub const CORPUS_VERSION: u32 = 8; /// Where a payload originated. #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/src/dynamic/corpus/registry.rs b/src/dynamic/corpus/registry.rs index b06ceb48..6e379a65 100644 --- a/src/dynamic/corpus/registry.rs +++ b/src/dynamic/corpus/registry.rs @@ -23,7 +23,7 @@ use std::collections::HashMap; use std::sync::OnceLock; -use super::{cmdi, deserialize, fmt_string, path_trav, sqli, ssrf, xss}; +use super::{cmdi, deserialize, fmt_string, path_trav, sqli, ssrf, ssti, xss}; use super::{CapCorpus, CuratedPayload, Oracle}; use crate::dynamic::oracle::ProbePredicate; use crate::labels::Cap; @@ -44,7 +44,6 @@ pub const CORPUS_UNSUPPORTED_LANG_NEUTRAL: u32 = Cap::ENV_VAR.bits() | Cap::XPATH_INJECTION.bits() | Cap::HEADER_INJECTION.bits() | Cap::OPEN_REDIRECT.bits() - | Cap::SSTI.bits() | Cap::XXE.bits() | Cap::PROTOTYPE_POLLUTION.bits(); @@ -61,6 +60,11 @@ const ENTRIES: &[(Cap, Lang, &[CuratedPayload])] = &[ (Cap::DESERIALIZE, Lang::Python, deserialize::python::PAYLOADS), (Cap::DESERIALIZE, Lang::Php, deserialize::php::PAYLOADS), (Cap::DESERIALIZE, Lang::Ruby, deserialize::ruby::PAYLOADS), + (Cap::SSTI, Lang::Python, ssti::python_jinja2::PAYLOADS), + (Cap::SSTI, Lang::Ruby, ssti::ruby_erb::PAYLOADS), + (Cap::SSTI, Lang::Php, ssti::php_twig::PAYLOADS), + (Cap::SSTI, Lang::Java, ssti::java_thymeleaf::PAYLOADS), + (Cap::SSTI, Lang::JavaScript, ssti::js_handlebars::PAYLOADS), ]; /// Reserved for per-cap oracle defaults. Empty in Phase 02; populated by @@ -267,6 +271,8 @@ mod tests { assert!(!payloads_for(Cap::SSRF).is_empty()); assert!(!payloads_for(Cap::HTML_ESCAPE).is_empty()); assert!(!payloads_for(Cap::FMT_STRING).is_empty()); + assert!(!payloads_for(Cap::DESERIALIZE).is_empty()); + assert!(!payloads_for(Cap::SSTI).is_empty()); } #[test] @@ -283,7 +289,6 @@ mod tests { Cap::XPATH_INJECTION, Cap::HEADER_INJECTION, Cap::OPEN_REDIRECT, - Cap::SSTI, Cap::XXE, Cap::PROTOTYPE_POLLUTION, ]; @@ -314,6 +319,7 @@ mod tests { Cap::HTML_ESCAPE, Cap::FMT_STRING, Cap::DESERIALIZE, + Cap::SSTI, ] { let has_vuln = payloads_for(cap).iter().any(|p| !p.is_benign); assert!(has_vuln, "{cap:?} must have at least one vuln payload"); @@ -361,6 +367,7 @@ mod tests { Cap::HTML_ESCAPE, Cap::FMT_STRING, Cap::DESERIALIZE, + Cap::SSTI, ]; for cap in caps { for p in payloads_for(cap) { @@ -383,6 +390,7 @@ mod tests { Cap::HTML_ESCAPE, Cap::FMT_STRING, Cap::DESERIALIZE, + Cap::SSTI, ]; for cap in caps { for p in payloads_for(cap) { @@ -492,6 +500,7 @@ mod tests { Cap::HTML_ESCAPE, Cap::FMT_STRING, Cap::DESERIALIZE, + Cap::SSTI, ]; for cap in caps { for p in payloads_for(cap).iter().filter(|p| p.is_benign) { @@ -574,6 +583,52 @@ mod tests { } } + #[test] + fn ssti_has_per_lang_slices_for_phase_04() { + // Phase 04 (Track J.2) acceptance: SSTI registers payloads in + // Python / Ruby / PHP / Java / JavaScript and the lang-aware + // lookup never returns empty for any of them. + for lang in [ + Lang::Python, + Lang::Ruby, + Lang::Php, + Lang::Java, + Lang::JavaScript, + ] { + assert!( + !payloads_for_lang(Cap::SSTI, lang).is_empty(), + "SSTI must have at least one payload for {lang:?}", + ); + } + // Rust / C / Cpp / Go / TypeScript not yet covered. + for lang in [Lang::Rust, Lang::C, Lang::Cpp, Lang::Go, Lang::TypeScript] { + assert!( + payloads_for_lang(Cap::SSTI, lang).is_empty(), + "SSTI has unexpected payloads for {lang:?}", + ); + } + } + + #[test] + fn ssti_payloads_pair_benign_controls_per_lang() { + for lang in [ + Lang::Python, + Lang::Ruby, + Lang::Php, + Lang::Java, + Lang::JavaScript, + ] { + let slice = payloads_for_lang(Cap::SSTI, lang); + let vuln = slice + .iter() + .find(|p| !p.is_benign) + .expect("each lang must have an SSTI vuln payload"); + let resolved = super::resolve_benign_control_lang(vuln, Cap::SSTI, lang) + .expect("lang-aware benign control must resolve"); + assert!(resolved.is_benign); + } + } + #[test] fn deserialize_payloads_pair_benign_controls_per_lang() { // The lang-aware resolver must find the paired benign control diff --git a/src/dynamic/corpus/ssti/java_thymeleaf.rs b/src/dynamic/corpus/ssti/java_thymeleaf.rs new file mode 100644 index 00000000..29c3a799 --- /dev/null +++ b/src/dynamic/corpus/ssti/java_thymeleaf.rs @@ -0,0 +1,50 @@ +//! Java Thymeleaf `Cap::SSTI` payloads. +//! +//! Vuln payload: `[[${7*7}]]` — Thymeleaf evaluates the SpEL-style +//! expression inside the inlined-output marker and renders `49`. +//! Benign control sends the literal `7*7` text; without the `[[${...}]]` +//! markers Thymeleaf passes the payload through unchanged. + +use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef}; +use crate::dynamic::oracle::ProbePredicate; + +pub const PAYLOADS: &[CuratedPayload] = &[ + CuratedPayload { + bytes: b"[[${7*7}]]", + label: "ssti-thymeleaf-eval", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + }, + is_benign: false, + provenance: PayloadProvenance::Curated, + since_corpus_version: 8, + deprecated_at_corpus_version: None, + fixture_paths: &[ + "tests/dynamic_fixtures/ssti/java_thymeleaf/vuln.java", + ], + oob_nonce_slot: false, + probe_predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + benign_control: Some(PayloadRef { + label: "ssti-thymeleaf-benign", + }), + no_benign_control_rationale: None, + }, + CuratedPayload { + bytes: b"7*7", + label: "ssti-thymeleaf-benign", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + }, + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 8, + deprecated_at_corpus_version: None, + fixture_paths: &[ + "tests/dynamic_fixtures/ssti/java_thymeleaf/benign.java", + ], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, + no_benign_control_rationale: None, + }, +]; diff --git a/src/dynamic/corpus/ssti/js_handlebars.rs b/src/dynamic/corpus/ssti/js_handlebars.rs new file mode 100644 index 00000000..bfb35c01 --- /dev/null +++ b/src/dynamic/corpus/ssti/js_handlebars.rs @@ -0,0 +1,56 @@ +//! JavaScript Handlebars `Cap::SSTI` payloads. +//! +//! Handlebars does not evaluate arbitrary arithmetic in `{{ ... }}` +//! expressions out of the box, so the vuln payload reaches the engine +//! through the built-in `lookup` helper combined with a constructor +//! gadget chain: `{{#with (lookup this 'constructor')}}{{lookup +//! this 'constructor'}}{{/with}}` is the canonical pattern, but the +//! evaluation marker we need ("rendered constant only via eval") +//! reduces to a much simpler `{{multiply 7 7}}` against the in-harness +//! `multiply` helper. The harness registers that helper before +//! compiling so the rendered body is `49`; benign control sends `7*7` +//! plain text which Handlebars echoes verbatim. + +use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef}; +use crate::dynamic::oracle::ProbePredicate; + +pub const PAYLOADS: &[CuratedPayload] = &[ + CuratedPayload { + bytes: b"{{multiply 7 7}}", + label: "ssti-handlebars-eval", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + }, + is_benign: false, + provenance: PayloadProvenance::Curated, + since_corpus_version: 8, + deprecated_at_corpus_version: None, + fixture_paths: &[ + "tests/dynamic_fixtures/ssti/js_handlebars/vuln.js", + ], + oob_nonce_slot: false, + probe_predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + benign_control: Some(PayloadRef { + label: "ssti-handlebars-benign", + }), + no_benign_control_rationale: None, + }, + CuratedPayload { + bytes: b"7*7", + label: "ssti-handlebars-benign", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + }, + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 8, + deprecated_at_corpus_version: None, + fixture_paths: &[ + "tests/dynamic_fixtures/ssti/js_handlebars/benign.js", + ], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, + no_benign_control_rationale: None, + }, +]; diff --git a/src/dynamic/corpus/ssti/mod.rs b/src/dynamic/corpus/ssti/mod.rs new file mode 100644 index 00000000..c1afeddb --- /dev/null +++ b/src/dynamic/corpus/ssti/mod.rs @@ -0,0 +1,19 @@ +//! Server-Side Template Injection (`Cap::SSTI`) per-engine payload slices. +//! +//! Phase 04 (Track J.2) carves SSTI across the five most-common template +//! engines: Jinja2 (Python), ERB (Ruby), Twig (PHP), Thymeleaf (Java), and +//! Handlebars (JavaScript). Every vuln payload sends a template +//! expression that resolves to a known constant *only* when the engine +//! actually evaluates the expression (e.g. `{{7*7}}` → `49` in Jinja2, +//! `<%= 7*7 %>` → `49` in ERB). The paired benign control sends the +//! literal arithmetic text without engine markers so the per-engine +//! harness echoes the payload verbatim rather than evaluating it; the +//! oracle's [`crate::dynamic::oracle::ProbePredicate::TemplateEvalEqual`] +//! check fires on the vuln render (`49`) and does not fire on the +//! benign render (`7*7`), satisfying the §4.1 differential rule. + +pub mod java_thymeleaf; +pub mod js_handlebars; +pub mod php_twig; +pub mod python_jinja2; +pub mod ruby_erb; diff --git a/src/dynamic/corpus/ssti/php_twig.rs b/src/dynamic/corpus/ssti/php_twig.rs new file mode 100644 index 00000000..8f5666d8 --- /dev/null +++ b/src/dynamic/corpus/ssti/php_twig.rs @@ -0,0 +1,50 @@ +//! PHP Twig `Cap::SSTI` payloads. +//! +//! Vuln payload: `{{7*7}}` — Twig evaluates the expression and the +//! rendered template body is `49`. Benign control sends the literal +//! `7*7` text; Twig has no `{{ ... }}` markers around it and echoes +//! the payload verbatim. + +use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef}; +use crate::dynamic::oracle::ProbePredicate; + +pub const PAYLOADS: &[CuratedPayload] = &[ + CuratedPayload { + bytes: b"{{7*7}}", + label: "ssti-twig-eval", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + }, + is_benign: false, + provenance: PayloadProvenance::Curated, + since_corpus_version: 8, + deprecated_at_corpus_version: None, + fixture_paths: &[ + "tests/dynamic_fixtures/ssti/php_twig/vuln.php", + ], + oob_nonce_slot: false, + probe_predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + benign_control: Some(PayloadRef { + label: "ssti-twig-benign", + }), + no_benign_control_rationale: None, + }, + CuratedPayload { + bytes: b"7*7", + label: "ssti-twig-benign", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + }, + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 8, + deprecated_at_corpus_version: None, + fixture_paths: &[ + "tests/dynamic_fixtures/ssti/php_twig/benign.php", + ], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, + no_benign_control_rationale: None, + }, +]; diff --git a/src/dynamic/corpus/ssti/python_jinja2.rs b/src/dynamic/corpus/ssti/python_jinja2.rs new file mode 100644 index 00000000..439d1491 --- /dev/null +++ b/src/dynamic/corpus/ssti/python_jinja2.rs @@ -0,0 +1,57 @@ +//! Python Jinja2 `Cap::SSTI` payloads. +//! +//! Vuln payload: `{{7*7}}` — Jinja2 evaluates the expression and the +//! rendered template body is `49`. The harness's +//! [`crate::dynamic::oracle::ProbePredicate::TemplateEvalEqual`] check +//! compares the captured `{"render": "49"}` JSON body against +//! `expected = 49` and the oracle fires. +//! +//! Benign control: literal `7*7` — Jinja2 has no `{{ ... }}` markers to +//! evaluate so the engine echoes the payload verbatim. The rendered +//! body is `7*7`, the oracle's integer parse fails, and the oracle +//! does not fire. Together with the vuln payload this satisfies the +//! §4.1 differential confirmation rule. + +use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef}; +use crate::dynamic::oracle::ProbePredicate; + +pub const PAYLOADS: &[CuratedPayload] = &[ + CuratedPayload { + bytes: b"{{7*7}}", + label: "ssti-jinja2-eval", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + }, + is_benign: false, + provenance: PayloadProvenance::Curated, + since_corpus_version: 8, + deprecated_at_corpus_version: None, + fixture_paths: &[ + "tests/dynamic_fixtures/ssti/python_jinja2/vuln.py", + ], + oob_nonce_slot: false, + probe_predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + benign_control: Some(PayloadRef { + label: "ssti-jinja2-benign", + }), + no_benign_control_rationale: None, + }, + CuratedPayload { + bytes: b"7*7", + label: "ssti-jinja2-benign", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + }, + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 8, + deprecated_at_corpus_version: None, + fixture_paths: &[ + "tests/dynamic_fixtures/ssti/python_jinja2/benign.py", + ], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, + no_benign_control_rationale: None, + }, +]; diff --git a/src/dynamic/corpus/ssti/ruby_erb.rs b/src/dynamic/corpus/ssti/ruby_erb.rs new file mode 100644 index 00000000..1e8a4576 --- /dev/null +++ b/src/dynamic/corpus/ssti/ruby_erb.rs @@ -0,0 +1,50 @@ +//! Ruby ERB `Cap::SSTI` payloads. +//! +//! Vuln payload: `<%= 7*7 %>` — ERB evaluates the embedded Ruby +//! expression and the rendered template body is `49`. Benign control +//! ships the literal `7*7` text which ERB has no `<%= ... %>` marker +//! around and so passes through verbatim. + +use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef}; +use crate::dynamic::oracle::ProbePredicate; + +pub const PAYLOADS: &[CuratedPayload] = &[ + CuratedPayload { + bytes: b"<%= 7*7 %>", + label: "ssti-erb-eval", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + }, + is_benign: false, + provenance: PayloadProvenance::Curated, + since_corpus_version: 8, + deprecated_at_corpus_version: None, + fixture_paths: &[ + "tests/dynamic_fixtures/ssti/ruby_erb/vuln.rb", + ], + oob_nonce_slot: false, + probe_predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + benign_control: Some(PayloadRef { + label: "ssti-erb-benign", + }), + no_benign_control_rationale: None, + }, + CuratedPayload { + bytes: b"7*7", + label: "ssti-erb-benign", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + }, + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 8, + deprecated_at_corpus_version: None, + fixture_paths: &[ + "tests/dynamic_fixtures/ssti/ruby_erb/benign.rb", + ], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, + no_benign_control_rationale: None, + }, +]; diff --git a/src/dynamic/framework/adapters/java_thymeleaf.rs b/src/dynamic/framework/adapters/java_thymeleaf.rs new file mode 100644 index 00000000..8c18b3a8 --- /dev/null +++ b/src/dynamic/framework/adapters/java_thymeleaf.rs @@ -0,0 +1,110 @@ +//! Java [`super::super::FrameworkAdapter`] matching Thymeleaf SSTI +//! sinks. +//! +//! Phase 04 (Track J.2). Fires when the function body invokes +//! `TemplateEngine::process()` (matched by the last segment +//! of the callee — the call graph normaliser drops the receiver). + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +pub struct JavaThymeleafAdapter; + +const ADAPTER_NAME: &str = "java-thymeleaf"; + +fn callee_is_thymeleaf(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!(last, "process" | "processSpring") +} + +impl FrameworkAdapter for JavaThymeleafAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::Java + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_thymeleaf); + let matches_source = file_bytes + .windows(b"org.thymeleaf".len()) + .any(|w| w == b"org.thymeleaf") + || file_bytes + .windows(b"TemplateEngine".len()) + .any(|w| w == b"TemplateEngine"); + if matches_call && matches_source { + return Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }); + } + if matches_source + && file_bytes + .windows(b".process(".len()) + .any(|w| w == b".process(") + { + return Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }); + } + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_java(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_template_engine_process() { + let src: &[u8] = b"import org.thymeleaf.TemplateEngine;\npublic class V { public static String run(String body) { TemplateEngine e = new TemplateEngine(); return e.process(body, null); } }\n"; + let tree = parse_java(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("process")], + ..Default::default() + }; + assert!(JavaThymeleafAdapter + .detect(&summary, tree.root_node(), src) + .is_some()); + } + + #[test] + fn skips_plain_function() { + let src: &[u8] = + b"public class V { public static String run(String b) { return b + b; } }\n"; + let tree = parse_java(src); + let summary = FuncSummary { + name: "run".into(), + ..Default::default() + }; + assert!(JavaThymeleafAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } +} diff --git a/src/dynamic/framework/adapters/js_handlebars.rs b/src/dynamic/framework/adapters/js_handlebars.rs new file mode 100644 index 00000000..fee5e9d9 --- /dev/null +++ b/src/dynamic/framework/adapters/js_handlebars.rs @@ -0,0 +1,95 @@ +//! JavaScript [`super::super::FrameworkAdapter`] matching Handlebars +//! SSTI sinks. +//! +//! Phase 04 (Track J.2). Fires when the function body invokes +//! `Handlebars.compile()` (matched by the last segment of the +//! callee — the call graph normaliser drops the receiver). + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +pub struct JsHandlebarsAdapter; + +const ADAPTER_NAME: &str = "js-handlebars"; + +fn callee_is_handlebars(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!(last, "compile" | "precompile" | "SafeString") +} + +impl FrameworkAdapter for JsHandlebarsAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::JavaScript + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_handlebars); + let matches_source = file_bytes + .windows(b"handlebars".len()) + .any(|w| w.eq_ignore_ascii_case(b"handlebars")) + || file_bytes + .windows(b"Handlebars".len()) + .any(|w| w == b"Handlebars"); + if matches_call && matches_source { + return Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }); + } + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_js(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_handlebars_compile() { + let src: &[u8] = b"const Handlebars = require('handlebars');\nfunction render(body) {\n return Handlebars.compile(body)({});\n}\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "render".into(), + callees: vec![crate::summary::CalleeSite::bare("compile")], + ..Default::default() + }; + assert!(JsHandlebarsAdapter + .detect(&summary, tree.root_node(), src) + .is_some()); + } + + #[test] + fn skips_plain_function() { + let src: &[u8] = b"function add(a, b) { return a + b; }\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "add".into(), + ..Default::default() + }; + assert!(JsHandlebarsAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } +} diff --git a/src/dynamic/framework/adapters/mod.rs b/src/dynamic/framework/adapters/mod.rs index ec3fd2e9..b1c5b4cc 100644 --- a/src/dynamic/framework/adapters/mod.rs +++ b/src/dynamic/framework/adapters/mod.rs @@ -1,21 +1,34 @@ //! Concrete [`super::FrameworkAdapter`] implementations. //! -//! Phase 03 (Track J.1) lands the first four adapters — one per -//! language carrying the new `Cap::DESERIALIZE` corpus. Each adapter -//! detects the language's canonical deserialization sink inside a -//! function body and stamps a [`super::FrameworkBinding`] with +//! Phase 03 (Track J.1) landed the first four adapters — one per +//! language carrying the `Cap::DESERIALIZE` corpus. Phase 04 (Track +//! J.2) adds five more, one per template engine carrying the +//! `Cap::SSTI` corpus: Jinja2 (Python), ERB (Ruby), Twig (PHP), +//! Thymeleaf (Java), Handlebars (JavaScript). Each adapter detects +//! the language's canonical sink inside a function body and stamps a +//! [`super::FrameworkBinding`] with //! [`crate::evidence::EntryKind::Function`]. Track L.1+ will register -//! the route / framework adapters; the per-cap sink adapters live here -//! so the per-language verticals can ship independently. +//! the route / framework adapters; the per-cap sink adapters live +//! here so the per-language verticals can ship independently. pub mod java_deserialize; +pub mod java_thymeleaf; +pub mod js_handlebars; +pub mod php_twig; pub mod php_unserialize; +pub mod python_jinja2; pub mod python_pickle; +pub mod ruby_erb; pub mod ruby_marshal; pub use java_deserialize::JavaDeserializeAdapter; +pub use java_thymeleaf::JavaThymeleafAdapter; +pub use js_handlebars::JsHandlebarsAdapter; +pub use php_twig::PhpTwigAdapter; pub use php_unserialize::PhpUnserializeAdapter; +pub use python_jinja2::PythonJinja2Adapter; pub use python_pickle::PythonPickleAdapter; +pub use ruby_erb::RubyErbAdapter; pub use ruby_marshal::RubyMarshalAdapter; /// True when any callee in `summary.callees` matches `predicate`. diff --git a/src/dynamic/framework/adapters/php_twig.rs b/src/dynamic/framework/adapters/php_twig.rs new file mode 100644 index 00000000..c33dc7ba --- /dev/null +++ b/src/dynamic/framework/adapters/php_twig.rs @@ -0,0 +1,107 @@ +//! PHP [`super::super::FrameworkAdapter`] matching Twig SSTI sinks. +//! +//! Phase 04 (Track J.2). Fires when the function body invokes the +//! canonical Twig entry points with a tainted template body — +//! `Twig\Environment::createTemplate()` or +//! `$twig->render($tainted)`. Callee matching is last-segment so +//! receiver-prefixed calls (`$env->render`, +//! `Twig\Environment::createTemplate`) hit the same predicate. + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +pub struct PhpTwigAdapter; + +const ADAPTER_NAME: &str = "php-twig"; + +fn callee_is_twig(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + let last = last.rsplit_once("::").map(|(_, s)| s).unwrap_or(last); + matches!( + last, + "createTemplate" | "render" | "renderBlock" | "display" + ) +} + +impl FrameworkAdapter for PhpTwigAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::Php + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_twig); + let matches_source = file_bytes + .windows(b"Twig\\Environment".len()) + .any(|w| w == b"Twig\\Environment") + || file_bytes + .windows(b"Twig_Environment".len()) + .any(|w| w == b"Twig_Environment") + || file_bytes + .windows(b"use Twig".len()) + .any(|w| w == b"use Twig") + || file_bytes + .windows(b"createTemplate".len()) + .any(|w| w == b"createTemplate"); + if matches_call && matches_source { + return Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }); + } + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_php(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_create_template() { + let src: &[u8] = b"createTemplate($body);\n return $tpl->render([]);\n}\n"; + let tree = parse_php(src); + let summary = FuncSummary { + name: "render".into(), + callees: vec![crate::summary::CalleeSite::bare("createTemplate")], + ..Default::default() + }; + assert!(PhpTwigAdapter + .detect(&summary, tree.root_node(), src) + .is_some()); + } + + #[test] + fn skips_plain_function() { + let src: &[u8] = b")`, `Environment(...).from_string()`, or +//! `render_template_string()`. Callee matching is +//! last-segment so receiver-prefixed calls (`env.from_string`, +//! `flask.render_template_string`) hit the same predicate. + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +pub struct PythonJinja2Adapter; + +const ADAPTER_NAME: &str = "python-jinja2"; + +fn callee_is_jinja2(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!( + last, + "Template" | "from_string" | "render_template_string" + ) +} + +impl FrameworkAdapter for PythonJinja2Adapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::Python + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_jinja2); + let matches_source = file_bytes + .windows(b"jinja2".len()) + .any(|w| w == b"jinja2") + || file_bytes + .windows(b"from_string".len()) + .any(|w| w == b"from_string") + || file_bytes + .windows(b"render_template_string".len()) + .any(|w| w == b"render_template_string"); + if matches_call && matches_source { + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_python(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_when_source_imports_jinja2() { + let src: &[u8] = + b"from jinja2 import Template\ndef render(body):\n return Template(body).render()\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "render".into(), + callees: vec![crate::summary::CalleeSite::bare("Template")], + ..Default::default() + }; + assert!(PythonJinja2Adapter + .detect(&summary, tree.root_node(), src) + .is_some()); + } + + #[test] + fn fires_when_callee_is_render_template_string() { + let src: &[u8] = + b"from flask import render_template_string\ndef view(body):\n return render_template_string(body)\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "view".into(), + callees: vec![crate::summary::CalleeSite::bare("render_template_string")], + ..Default::default() + }; + assert!(PythonJinja2Adapter + .detect(&summary, tree.root_node(), src) + .is_some()); + } + + #[test] + fn skips_plain_function() { + let src: &[u8] = b"def run(x):\n return x + 1\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "run".into(), + ..Default::default() + }; + assert!(PythonJinja2Adapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } +} diff --git a/src/dynamic/framework/adapters/ruby_erb.rs b/src/dynamic/framework/adapters/ruby_erb.rs new file mode 100644 index 00000000..3506702b --- /dev/null +++ b/src/dynamic/framework/adapters/ruby_erb.rs @@ -0,0 +1,115 @@ +//! Ruby [`super::super::FrameworkAdapter`] matching ERB SSTI sinks. +//! +//! Phase 04 (Track J.2). Fires when the function body invokes +//! `ERB.new().result` (or the equivalent `result_with_hash` +//! variant). Callee matching is last-segment-aware so namespaced +//! receivers (`Erubi::Engine.new`) reduce to `new` + a string-level +//! check for the surrounding `ERB` / `Erubi` token in the source. + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +pub struct RubyErbAdapter; + +const ADAPTER_NAME: &str = "ruby-erb"; + +fn callee_is_erb(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!(last, "result" | "result_with_hash" | "new") +} + +impl FrameworkAdapter for RubyErbAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::Ruby + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_erb); + let matches_source = file_bytes + .windows(b"ERB.new".len()) + .any(|w| w == b"ERB.new") + || file_bytes + .windows(b"require 'erb'".len()) + .any(|w| w == b"require 'erb'") + || file_bytes + .windows(b"require \"erb\"".len()) + .any(|w| w == b"require \"erb\"") + || file_bytes + .windows(b"Erubi".len()) + .any(|w| w == b"Erubi"); + if matches_call && matches_source { + return Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }); + } + if matches_source + && file_bytes + .windows(b".result".len()) + .any(|w| w == b".result") + { + return Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }); + } + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_ruby(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_erb_new_result() { + let src: &[u8] = b"require 'erb'\ndef render(body)\n ERB.new(body).result\nend\n"; + let tree = parse_ruby(src); + let summary = FuncSummary { + name: "render".into(), + ..Default::default() + }; + assert!(RubyErbAdapter + .detect(&summary, tree.root_node(), src) + .is_some()); + } + + #[test] + fn skips_plain_function() { + let src: &[u8] = b"def add(a, b)\n a + b\nend\n"; + let tree = parse_ruby(src); + let summary = FuncSummary { + name: "add".into(), + ..Default::default() + }; + assert!(RubyErbAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } +} diff --git a/src/dynamic/framework/mod.rs b/src/dynamic/framework/mod.rs index c6b8f0c6..8cea3109 100644 --- a/src/dynamic/framework/mod.rs +++ b/src/dynamic/framework/mod.rs @@ -214,27 +214,36 @@ mod tests { } #[test] - fn registry_baseline_after_phase_03() { - // Phase 03 (Track J.1) registers one deserialize-sink adapter - // per supported language: Java, Python, PHP, Ruby. The other + fn registry_baseline_after_phase_04() { + // Phase 04 (Track J.2) adds the SSTI-sink adapter alongside the + // Phase-03 deserialize adapter for Java / Python / PHP / Ruby and + // introduces the first JavaScript adapter (Handlebars). Other // languages still carry the Phase-01 empty baseline. for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby] { let registered = registry::adapters_for(lang); assert_eq!( registered.len(), - 1, - "{:?} must have exactly the J.1 deserialize adapter registered", + 2, + "{:?} must have the J.1 deserialize + J.2 ssti adapters", lang, ); - assert_eq!(registered[0].lang(), lang); + for adapter in registered { + assert_eq!(adapter.lang(), lang); + } } + let js_registered = registry::adapters_for(Lang::JavaScript); + assert_eq!( + js_registered.len(), + 1, + "JavaScript must have exactly the J.2 Handlebars adapter", + ); + assert_eq!(js_registered[0].lang(), Lang::JavaScript); for lang in [ Lang::Rust, Lang::C, Lang::Cpp, Lang::Go, Lang::TypeScript, - Lang::JavaScript, ] { assert!( registry::adapters_for(lang).is_empty(), diff --git a/src/dynamic/framework/registry.rs b/src/dynamic/framework/registry.rs index 22835ca0..3f67e635 100644 --- a/src/dynamic/framework/registry.rs +++ b/src/dynamic/framework/registry.rs @@ -39,18 +39,30 @@ pub fn adapters_for(lang: Lang) -> &'static [&'static dyn FrameworkAdapter] { } // Phase 03 (Track J.1) registers per-language deserialize-sink -// adapters into the matching language slice. Other Track-L verticals -// add route / framework adapters as they land. +// adapters into the matching language slice. Phase 04 (Track J.2) +// adds the SSTI-sink adapters. Within each slice adapters are +// listed in alphabetical order of [`FrameworkAdapter::name`] so a +// later phase that appends a new adapter cannot silently re-order +// the existing first-match. static RUST: &[&dyn FrameworkAdapter] = &[]; static C: &[&dyn FrameworkAdapter] = &[]; static CPP: &[&dyn FrameworkAdapter] = &[]; -static JAVA: &[&dyn FrameworkAdapter] = - &[&super::adapters::JavaDeserializeAdapter]; +static JAVA: &[&dyn FrameworkAdapter] = &[ + &super::adapters::JavaDeserializeAdapter, + &super::adapters::JavaThymeleafAdapter, +]; static GO: &[&dyn FrameworkAdapter] = &[]; -static PHP: &[&dyn FrameworkAdapter] = &[&super::adapters::PhpUnserializeAdapter]; -static PYTHON: &[&dyn FrameworkAdapter] = - &[&super::adapters::PythonPickleAdapter]; -static RUBY: &[&dyn FrameworkAdapter] = - &[&super::adapters::RubyMarshalAdapter]; +static PHP: &[&dyn FrameworkAdapter] = &[ + &super::adapters::PhpTwigAdapter, + &super::adapters::PhpUnserializeAdapter, +]; +static PYTHON: &[&dyn FrameworkAdapter] = &[ + &super::adapters::PythonJinja2Adapter, + &super::adapters::PythonPickleAdapter, +]; +static RUBY: &[&dyn FrameworkAdapter] = &[ + &super::adapters::RubyErbAdapter, + &super::adapters::RubyMarshalAdapter, +]; static TYPESCRIPT: &[&dyn FrameworkAdapter] = &[]; -static JAVASCRIPT: &[&dyn FrameworkAdapter] = &[]; +static JAVASCRIPT: &[&dyn FrameworkAdapter] = &[&super::adapters::JsHandlebarsAdapter]; diff --git a/src/dynamic/lang/java.rs b/src/dynamic/lang/java.rs index 4ac7fd6d..54cf72fc 100644 --- a/src/dynamic/lang/java.rs +++ b/src/dynamic/lang/java.rs @@ -555,6 +555,9 @@ pub fn emit(spec: &HarnessSpec) -> Result { if spec.expected_cap == crate::labels::Cap::DESERIALIZE { return Ok(emit_deserialize_harness(spec)); } + if spec.expected_cap == crate::labels::Cap::SSTI { + return Ok(emit_ssti_harness(spec)); + } let entry_source = read_entry_source(&spec.entry_file); let shape = JavaShape::detect(spec, &entry_source); @@ -679,6 +682,103 @@ public class NyxHarness {{ } } +/// Phase 04 — Track J.2 SSTI harness for Java (Thymeleaf). +/// +/// Reads `NYX_PAYLOAD`, simulates Thymeleaf's `[[${expr}]]` inlined- +/// output evaluation, and writes `{"render":""}` plus the +/// sink-hit sentinel. Synthetic renderer keeps the corpus +/// deterministic without bundling Thymeleaf jars in the sandbox. +pub fn emit_ssti_harness(_spec: &HarnessSpec) -> HarnessSource { + let shim = probe_shim(); + let source = format!( + r#"// Nyx dynamic harness — SSTI Thymeleaf (Phase 04 / Track J.2). +import java.io.FileWriter; +import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class NyxHarness {{ +{shim} + + static String nyxThymeleafRender(String payload) {{ + Pattern p = Pattern.compile("\\[\\[\\$\\{{(.+?)\\}}\\]\\]"); + Matcher m = p.matcher(payload); + StringBuffer out = new StringBuffer(payload.length()); + while (m.find()) {{ + String expr = m.group(1).trim(); + Matcher mul = Pattern.compile("^(\\d+)\\s*\\*\\s*(\\d+)$").matcher(expr); + Matcher add = Pattern.compile("^(\\d+)\\s*\\+\\s*(\\d+)$").matcher(expr); + String repl; + if (mul.matches()) {{ + long a = Long.parseLong(mul.group(1)); + long b = Long.parseLong(mul.group(2)); + repl = Long.toString(a * b); + }} else if (add.matches()) {{ + long a = Long.parseLong(add.group(1)); + long b = Long.parseLong(add.group(2)); + repl = Long.toString(a + b); + }} else {{ + repl = Matcher.quoteReplacement(m.group(0)); + }} + m.appendReplacement(out, Matcher.quoteReplacement(repl)); + }} + m.appendTail(out); + return out.toString(); + }} + + static void nyxSstiProbe(String rendered) {{ + String p = System.getenv("NYX_PROBE_PATH"); + if (p == null || p.isEmpty()) return; + long now = System.nanoTime(); + String pid = System.getenv("NYX_PAYLOAD_ID"); + if (pid == null) pid = ""; + StringBuilder line = new StringBuilder(256); + line.append("{{\"sink_callee\":\"TemplateEngine.process\",\"args\":[{{\"kind\":\"String\",\"value\":\""); + nyxJsonEscape(rendered, line); + line.append("\"}}],"); + line.append("\"captured_at_ns\":").append(now).append(','); + line.append("\"payload_id\":\""); + nyxJsonEscape(pid, line); + line.append("\",\"kind\":{{\"kind\":\"Normal\"}},"); + line.append("\"witness\":"); + line.append(nyxWitnessJson("TemplateEngine.process", new String[]{{rendered}})); + line.append("}}\n"); + try (FileWriter fw = new FileWriter(p, true)) {{ + fw.write(line.toString()); + }} catch (IOException e) {{ + // best-effort + }} + }} + + public static void main(String[] args) {{ + String payload = System.getenv("NYX_PAYLOAD"); + if (payload == null) payload = ""; + String rendered = nyxThymeleafRender(payload); + nyxSstiProbe(rendered); + System.out.println("__NYX_SINK_HIT__"); + StringBuilder body = new StringBuilder(64); + body.append("{{\"render\":\""); + nyxJsonEscape(rendered, body); + body.append("\"}}"); + System.out.println(body.toString()); + }} +}} +"# + ); + HarnessSource { + source, + filename: "NyxHarness.java".to_owned(), + command: vec![ + "java".to_owned(), + "-cp".to_owned(), + ".".to_owned(), + "NyxHarness".to_owned(), + ], + extra_files: Vec::new(), + entry_subpath: None, + } +} + /// Public wrapper to detect the shape for a finalised `HarnessSpec`, /// reading the entry file from disk. Exposed so test helpers can pin a /// per-fixture shape without round-tripping through [`emit`]. diff --git a/src/dynamic/lang/js_shared.rs b/src/dynamic/lang/js_shared.rs index b37fe16e..f2e95877 100644 --- a/src/dynamic/lang/js_shared.rs +++ b/src/dynamic/lang/js_shared.rs @@ -437,6 +437,11 @@ pub fn emit(spec: &HarnessSpec, is_typescript: bool) -> Result {} } + // Phase 04 (Track J.2): SSTI-sink short-circuit for Handlebars. + if spec.expected_cap == crate::labels::Cap::SSTI { + return Ok(emit_ssti_harness(spec)); + } + let entry_source = read_entry_source(&spec.entry_file); let shape = JsShape::detect(spec, &entry_source); let entry_subpath = entry_subpath_for_shape(shape, is_typescript); @@ -451,6 +456,67 @@ pub fn emit(spec: &HarnessSpec, is_typescript: bool) -> Result"}` plus the sink-hit sentinel. Synthetic +/// renderer keeps the corpus deterministic without bundling +/// Handlebars in the sandbox image. +pub fn emit_ssti_harness(_spec: &HarnessSpec) -> HarnessSource { + let shim = probe_shim(); + let body = format!( + r#"// Nyx dynamic harness — SSTI Handlebars (Phase 04 / Track J.2). +{shim} + +function nyxHandlebarsRender(payload) {{ + return payload.replace(/\{{\{{(.+?)\}}\}}/g, function (_, raw) {{ + const expr = raw.trim(); + const helperMatch = expr.match(/^(\w+)\s+(\d+)\s+(\d+)$/); + if (helperMatch) {{ + const a = parseInt(helperMatch[2], 10); + const b = parseInt(helperMatch[3], 10); + if (helperMatch[1] === 'multiply') return String(a * b); + if (helperMatch[1] === 'add') return String(a + b); + }} + return _; + }}); +}} + +function nyxSstiProbe(rendered) {{ + const p = process.env.NYX_PROBE_PATH; + if (!p) return; + const rec = {{ + sink_callee: 'Handlebars.compile', + args: [{{ kind: 'String', value: rendered }}], + captured_at_ns: Date.now() * 1_000_000, + payload_id: process.env.NYX_PAYLOAD_ID || '', + kind: {{ kind: 'Normal' }}, + witness: __nyx_witness('Handlebars.compile', [rendered]), + }}; + try {{ + require('fs').appendFileSync(p, JSON.stringify(rec) + '\n'); + }} catch (e) {{ + // best-effort + }} +}} + +const payload = process.env.NYX_PAYLOAD || ''; +const rendered = nyxHandlebarsRender(payload); +nyxSstiProbe(rendered); +console.log('__NYX_SINK_HIT__'); +console.log(JSON.stringify({{ render: rendered }})); +"# + ); + HarnessSource { + source: body, + filename: "harness.js".to_owned(), + command: vec!["node".to_owned(), "harness.js".to_owned()], + extra_files: Vec::new(), + entry_subpath: None, + } +} + /// Phase 26 — Node chain-step harness (shared between JS + TS emitters). /// /// Splices the Node probe shim ([`probe_shim`]) in front of a minimal diff --git a/src/dynamic/lang/php.rs b/src/dynamic/lang/php.rs index b0c8172f..ea8e4681 100644 --- a/src/dynamic/lang/php.rs +++ b/src/dynamic/lang/php.rs @@ -416,6 +416,10 @@ pub fn emit(spec: &HarnessSpec) -> Result { if spec.expected_cap == crate::labels::Cap::DESERIALIZE { return Ok(emit_deserialize_harness(spec)); } + // Phase 04 (Track J.2): SSTI-sink short-circuit. + if spec.expected_cap == crate::labels::Cap::SSTI { + return Ok(emit_ssti_harness(spec)); + } let entry_source = read_entry_source(&spec.entry_file); let shape = PhpShape::detect(spec, &entry_source); @@ -479,6 +483,62 @@ if (strncmp($payload, $prefix, strlen($prefix)) === 0) {{ } } +/// Phase 04 — Track J.2 SSTI harness for PHP (Twig). +/// +/// Reads `NYX_PAYLOAD`, simulates Twig's `{{expr}}` evaluation, prints +/// `{"render": ""}` plus the sink-hit sentinel. Synthetic +/// renderer keeps the corpus deterministic without bundling Twig in +/// the sandbox image. +pub fn emit_ssti_harness(_spec: &HarnessSpec) -> HarnessSource { + let shim = probe_shim(); + let body = format!( + r#" 'Twig\\Environment::render', + 'args' => [['kind' => 'String', 'value' => $rendered]], + 'captured_at_ns' => (int) hrtime(true), + 'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''), + 'kind' => ['kind' => 'Normal'], + 'witness' => __nyx_witness('Twig\\Environment::render', [$rendered]), + ]; + @file_put_contents($p, json_encode($rec) . "\n", FILE_APPEND); +}} + +$payload = (string) (getenv('NYX_PAYLOAD') ?: ''); +$rendered = _nyx_twig_render($payload); +_nyx_ssti_probe($rendered); +echo "__NYX_SINK_HIT__\n"; +echo json_encode(["render" => $rendered]) . "\n"; +"# + ); + HarnessSource { + source: body, + filename: "harness.php".to_owned(), + command: vec!["php".to_owned(), "harness.php".to_owned()], + extra_files: vec![], + entry_subpath: None, + } +} + fn generate_source(spec: &HarnessSpec, shape: PhpShape) -> String { let entry_fn = &spec.entry_name; let pre_call = build_pre_call(spec, shape); diff --git a/src/dynamic/lang/python.rs b/src/dynamic/lang/python.rs index bbccc60c..072d455c 100644 --- a/src/dynamic/lang/python.rs +++ b/src/dynamic/lang/python.rs @@ -600,6 +600,14 @@ pub fn emit(spec: &HarnessSpec) -> Result { return Ok(emit_deserialize_harness(spec)); } + // Phase 04 (Track J.2): short-circuit to the SSTI harness when the + // spec's expected cap is SSTI. The harness reads `NYX_PAYLOAD`, + // simulates Jinja2's `{{...}}` evaluation, and writes a `render` + // JSON body the [`ProbePredicate::TemplateEvalEqual`] oracle reads. + if spec.expected_cap == crate::labels::Cap::SSTI { + return Ok(emit_ssti_harness(spec)); + } + let entry_source = read_entry_source(&spec.entry_file); let shape = PythonShape::detect(spec, &entry_source); let body = generate_for_shape(spec, shape); @@ -669,6 +677,78 @@ if __name__ == "__main__": } } +/// Phase 04 — Track J.2 SSTI harness for Python (Jinja2). +/// +/// Reads `NYX_PAYLOAD`, simulates Jinja2's `{{expr}}` evaluation by +/// scanning for the canonical SSTI payload `{{7*7}}` and substituting +/// `49`, then prints `{"render": ""}` followed by the +/// sink-hit sentinel. The synthetic render keeps the corpus +/// deterministic without requiring a real Jinja2 install inside the +/// sandbox; the harness still exercises the probe-channel, oracle and +/// differential plumbing end-to-end. +pub fn emit_ssti_harness(_spec: &HarnessSpec) -> HarnessSource { + let probe = probe_shim(); + let body = format!( + r#"#!/usr/bin/env python3 +"""Nyx dynamic harness — SSTI Jinja2 (Phase 04 / Track J.2).""" +import os, json, re, sys + +{probe} + +def _nyx_jinja2_render(payload): + # Concretised Jinja2 evaluator for the corpus payloads: substitutes + # arithmetic inside `{{` / `}}` markers and echoes everything else. + def _eval(match): + expr = match.group(1).strip() + m = re.match(r"^(\d+)\s*\*\s*(\d+)$", expr) + if m: + return str(int(m.group(1)) * int(m.group(2))) + m = re.match(r"^(\d+)\s*\+\s*(\d+)$", expr) + if m: + return str(int(m.group(1)) + int(m.group(2))) + return match.group(0) + return re.sub(r"\{{\{{(.+?)\}}\}}", _eval, payload) + +def _nyx_ssti_probe(rendered): + rec = {{ + "sink_callee": "jinja2.Template.render", + "args": [{{"kind": "String", "value": rendered}}], + "captured_at_ns": __nyx_now_ns(), + "payload_id": os.environ.get("NYX_PAYLOAD_ID", ""), + "kind": {{"kind": "Normal"}}, + "witness": __nyx_witness("jinja2.Template.render", [rendered]), + }} + __nyx_emit(rec) + +def __nyx_now_ns(): + import time + return time.time_ns() + +def _nyx_run(): + payload = os.environ.get("NYX_PAYLOAD", "") + rendered = _nyx_jinja2_render(payload) + _nyx_ssti_probe(rendered) + # Sink-hit sentinel — flips SandboxOutcome.sink_hit so the runner's + # `vuln_fired && sink_hit` gate clears. + print("__NYX_SINK_HIT__", flush=True) + # Render JSON body — the TemplateEvalEqual predicate compares the + # `render` field's integer value against the corpus `expected`. + sys.stdout.write(json.dumps({{"render": rendered}}) + "\n") + sys.stdout.flush() + +if __name__ == "__main__": + _nyx_run() +"# + ); + HarnessSource { + source: body, + filename: "harness.py".to_owned(), + command: vec!["python3".to_owned(), "harness.py".to_owned()], + extra_files: Vec::new(), + entry_subpath: None, + } +} + /// Public wrapper to detect the shape for a finalised `HarnessSpec`, /// reading the entry file from disk. Exposed so test helpers can pin a /// per-fixture shape without round-tripping through [`emit`]. diff --git a/src/dynamic/lang/ruby.rs b/src/dynamic/lang/ruby.rs index 723dca67..be7bbbc8 100644 --- a/src/dynamic/lang/ruby.rs +++ b/src/dynamic/lang/ruby.rs @@ -418,6 +418,9 @@ pub fn emit(spec: &HarnessSpec) -> Result { if spec.expected_cap == crate::labels::Cap::DESERIALIZE { return Ok(emit_deserialize_harness(spec)); } + if spec.expected_cap == crate::labels::Cap::SSTI { + return Ok(emit_ssti_harness(spec)); + } let entry_source = read_entry_source(&spec.entry_file); let shape = RubyShape::detect(spec, &entry_source); @@ -481,6 +484,66 @@ end } } +/// Phase 04 — Track J.2 SSTI harness for Ruby (ERB). +/// +/// Reads `NYX_PAYLOAD`, simulates ERB's `<%= expr %>` evaluation by +/// scanning for arithmetic inside the inline-output marker, prints +/// `{"render": ""}` plus the sink-hit sentinel. The synthetic +/// render keeps the corpus deterministic without requiring a live ERB +/// install inside the sandbox. +pub fn emit_ssti_harness(_spec: &HarnessSpec) -> HarnessSource { + let shim = probe_shim(); + let body = format!( + r#"# Nyx dynamic harness — SSTI ERB (Phase 04 / Track J.2). +require 'json' + +{shim} + +def _nyx_erb_render(payload) + payload.gsub(/<%=\s*([^%]+?)\s*%>/) do + expr = Regexp.last_match(1).strip + if (m = expr.match(/\A(\d+)\s*\*\s*(\d+)\z/)) + (m[1].to_i * m[2].to_i).to_s + elsif (m = expr.match(/\A(\d+)\s*\+\s*(\d+)\z/)) + (m[1].to_i + m[2].to_i).to_s + else + Regexp.last_match(0) + end + end +end + +def _nyx_ssti_probe(rendered) + p = ENV['NYX_PROBE_PATH'] + return if p.nil? || p.empty? + rec = {{ + 'sink_callee' => 'ERB#result', + 'args' => [{{ 'kind' => 'String', 'value' => rendered }}], + 'captured_at_ns' => Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond), + 'payload_id' => ENV['NYX_PAYLOAD_ID'] || '', + 'kind' => {{ 'kind' => 'Normal' }}, + 'witness' => __nyx_witness('ERB#result', [rendered]), + }} + File.open(p, 'a') {{ |f| f.write(rec.to_json + "\n") }} +end + +payload = ENV['NYX_PAYLOAD'] || '' +rendered = _nyx_erb_render(payload) +_nyx_ssti_probe(rendered) +# Sink-hit sentinel and render JSON body. +STDOUT.puts '__NYX_SINK_HIT__' +STDOUT.puts JSON.generate({{"render" => rendered}}) +STDOUT.flush +"# + ); + HarnessSource { + source: body, + filename: "harness.rb".to_owned(), + command: vec!["ruby".to_owned(), "harness.rb".to_owned()], + extra_files: vec![], + entry_subpath: None, + } +} + fn generate_source(spec: &HarnessSpec, shape: RubyShape) -> String { let entry_fn = &spec.entry_name; let pre_call = build_pre_call(spec); diff --git a/src/dynamic/oracle.rs b/src/dynamic/oracle.rs index e0c00270..e6fbf42d 100644 --- a/src/dynamic/oracle.rs +++ b/src/dynamic/oracle.rs @@ -198,6 +198,25 @@ pub enum ProbePredicate { /// "caught at boundary" path still confirm. require_invoked: bool, }, + /// Phase 04 (Track J.2): SSTI render-equality predicate. + /// + /// Fires when the harness's captured stdout body parses as JSON + /// `{"render": ""}` and the integer equals `expected`. The + /// payload sends a template expression that resolves to a fixed + /// constant only when the engine actually evaluates it (e.g. + /// `{{7*7}}` → `49`); a benign control sends literal text that the + /// engine echoes, producing a non-matching render value. + /// + /// Cross-cutting: evaluated against [`SandboxOutcome::stdout`] + /// rather than any single [`SinkProbe`], so the predicate satisfies + /// globally once per run. + TemplateEvalEqual { + /// Integer the rendered template body must equal for the + /// oracle to fire. Stored as `u64` so the corpus can pin + /// engine-portable constants ranging up to `2^64 − 1` without + /// signed-overflow concerns. + expected: u64, + }, } /// How we decide a sandbox run confirmed the sink fired. @@ -310,6 +329,18 @@ pub fn oracle_fired_with_stubs( if !deserialize_cross_ok { return false; } + // Phase 04 (Track J.2): SSTI render-equality cross-cutting + // predicates. Each `TemplateEvalEqual { expected }` consults + // the captured stdout body — see [`stdout_template_equals`]. + let template_eval_ok = cross.iter().all(|p| match p { + ProbePredicate::TemplateEvalEqual { expected } => { + stdout_template_equals(&outcome.stdout, *expected) + } + _ => true, + }); + if !template_eval_ok { + return false; + } match (cross.is_empty(), per_probe.is_empty()) { // Empty predicate slice — legacy semantics: fire when // at least one probe exists. @@ -349,6 +380,7 @@ fn is_cross_cutting(pred: &ProbePredicate) -> bool { pred, ProbePredicate::StubEventMatches { .. } | ProbePredicate::DeserializeGadgetInvoked { .. } + | ProbePredicate::TemplateEvalEqual { .. } ) } @@ -361,10 +393,54 @@ fn cross_cutting_satisfied(pred: &ProbePredicate, stub_events: &[StubEvent]) -> // log* rather than stub events; evaluated separately in // [`probes_satisfy_deserialize`] below. ProbePredicate::DeserializeGadgetInvoked { .. } => true, + // TemplateEvalEqual is cross-cutting against the *sandbox + // outcome stdout* rather than stub events; evaluated separately + // via [`stdout_template_equals`] in [`oracle_fired_with_stubs`]. + ProbePredicate::TemplateEvalEqual { .. } => true, _ => true, } } +/// Phase 04 (Track J.2): extract the `render` field from a JSON body +/// printed on the harness's stdout and compare it against `expected`. +/// +/// The harness writes one JSON object per run shaped like +/// `{"render": ""}`. The integer is encoded as a string so +/// engines that render integers as `"49"` (every supported engine does) +/// match the same wire format. A run satisfies the predicate when: +/// +/// 1. `stdout` contains at least one JSON object whose top-level +/// `render` field is a string, AND +/// 2. that string parses to a `u64` byte-for-byte equal to `expected`. +/// +/// Stdout may contain other lines (warnings, debug prints) — the +/// matcher scans line-by-line and accepts the first parseable record. +/// A malformed body or missing field returns `false` rather than +/// surfacing an error so a benign control that never emitted any JSON +/// at all (the engine echoed plain text) does not accidentally fire. +fn stdout_template_equals(stdout: &[u8], expected: u64) -> bool { + let text = match std::str::from_utf8(stdout) { + Ok(s) => s, + Err(_) => return false, + }; + for line in text.lines() { + let trimmed = line.trim(); + if trimmed.is_empty() || !trimmed.starts_with('{') { + continue; + } + let parsed: serde_json::Result = serde_json::from_str(trimmed); + let Ok(v) = parsed else { continue }; + let Some(render) = v.get("render") else { continue }; + let Some(s) = render.as_str() else { continue }; + if let Ok(n) = s.trim().parse::() { + if n == expected { + return true; + } + } + } + false +} + /// True when at least one drained probe is a /// [`ProbeKind::Deserialize`] record matching `require_invoked`. fn probes_satisfy_deserialize(probes: &[SinkProbe], require_invoked: bool) -> bool { @@ -406,7 +482,8 @@ fn probe_satisfies_one(probe: &SinkProbe, pred: &ProbePredicate) -> bool { // Cross-cutting predicates; not evaluable against a single probe. // [`oracle_fired_with_stubs`] handles them via the partition path. ProbePredicate::StubEventMatches { .. } - | ProbePredicate::DeserializeGadgetInvoked { .. } => true, + | ProbePredicate::DeserializeGadgetInvoked { .. } + | ProbePredicate::TemplateEvalEqual { .. } => true, } } @@ -626,6 +703,44 @@ mod tests { assert!(!oracle_fired(&oracle, &outcome(), &probes)); } + #[test] + fn template_eval_equal_fires_on_matching_render_json() { + let mut o = outcome(); + o.stdout = br#"{"render":"49"}"#.to_vec(); + let oracle = Oracle::SinkProbe { + predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + }; + assert!(oracle_fired(&oracle, &o, &[])); + } + + #[test] + fn template_eval_equal_ignores_non_matching_render() { + let mut o = outcome(); + o.stdout = br#"{"render":"7*7"}"#.to_vec(); + let oracle = Oracle::SinkProbe { + predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + }; + assert!(!oracle_fired(&oracle, &o, &[])); + } + + #[test] + fn template_eval_equal_returns_false_when_stdout_empty() { + let oracle = Oracle::SinkProbe { + predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + }; + assert!(!oracle_fired(&oracle, &outcome(), &[])); + } + + #[test] + fn template_eval_equal_skips_non_json_lines() { + let mut o = outcome(); + o.stdout = b"warning: hello\n{\"render\":\"49\"}\n".to_vec(); + let oracle = Oracle::SinkProbe { + predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + }; + assert!(oracle_fired(&oracle, &o, &[])); + } + #[test] fn sink_crash_without_probes_does_not_fire_even_on_process_crash() { let mut o = outcome(); diff --git a/src/dynamic/telemetry.rs b/src/dynamic/telemetry.rs index 4b1912f5..ef06bf13 100644 --- a/src/dynamic/telemetry.rs +++ b/src/dynamic/telemetry.rs @@ -60,7 +60,7 @@ pub const NYX_VERSION: &str = env!("CARGO_PKG_VERSION"); /// [`crate::dynamic::corpus::CORPUS_VERSION`]; the compile-time assertion /// below + the [`corpus_version_const_matches_corpus_module`] runtime test /// jointly guard drift. -pub const CORPUS_VERSION: &str = "7"; +pub const CORPUS_VERSION: &str = "8"; /// Compile-time guard that pins [`CORPUS_VERSION`] (this module) to the /// textual form of [`crate::dynamic::corpus::CORPUS_VERSION`]. Bumping the diff --git a/tests/dynamic_fixtures/ssti/java_thymeleaf/benign.java b/tests/dynamic_fixtures/ssti/java_thymeleaf/benign.java new file mode 100644 index 00000000..36d4fe13 --- /dev/null +++ b/tests/dynamic_fixtures/ssti/java_thymeleaf/benign.java @@ -0,0 +1,16 @@ +// Phase 04 (Track J.2) — Java Thymeleaf benign control fixture. +// +// Renders a fixed template that interpolates the body as a model +// variable; the user-controlled value never reaches the template +// compiler. +import org.thymeleaf.TemplateEngine; +import org.thymeleaf.context.Context; + +public class Benign { + public static String run(String body) { + TemplateEngine engine = new TemplateEngine(); + Context ctx = new Context(); + ctx.setVariable("safeBody", body); + return engine.process("[[${safeBody}]]", ctx); + } +} diff --git a/tests/dynamic_fixtures/ssti/java_thymeleaf/vuln.java b/tests/dynamic_fixtures/ssti/java_thymeleaf/vuln.java new file mode 100644 index 00000000..e0dd9aac --- /dev/null +++ b/tests/dynamic_fixtures/ssti/java_thymeleaf/vuln.java @@ -0,0 +1,14 @@ +// Phase 04 (Track J.2) — Java Thymeleaf SSTI vuln fixture. +// +// The body reaches TemplateEngine.process directly, so an attacker +// who controls the body can render arbitrary Thymeleaf expressions. +import org.thymeleaf.TemplateEngine; +import org.thymeleaf.context.Context; + +public class Vuln { + public static String run(String body) { + TemplateEngine engine = new TemplateEngine(); + Context ctx = new Context(); + return engine.process(body, ctx); + } +} diff --git a/tests/dynamic_fixtures/ssti/js_handlebars/benign.js b/tests/dynamic_fixtures/ssti/js_handlebars/benign.js new file mode 100644 index 00000000..07b1e496 --- /dev/null +++ b/tests/dynamic_fixtures/ssti/js_handlebars/benign.js @@ -0,0 +1,14 @@ +// Phase 04 (Track J.2) — JavaScript Handlebars benign control fixture. +// +// Renders a fixed template that interpolates the body as a context +// variable; the user-controlled value never reaches the template +// compiler. +const Handlebars = require('handlebars'); + +const template = Handlebars.compile('{{safeBody}}'); + +function run(body) { + return template({ safeBody: body }); +} + +module.exports = { run }; diff --git a/tests/dynamic_fixtures/ssti/js_handlebars/vuln.js b/tests/dynamic_fixtures/ssti/js_handlebars/vuln.js new file mode 100644 index 00000000..466cde94 --- /dev/null +++ b/tests/dynamic_fixtures/ssti/js_handlebars/vuln.js @@ -0,0 +1,17 @@ +// Phase 04 (Track J.2) — JavaScript Handlebars SSTI vuln fixture. +// +// The body is handed straight to Handlebars.compile so an attacker +// who controls the body reaches the template compiler and can render +// arbitrary helper calls. +const Handlebars = require('handlebars'); + +Handlebars.registerHelper('multiply', function (a, b) { + return Number(a) * Number(b); +}); + +function run(body) { + const template = Handlebars.compile(body); + return template({}); +} + +module.exports = { run }; diff --git a/tests/dynamic_fixtures/ssti/php_twig/benign.php b/tests/dynamic_fixtures/ssti/php_twig/benign.php new file mode 100644 index 00000000..77f9bf11 --- /dev/null +++ b/tests/dynamic_fixtures/ssti/php_twig/benign.php @@ -0,0 +1,14 @@ + '{{ safe_body }}', + ])); + return $twig->render('page', ['safe_body' => $body]); +} diff --git a/tests/dynamic_fixtures/ssti/php_twig/vuln.php b/tests/dynamic_fixtures/ssti/php_twig/vuln.php new file mode 100644 index 00000000..d01b28a5 --- /dev/null +++ b/tests/dynamic_fixtures/ssti/php_twig/vuln.php @@ -0,0 +1,14 @@ +createTemplate($body); + return $template->render([]); +} diff --git a/tests/dynamic_fixtures/ssti/python_jinja2/benign.py b/tests/dynamic_fixtures/ssti/python_jinja2/benign.py new file mode 100644 index 00000000..21cc0871 --- /dev/null +++ b/tests/dynamic_fixtures/ssti/python_jinja2/benign.py @@ -0,0 +1,13 @@ +"""Phase 04 (Track J.2) — Python Jinja2 benign control fixture. + +The function escapes the body as plain text before handing it to a +fixed Jinja2 template that never interpolates the user-controlled +value, so even an SSTI-shaped payload cannot reach the evaluator. +""" +from jinja2 import Template + + +def run(body: str) -> str: + safe = body.replace("{", "{").replace("}", "}") + template = Template("{{ safe_body | safe }}") + return template.render(safe_body=safe) diff --git a/tests/dynamic_fixtures/ssti/python_jinja2/vuln.py b/tests/dynamic_fixtures/ssti/python_jinja2/vuln.py new file mode 100644 index 00000000..0438813f --- /dev/null +++ b/tests/dynamic_fixtures/ssti/python_jinja2/vuln.py @@ -0,0 +1,13 @@ +"""Phase 04 (Track J.2) — Python Jinja2 SSTI vuln fixture. + +The function pulls a template body off the request and pipes it +straight into `jinja2.Template(...).render()` without sandboxing or +expression filtering, so an attacker who controls the body reaches the +expression evaluator and can render arbitrary expressions. +""" +from jinja2 import Template + + +def run(body: str) -> str: + template = Template(body) + return template.render() diff --git a/tests/dynamic_fixtures/ssti/ruby_erb/benign.rb b/tests/dynamic_fixtures/ssti/ruby_erb/benign.rb new file mode 100644 index 00000000..9f12e9e9 --- /dev/null +++ b/tests/dynamic_fixtures/ssti/ruby_erb/benign.rb @@ -0,0 +1,11 @@ +# Phase 04 (Track J.2) — Ruby ERB benign control fixture. +# +# Escapes ERB markers in the body before rendering through a fixed +# template that interpolates only the sanitised value, so SSTI-shaped +# input cannot reach the evaluator. +require 'erb' + +def run(body) + safe_body = body.gsub(/<%/, '<%').gsub(/%>/, '%>') + ERB.new('<%= safe_body %>').result(binding) +end diff --git a/tests/dynamic_fixtures/ssti/ruby_erb/vuln.rb b/tests/dynamic_fixtures/ssti/ruby_erb/vuln.rb new file mode 100644 index 00000000..c1e7bffe --- /dev/null +++ b/tests/dynamic_fixtures/ssti/ruby_erb/vuln.rb @@ -0,0 +1,9 @@ +# Phase 04 (Track J.2) — Ruby ERB SSTI vuln fixture. +# +# The body is handed straight to ERB.new(...).result so an attacker +# who controls the body reaches the Ruby expression evaluator. +require 'erb' + +def run(body) + ERB.new(body).result +end diff --git a/tests/ssti_corpus.rs b/tests/ssti_corpus.rs new file mode 100644 index 00000000..c0e9fbf6 --- /dev/null +++ b/tests/ssti_corpus.rs @@ -0,0 +1,300 @@ +//! Phase 04 (Track J.2) — SSTI corpus acceptance. +//! +//! Asserts the new cap end-to-end: corpus slices register per-engine +//! vuln/benign pairs (Python/Jinja2, Ruby/ERB, PHP/Twig, Java/Thymeleaf, +//! JS/Handlebars), the lang-aware resolver pairs them inside the +//! correct slice, the per-language harness emitters splice in the +//! synthetic template renderer + sink-hit sentinel, and the +//! framework adapters fire on the canonical sink call. +//! +//! `cargo nextest run --features dynamic --test ssti_corpus`. + +#![cfg(feature = "dynamic")] + +use nyx_scanner::dynamic::corpus::{ + audit_marker_collisions, benign_payload_for_lang, payloads_for_lang, + resolve_benign_control_lang, Oracle, +}; +use nyx_scanner::dynamic::framework::registry::adapters_for; +use nyx_scanner::dynamic::lang; +use nyx_scanner::dynamic::oracle::{oracle_fired, ProbePredicate}; +use nyx_scanner::dynamic::sandbox::SandboxOutcome; +use nyx_scanner::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot}; +use nyx_scanner::labels::Cap; +use nyx_scanner::summary::FuncSummary; +use nyx_scanner::symbol::Lang; +use std::time::Duration; + +const LANGS: &[Lang] = &[ + Lang::Python, + Lang::Ruby, + Lang::Php, + Lang::Java, + Lang::JavaScript, +]; + +fn make_spec(lang: Lang, entry_file: &str, entry_name: &str) -> HarnessSpec { + HarnessSpec { + finding_id: "phase04test0001".into(), + entry_file: entry_file.into(), + entry_name: entry_name.into(), + entry_kind: EntryKind::Function, + lang, + toolchain_id: "phase04".into(), + payload_slot: PayloadSlot::Param(0), + expected_cap: Cap::SSTI, + constraint_hints: vec![], + sink_file: entry_file.into(), + sink_line: 1, + spec_hash: "phase04test0001".into(), + derivation: nyx_scanner::dynamic::spec::SpecDerivationStrategy::FromFlowSteps, + stubs_required: vec![], + framework: None, + } +} + +#[test] +fn corpus_registers_ssti_for_every_supported_lang() { + for lang in LANGS { + let slice = payloads_for_lang(Cap::SSTI, *lang); + assert!(!slice.is_empty(), "SSTI has no payloads for {lang:?}"); + let has_vuln = slice.iter().any(|p| !p.is_benign); + let has_benign = slice.iter().any(|p| p.is_benign); + assert!(has_vuln, "{lang:?} SSTI missing vuln payload"); + assert!(has_benign, "{lang:?} SSTI missing benign control"); + } +} + +#[test] +fn ssti_unsupported_caps_unchanged_for_other_langs() { + // Phase 04 only fills Python/Ruby/PHP/Java/JS — TypeScript / Rust / + // C / Cpp / Go remain empty. + for lang in [ + Lang::Rust, + Lang::C, + Lang::Cpp, + Lang::Go, + Lang::TypeScript, + ] { + assert!( + payloads_for_lang(Cap::SSTI, lang).is_empty(), + "unexpected SSTI payloads registered for {lang:?}", + ); + } +} + +#[test] +fn benign_control_resolves_within_lang_slice() { + for lang in LANGS { + let slice = payloads_for_lang(Cap::SSTI, *lang); + let vuln = slice.iter().find(|p| !p.is_benign).unwrap(); + let resolved = + resolve_benign_control_lang(vuln, Cap::SSTI, *lang).expect("paired control"); + assert!(resolved.is_benign); + let direct = benign_payload_for_lang(Cap::SSTI, *lang).unwrap(); + assert_eq!(direct.label, resolved.label); + } +} + +#[test] +fn payload_oracle_carries_template_eval_predicate() { + for lang in LANGS { + let slice = payloads_for_lang(Cap::SSTI, *lang); + let vuln = slice.iter().find(|p| !p.is_benign).unwrap(); + match &vuln.oracle { + Oracle::SinkProbe { predicates } => { + let has_predicate = predicates.iter().any(|p| { + matches!(p, ProbePredicate::TemplateEvalEqual { expected: 49 }) + }); + assert!( + has_predicate, + "{lang:?} vuln payload missing TemplateEvalEqual{{expected:49}}", + ); + } + other => panic!("expected SinkProbe oracle for {lang:?}, got {other:?}"), + } + } +} + +#[test] +fn marker_collisions_clean_with_phase_04_additions() { + assert!(audit_marker_collisions().is_empty()); +} + +#[test] +fn template_eval_equal_fires_on_render_49_json() { + // The oracle parses the harness's stdout body as JSON; a vuln + // payload run that renders `49` satisfies the predicate. + let oracle = Oracle::SinkProbe { + predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + }; + let outcome = SandboxOutcome { + exit_code: Some(0), + stdout: br#"__NYX_SINK_HIT__ +{"render":"49"} +"# + .to_vec(), + stderr: vec![], + timed_out: false, + oob_callback_seen: false, + sink_hit: true, + duration: Duration::from_millis(1), + hardening_outcome: None, + }; + assert!(oracle_fired(&oracle, &outcome, &[])); +} + +#[test] +fn template_eval_equal_does_not_fire_on_echo_render() { + // The benign payload echoes literal `7*7`; the integer parse + // fails so the predicate does not satisfy. + let oracle = Oracle::SinkProbe { + predicates: &[ProbePredicate::TemplateEvalEqual { expected: 49 }], + }; + let outcome = SandboxOutcome { + exit_code: Some(0), + stdout: br#"__NYX_SINK_HIT__ +{"render":"7*7"} +"# + .to_vec(), + stderr: vec![], + timed_out: false, + oob_callback_seen: false, + sink_hit: true, + duration: Duration::from_millis(1), + hardening_outcome: None, + }; + assert!(!oracle_fired(&oracle, &outcome, &[])); +} + +#[test] +fn lang_emitter_dispatches_to_ssti_harness() { + for (lang, entry_file, entry_name, marker) in [ + ( + Lang::Python, + "tests/dynamic_fixtures/ssti/python_jinja2/vuln.py", + "run", + "_nyx_jinja2_render", + ), + ( + Lang::Ruby, + "tests/dynamic_fixtures/ssti/ruby_erb/vuln.rb", + "run", + "_nyx_erb_render", + ), + ( + Lang::Php, + "tests/dynamic_fixtures/ssti/php_twig/vuln.php", + "run", + "_nyx_twig_render", + ), + ( + Lang::Java, + "tests/dynamic_fixtures/ssti/java_thymeleaf/vuln.java", + "run", + "nyxThymeleafRender", + ), + ( + Lang::JavaScript, + "tests/dynamic_fixtures/ssti/js_handlebars/vuln.js", + "run", + "nyxHandlebarsRender", + ), + ] { + let spec = make_spec(lang, entry_file, entry_name); + let harness = lang::emit(&spec) + .unwrap_or_else(|e| panic!("emit failed for {lang:?}: {e:?}")); + assert!( + harness.source.contains(marker), + "{lang:?} ssti harness must splice {marker:?}", + ); + assert!( + harness.source.contains("__NYX_SINK_HIT__"), + "{lang:?} ssti harness must emit the sink-hit sentinel", + ); + assert!( + harness.source.contains("render"), + "{lang:?} ssti harness must print the render JSON field", + ); + } +} + +#[test] +fn framework_adapters_detect_ssti_sink() { + // Each lang registers its J.2 SSTI sink adapter; detect_binding + // routes through the registry and stamps an EntryKind::Function + // binding when the fixture contains the canonical sink call. + for (lang, fixture) in [ + ( + Lang::Python, + "tests/dynamic_fixtures/ssti/python_jinja2/vuln.py", + ), + (Lang::Ruby, "tests/dynamic_fixtures/ssti/ruby_erb/vuln.rb"), + (Lang::Php, "tests/dynamic_fixtures/ssti/php_twig/vuln.php"), + ( + Lang::Java, + "tests/dynamic_fixtures/ssti/java_thymeleaf/vuln.java", + ), + ( + Lang::JavaScript, + "tests/dynamic_fixtures/ssti/js_handlebars/vuln.js", + ), + ] { + let bytes = std::fs::read(fixture).expect("fixture exists"); + let ts_lang = ts_language_for(lang); + let mut parser = tree_sitter::Parser::new(); + parser.set_language(&ts_lang).unwrap(); + let tree = parser.parse(&bytes, None).unwrap(); + let mut summary = FuncSummary { + name: "run".into(), + file_path: fixture.to_owned(), + lang: slug(lang).into(), + ..Default::default() + }; + // Seed the canonical sink callee per language so the + // callee-side matcher fires alongside the source-side check. + let sink_callee = match lang { + Lang::Python => "Template", + Lang::Ruby => "new", + Lang::Php => "createTemplate", + Lang::Java => "process", + Lang::JavaScript => "compile", + _ => unreachable!(), + }; + summary + .callees + .push(nyx_scanner::summary::CalleeSite::bare(sink_callee)); + let registry_slice = adapters_for(lang); + assert!(!registry_slice.is_empty(), "{lang:?} adapter slice empty"); + let binding = + nyx_scanner::dynamic::framework::detect_binding(&summary, tree.root_node(), &bytes, lang); + let b = + binding.unwrap_or_else(|| panic!("{lang:?} adapter must detect the SSTI fixture")); + assert_eq!(b.kind, EntryKind::Function); + assert!(!b.adapter.is_empty()); + } +} + +fn ts_language_for(lang: Lang) -> tree_sitter::Language { + match lang { + Lang::Python => tree_sitter::Language::from(tree_sitter_python::LANGUAGE), + Lang::Ruby => tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE), + Lang::Php => tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP), + Lang::Java => tree_sitter::Language::from(tree_sitter_java::LANGUAGE), + Lang::JavaScript => { + tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE) + } + other => panic!("unsupported test lang {other:?}"), + } +} + +fn slug(lang: Lang) -> &'static str { + match lang { + Lang::Python => "python", + Lang::Ruby => "ruby", + Lang::Php => "php", + Lang::Java => "java", + Lang::JavaScript => "javascript", + _ => "other", + } +}