[pitboss] phase 04: Track J.2 + Track L.2 — SSTI corpus + Jinja2 / ERB / Twig / Thymeleaf / Handlebars adapters

This commit is contained in:
pitboss 2026-05-17 18:51:13 -05:00
parent b5e6dddf2c
commit 8583b29796
34 changed files with 1868 additions and 29 deletions

View file

@ -0,0 +1,110 @@
//! Java [`super::super::FrameworkAdapter`] matching Thymeleaf SSTI
//! sinks.
//!
//! Phase 04 (Track J.2). Fires when the function body invokes
//! `TemplateEngine::process(<tainted>)` (matched by the last segment
//! of the callee — the call graph normaliser drops the receiver).
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
use crate::evidence::EntryKind;
use crate::summary::FuncSummary;
use crate::symbol::Lang;
pub struct JavaThymeleafAdapter;
const ADAPTER_NAME: &str = "java-thymeleaf";
fn callee_is_thymeleaf(name: &str) -> bool {
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
matches!(last, "process" | "processSpring")
}
impl FrameworkAdapter for JavaThymeleafAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
}
fn lang(&self) -> Lang {
Lang::Java
}
fn detect(
&self,
summary: &FuncSummary,
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
let matches_call = super::any_callee_matches(summary, callee_is_thymeleaf);
let matches_source = file_bytes
.windows(b"org.thymeleaf".len())
.any(|w| w == b"org.thymeleaf")
|| file_bytes
.windows(b"TemplateEngine".len())
.any(|w| w == b"TemplateEngine");
if matches_call && matches_source {
return Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Function,
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
});
}
if matches_source
&& file_bytes
.windows(b".process(".len())
.any(|w| w == b".process(")
{
return Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Function,
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
});
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_java(src: &[u8]) -> tree_sitter::Tree {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
parser.set_language(&lang).unwrap();
parser.parse(src, None).unwrap()
}
#[test]
fn fires_on_template_engine_process() {
let src: &[u8] = b"import org.thymeleaf.TemplateEngine;\npublic class V { public static String run(String body) { TemplateEngine e = new TemplateEngine(); return e.process(body, null); } }\n";
let tree = parse_java(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![crate::summary::CalleeSite::bare("process")],
..Default::default()
};
assert!(JavaThymeleafAdapter
.detect(&summary, tree.root_node(), src)
.is_some());
}
#[test]
fn skips_plain_function() {
let src: &[u8] =
b"public class V { public static String run(String b) { return b + b; } }\n";
let tree = parse_java(src);
let summary = FuncSummary {
name: "run".into(),
..Default::default()
};
assert!(JavaThymeleafAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -0,0 +1,95 @@
//! JavaScript [`super::super::FrameworkAdapter`] matching Handlebars
//! SSTI sinks.
//!
//! Phase 04 (Track J.2). Fires when the function body invokes
//! `Handlebars.compile(<tainted>)` (matched by the last segment of the
//! callee — the call graph normaliser drops the receiver).
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
use crate::evidence::EntryKind;
use crate::summary::FuncSummary;
use crate::symbol::Lang;
pub struct JsHandlebarsAdapter;
const ADAPTER_NAME: &str = "js-handlebars";
fn callee_is_handlebars(name: &str) -> bool {
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
matches!(last, "compile" | "precompile" | "SafeString")
}
impl FrameworkAdapter for JsHandlebarsAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
}
fn lang(&self) -> Lang {
Lang::JavaScript
}
fn detect(
&self,
summary: &FuncSummary,
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
let matches_call = super::any_callee_matches(summary, callee_is_handlebars);
let matches_source = file_bytes
.windows(b"handlebars".len())
.any(|w| w.eq_ignore_ascii_case(b"handlebars"))
|| file_bytes
.windows(b"Handlebars".len())
.any(|w| w == b"Handlebars");
if matches_call && matches_source {
return Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Function,
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
});
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_js(src: &[u8]) -> tree_sitter::Tree {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
parser.set_language(&lang).unwrap();
parser.parse(src, None).unwrap()
}
#[test]
fn fires_on_handlebars_compile() {
let src: &[u8] = b"const Handlebars = require('handlebars');\nfunction render(body) {\n return Handlebars.compile(body)({});\n}\n";
let tree = parse_js(src);
let summary = FuncSummary {
name: "render".into(),
callees: vec![crate::summary::CalleeSite::bare("compile")],
..Default::default()
};
assert!(JsHandlebarsAdapter
.detect(&summary, tree.root_node(), src)
.is_some());
}
#[test]
fn skips_plain_function() {
let src: &[u8] = b"function add(a, b) { return a + b; }\n";
let tree = parse_js(src);
let summary = FuncSummary {
name: "add".into(),
..Default::default()
};
assert!(JsHandlebarsAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -1,21 +1,34 @@
//! Concrete [`super::FrameworkAdapter`] implementations.
//!
//! Phase 03 (Track J.1) lands the first four adapters — one per
//! language carrying the new `Cap::DESERIALIZE` corpus. Each adapter
//! detects the language's canonical deserialization sink inside a
//! function body and stamps a [`super::FrameworkBinding`] with
//! Phase 03 (Track J.1) landed the first four adapters — one per
//! language carrying the `Cap::DESERIALIZE` corpus. Phase 04 (Track
//! J.2) adds five more, one per template engine carrying the
//! `Cap::SSTI` corpus: Jinja2 (Python), ERB (Ruby), Twig (PHP),
//! Thymeleaf (Java), Handlebars (JavaScript). Each adapter detects
//! the language's canonical sink inside a function body and stamps a
//! [`super::FrameworkBinding`] with
//! [`crate::evidence::EntryKind::Function`]. Track L.1+ will register
//! the route / framework adapters; the per-cap sink adapters live here
//! so the per-language verticals can ship independently.
//! the route / framework adapters; the per-cap sink adapters live
//! here so the per-language verticals can ship independently.
pub mod java_deserialize;
pub mod java_thymeleaf;
pub mod js_handlebars;
pub mod php_twig;
pub mod php_unserialize;
pub mod python_jinja2;
pub mod python_pickle;
pub mod ruby_erb;
pub mod ruby_marshal;
pub use java_deserialize::JavaDeserializeAdapter;
pub use java_thymeleaf::JavaThymeleafAdapter;
pub use js_handlebars::JsHandlebarsAdapter;
pub use php_twig::PhpTwigAdapter;
pub use php_unserialize::PhpUnserializeAdapter;
pub use python_jinja2::PythonJinja2Adapter;
pub use python_pickle::PythonPickleAdapter;
pub use ruby_erb::RubyErbAdapter;
pub use ruby_marshal::RubyMarshalAdapter;
/// True when any callee in `summary.callees` matches `predicate`.

View file

@ -0,0 +1,107 @@
//! PHP [`super::super::FrameworkAdapter`] matching Twig SSTI sinks.
//!
//! Phase 04 (Track J.2). Fires when the function body invokes the
//! canonical Twig entry points with a tainted template body —
//! `Twig\Environment::createTemplate(<tainted>)` or
//! `$twig->render($tainted)`. Callee matching is last-segment so
//! receiver-prefixed calls (`$env->render`,
//! `Twig\Environment::createTemplate`) hit the same predicate.
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
use crate::evidence::EntryKind;
use crate::summary::FuncSummary;
use crate::symbol::Lang;
pub struct PhpTwigAdapter;
const ADAPTER_NAME: &str = "php-twig";
fn callee_is_twig(name: &str) -> bool {
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
let last = last.rsplit_once("::").map(|(_, s)| s).unwrap_or(last);
matches!(
last,
"createTemplate" | "render" | "renderBlock" | "display"
)
}
impl FrameworkAdapter for PhpTwigAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
}
fn lang(&self) -> Lang {
Lang::Php
}
fn detect(
&self,
summary: &FuncSummary,
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
let matches_call = super::any_callee_matches(summary, callee_is_twig);
let matches_source = file_bytes
.windows(b"Twig\\Environment".len())
.any(|w| w == b"Twig\\Environment")
|| file_bytes
.windows(b"Twig_Environment".len())
.any(|w| w == b"Twig_Environment")
|| file_bytes
.windows(b"use Twig".len())
.any(|w| w == b"use Twig")
|| file_bytes
.windows(b"createTemplate".len())
.any(|w| w == b"createTemplate");
if matches_call && matches_source {
return Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Function,
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
});
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_php(src: &[u8]) -> tree_sitter::Tree {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
parser.set_language(&lang).unwrap();
parser.parse(src, None).unwrap()
}
#[test]
fn fires_on_create_template() {
let src: &[u8] = b"<?php\nuse Twig\\Environment;\nfunction render($body, $twig) {\n $tpl = $twig->createTemplate($body);\n return $tpl->render([]);\n}\n";
let tree = parse_php(src);
let summary = FuncSummary {
name: "render".into(),
callees: vec![crate::summary::CalleeSite::bare("createTemplate")],
..Default::default()
};
assert!(PhpTwigAdapter
.detect(&summary, tree.root_node(), src)
.is_some());
}
#[test]
fn skips_plain_function() {
let src: &[u8] = b"<?php\nfunction add($a, $b) { return $a + $b; }\n";
let tree = parse_php(src);
let summary = FuncSummary {
name: "add".into(),
..Default::default()
};
assert!(PhpTwigAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -0,0 +1,120 @@
//! Python [`super::super::FrameworkAdapter`] matching Jinja2 SSTI sinks.
//!
//! Phase 04 (Track J.2). Fires when the function body invokes one of
//! the canonical Jinja2 entry points with a tainted template body —
//! `Template(<tainted>)`, `Environment(...).from_string(<tainted>)`, or
//! `render_template_string(<tainted>)`. Callee matching is
//! last-segment so receiver-prefixed calls (`env.from_string`,
//! `flask.render_template_string`) hit the same predicate.
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
use crate::evidence::EntryKind;
use crate::summary::FuncSummary;
use crate::symbol::Lang;
pub struct PythonJinja2Adapter;
const ADAPTER_NAME: &str = "python-jinja2";
fn callee_is_jinja2(name: &str) -> bool {
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
matches!(
last,
"Template" | "from_string" | "render_template_string"
)
}
impl FrameworkAdapter for PythonJinja2Adapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
}
fn lang(&self) -> Lang {
Lang::Python
}
fn detect(
&self,
summary: &FuncSummary,
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
let matches_call = super::any_callee_matches(summary, callee_is_jinja2);
let matches_source = file_bytes
.windows(b"jinja2".len())
.any(|w| w == b"jinja2")
|| file_bytes
.windows(b"from_string".len())
.any(|w| w == b"from_string")
|| file_bytes
.windows(b"render_template_string".len())
.any(|w| w == b"render_template_string");
if matches_call && matches_source {
Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Function,
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
})
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_python(src: &[u8]) -> tree_sitter::Tree {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
parser.set_language(&lang).unwrap();
parser.parse(src, None).unwrap()
}
#[test]
fn fires_when_source_imports_jinja2() {
let src: &[u8] =
b"from jinja2 import Template\ndef render(body):\n return Template(body).render()\n";
let tree = parse_python(src);
let summary = FuncSummary {
name: "render".into(),
callees: vec![crate::summary::CalleeSite::bare("Template")],
..Default::default()
};
assert!(PythonJinja2Adapter
.detect(&summary, tree.root_node(), src)
.is_some());
}
#[test]
fn fires_when_callee_is_render_template_string() {
let src: &[u8] =
b"from flask import render_template_string\ndef view(body):\n return render_template_string(body)\n";
let tree = parse_python(src);
let summary = FuncSummary {
name: "view".into(),
callees: vec![crate::summary::CalleeSite::bare("render_template_string")],
..Default::default()
};
assert!(PythonJinja2Adapter
.detect(&summary, tree.root_node(), src)
.is_some());
}
#[test]
fn skips_plain_function() {
let src: &[u8] = b"def run(x):\n return x + 1\n";
let tree = parse_python(src);
let summary = FuncSummary {
name: "run".into(),
..Default::default()
};
assert!(PythonJinja2Adapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -0,0 +1,115 @@
//! Ruby [`super::super::FrameworkAdapter`] matching ERB SSTI sinks.
//!
//! Phase 04 (Track J.2). Fires when the function body invokes
//! `ERB.new(<tainted>).result` (or the equivalent `result_with_hash`
//! variant). Callee matching is last-segment-aware so namespaced
//! receivers (`Erubi::Engine.new`) reduce to `new` + a string-level
//! check for the surrounding `ERB` / `Erubi` token in the source.
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
use crate::evidence::EntryKind;
use crate::summary::FuncSummary;
use crate::symbol::Lang;
pub struct RubyErbAdapter;
const ADAPTER_NAME: &str = "ruby-erb";
fn callee_is_erb(name: &str) -> bool {
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
matches!(last, "result" | "result_with_hash" | "new")
}
impl FrameworkAdapter for RubyErbAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
}
fn lang(&self) -> Lang {
Lang::Ruby
}
fn detect(
&self,
summary: &FuncSummary,
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
let matches_call = super::any_callee_matches(summary, callee_is_erb);
let matches_source = file_bytes
.windows(b"ERB.new".len())
.any(|w| w == b"ERB.new")
|| file_bytes
.windows(b"require 'erb'".len())
.any(|w| w == b"require 'erb'")
|| file_bytes
.windows(b"require \"erb\"".len())
.any(|w| w == b"require \"erb\"")
|| file_bytes
.windows(b"Erubi".len())
.any(|w| w == b"Erubi");
if matches_call && matches_source {
return Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Function,
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
});
}
if matches_source
&& file_bytes
.windows(b".result".len())
.any(|w| w == b".result")
{
return Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Function,
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
});
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_ruby(src: &[u8]) -> tree_sitter::Tree {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE);
parser.set_language(&lang).unwrap();
parser.parse(src, None).unwrap()
}
#[test]
fn fires_on_erb_new_result() {
let src: &[u8] = b"require 'erb'\ndef render(body)\n ERB.new(body).result\nend\n";
let tree = parse_ruby(src);
let summary = FuncSummary {
name: "render".into(),
..Default::default()
};
assert!(RubyErbAdapter
.detect(&summary, tree.root_node(), src)
.is_some());
}
#[test]
fn skips_plain_function() {
let src: &[u8] = b"def add(a, b)\n a + b\nend\n";
let tree = parse_ruby(src);
let summary = FuncSummary {
name: "add".into(),
..Default::default()
};
assert!(RubyErbAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -214,27 +214,36 @@ mod tests {
}
#[test]
fn registry_baseline_after_phase_03() {
// Phase 03 (Track J.1) registers one deserialize-sink adapter
// per supported language: Java, Python, PHP, Ruby. The other
fn registry_baseline_after_phase_04() {
// Phase 04 (Track J.2) adds the SSTI-sink adapter alongside the
// Phase-03 deserialize adapter for Java / Python / PHP / Ruby and
// introduces the first JavaScript adapter (Handlebars). Other
// languages still carry the Phase-01 empty baseline.
for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby] {
let registered = registry::adapters_for(lang);
assert_eq!(
registered.len(),
1,
"{:?} must have exactly the J.1 deserialize adapter registered",
2,
"{:?} must have the J.1 deserialize + J.2 ssti adapters",
lang,
);
assert_eq!(registered[0].lang(), lang);
for adapter in registered {
assert_eq!(adapter.lang(), lang);
}
}
let js_registered = registry::adapters_for(Lang::JavaScript);
assert_eq!(
js_registered.len(),
1,
"JavaScript must have exactly the J.2 Handlebars adapter",
);
assert_eq!(js_registered[0].lang(), Lang::JavaScript);
for lang in [
Lang::Rust,
Lang::C,
Lang::Cpp,
Lang::Go,
Lang::TypeScript,
Lang::JavaScript,
] {
assert!(
registry::adapters_for(lang).is_empty(),

View file

@ -39,18 +39,30 @@ pub fn adapters_for(lang: Lang) -> &'static [&'static dyn FrameworkAdapter] {
}
// Phase 03 (Track J.1) registers per-language deserialize-sink
// adapters into the matching language slice. Other Track-L verticals
// add route / framework adapters as they land.
// adapters into the matching language slice. Phase 04 (Track J.2)
// adds the SSTI-sink adapters. Within each slice adapters are
// listed in alphabetical order of [`FrameworkAdapter::name`] so a
// later phase that appends a new adapter cannot silently re-order
// the existing first-match.
static RUST: &[&dyn FrameworkAdapter] = &[];
static C: &[&dyn FrameworkAdapter] = &[];
static CPP: &[&dyn FrameworkAdapter] = &[];
static JAVA: &[&dyn FrameworkAdapter] =
&[&super::adapters::JavaDeserializeAdapter];
static JAVA: &[&dyn FrameworkAdapter] = &[
&super::adapters::JavaDeserializeAdapter,
&super::adapters::JavaThymeleafAdapter,
];
static GO: &[&dyn FrameworkAdapter] = &[];
static PHP: &[&dyn FrameworkAdapter] = &[&super::adapters::PhpUnserializeAdapter];
static PYTHON: &[&dyn FrameworkAdapter] =
&[&super::adapters::PythonPickleAdapter];
static RUBY: &[&dyn FrameworkAdapter] =
&[&super::adapters::RubyMarshalAdapter];
static PHP: &[&dyn FrameworkAdapter] = &[
&super::adapters::PhpTwigAdapter,
&super::adapters::PhpUnserializeAdapter,
];
static PYTHON: &[&dyn FrameworkAdapter] = &[
&super::adapters::PythonJinja2Adapter,
&super::adapters::PythonPickleAdapter,
];
static RUBY: &[&dyn FrameworkAdapter] = &[
&super::adapters::RubyErbAdapter,
&super::adapters::RubyMarshalAdapter,
];
static TYPESCRIPT: &[&dyn FrameworkAdapter] = &[];
static JAVASCRIPT: &[&dyn FrameworkAdapter] = &[];
static JAVASCRIPT: &[&dyn FrameworkAdapter] = &[&super::adapters::JsHandlebarsAdapter];