mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
[pitboss] phase 03: Track J.1 + Track L.1 — DESERIALIZE corpus + Java/Python/PHP/Ruby adapters
This commit is contained in:
parent
01fcaab310
commit
9dc60b51c0
33 changed files with 1625 additions and 53 deletions
97
src/dynamic/framework/adapters/java_deserialize.rs
Normal file
97
src/dynamic/framework/adapters/java_deserialize.rs
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
//! Java [`super::super::FrameworkAdapter`] matching deserialization sinks.
|
||||
//!
|
||||
//! Fires when the function body invokes `ObjectInputStream.readObject`
|
||||
//! or `XMLDecoder.readObject` (matched by the last segment of the
|
||||
//! callee name — the call graph normaliser drops the receiver).
|
||||
|
||||
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
|
||||
use crate::evidence::EntryKind;
|
||||
use crate::summary::FuncSummary;
|
||||
use crate::symbol::Lang;
|
||||
|
||||
pub struct JavaDeserializeAdapter;
|
||||
|
||||
const ADAPTER_NAME: &str = "java-deserialize";
|
||||
|
||||
fn callee_is_java_deserialize(name: &str) -> bool {
|
||||
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
|
||||
matches!(last, "readObject" | "fromXML" | "deserialize")
|
||||
}
|
||||
|
||||
impl FrameworkAdapter for JavaDeserializeAdapter {
|
||||
fn name(&self) -> &'static str {
|
||||
ADAPTER_NAME
|
||||
}
|
||||
|
||||
fn lang(&self) -> Lang {
|
||||
Lang::Java
|
||||
}
|
||||
|
||||
fn detect(
|
||||
&self,
|
||||
summary: &FuncSummary,
|
||||
_ast: tree_sitter::Node<'_>,
|
||||
file_bytes: &[u8],
|
||||
) -> Option<FrameworkBinding> {
|
||||
let matches_call = super::any_callee_matches(summary, callee_is_java_deserialize);
|
||||
let matches_source = file_bytes
|
||||
.windows(b"ObjectInputStream".len())
|
||||
.any(|w| w == b"ObjectInputStream")
|
||||
|| file_bytes
|
||||
.windows(b"XMLDecoder".len())
|
||||
.any(|w| w == b"XMLDecoder");
|
||||
if matches_call || matches_source {
|
||||
Some(FrameworkBinding {
|
||||
adapter: ADAPTER_NAME.to_owned(),
|
||||
kind: EntryKind::Function,
|
||||
route: None,
|
||||
request_params: Vec::new(),
|
||||
response_writer: None,
|
||||
middleware: Vec::new(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn parse_java(src: &[u8]) -> tree_sitter::Tree {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
|
||||
parser.set_language(&lang).unwrap();
|
||||
parser.parse(src, None).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fires_when_source_imports_object_input_stream() {
|
||||
let src: &[u8] = b"import java.io.ObjectInputStream;\npublic class V { public static void run(byte[] b) {} }\n";
|
||||
let tree = parse_java(src);
|
||||
let summary = FuncSummary {
|
||||
name: "run".into(),
|
||||
..Default::default()
|
||||
};
|
||||
let binding = JavaDeserializeAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.expect("must fire on ObjectInputStream source");
|
||||
assert_eq!(binding.adapter, ADAPTER_NAME);
|
||||
assert_eq!(binding.kind, EntryKind::Function);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skips_plain_function() {
|
||||
let src: &[u8] =
|
||||
b"public class V { public static void run(String b) { System.out.println(b); } }\n";
|
||||
let tree = parse_java(src);
|
||||
let summary = FuncSummary {
|
||||
name: "run".into(),
|
||||
..Default::default()
|
||||
};
|
||||
assert!(JavaDeserializeAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.is_none());
|
||||
}
|
||||
}
|
||||
30
src/dynamic/framework/adapters/mod.rs
Normal file
30
src/dynamic/framework/adapters/mod.rs
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
//! Concrete [`super::FrameworkAdapter`] implementations.
|
||||
//!
|
||||
//! Phase 03 (Track J.1) lands the first four adapters — one per
|
||||
//! language carrying the new `Cap::DESERIALIZE` corpus. Each adapter
|
||||
//! detects the language's canonical deserialization sink inside a
|
||||
//! function body and stamps a [`super::FrameworkBinding`] with
|
||||
//! [`crate::evidence::EntryKind::Function`]. Track L.1+ will register
|
||||
//! the route / framework adapters; the per-cap sink adapters live here
|
||||
//! so the per-language verticals can ship independently.
|
||||
|
||||
pub mod java_deserialize;
|
||||
pub mod php_unserialize;
|
||||
pub mod python_pickle;
|
||||
pub mod ruby_marshal;
|
||||
|
||||
pub use java_deserialize::JavaDeserializeAdapter;
|
||||
pub use php_unserialize::PhpUnserializeAdapter;
|
||||
pub use python_pickle::PythonPickleAdapter;
|
||||
pub use ruby_marshal::RubyMarshalAdapter;
|
||||
|
||||
/// True when any callee in `summary.callees` matches `predicate`.
|
||||
fn any_callee_matches(
|
||||
summary: &crate::summary::FuncSummary,
|
||||
predicate: impl Fn(&str) -> bool,
|
||||
) -> bool {
|
||||
summary
|
||||
.callees
|
||||
.iter()
|
||||
.any(|c| predicate(c.name.as_str()))
|
||||
}
|
||||
88
src/dynamic/framework/adapters/php_unserialize.rs
Normal file
88
src/dynamic/framework/adapters/php_unserialize.rs
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
//! PHP [`super::super::FrameworkAdapter`] matching `unserialize` sinks.
|
||||
|
||||
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
|
||||
use crate::evidence::EntryKind;
|
||||
use crate::summary::FuncSummary;
|
||||
use crate::symbol::Lang;
|
||||
|
||||
pub struct PhpUnserializeAdapter;
|
||||
|
||||
const ADAPTER_NAME: &str = "php-unserialize";
|
||||
|
||||
fn callee_is_php_deserialize(name: &str) -> bool {
|
||||
let last = name.rsplit_once('\\').map(|(_, s)| s).unwrap_or(name);
|
||||
let last = last.rsplit_once("::").map(|(_, s)| s).unwrap_or(last);
|
||||
matches!(last, "unserialize")
|
||||
}
|
||||
|
||||
impl FrameworkAdapter for PhpUnserializeAdapter {
|
||||
fn name(&self) -> &'static str {
|
||||
ADAPTER_NAME
|
||||
}
|
||||
|
||||
fn lang(&self) -> Lang {
|
||||
Lang::Php
|
||||
}
|
||||
|
||||
fn detect(
|
||||
&self,
|
||||
summary: &FuncSummary,
|
||||
_ast: tree_sitter::Node<'_>,
|
||||
file_bytes: &[u8],
|
||||
) -> Option<FrameworkBinding> {
|
||||
let matches_call = super::any_callee_matches(summary, callee_is_php_deserialize);
|
||||
let matches_source = file_bytes
|
||||
.windows(b"unserialize".len())
|
||||
.any(|w| w == b"unserialize");
|
||||
if matches_call || matches_source {
|
||||
Some(FrameworkBinding {
|
||||
adapter: ADAPTER_NAME.to_owned(),
|
||||
kind: EntryKind::Function,
|
||||
route: None,
|
||||
request_params: Vec::new(),
|
||||
response_writer: None,
|
||||
middleware: Vec::new(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn parse_php(src: &[u8]) -> tree_sitter::Tree {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
|
||||
parser.set_language(&lang).unwrap();
|
||||
parser.parse(src, None).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fires_when_source_calls_unserialize() {
|
||||
let src: &[u8] = b"<?php\nfunction run($blob) { return unserialize($blob); }\n";
|
||||
let tree = parse_php(src);
|
||||
let summary = FuncSummary {
|
||||
name: "run".into(),
|
||||
..Default::default()
|
||||
};
|
||||
assert!(PhpUnserializeAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skips_plain_function() {
|
||||
let src: &[u8] = b"<?php\nfunction run($x) { return strtoupper($x); }\n";
|
||||
let tree = parse_php(src);
|
||||
let summary = FuncSummary {
|
||||
name: "run".into(),
|
||||
..Default::default()
|
||||
};
|
||||
assert!(PhpUnserializeAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.is_none());
|
||||
}
|
||||
}
|
||||
97
src/dynamic/framework/adapters/python_pickle.rs
Normal file
97
src/dynamic/framework/adapters/python_pickle.rs
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
//! Python [`super::super::FrameworkAdapter`] matching pickle / yaml
|
||||
//! deserialization sinks.
|
||||
|
||||
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
|
||||
use crate::evidence::EntryKind;
|
||||
use crate::summary::FuncSummary;
|
||||
use crate::symbol::Lang;
|
||||
|
||||
pub struct PythonPickleAdapter;
|
||||
|
||||
const ADAPTER_NAME: &str = "python-pickle";
|
||||
|
||||
fn callee_is_python_deserialize(name: &str) -> bool {
|
||||
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
|
||||
matches!(
|
||||
last,
|
||||
"loads" | "load" | "unsafe_load" | "Unpickler" | "find_class"
|
||||
)
|
||||
}
|
||||
|
||||
impl FrameworkAdapter for PythonPickleAdapter {
|
||||
fn name(&self) -> &'static str {
|
||||
ADAPTER_NAME
|
||||
}
|
||||
|
||||
fn lang(&self) -> Lang {
|
||||
Lang::Python
|
||||
}
|
||||
|
||||
fn detect(
|
||||
&self,
|
||||
summary: &FuncSummary,
|
||||
_ast: tree_sitter::Node<'_>,
|
||||
file_bytes: &[u8],
|
||||
) -> Option<FrameworkBinding> {
|
||||
let matches_call = super::any_callee_matches(summary, callee_is_python_deserialize);
|
||||
let matches_source = file_bytes
|
||||
.windows(b"pickle".len())
|
||||
.any(|w| w == b"pickle")
|
||||
|| file_bytes
|
||||
.windows(b"yaml.unsafe_load".len())
|
||||
.any(|w| w == b"yaml.unsafe_load")
|
||||
|| file_bytes
|
||||
.windows(b"yaml.load".len())
|
||||
.any(|w| w == b"yaml.load");
|
||||
if matches_call || matches_source {
|
||||
Some(FrameworkBinding {
|
||||
adapter: ADAPTER_NAME.to_owned(),
|
||||
kind: EntryKind::Function,
|
||||
route: None,
|
||||
request_params: Vec::new(),
|
||||
response_writer: None,
|
||||
middleware: Vec::new(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn parse_python(src: &[u8]) -> tree_sitter::Tree {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
|
||||
parser.set_language(&lang).unwrap();
|
||||
parser.parse(src, None).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fires_when_source_imports_pickle() {
|
||||
let src: &[u8] = b"import pickle\n\ndef run(blob):\n return pickle.loads(blob)\n";
|
||||
let tree = parse_python(src);
|
||||
let summary = FuncSummary {
|
||||
name: "run".into(),
|
||||
..Default::default()
|
||||
};
|
||||
assert!(PythonPickleAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skips_plain_function() {
|
||||
let src: &[u8] = b"def run(x):\n return x + 1\n";
|
||||
let tree = parse_python(src);
|
||||
let summary = FuncSummary {
|
||||
name: "run".into(),
|
||||
..Default::default()
|
||||
};
|
||||
assert!(PythonPickleAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.is_none());
|
||||
}
|
||||
}
|
||||
99
src/dynamic/framework/adapters/ruby_marshal.rs
Normal file
99
src/dynamic/framework/adapters/ruby_marshal.rs
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
//! Ruby [`super::super::FrameworkAdapter`] matching `Marshal.load` /
|
||||
//! `YAML.load` deserialization sinks.
|
||||
|
||||
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
|
||||
use crate::evidence::EntryKind;
|
||||
use crate::summary::FuncSummary;
|
||||
use crate::symbol::Lang;
|
||||
|
||||
pub struct RubyMarshalAdapter;
|
||||
|
||||
const ADAPTER_NAME: &str = "ruby-marshal";
|
||||
|
||||
fn callee_is_ruby_deserialize(name: &str) -> bool {
|
||||
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
|
||||
let last = last.rsplit_once("::").map(|(_, s)| s).unwrap_or(last);
|
||||
matches!(last, "load" | "restore" | "unsafe_load" | "load_documents")
|
||||
&& (name.contains("Marshal") || name.contains("YAML"))
|
||||
}
|
||||
|
||||
impl FrameworkAdapter for RubyMarshalAdapter {
|
||||
fn name(&self) -> &'static str {
|
||||
ADAPTER_NAME
|
||||
}
|
||||
|
||||
fn lang(&self) -> Lang {
|
||||
Lang::Ruby
|
||||
}
|
||||
|
||||
fn detect(
|
||||
&self,
|
||||
summary: &FuncSummary,
|
||||
_ast: tree_sitter::Node<'_>,
|
||||
file_bytes: &[u8],
|
||||
) -> Option<FrameworkBinding> {
|
||||
let matches_call = super::any_callee_matches(summary, callee_is_ruby_deserialize);
|
||||
let matches_source = file_bytes
|
||||
.windows(b"Marshal.load".len())
|
||||
.any(|w| w == b"Marshal.load")
|
||||
|| file_bytes
|
||||
.windows(b"Marshal.restore".len())
|
||||
.any(|w| w == b"Marshal.restore")
|
||||
|| file_bytes
|
||||
.windows(b"YAML.load".len())
|
||||
.any(|w| w == b"YAML.load")
|
||||
|| file_bytes
|
||||
.windows(b"YAML.unsafe_load".len())
|
||||
.any(|w| w == b"YAML.unsafe_load");
|
||||
if matches_call || matches_source {
|
||||
Some(FrameworkBinding {
|
||||
adapter: ADAPTER_NAME.to_owned(),
|
||||
kind: EntryKind::Function,
|
||||
route: None,
|
||||
request_params: Vec::new(),
|
||||
response_writer: None,
|
||||
middleware: Vec::new(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn parse_ruby(src: &[u8]) -> tree_sitter::Tree {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
let lang = tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE);
|
||||
parser.set_language(&lang).unwrap();
|
||||
parser.parse(src, None).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fires_when_source_calls_marshal_load() {
|
||||
let src: &[u8] = b"def run(blob)\n Marshal.load(blob)\nend\n";
|
||||
let tree = parse_ruby(src);
|
||||
let summary = FuncSummary {
|
||||
name: "run".into(),
|
||||
..Default::default()
|
||||
};
|
||||
assert!(RubyMarshalAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skips_plain_function() {
|
||||
let src: &[u8] = b"def run(x)\n x + 1\nend\n";
|
||||
let tree = parse_ruby(src);
|
||||
let summary = FuncSummary {
|
||||
name: "run".into(),
|
||||
..Default::default()
|
||||
};
|
||||
assert!(RubyMarshalAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.is_none());
|
||||
}
|
||||
}
|
||||
|
|
@ -14,6 +14,7 @@
|
|||
//! phase that adds a new adapter cannot silently re-order an existing
|
||||
//! match.
|
||||
|
||||
pub mod adapters;
|
||||
pub mod registry;
|
||||
|
||||
use crate::evidence::EntryKind;
|
||||
|
|
@ -213,28 +214,32 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn registry_is_empty_for_every_lang_phase_01() {
|
||||
// Regression guard: Phase 01 ships the trait + dispatch
|
||||
// machinery but registers zero adapters. Subsequent Track-L
|
||||
// phases register concrete adapters per language; this test
|
||||
// documents the starting baseline so accidental re-ordering
|
||||
// is caught by `tests/determinism_audit.rs`.
|
||||
fn registry_baseline_after_phase_03() {
|
||||
// Phase 03 (Track J.1) registers one deserialize-sink adapter
|
||||
// per supported language: Java, Python, PHP, Ruby. The other
|
||||
// languages still carry the Phase-01 empty baseline.
|
||||
for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby] {
|
||||
let registered = registry::adapters_for(lang);
|
||||
assert_eq!(
|
||||
registered.len(),
|
||||
1,
|
||||
"{:?} must have exactly the J.1 deserialize adapter registered",
|
||||
lang,
|
||||
);
|
||||
assert_eq!(registered[0].lang(), lang);
|
||||
}
|
||||
for lang in [
|
||||
Lang::Rust,
|
||||
Lang::C,
|
||||
Lang::Cpp,
|
||||
Lang::Java,
|
||||
Lang::Go,
|
||||
Lang::Php,
|
||||
Lang::Python,
|
||||
Lang::Ruby,
|
||||
Lang::TypeScript,
|
||||
Lang::JavaScript,
|
||||
] {
|
||||
assert!(
|
||||
registry::adapters_for(lang).is_empty(),
|
||||
"{:?} starts with zero registered adapters",
|
||||
lang
|
||||
"{:?} should still have zero adapters before its Track-L phase",
|
||||
lang,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,16 +38,19 @@ pub fn adapters_for(lang: Lang) -> &'static [&'static dyn FrameworkAdapter] {
|
|||
}
|
||||
}
|
||||
|
||||
// All slices intentionally empty in Phase 01. Later Track-L phases
|
||||
// register concrete adapters (Flask, Spring, axum, Express, …) into
|
||||
// the appropriate language slice.
|
||||
// Phase 03 (Track J.1) registers per-language deserialize-sink
|
||||
// adapters into the matching language slice. Other Track-L verticals
|
||||
// add route / framework adapters as they land.
|
||||
static RUST: &[&dyn FrameworkAdapter] = &[];
|
||||
static C: &[&dyn FrameworkAdapter] = &[];
|
||||
static CPP: &[&dyn FrameworkAdapter] = &[];
|
||||
static JAVA: &[&dyn FrameworkAdapter] = &[];
|
||||
static JAVA: &[&dyn FrameworkAdapter] =
|
||||
&[&super::adapters::JavaDeserializeAdapter];
|
||||
static GO: &[&dyn FrameworkAdapter] = &[];
|
||||
static PHP: &[&dyn FrameworkAdapter] = &[];
|
||||
static PYTHON: &[&dyn FrameworkAdapter] = &[];
|
||||
static RUBY: &[&dyn FrameworkAdapter] = &[];
|
||||
static PHP: &[&dyn FrameworkAdapter] = &[&super::adapters::PhpUnserializeAdapter];
|
||||
static PYTHON: &[&dyn FrameworkAdapter] =
|
||||
&[&super::adapters::PythonPickleAdapter];
|
||||
static RUBY: &[&dyn FrameworkAdapter] =
|
||||
&[&super::adapters::RubyMarshalAdapter];
|
||||
static TYPESCRIPT: &[&dyn FrameworkAdapter] = &[];
|
||||
static JAVASCRIPT: &[&dyn FrameworkAdapter] = &[];
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue