refactor(dynamic): enhance Rust receiver construction with recursive dependency resolution, add Liquibase changelog context detection, and expand test coverage

This commit is contained in:
elipeter 2026-05-24 22:18:59 -05:00
parent acec041676
commit 8786d1b71e
6 changed files with 546 additions and 25 deletions

View file

@ -13,7 +13,7 @@
//! mirror the Phase 21 binding-stealing audit applied to
//! `migration_flyway` and `migration_rails`.
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding, FrameworkDetectionContext};
use crate::evidence::EntryKind;
use crate::summary::FuncSummary;
use crate::symbol::Lang;
@ -53,6 +53,178 @@ fn source_has_liquibase_shape(file_bytes: &[u8]) -> bool {
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
fn source_class_names(file_bytes: &[u8]) -> Vec<String> {
let text = std::str::from_utf8(file_bytes).unwrap_or("");
let package = parse_package_name(text);
let mut out = Vec::new();
for marker in [" class ", " interface ", " enum "] {
let mut rest = text;
while let Some(idx) = rest.find(marker) {
let after = &rest[idx + marker.len()..];
let Some(name) = java_ident_prefix(after) else {
rest = after;
continue;
};
out.push(name.to_owned());
if let Some(pkg) = package.as_deref() {
out.push(format!("{pkg}.{name}"));
}
rest = &after[name.len()..];
}
}
out.sort();
out.dedup();
out
}
fn parse_package_name(text: &str) -> Option<String> {
for line in text.lines() {
let trimmed = line.trim();
if !trimmed.starts_with("package ") {
continue;
}
let rest = trimmed["package ".len()..].trim_start();
let end = rest.find(';')?;
let pkg = rest[..end].trim();
if !pkg.is_empty() {
return Some(pkg.to_owned());
}
}
None
}
fn java_ident_prefix(text: &str) -> Option<&str> {
let mut end = 0usize;
for (idx, ch) in text.char_indices() {
let valid = if idx == 0 {
ch == '_' || ch == '$' || ch.is_ascii_alphabetic()
} else {
ch == '_' || ch == '$' || ch.is_ascii_alphanumeric()
};
if !valid {
break;
}
end = idx + ch.len_utf8();
}
if end == 0 { None } else { Some(&text[..end]) }
}
fn project_liquibase_changeset_for_class(
context: FrameworkDetectionContext<'_>,
file_bytes: &[u8],
) -> Option<Option<String>> {
let names = source_class_names(file_bytes);
if names.is_empty() {
return None;
}
for rel in LIQUIBASE_CHANGELOG_PATHS {
let Some(bytes) = context.project_files.get(rel) else {
continue;
};
let text = std::str::from_utf8(bytes).unwrap_or("");
if !changelog_mentions_liquibase(text) {
continue;
}
for name in &names {
if changelog_references_class(text, name) {
return Some(extract_changelog_id_for_class(text, name));
}
}
}
None
}
const LIQUIBASE_CHANGELOG_PATHS: &[&str] = &[
"changelog.xml",
"changelog.yaml",
"changelog.yml",
"changelog.json",
"db/changelog/db.changelog-master.xml",
"db/changelog/db.changelog-master.yaml",
"db/changelog/db.changelog-master.yml",
"db/changelog/db.changelog-master.json",
"src/main/resources/db/changelog/db.changelog-master.xml",
"src/main/resources/db/changelog/db.changelog-master.yaml",
"src/main/resources/db/changelog/db.changelog-master.yml",
"src/main/resources/db/changelog/db.changelog-master.json",
];
fn changelog_mentions_liquibase(text: &str) -> bool {
text.contains("databaseChangeLog")
|| text.contains("changeSet")
|| text.contains("customChange")
|| text.contains("customChange:")
}
fn changelog_references_class(text: &str, class_name: &str) -> bool {
text.contains(&format!("class=\"{class_name}\""))
|| text.contains(&format!("class='{class_name}'"))
|| text.contains(&format!("class: {class_name}"))
|| text.contains(&format!("class: \"{class_name}\""))
|| text.contains(&format!("class: '{class_name}'"))
|| text.contains(&format!("\"class\": \"{class_name}\""))
|| text.contains(&format!("\"class\":\"{class_name}\""))
}
fn extract_changelog_id_for_class(text: &str, class_name: &str) -> Option<String> {
let class_idx = text.find(class_name)?;
let before = &text[..class_idx];
extract_last_attr_value(before, "id")
.or_else(|| extract_last_yaml_value(before, "id"))
.or_else(|| extract_last_json_value(before, "id"))
}
fn extract_last_attr_value(text: &str, key: &str) -> Option<String> {
let needle = format!("{key}=");
let idx = text.rfind(&needle)?;
let quoted = text[idx + needle.len()..].trim_start();
let quote = quoted.chars().next()?;
if quote != '"' && quote != '\'' {
return None;
}
let body = &quoted[1..];
let end = body.find(quote)?;
non_empty(body[..end].trim())
}
fn extract_last_yaml_value(text: &str, key: &str) -> Option<String> {
let needle = format!("{key}:");
for line in text.lines().rev() {
let trimmed = line.trim();
if !trimmed.starts_with(&needle) {
continue;
}
let raw = trimmed[needle.len()..].trim().trim_matches(['"', '\'']);
if let Some(value) = non_empty(raw) {
return Some(value);
}
}
None
}
fn extract_last_json_value(text: &str, key: &str) -> Option<String> {
let needle = format!("\"{key}\"");
let idx = text.rfind(&needle)?;
let tail = &text[idx + needle.len()..];
let colon = tail.find(':')?;
let quoted = tail[colon + 1..].trim_start();
let quote = quoted.chars().next()?;
if quote != '"' && quote != '\'' {
return None;
}
let body = &quoted[1..];
let end = body.find(quote)?;
non_empty(body[..end].trim())
}
fn non_empty(value: &str) -> Option<String> {
if value.is_empty() {
None
} else {
Some(value.to_owned())
}
}
fn name_is_migration_entry(name: &str) -> bool {
matches!(name, "execute" | "generateStatements")
}
@ -102,29 +274,53 @@ impl FrameworkAdapter for MigrationLiquibaseAdapter {
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
let has_shape = source_has_liquibase_shape(file_bytes);
let name_matches = name_is_migration_entry(&summary.name);
let body_runs_ddl = super::any_callee_matches(summary, callee_is_liquibase_ddl);
let binds = has_shape && (name_matches || body_runs_ddl);
if !binds {
return None;
}
Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Migration {
version: extract_version(file_bytes),
},
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
})
detect_liquibase(summary, file_bytes, None)
}
fn detect_with_project_context(
&self,
summary: &FuncSummary,
context: FrameworkDetectionContext<'_>,
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
detect_liquibase(summary, file_bytes, Some(context))
}
}
fn detect_liquibase(
summary: &FuncSummary,
file_bytes: &[u8],
context: Option<FrameworkDetectionContext<'_>>,
) -> Option<FrameworkBinding> {
let project_changeset =
context.and_then(|ctx| project_liquibase_changeset_for_class(ctx, file_bytes));
let has_shape = source_has_liquibase_shape(file_bytes);
let name_matches = name_is_migration_entry(&summary.name);
let body_runs_ddl = super::any_callee_matches(summary, callee_is_liquibase_ddl);
let binds = (has_shape || project_changeset.is_some()) && (name_matches || body_runs_ddl);
if !binds {
return None;
}
Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Migration {
version: project_changeset
.flatten()
.or_else(|| extract_version(file_bytes)),
},
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::dynamic::framework::ProjectFileIndex;
use crate::summary::CalleeSite;
fn parse_java(src: &[u8]) -> tree_sitter::Tree {
let mut parser = tree_sitter::Parser::new();
@ -230,4 +426,101 @@ mod tests {
panic!("expected Migration entry kind");
}
}
#[test]
fn binds_custom_change_from_xml_changelog() {
let src: &[u8] = b"package app.migrations;\n\
public class AddUsersIndex {\n\
public void execute(Object database) { }\n\
}\n";
let tree = parse_java(src);
let mut project_files = ProjectFileIndex::new();
project_files.insert(
"src/main/resources/db/changelog/db.changelog-master.xml",
br#"<databaseChangeLog>
<changeSet id="20260525-add-users-index" author="nyx">
<customChange class="app.migrations.AddUsersIndex"/>
</changeSet>
</databaseChangeLog>"#,
);
let context = FrameworkDetectionContext {
ssa_summary: None,
project_files: &project_files,
};
let summary = FuncSummary {
name: "execute".into(),
..Default::default()
};
let binding = MigrationLiquibaseAdapter
.detect_with_project_context(&summary, context, tree.root_node(), src)
.expect("xml changelog should bind custom change class");
assert_eq!(binding.adapter, "migration-liquibase");
if let EntryKind::Migration { version } = binding.kind {
assert_eq!(version.as_deref(), Some("20260525-add-users-index"));
} else {
panic!("expected Migration entry kind");
}
}
#[test]
fn binds_custom_change_from_yaml_changelog_with_ddl_body() {
let src: &[u8] = b"public class AddAuditTable {\n\
void helper(Connection c) throws Exception { c.createStatement().execute(\"create table audit(id int)\"); }\n\
}\n";
let tree = parse_java(src);
let mut project_files = ProjectFileIndex::new();
project_files.insert(
"db/changelog/db.changelog-master.yaml",
b"databaseChangeLog:\n\
- changeSet:\n\
id: audit-table\n\
changes:\n\
- customChange:\n\
class: AddAuditTable\n",
);
let context = FrameworkDetectionContext {
ssa_summary: None,
project_files: &project_files,
};
let summary = FuncSummary {
name: "helper".into(),
callees: vec![CalleeSite::bare("stmt.execute")],
..Default::default()
};
let binding = MigrationLiquibaseAdapter
.detect_with_project_context(&summary, context, tree.root_node(), src)
.expect("yaml changelog plus DDL body should bind");
if let EntryKind::Migration { version } = binding.kind {
assert_eq!(version.as_deref(), Some("audit-table"));
} else {
panic!("expected Migration entry kind");
}
}
#[test]
fn skips_project_changelog_when_class_does_not_match() {
let src: &[u8] = b"public class Unrelated {\n\
public void execute(Object database) { }\n\
}\n";
let tree = parse_java(src);
let mut project_files = ProjectFileIndex::new();
project_files.insert(
"changelog.json",
br#"{"databaseChangeLog":[{"changeSet":{"id":"x","changes":[{"customChange":{"class":"OtherChange"}}]}}]}"#,
);
let context = FrameworkDetectionContext {
ssa_summary: None,
project_files: &project_files,
};
let summary = FuncSummary {
name: "execute".into(),
..Default::default()
};
assert!(
MigrationLiquibaseAdapter
.detect_with_project_context(&summary, context, tree.root_node(), src)
.is_none(),
"project changelog must not bind every execute method in the project",
);
}
}

View file

@ -2028,11 +2028,7 @@ fn emit_class_method_harness(spec: &HarnessSpec, class: &str, method: &str) -> H
let cargo_toml = generate_cargo_toml(spec.expected_cap);
let entry_label = format!("{class}::{method}");
let entry_src = read_entry_source(&spec.entry_file);
let ctor = if class_derives_default(&entry_src, class) {
"default"
} else {
"new"
};
let receiver_expr = rust_receiver_expr(&entry_src, class, 3);
let body = format!(
r#"//! Nyx dynamic harness — class method (Phase 19 / Track M.1).
mod entry;
@ -2041,7 +2037,7 @@ fn main() {{
let payload = nyx_payload();
let _ = &payload;
__nyx_install_crash_guard("{entry_label}");
let instance = entry::{class}::{ctor}();
let instance = {receiver_expr};
let _ = instance.{method}(&payload);
println!("__NYX_SINK_HIT__");
}}
@ -2088,9 +2084,9 @@ fn b64_decode(input: &[u8]) -> Option<Vec<u8>> {{
Some(out)
}}
"#,
class = class,
method = method,
entry_label = entry_label,
receiver_expr = receiver_expr,
);
HarnessSource {
source: body,
@ -2101,6 +2097,150 @@ fn b64_decode(input: &[u8]) -> Option<Vec<u8>> {{
}
}
fn rust_receiver_expr(entry_src: &str, class: &str, depth: usize) -> String {
if class_derives_default(entry_src, class) {
return format!("entry::{class}::default()");
}
if class_has_new(entry_src, class) {
return format!("entry::{class}::new()");
}
rust_struct_literal(entry_src, class, depth).unwrap_or_else(|| format!("entry::{class}::new()"))
}
fn class_has_new(entry_src: &str, class: &str) -> bool {
let impl_marker = format!("impl {class}");
let Some(mut pos) = entry_src.find(&impl_marker) else {
return false;
};
loop {
let after = &entry_src[pos + impl_marker.len()..];
if let Some(open_rel) = after.find('{') {
let body = &after[open_rel + 1..];
if let Some(close_rel) = body.find("\n}")
&& word_in_text(&body[..close_rel], "new")
&& body[..close_rel].contains("fn new")
{
return true;
}
}
let next_from = pos + impl_marker.len();
let Some(next_rel) = entry_src[next_from..].find(&impl_marker) else {
return false;
};
pos = next_from + next_rel;
}
}
fn rust_struct_literal(entry_src: &str, class: &str, depth: usize) -> Option<String> {
if depth == 0 {
return None;
}
let fields = rust_struct_fields(entry_src, class)?;
let mut parts = Vec::new();
for (name, ty) in fields {
parts.push(format!(
"{name}: {}",
rust_value_for_type(entry_src, &ty, depth - 1)
));
}
Some(format!("entry::{class} {{ {} }}", parts.join(", ")))
}
fn rust_struct_fields(entry_src: &str, class: &str) -> Option<Vec<(String, String)>> {
let marker = format!("struct {class}");
let idx = entry_src.find(&marker)?;
let after = &entry_src[idx + marker.len()..];
let open = after.find('{')?;
let body = balanced_block(&after[open..])?;
let inner = &body[1..body.len() - 1];
let mut out = Vec::new();
for part in split_top_level_commas(inner) {
let mut text = part.trim();
if text.is_empty() {
continue;
}
while text.starts_with("#[") {
let end = text.find(']')?;
text = text[end + 1..].trim_start();
}
let text = text.strip_prefix("pub ").unwrap_or(text).trim_start();
let colon = text.find(':')?;
let name = text[..colon].trim();
let ty = text[colon + 1..].trim();
if !name.is_empty() && !ty.is_empty() {
out.push((name.to_owned(), ty.to_owned()));
}
}
if out.is_empty() { None } else { Some(out) }
}
fn balanced_block(text: &str) -> Option<&str> {
let mut depth = 0usize;
for (idx, ch) in text.char_indices() {
match ch {
'{' => depth += 1,
'}' => {
depth = depth.checked_sub(1)?;
if depth == 0 {
return Some(&text[..=idx]);
}
}
_ => {}
}
}
None
}
fn split_top_level_commas(text: &str) -> Vec<&str> {
let mut parts = Vec::new();
let mut depth = 0isize;
let mut start = 0usize;
for (idx, ch) in text.char_indices() {
match ch {
'<' | '(' | '[' | '{' => depth += 1,
'>' | ')' | ']' | '}' => depth -= 1,
',' if depth == 0 => {
parts.push(&text[start..idx]);
start = idx + 1;
}
_ => {}
}
}
parts.push(&text[start..]);
parts
}
fn rust_value_for_type(entry_src: &str, ty: &str, depth: usize) -> String {
let clean = ty.trim().trim_start_matches('&').trim();
let bare = clean
.split('<')
.next()
.unwrap_or(clean)
.rsplit("::")
.next()
.unwrap_or(clean)
.trim();
match bare {
"String" => "String::new()".to_owned(),
"str" => "\"\"".to_owned(),
"bool" => "false".to_owned(),
"char" => "'\\0'".to_owned(),
"usize" | "u8" | "u16" | "u32" | "u64" | "u128" | "isize" | "i8" | "i16" | "i32"
| "i64" | "i128" => "0".to_owned(),
"f32" | "f64" => "0.0".to_owned(),
_ if clean.starts_with("Option<") => "None".to_owned(),
_ if clean.starts_with("Vec<") => "Vec::new()".to_owned(),
_ if clean.starts_with("Box<") && clean.ends_with('>') => {
let inner = &clean["Box<".len()..clean.len() - 1];
format!("Box::new({})", rust_value_for_type(entry_src, inner, depth))
}
_ if depth > 0 && rust_struct_fields(entry_src, bare).is_some() => {
rust_receiver_expr(entry_src, bare, depth)
}
_ => "Default::default()".to_owned(),
}
}
// ── Phase 21 (Track M.3) — synthetic entry-kind harnesses ─────────────────────
/// Phase 21 (Track M.3) — GraphQL resolver harness for Rust (Juniper).

View file

@ -1270,6 +1270,20 @@ fn framework_project_files_for_entry(entry_file: &str, lang: Lang) -> ProjectFil
"routes/api.php",
"app/Config/Routes.php",
],
Lang::Java => &[
"changelog.xml",
"changelog.yaml",
"changelog.yml",
"changelog.json",
"db/changelog/db.changelog-master.xml",
"db/changelog/db.changelog-master.yaml",
"db/changelog/db.changelog-master.yml",
"db/changelog/db.changelog-master.json",
"src/main/resources/db/changelog/db.changelog-master.xml",
"src/main/resources/db/changelog/db.changelog-master.yaml",
"src/main/resources/db/changelog/db.changelog-master.yml",
"src/main/resources/db/changelog/db.changelog-master.json",
],
_ => &[],
};
ProjectFileIndex::from_root(&root, rel_paths)

View file

@ -220,6 +220,20 @@ fn class_method_rust_uses_default_constructor() {
assert!(h.source.contains("instance.run"));
}
#[test]
fn class_method_rust_builds_recursive_receiver_literal() {
let mut spec = make_spec(Lang::Rust);
spec.entry_file = "tests/dynamic_fixtures/class_method/rust_recursive_deps/vuln.rs".into();
spec.sink_file = spec.entry_file.clone();
let h = lang::emit(&spec).expect("emit ok");
assert!(
h.source
.contains("entry::UserService { runner: entry::CommandRunner")
);
assert!(!h.source.contains("UserService::default()"));
assert!(!h.source.contains("UserService::new()"));
}
#[test]
fn class_method_c_collapses_to_class_underscore_method_symbol() {
let spec = make_spec(Lang::C);
@ -428,6 +442,17 @@ mod e2e_phase_19 {
cap: Cap::CODE_EXEC,
bins: &["cargo"],
},
Case {
lang: Lang::Rust,
fixture_dir: "rust_recursive_deps",
vuln_file: "vuln.rs",
benign_file: "benign.rs",
vuln_class: "UserService",
benign_class: "UserService",
method: "run",
cap: Cap::CODE_EXEC,
bins: &["cargo"],
},
Case {
lang: Lang::C,
fixture_dir: "c",

View file

@ -0,0 +1,23 @@
// Benign control for recursive Rust class-method receiver construction.
pub struct CommandRunner;
impl CommandRunner {
pub fn run(&self, input: &str) -> String {
let out = std::process::Command::new("true")
.arg(input)
.output()
.expect("exec");
String::from_utf8_lossy(&out.stdout).into_owned()
}
}
pub struct UserService {
pub runner: CommandRunner,
}
impl UserService {
pub fn run(&self, input: &str) -> String {
self.runner.run(input)
}
}

View file

@ -0,0 +1,26 @@
// Rust class-method fixture whose receiver has same-file dependencies
// but no Default or new() constructor.
pub struct CommandRunner;
impl CommandRunner {
pub fn run(&self, input: &str) -> String {
let cmd = format!("true {}", input);
let out = std::process::Command::new("sh")
.arg("-c")
.arg(&cmd)
.output()
.expect("exec");
String::from_utf8_lossy(&out.stdout).into_owned()
}
}
pub struct UserService {
pub runner: CommandRunner,
}
impl UserService {
pub fn run(&self, input: &str) -> String {
self.runner.run(input)
}
}