diff --git a/src/dynamic/corpus.rs b/src/dynamic/corpus.rs index e643c463..b4d6664a 100644 --- a/src/dynamic/corpus.rs +++ b/src/dynamic/corpus.rs @@ -50,6 +50,7 @@ pub mod registry; mod cmdi; mod deserialize; mod fmt_string; +mod ldap; mod path_trav; mod sqli; mod ssrf; @@ -88,7 +89,8 @@ pub use crate::dynamic::oracle::Oracle; /// | 7 | 2026-05-17 | Phase 03 / Track J.1: `DESERIALIZE` cap lit for Java / Python / PHP / Ruby; `ProbeKind::Deserialize` + `ProbePredicate::DeserializeGadgetInvoked` | /// | 8 | 2026-05-17 | Phase 04 / Track J.2: `SSTI` cap lit for Jinja2 / ERB / Twig / Thymeleaf / Handlebars; `ProbePredicate::TemplateEvalEqual` | /// | 9 | 2026-05-17 | Phase 05 / Track J.3: `XXE` cap lit for Java / Python / PHP / Ruby / Go; `ProbeKind::Xxe` + `ProbePredicate::XxeEntityExpanded` | -pub const CORPUS_VERSION: u32 = 9; +/// | 10 | 2026-05-17 | Phase 06 / Track J.4: `LDAP_INJECTION` cap lit for Java / Python / PHP; `ProbeKind::Ldap` + `ProbePredicate::LdapResultCountGreaterThan`; `StubKind::Ldap` + in-sandbox LDAP server stub | +pub const CORPUS_VERSION: u32 = 10; /// Where a payload originated. #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/src/dynamic/corpus/ldap/java.rs b/src/dynamic/corpus/ldap/java.rs new file mode 100644 index 00000000..e73364ed --- /dev/null +++ b/src/dynamic/corpus/ldap/java.rs @@ -0,0 +1,53 @@ +//! Java `Cap::LDAP_INJECTION` payloads — `LdapTemplate.search` / +//! `DirContext.search` filter injection. +//! +//! Vuln payload: a filter fragment whose `*)(uid=*` tail breaks out of +//! the host template's `(uid=…)` clause and rewraps the search as +//! `(|(uid=…)(uid=*))`, matching every user the directory carries. +//! The harness's instrumented LDAP client (talking to +//! [`crate::dynamic::stubs::ldap_server`]) records +//! `ProbeKind::Ldap { entries_returned: 3 }`. +//! +//! Benign control: the same intended username quoted through +//! `EscapeDN` so the LDAP filter stays pinned to a single entry; the +//! shim records `entries_returned: 1` and the oracle does not fire. + +use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef}; +use crate::dynamic::oracle::ProbePredicate; + +pub const PAYLOADS: &[CuratedPayload] = &[ + CuratedPayload { + bytes: b"alice*)(uid=*", + label: "ldap-java-filter-wildcard", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + }, + is_benign: false, + provenance: PayloadProvenance::Curated, + since_corpus_version: 10, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/ldap_injection/java/Vuln.java"], + oob_nonce_slot: false, + probe_predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + benign_control: Some(PayloadRef { + label: "ldap-java-benign", + }), + no_benign_control_rationale: None, + }, + CuratedPayload { + bytes: b"alice", + label: "ldap-java-benign", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + }, + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 10, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/ldap_injection/java/Benign.java"], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, + no_benign_control_rationale: None, + }, +]; diff --git a/src/dynamic/corpus/ldap/mod.rs b/src/dynamic/corpus/ldap/mod.rs new file mode 100644 index 00000000..a1b971a4 --- /dev/null +++ b/src/dynamic/corpus/ldap/mod.rs @@ -0,0 +1,30 @@ +//! LDAP filter injection (`Cap::LDAP_INJECTION`) per-language payload +//! slices. +//! +//! Phase 06 (Track J.4) carves LDAP filter injection across the three +//! most-common directory clients: Java (`LdapTemplate.search` / +//! `DirContext.search`), Python (`ldap.search_s`), and PHP +//! (`ldap_search`). Every vuln payload appends the canonical +//! `*)(uid=*` quote-escape break — once the host code substitutes the +//! attacker bytes into its filter template the synthesized LDAP +//! filter matches every entry the directory carries (the +//! [`crate::dynamic::stubs::ldap_server`] stub returns its three +//! provisioned users). The paired benign control quotes the same +//! bytes through `EscapeDN` / `ldap.dn.escape_filter_chars` / +//! `ldap_escape`, leaving the filter pinned to the originally +//! intended single user. +//! +//! The oracle's +//! [`crate::dynamic::oracle::ProbePredicate::LdapResultCountGreaterThan`] +//! checks the per-payload `ProbeKind::Ldap.entries_returned` against +//! `n = 1` — vuln passes (3 entries), benign clears (1 entry), +//! fulfilling the §4.1 differential rule. +//! +//! C# is intentionally omitted: the [`crate::symbol::Lang`] enum has +//! no `CSharp` variant, so the corpus has nowhere to register it. +//! Tracked in `.pitboss/play/deferred.md` alongside the Phase 05 +//! Lang::CSharp gap. + +pub mod java; +pub mod php; +pub mod python; diff --git a/src/dynamic/corpus/ldap/php.rs b/src/dynamic/corpus/ldap/php.rs new file mode 100644 index 00000000..ed5e54b6 --- /dev/null +++ b/src/dynamic/corpus/ldap/php.rs @@ -0,0 +1,51 @@ +//! PHP `Cap::LDAP_INJECTION` payloads — `ldap_search` filter injection. +//! +//! Vuln payload: a filter fragment whose `*)(uid=*` tail breaks out of +//! the host template's `(uid=…)` clause; the synthesized filter +//! becomes `(|(uid=…)(uid=*))` and matches every directory entry. +//! The harness's instrumented `ldap_search` records +//! `ProbeKind::Ldap { entries_returned: 3 }`. +//! +//! Benign control: the same intended username quoted via +//! `ldap_escape($value, "", LDAP_ESCAPE_FILTER)` — `entries_returned: +//! 1`, oracle clear. + +use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef}; +use crate::dynamic::oracle::ProbePredicate; + +pub const PAYLOADS: &[CuratedPayload] = &[ + CuratedPayload { + bytes: b"alice*)(uid=*", + label: "ldap-php-filter-wildcard", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + }, + is_benign: false, + provenance: PayloadProvenance::Curated, + since_corpus_version: 10, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/ldap_injection/php/vuln.php"], + oob_nonce_slot: false, + probe_predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + benign_control: Some(PayloadRef { + label: "ldap-php-benign", + }), + no_benign_control_rationale: None, + }, + CuratedPayload { + bytes: b"alice", + label: "ldap-php-benign", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + }, + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 10, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/ldap_injection/php/benign.php"], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, + no_benign_control_rationale: None, + }, +]; diff --git a/src/dynamic/corpus/ldap/python.rs b/src/dynamic/corpus/ldap/python.rs new file mode 100644 index 00000000..429c9ac7 --- /dev/null +++ b/src/dynamic/corpus/ldap/python.rs @@ -0,0 +1,52 @@ +//! Python `Cap::LDAP_INJECTION` payloads — `ldap.search_s` filter +//! injection. +//! +//! Vuln payload: a filter fragment whose `*)(uid=*` tail breaks out of +//! the host template's `(uid=…)` clause; the synthesized filter +//! becomes `(|(uid=…)(uid=*))` and matches every directory entry. +//! The harness's instrumented `ldap.search_s` records +//! `ProbeKind::Ldap { entries_returned: 3 }`. +//! +//! Benign control: the same intended username quoted via +//! `ldap.dn.escape_filter_chars`, leaving the filter pinned to a +//! single entry — `entries_returned: 1`, oracle clear. + +use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef}; +use crate::dynamic::oracle::ProbePredicate; + +pub const PAYLOADS: &[CuratedPayload] = &[ + CuratedPayload { + bytes: b"alice*)(uid=*", + label: "ldap-python-filter-wildcard", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + }, + is_benign: false, + provenance: PayloadProvenance::Curated, + since_corpus_version: 10, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/ldap_injection/python/vuln.py"], + oob_nonce_slot: false, + probe_predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + benign_control: Some(PayloadRef { + label: "ldap-python-benign", + }), + no_benign_control_rationale: None, + }, + CuratedPayload { + bytes: b"alice", + label: "ldap-python-benign", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + }, + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 10, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/ldap_injection/python/benign.py"], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, + no_benign_control_rationale: None, + }, +]; diff --git a/src/dynamic/corpus/registry.rs b/src/dynamic/corpus/registry.rs index d603ff41..5b71f308 100644 --- a/src/dynamic/corpus/registry.rs +++ b/src/dynamic/corpus/registry.rs @@ -23,7 +23,7 @@ use std::collections::HashMap; use std::sync::OnceLock; -use super::{cmdi, deserialize, fmt_string, path_trav, sqli, ssrf, ssti, xss, xxe}; +use super::{cmdi, deserialize, fmt_string, ldap, path_trav, sqli, ssrf, ssti, xss, xxe}; use super::{CapCorpus, CuratedPayload, Oracle}; use crate::dynamic::oracle::ProbePredicate; use crate::labels::Cap; @@ -40,7 +40,6 @@ pub const CORPUS_UNSUPPORTED_LANG_NEUTRAL: u32 = Cap::ENV_VAR.bits() | Cap::CRYPTO.bits() | Cap::UNAUTHORIZED_ID.bits() | Cap::DATA_EXFIL.bits() - | Cap::LDAP_INJECTION.bits() | Cap::XPATH_INJECTION.bits() | Cap::HEADER_INJECTION.bits() | Cap::OPEN_REDIRECT.bits() @@ -69,6 +68,9 @@ const ENTRIES: &[(Cap, Lang, &[CuratedPayload])] = &[ (Cap::XXE, Lang::Php, xxe::php::PAYLOADS), (Cap::XXE, Lang::Ruby, xxe::ruby::PAYLOADS), (Cap::XXE, Lang::Go, xxe::go::PAYLOADS), + (Cap::LDAP_INJECTION, Lang::Java, ldap::java::PAYLOADS), + (Cap::LDAP_INJECTION, Lang::Python, ldap::python::PAYLOADS), + (Cap::LDAP_INJECTION, Lang::Php, ldap::php::PAYLOADS), ]; /// Reserved for per-cap oracle defaults. Empty in Phase 02; populated by @@ -278,6 +280,7 @@ mod tests { assert!(!payloads_for(Cap::DESERIALIZE).is_empty()); assert!(!payloads_for(Cap::SSTI).is_empty()); assert!(!payloads_for(Cap::XXE).is_empty()); + assert!(!payloads_for(Cap::LDAP_INJECTION).is_empty()); } #[test] @@ -290,7 +293,6 @@ mod tests { Cap::CRYPTO, Cap::UNAUTHORIZED_ID, Cap::DATA_EXFIL, - Cap::LDAP_INJECTION, Cap::XPATH_INJECTION, Cap::HEADER_INJECTION, Cap::OPEN_REDIRECT, @@ -325,6 +327,7 @@ mod tests { Cap::DESERIALIZE, Cap::SSTI, Cap::XXE, + Cap::LDAP_INJECTION, ] { let has_vuln = payloads_for(cap).iter().any(|p| !p.is_benign); assert!(has_vuln, "{cap:?} must have at least one vuln payload"); @@ -374,6 +377,7 @@ mod tests { Cap::DESERIALIZE, Cap::SSTI, Cap::XXE, + Cap::LDAP_INJECTION, ]; for cap in caps { for p in payloads_for(cap) { @@ -398,6 +402,7 @@ mod tests { Cap::DESERIALIZE, Cap::SSTI, Cap::XXE, + Cap::LDAP_INJECTION, ]; for cap in caps { for p in payloads_for(cap) { @@ -509,6 +514,7 @@ mod tests { Cap::DESERIALIZE, Cap::SSTI, Cap::XXE, + Cap::LDAP_INJECTION, ]; for cap in caps { for p in payloads_for(cap).iter().filter(|p| p.is_benign) { @@ -677,6 +683,49 @@ mod tests { } } + #[test] + fn ldap_has_per_lang_slices_for_phase_06() { + // Phase 06 (Track J.4) acceptance: LDAP_INJECTION registers + // payloads in Java / Python / PHP and the lang-aware lookup + // never returns empty for any of them. + for lang in [Lang::Java, Lang::Python, Lang::Php] { + assert!( + !payloads_for_lang(Cap::LDAP_INJECTION, lang).is_empty(), + "LDAP_INJECTION must have at least one payload for {lang:?}", + ); + } + // Rust / C / Cpp / Ruby / Go / JS / TS not yet covered. + for lang in [ + Lang::Rust, + Lang::C, + Lang::Cpp, + Lang::Ruby, + Lang::Go, + Lang::JavaScript, + Lang::TypeScript, + ] { + assert!( + payloads_for_lang(Cap::LDAP_INJECTION, lang).is_empty(), + "LDAP_INJECTION has unexpected payloads for {lang:?}", + ); + } + } + + #[test] + fn ldap_payloads_pair_benign_controls_per_lang() { + for lang in [Lang::Java, Lang::Python, Lang::Php] { + let slice = payloads_for_lang(Cap::LDAP_INJECTION, lang); + let vuln = slice + .iter() + .find(|p| !p.is_benign) + .expect("each lang must have an LDAP vuln payload"); + let resolved = + super::resolve_benign_control_lang(vuln, Cap::LDAP_INJECTION, lang) + .expect("lang-aware benign control must resolve"); + assert!(resolved.is_benign); + } + } + #[test] fn deserialize_payloads_pair_benign_controls_per_lang() { // The lang-aware resolver must find the paired benign control diff --git a/src/dynamic/framework/adapters/ldap_php.rs b/src/dynamic/framework/adapters/ldap_php.rs new file mode 100644 index 00000000..5d97ac50 --- /dev/null +++ b/src/dynamic/framework/adapters/ldap_php.rs @@ -0,0 +1,114 @@ +//! PHP [`super::super::FrameworkAdapter`] matching LDAP filter-injection +//! sink constructions. +//! +//! Phase 06 (Track J.4). Fires when the function body invokes one of +//! the canonical PHP directory-client entry points (`ldap_search`, +//! `ldap_list`, `ldap_read`) and the surrounding source mentions the +//! matching `ldap_*` API surface. + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +pub struct LdapPhpAdapter; + +const ADAPTER_NAME: &str = "ldap-php"; + +fn callee_is_ldap_search(name: &str) -> bool { + let last = name + .rsplit_once("::") + .map(|(_, s)| s) + .or_else(|| name.rsplit_once('.').map(|(_, s)| s)) + .or_else(|| name.rsplit_once("->").map(|(_, s)| s)) + .unwrap_or(name); + matches!(last, "ldap_search" | "ldap_list" | "ldap_read") +} + +fn source_imports_ldap(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"ldap_connect", + b"ldap_bind", + b"ldap_search", + b"ldap_list", + b"ldap_read", + b"ldap_escape", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +impl FrameworkAdapter for LdapPhpAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::Php + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_ldap_search); + let matches_source = source_imports_ldap(file_bytes); + if matches_call && matches_source { + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_php(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_ldap_search() { + let src: &[u8] = b" bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!( + last, + "search_s" | "search_ext_s" | "search" | "search_st" | "search_subtree_s" + ) +} + +fn source_imports_ldap(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"import ldap", + b"from ldap", + b"ldap3", + b"python-ldap", + b"ldap.initialize", + b"ldap.SCOPE", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +impl FrameworkAdapter for LdapPythonAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::Python + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_ldap_search); + let matches_source = source_imports_ldap(file_bytes); + if matches_call && matches_source { + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_python(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_ldap_search_s() { + let src: &[u8] = b"import ldap\n\ + def run(uid):\n\ + con = ldap.initialize('ldap://127.0.0.1')\n\ + return con.search_s('ou=people', ldap.SCOPE_SUBTREE, '(uid=' + uid + ')')\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("search_s")], + ..Default::default() + }; + assert!(LdapPythonAdapter + .detect(&summary, tree.root_node(), src) + .is_some()); + } + + #[test] + fn skips_plain_function() { + let src: &[u8] = b"def add(a, b):\n return a + b\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "add".into(), + ..Default::default() + }; + assert!(LdapPythonAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } +} diff --git a/src/dynamic/framework/adapters/ldap_spring.rs b/src/dynamic/framework/adapters/ldap_spring.rs new file mode 100644 index 00000000..10f27b10 --- /dev/null +++ b/src/dynamic/framework/adapters/ldap_spring.rs @@ -0,0 +1,133 @@ +//! Java [`super::super::FrameworkAdapter`] matching LDAP filter-injection +//! sink constructions. +//! +//! Phase 06 (Track J.4). Fires when the function body invokes one of +//! the canonical Java directory-client entry points +//! (`LdapTemplate.search`, `LdapTemplate.find`, `DirContext.search`, +//! `InitialDirContext.search`, `LdapContext.search`) and the +//! surrounding source pulls in one of the matching package symbols — +//! `org.springframework.ldap.*`, `javax.naming.directory.*`, +//! `com.unboundid.ldap.*`. + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +pub struct LdapSpringAdapter; + +const ADAPTER_NAME: &str = "ldap-spring"; + +fn callee_is_ldap_search(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!( + last, + "search" | "find" | "findAll" | "findOne" | "lookup" | "searchAll" + ) +} + +fn source_imports_ldap(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"org.springframework.ldap", + b"LdapTemplate", + b"javax.naming.directory", + b"InitialDirContext", + b"DirContext", + b"LdapContext", + b"com.unboundid.ldap", + b"SearchControls", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +impl FrameworkAdapter for LdapSpringAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::Java + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_ldap_search); + let matches_source = source_imports_ldap(file_bytes); + if matches_call && matches_source { + return Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }); + } + if matches_source + && file_bytes + .windows(b".search(".len()) + .any(|w| w == b".search(") + { + return Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }); + } + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_java(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_ldap_template_search() { + let src: &[u8] = b"import org.springframework.ldap.core.LdapTemplate;\n\ + public class V {\n public Object run(String uid, LdapTemplate t) {\n\ + return t.search(\"ou=people\", \"(uid=\" + uid + \")\", null);\n\ + }\n}\n"; + let tree = parse_java(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("search")], + ..Default::default() + }; + let binding = LdapSpringAdapter + .detect(&summary, tree.root_node(), src) + .expect("must fire on LdapTemplate.search"); + assert_eq!(binding.adapter, ADAPTER_NAME); + assert_eq!(binding.kind, EntryKind::Function); + } + + #[test] + fn skips_plain_function() { + let src: &[u8] = + b"public class V { public static int add(int a, int b) { return a + b; } }\n"; + let tree = parse_java(src); + let summary = FuncSummary { + name: "add".into(), + ..Default::default() + }; + assert!(LdapSpringAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } +} diff --git a/src/dynamic/framework/adapters/mod.rs b/src/dynamic/framework/adapters/mod.rs index caf14aa3..dd20cdda 100644 --- a/src/dynamic/framework/adapters/mod.rs +++ b/src/dynamic/framework/adapters/mod.rs @@ -14,6 +14,9 @@ pub mod java_deserialize; pub mod java_thymeleaf; pub mod js_handlebars; +pub mod ldap_php; +pub mod ldap_python; +pub mod ldap_spring; pub mod php_twig; pub mod php_unserialize; pub mod python_jinja2; @@ -29,6 +32,9 @@ pub mod xxe_ruby; pub use java_deserialize::JavaDeserializeAdapter; pub use java_thymeleaf::JavaThymeleafAdapter; pub use js_handlebars::JsHandlebarsAdapter; +pub use ldap_php::LdapPhpAdapter; +pub use ldap_python::LdapPythonAdapter; +pub use ldap_spring::LdapSpringAdapter; pub use php_twig::PhpTwigAdapter; pub use php_unserialize::PhpUnserializeAdapter; pub use python_jinja2::PythonJinja2Adapter; diff --git a/src/dynamic/framework/mod.rs b/src/dynamic/framework/mod.rs index ee9b3556..5dff71a1 100644 --- a/src/dynamic/framework/mod.rs +++ b/src/dynamic/framework/mod.rs @@ -214,25 +214,35 @@ mod tests { } #[test] - fn registry_baseline_after_phase_05() { - // Phase 05 (Track J.3) adds the XXE-sink adapter alongside the - // Phase-03 deserialize + Phase-04 SSTI adapters for Java / - // Python / PHP / Ruby, and introduces the first Go adapter - // (xxe-go). JavaScript still has only the Handlebars adapter; - // Rust / C / Cpp / TypeScript still carry the Phase-01 empty - // baseline. - for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby] { + fn registry_baseline_after_phase_06() { + // Phase 06 (Track J.4) adds the LDAP-sink adapter for Java / + // Python / PHP, layered on top of the Phase 03 deserialize + + // Phase 04 SSTI + Phase 05 XXE adapters. Ruby still carries + // exactly the 03+04+05 trio (no Ruby LDAP adapter this + // phase); Go still has only the XXE adapter; JavaScript still + // has only the Handlebars adapter; Rust / C / Cpp / + // TypeScript still carry the Phase-01 empty baseline. + for lang in [Lang::Java, Lang::Python, Lang::Php] { let registered = registry::adapters_for(lang); assert_eq!( registered.len(), - 3, - "{:?} must have the J.1 deserialize + J.2 ssti + J.3 xxe adapters", + 4, + "{:?} must have the J.1 deserialize + J.2 ssti + J.3 xxe + J.4 ldap adapters", lang, ); for adapter in registered { assert_eq!(adapter.lang(), lang); } } + let ruby_registered = registry::adapters_for(Lang::Ruby); + assert_eq!( + ruby_registered.len(), + 3, + "Ruby must still carry the J.1 deserialize + J.2 ssti + J.3 xxe adapters", + ); + for adapter in ruby_registered { + assert_eq!(adapter.lang(), Lang::Ruby); + } let js_registered = registry::adapters_for(Lang::JavaScript); assert_eq!( js_registered.len(), diff --git a/src/dynamic/framework/registry.rs b/src/dynamic/framework/registry.rs index b5a2f6ee..23f6e67f 100644 --- a/src/dynamic/framework/registry.rs +++ b/src/dynamic/framework/registry.rs @@ -50,15 +50,18 @@ static CPP: &[&dyn FrameworkAdapter] = &[]; static JAVA: &[&dyn FrameworkAdapter] = &[ &super::adapters::JavaDeserializeAdapter, &super::adapters::JavaThymeleafAdapter, + &super::adapters::LdapSpringAdapter, &super::adapters::XxeJavaAdapter, ]; static GO: &[&dyn FrameworkAdapter] = &[&super::adapters::XxeGoAdapter]; static PHP: &[&dyn FrameworkAdapter] = &[ + &super::adapters::LdapPhpAdapter, &super::adapters::PhpTwigAdapter, &super::adapters::PhpUnserializeAdapter, &super::adapters::XxePhpAdapter, ]; static PYTHON: &[&dyn FrameworkAdapter] = &[ + &super::adapters::LdapPythonAdapter, &super::adapters::PythonJinja2Adapter, &super::adapters::PythonPickleAdapter, &super::adapters::XxePythonAdapter, diff --git a/src/dynamic/lang/java.rs b/src/dynamic/lang/java.rs index 3671f65a..d23eee43 100644 --- a/src/dynamic/lang/java.rs +++ b/src/dynamic/lang/java.rs @@ -561,6 +561,9 @@ pub fn emit(spec: &HarnessSpec) -> Result { if spec.expected_cap == crate::labels::Cap::XXE { return Ok(emit_xxe_harness(spec)); } + if spec.expected_cap == crate::labels::Cap::LDAP_INJECTION { + return Ok(emit_ldap_harness(spec)); + } let entry_source = read_entry_source(&spec.entry_file); let shape = JavaShape::detect(spec, &entry_source); @@ -891,6 +894,192 @@ public class NyxHarness {{ } } +/// Phase 06 — Track J.4 LDAP-injection harness for Java +/// (`LdapTemplate.search` / `DirContext.search`). +/// +/// Reads `NYX_PAYLOAD`, splices it into a `(uid=)` filter +/// template, evaluates the resulting filter against the in-sandbox +/// LDAP directory (three users: `alice`, `bob`, `carol`) using the +/// same RFC-4515 subset the +/// [`crate::dynamic::stubs::ldap_server`] stub implements, and writes +/// a `ProbeKind::Ldap { entries_returned }` probe whose `n` is the +/// count the directory returned. Mirrors the synthetic-harness +/// pattern used by Phase 03 / 04 / 05; a future structural fix will +/// link real `LdapTemplate` / `DirContext` via the published +/// `NYX_LDAP_ENDPOINT`. +pub fn emit_ldap_harness(_spec: &HarnessSpec) -> HarnessSource { + let shim = probe_shim(); + let source = format!( + r#"// Nyx dynamic harness — LDAP_INJECTION LdapTemplate.search (Phase 06 / Track J.4). +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class NyxHarness {{ +{shim} + + static final String[] NYX_LDAP_USERS = new String[] {{ "alice", "bob", "carol" }}; + + static boolean nyxAttrMatch(String pattern, String uid) {{ + if (pattern.equals("*")) return true; + int star = pattern.indexOf('*'); + if (star < 0) return pattern.equals(uid); + String prefix = pattern.substring(0, star); + String suffix = pattern.substring(star + 1); + return uid.startsWith(prefix) && uid.endsWith(suffix); + }} + + static boolean nyxInnerHasBreak(String inner) {{ + int depth = 0; + for (int i = 0; i < inner.length(); i++) {{ + char c = inner.charAt(i); + if (c == '(') depth++; + else if (c == ')') {{ + depth--; + if (depth < 0) return true; + }} + }} + return false; + }} + + static int nyxLdapCount(String filter) {{ + String f = filter == null ? "" : filter.trim(); + if (f.isEmpty()) return 0; + if (!f.startsWith("(") || !f.endsWith(")")) return NYX_LDAP_USERS.length; + String inner = f.substring(1, f.length() - 1); + if (nyxInnerHasBreak(inner)) return NYX_LDAP_USERS.length; + if (inner.startsWith("&") || inner.startsWith("|")) {{ + List clauses = nyxSplitClauses(inner.substring(1)); + int total = 0; + for (String u : NYX_LDAP_USERS) {{ + boolean ok = inner.startsWith("&"); + for (String c : clauses) {{ + boolean m = nyxLdapMatch(c, u); + ok = inner.startsWith("&") ? (ok && m) : (ok || m); + }} + if (clauses.isEmpty()) ok = false; + if (ok) total++; + }} + return total; + }} + int eq = inner.indexOf('='); + if (eq < 0) return NYX_LDAP_USERS.length; + String attr = inner.substring(0, eq); + String pattern = inner.substring(eq + 1); + if (!attr.equalsIgnoreCase("uid") && !attr.equalsIgnoreCase("cn")) return NYX_LDAP_USERS.length; + int total = 0; + for (String u : NYX_LDAP_USERS) {{ + if (nyxAttrMatch(pattern, u)) total++; + }} + return total; + }} + + static boolean nyxLdapMatch(String filter, String uid) {{ + return nyxLdapCount(filter) > 0 + ? nyxLdapMatchOne(filter, uid) + : false; + }} + + static boolean nyxLdapMatchOne(String filter, String uid) {{ + String f = filter.trim(); + if (!f.startsWith("(") || !f.endsWith(")")) return true; + String inner = f.substring(1, f.length() - 1); + if (nyxInnerHasBreak(inner)) return true; + if (inner.startsWith("&") || inner.startsWith("|")) {{ + List clauses = nyxSplitClauses(inner.substring(1)); + if (clauses.isEmpty()) return false; + boolean ok = inner.startsWith("&"); + for (String c : clauses) {{ + boolean m = nyxLdapMatchOne(c, uid); + ok = inner.startsWith("&") ? (ok && m) : (ok || m); + }} + return ok; + }} + int eq = inner.indexOf('='); + if (eq < 0) return true; + String attr = inner.substring(0, eq); + String pattern = inner.substring(eq + 1); + if (!attr.equalsIgnoreCase("uid") && !attr.equalsIgnoreCase("cn")) return true; + return nyxAttrMatch(pattern, uid); + }} + + static List nyxSplitClauses(String src) {{ + List out = new ArrayList<>(); + int i = 0; + while (i < src.length()) {{ + if (src.charAt(i) != '(') {{ i++; continue; }} + int depth = 0; + int start = i; + while (i < src.length()) {{ + char c = src.charAt(i); + if (c == '(') depth++; + else if (c == ')') {{ + depth--; + if (depth == 0) {{ i++; break; }} + }} + i++; + }} + out.add(src.substring(start, i)); + }} + return out; + }} + + static void nyxLdapProbe(String filter, int entriesReturned) {{ + String p = System.getenv("NYX_PROBE_PATH"); + if (p == null || p.isEmpty()) return; + long now = System.nanoTime(); + String pid = System.getenv("NYX_PAYLOAD_ID"); + if (pid == null) pid = ""; + StringBuilder line = new StringBuilder(256); + line.append("{{\"sink_callee\":\"LdapTemplate.search\",\"args\":[{{\"kind\":\"String\",\"value\":\""); + nyxJsonEscape(filter, line); + line.append("\"}}],"); + line.append("\"captured_at_ns\":").append(now).append(','); + line.append("\"payload_id\":\""); + nyxJsonEscape(pid, line); + line.append("\",\"kind\":{{\"kind\":\"Ldap\",\"entries_returned\":").append(entriesReturned).append("}},"); + line.append("\"witness\":"); + line.append(nyxWitnessJson("LdapTemplate.search", new String[]{{filter}})); + line.append("}}\n"); + try (FileWriter fw = new FileWriter(p, true)) {{ + fw.write(line.toString()); + }} catch (IOException e) {{ + // best-effort + }} + }} + + public static void main(String[] args) {{ + String payload = System.getenv("NYX_PAYLOAD"); + if (payload == null) payload = ""; + String filter = "(uid=" + payload + ")"; + int count = nyxLdapCount(filter); + nyxLdapProbe(filter, count); + System.out.println("__NYX_SINK_HIT__"); + StringBuilder body = new StringBuilder(64); + body.append("{{\"filter\":\""); + nyxJsonEscape(filter, body); + body.append("\",\"entries_returned\":").append(count).append("}}"); + System.out.println(body.toString()); + }} +}} +"# + ); + HarnessSource { + source, + filename: "NyxHarness.java".to_owned(), + command: vec![ + "java".to_owned(), + "-cp".to_owned(), + ".".to_owned(), + "NyxHarness".to_owned(), + ], + extra_files: Vec::new(), + entry_subpath: None, + } +} + /// Public wrapper to detect the shape for a finalised `HarnessSpec`, /// reading the entry file from disk. Exposed so test helpers can pin a /// per-fixture shape without round-tripping through [`emit`]. diff --git a/src/dynamic/lang/php.rs b/src/dynamic/lang/php.rs index 077e7254..190debf6 100644 --- a/src/dynamic/lang/php.rs +++ b/src/dynamic/lang/php.rs @@ -424,6 +424,10 @@ pub fn emit(spec: &HarnessSpec) -> Result { if spec.expected_cap == crate::labels::Cap::XXE { return Ok(emit_xxe_harness(spec)); } + // Phase 06 (Track J.4): LDAP_INJECTION-sink short-circuit. + if spec.expected_cap == crate::labels::Cap::LDAP_INJECTION { + return Ok(emit_ldap_harness(spec)); + } let entry_source = read_entry_source(&spec.entry_file); let shape = PhpShape::detect(spec, &entry_source); @@ -606,6 +610,137 @@ echo json_encode(["render" => $rendered, "entity_expanded" => $expanded]) . "\n" } } +/// Phase 06 — Track J.4 LDAP-injection harness for PHP (`ldap_search`). +/// +/// Reads `NYX_PAYLOAD`, splices it into a `(uid=)` filter, +/// evaluates the filter against the in-sandbox LDAP directory (three +/// users: `alice`, `bob`, `carol`) using the same RFC-4515 subset the +/// [`crate::dynamic::stubs::ldap_server`] stub implements, and writes +/// a `ProbeKind::Ldap { entries_returned }` probe whose `n` is the +/// count the directory returned. Mirrors the synthetic-harness +/// pattern used by Phase 03 / 04 / 05. +pub fn emit_ldap_harness(_spec: &HarnessSpec) -> HarnessSource { + let shim = probe_shim(); + let body = format!( + r#" 'ldap_search', + 'args' => [['kind' => 'String', 'value' => $filt]], + 'captured_at_ns' => (int) hrtime(true), + 'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''), + 'kind' => ['kind' => 'Ldap', 'entries_returned' => $entries_returned], + 'witness' => __nyx_witness('ldap_search', [$filt]), + ]; + @file_put_contents($p, json_encode($rec) . "\n", FILE_APPEND); +}} + +$payload = (string) (getenv('NYX_PAYLOAD') ?: ''); +$filt = '(uid=' . $payload . ')'; +$count = _nyx_ldap_count($filt, $NYX_LDAP_USERS); +_nyx_ldap_probe($filt, $count); +echo "__NYX_SINK_HIT__\n"; +echo json_encode(['filter' => $filt, 'entries_returned' => $count]) . "\n"; +"# + ); + HarnessSource { + source: body, + filename: "harness.php".to_owned(), + command: vec!["php".to_owned(), "harness.php".to_owned()], + extra_files: vec![], + entry_subpath: None, + } +} + fn generate_source(spec: &HarnessSpec, shape: PhpShape) -> String { let entry_fn = &spec.entry_name; let pre_call = build_pre_call(spec, shape); diff --git a/src/dynamic/lang/python.rs b/src/dynamic/lang/python.rs index 5a32fb50..0445b7ff 100644 --- a/src/dynamic/lang/python.rs +++ b/src/dynamic/lang/python.rs @@ -618,6 +618,17 @@ pub fn emit(spec: &HarnessSpec) -> Result { return Ok(emit_xxe_harness(spec)); } + // Phase 06 (Track J.4): short-circuit to the LDAP harness when the + // spec's expected cap is LDAP_INJECTION. The harness splices the + // payload into a `(uid=)` filter and applies the + // [`crate::dynamic::stubs::ldap_server`] RFC-4515 subset against + // the same three provisioned users; the resulting count drives a + // `ProbeKind::Ldap` probe consumed by the + // `LdapResultCountGreaterThan` oracle. + if spec.expected_cap == crate::labels::Cap::LDAP_INJECTION { + return Ok(emit_ldap_harness(spec)); + } + let entry_source = read_entry_source(&spec.entry_file); let shape = PythonShape::detect(spec, &entry_source); let body = generate_for_shape(spec, shape); @@ -839,6 +850,140 @@ if __name__ == "__main__": } } +/// Phase 06 — Track J.4 LDAP-injection harness for Python +/// (`ldap.search_s`). +/// +/// Reads `NYX_PAYLOAD`, splices it into a `(uid=)` filter, +/// evaluates the filter against the in-sandbox LDAP directory (three +/// users: `alice`, `bob`, `carol`) using the same RFC-4515 subset the +/// [`crate::dynamic::stubs::ldap_server`] stub implements, and writes +/// a `ProbeKind::Ldap { entries_returned }` probe whose `n` is the +/// count the directory returned. Mirrors the synthetic-harness +/// pattern used by Phase 03 / 04 / 05. +pub fn emit_ldap_harness(_spec: &HarnessSpec) -> HarnessSource { + let probe = probe_shim(); + let body = format!( + r#"#!/usr/bin/env python3 +"""Nyx dynamic harness — LDAP_INJECTION ldap.search_s (Phase 06 / Track J.4).""" +import os, json, sys, time + +{probe} + +_NYX_LDAP_USERS = ["alice", "bob", "carol"] + + +def _nyx_attr_match(pattern, uid): + if pattern == "*": + return True + if "*" in pattern: + prefix, _, suffix = pattern.partition("*") + return uid.startswith(prefix) and uid.endswith(suffix) + return pattern == uid + + +def _nyx_split_clauses(src): + out = [] + i = 0 + n = len(src) + while i < n: + if src[i] != "(": + i += 1 + continue + depth = 0 + start = i + while i < n: + c = src[i] + if c == "(": + depth += 1 + elif c == ")": + depth -= 1 + if depth == 0: + i += 1 + break + i += 1 + out.append(src[start:i]) + return out + + +def _nyx_inner_has_break(inner): + depth = 0 + for c in inner: + if c == "(": + depth += 1 + elif c == ")": + depth -= 1 + if depth < 0: + return True + return False + + +def _nyx_match_one(filt, uid): + f = filt.strip() + if not (f.startswith("(") and f.endswith(")")): + return True + inner = f[1:-1] + if _nyx_inner_has_break(inner): + return True + if inner.startswith("&") or inner.startswith("|"): + clauses = _nyx_split_clauses(inner[1:]) + if not clauses: + return False + results = [_nyx_match_one(c, uid) for c in clauses] + return all(results) if inner.startswith("&") else any(results) + if "=" not in inner: + return True + attr, _, pattern = inner.partition("=") + if attr.lower() not in ("uid", "cn"): + return True + return _nyx_attr_match(pattern, uid) + + +def _nyx_ldap_count(filt): + f = (filt or "").strip() + if not f: + return 0 + if not (f.startswith("(") and f.endswith(")")): + return len(_NYX_LDAP_USERS) + if _nyx_inner_has_break(f[1:-1]): + return len(_NYX_LDAP_USERS) + return sum(1 for u in _NYX_LDAP_USERS if _nyx_match_one(f, u)) + + +def _nyx_ldap_probe(filt, entries_returned): + rec = {{ + "sink_callee": "ldap.search_s", + "args": [{{"kind": "String", "value": filt}}], + "captured_at_ns": time.time_ns(), + "payload_id": os.environ.get("NYX_PAYLOAD_ID", ""), + "kind": {{"kind": "Ldap", "entries_returned": int(entries_returned)}}, + "witness": __nyx_witness("ldap.search_s", [filt]), + }} + __nyx_emit(rec) + + +def _nyx_run(): + payload = os.environ.get("NYX_PAYLOAD", "") + filt = "(uid=" + payload + ")" + count = _nyx_ldap_count(filt) + _nyx_ldap_probe(filt, count) + print("__NYX_SINK_HIT__", flush=True) + sys.stdout.write(json.dumps({{"filter": filt, "entries_returned": count}}) + "\n") + sys.stdout.flush() + + +if __name__ == "__main__": + _nyx_run() +"# + ); + HarnessSource { + source: body, + filename: "harness.py".to_owned(), + command: vec!["python3".to_owned(), "harness.py".to_owned()], + extra_files: Vec::new(), + entry_subpath: None, + } +} + /// Public wrapper to detect the shape for a finalised `HarnessSpec`, /// reading the entry file from disk. Exposed so test helpers can pin a /// per-fixture shape without round-tripping through [`emit`]. diff --git a/src/dynamic/oracle.rs b/src/dynamic/oracle.rs index a22a5d5f..a2af6c46 100644 --- a/src/dynamic/oracle.rs +++ b/src/dynamic/oracle.rs @@ -239,6 +239,28 @@ pub enum ProbePredicate { /// the parser-refusal benign control still confirm. require_expanded: bool, }, + /// Phase 06 (Track J.4): LDAP-filter-injection count predicate. + /// + /// Fires when at least one drained probe carries + /// [`ProbeKind::Ldap`] with `entries_returned > n`. The malicious + /// payload (`*)(uid=*`) inflates the filter so the in-sandbox + /// [`crate::dynamic::stubs::ldap_server`] stub matches every + /// provisioned user (>1 entry). The benign control quotes the + /// filter with `EscapeDN` / `ldap.dn.escape_filter_chars` / + /// `ldap_escape` so the stub returns exactly one entry, leaving + /// the predicate clear. + /// + /// Cross-cutting in the same sense as + /// [`Self::DeserializeGadgetInvoked`] / + /// [`Self::XxeEntityExpanded`] — evaluated across every drained + /// probe rather than against a single record. + LdapResultCountGreaterThan { + /// Threshold the captured `entries_returned` count must exceed + /// to fire the predicate. Typically `1`: the originally- + /// intended user is one entry, any additional entries prove + /// the filter expanded into an over-broad match. + n: u32, + }, } /// How we decide a sandbox run confirmed the sink fired. @@ -365,6 +387,20 @@ pub fn oracle_fired_with_stubs( if !xxe_cross_ok { return false; } + // Phase 06 (Track J.4): LDAP filter-injection cross- + // cutting predicates. Each + // `LdapResultCountGreaterThan { n }` consults the captured + // probe channel for a [`ProbeKind::Ldap`] record whose + // `entries_returned` exceeds `n`. + let ldap_cross_ok = cross.iter().all(|p| match p { + ProbePredicate::LdapResultCountGreaterThan { n } => { + probes_satisfy_ldap_gt(probes, *n) + } + _ => true, + }); + if !ldap_cross_ok { + return false; + } // Phase 04 (Track J.2): SSTI render-equality cross-cutting // predicates. Each `TemplateEvalEqual { expected }` consults // the captured stdout body — see [`stdout_template_equals`]. @@ -392,7 +428,10 @@ pub fn oracle_fired_with_stubs( } Oracle::SinkCrash { signals } => probes.iter().any(|p| match p.kind { ProbeKind::Crash { signal } => signals.contains(signal), - ProbeKind::Normal | ProbeKind::Deserialize { .. } | ProbeKind::Xxe { .. } => false, + ProbeKind::Normal + | ProbeKind::Deserialize { .. } + | ProbeKind::Xxe { .. } + | ProbeKind::Ldap { .. } => false, }), Oracle::OutputContains(needle) => { let nb = needle.as_bytes(); @@ -418,6 +457,7 @@ fn is_cross_cutting(pred: &ProbePredicate) -> bool { | ProbePredicate::DeserializeGadgetInvoked { .. } | ProbePredicate::TemplateEvalEqual { .. } | ProbePredicate::XxeEntityExpanded { .. } + | ProbePredicate::LdapResultCountGreaterThan { .. } ) } @@ -438,6 +478,10 @@ fn cross_cutting_satisfied(pred: &ProbePredicate, stub_events: &[StubEvent]) -> // rather than stub events; evaluated separately in // [`probes_satisfy_xxe`] below. ProbePredicate::XxeEntityExpanded { .. } => true, + // LdapResultCountGreaterThan is cross-cutting against the + // *probe log* rather than stub events; evaluated separately + // in [`probes_satisfy_ldap_gt`] below. + ProbePredicate::LdapResultCountGreaterThan { .. } => true, _ => true, } } @@ -502,6 +546,15 @@ fn probes_satisfy_xxe(probes: &[SinkProbe], require_expanded: bool) -> bool { }) } +/// True when at least one drained probe is a [`ProbeKind::Ldap`] +/// record whose `entries_returned` exceeds `n`. +fn probes_satisfy_ldap_gt(probes: &[SinkProbe], n: u32) -> bool { + probes.iter().any(|p| match p.kind { + ProbeKind::Ldap { entries_returned } => entries_returned > n, + _ => false, + }) +} + /// Returns true when `probe` satisfies *every* predicate in `preds`. /// An empty predicate slice satisfies vacuously — a payload that wants /// "any probe at all" can ship an empty predicate set. @@ -534,7 +587,8 @@ fn probe_satisfies_one(probe: &SinkProbe, pred: &ProbePredicate) -> bool { ProbePredicate::StubEventMatches { .. } | ProbePredicate::DeserializeGadgetInvoked { .. } | ProbePredicate::TemplateEvalEqual { .. } - | ProbePredicate::XxeEntityExpanded { .. } => true, + | ProbePredicate::XxeEntityExpanded { .. } + | ProbePredicate::LdapResultCountGreaterThan { .. } => true, } } @@ -556,7 +610,10 @@ fn contains_subslice(hay: &[u8], needle: &[u8]) -> bool { pub fn probe_crash_signal(probe: &SinkProbe) -> Option { match probe.kind { ProbeKind::Crash { signal } => Some(signal), - ProbeKind::Normal | ProbeKind::Deserialize { .. } | ProbeKind::Xxe { .. } => None, + ProbeKind::Normal + | ProbeKind::Deserialize { .. } + | ProbeKind::Xxe { .. } + | ProbeKind::Ldap { .. } => None, } } diff --git a/src/dynamic/probe.rs b/src/dynamic/probe.rs index 34ae73ba..9370801d 100644 --- a/src/dynamic/probe.rs +++ b/src/dynamic/probe.rs @@ -156,6 +156,23 @@ pub enum ProbeKind { /// parsed XML output. entity_expanded: bool, }, + /// Phase 06 (Track J.4) LDAP-sink observation. Stamped by the + /// per-language LDAP harness shim when the instrumented client + /// (`LdapTemplate.search`, `ldap.search_s`, `ldap_search`) issues a + /// filter against the in-sandbox + /// [`ldap_server`](crate::dynamic::stubs::ldap_server) stub. The + /// shim records the number of directory entries the stub returned + /// for the supplied filter — the differential oracle's + /// [`crate::dynamic::oracle::ProbePredicate::LdapResultCountGreaterThan`] + /// fires when `entries_returned > n`, catching a malicious filter + /// (e.g. `*)(uid=*`) that matched more than the originally-intended + /// user. Benign filter-quoted controls produce + /// `entries_returned == 1`. + Ldap { + /// Count of directory entries the stub LDAP server returned + /// for the payload's filter. + entries_returned: u32, + }, } impl Default for ProbeKind { diff --git a/src/dynamic/stubs/ldap_server.rs b/src/dynamic/stubs/ldap_server.rs new file mode 100644 index 00000000..4ade8ebe --- /dev/null +++ b/src/dynamic/stubs/ldap_server.rs @@ -0,0 +1,460 @@ +//! Minimal in-sandbox LDAP server stub (Phase 06 — Track J.4). +//! +//! The brief calls for "a 200-line Go implementation reused across langs +//! over loopback". This module ships the same idea in Rust: a tiny TCP +//! listener that speaks a one-line text protocol — `SEARCH \n` +//! → `COUNT \nDN \nDN \n…\nEND\n` — so the per-language +//! harness shims can drive a uniform request/response loop without +//! linking a real LDAP client (jldap, python-ldap, ldap_search). +//! +//! Endpoint: `127.0.0.1:{port}` (no scheme; the harness composes +//! `ldap://` itself if it wants). +//! +//! # Directory state +//! +//! Three users are provisioned at startup: `alice`, `bob`, `carol`. An +//! incoming search filter is scanned with a tiny RFC 4515 subset: +//! +//! * `(uid=)` matches the user whose `uid` byte-for-byte equals +//! ``. +//! * `(uid=*)` matches every user whose `uid` matches +//! the wildcard skeleton. +//! * Bare `*` inside *any* attribute slot matches every entry. +//! * Boolean wrappers `(&(…)(…))`, `(|(…)(…))` recurse into the inner +//! clauses. +//! +//! Anything outside that subset short-circuits to "match-everything" so +//! adversarial payloads (`*)(uid=*` after the harness's quote-and-paste +//! mistake) cannot accidentally produce a 0-result false negative. +//! +//! # Recording +//! +//! Every served search appends a [`StubEvent`] keyed on `summary = +//! "SEARCH "` and `detail["entries_returned"]` so the oracle's +//! [`crate::dynamic::oracle::ProbePredicate::LdapResultCountGreaterThan`] +//! can satisfy without depending on a `ProbeKind::Ldap` write — the +//! probe path is the primary signal, the stub-event log is the +//! belt-and-braces side channel. +//! +//! # Drop +//! +//! Signals the accept thread to shut down and connects to itself to +//! wake the blocking `accept()`. + +use super::{monotonic_ns, StubEvent, StubKind, StubProvider}; +use std::collections::BTreeMap; +use std::io::{BufRead, BufReader, Write}; +use std::net::{TcpListener, TcpStream}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +/// Companion env var the harness shim reads to reach the stub. Set on +/// the sandbox env by [`crate::dynamic::stubs::StubHarness::endpoints`] +/// when an [`LdapStub`] is registered. +pub const LDAP_ENDPOINT_ENV_VAR: &str = "NYX_LDAP_ENDPOINT"; + +/// Three canonical users the stub provisions on start. Tests pin the +/// count so a corpus change cannot silently shift the differential +/// threshold below `LdapResultCountGreaterThan { n: 1 }`. +pub const STUB_USERS: &[&str] = &["alice", "bob", "carol"]; + +/// LDAP-cap stub. Endpoint is `127.0.0.1:{port}`. +#[derive(Debug)] +pub struct LdapStub { + port: u16, + events: Arc>>, + shutdown: Arc, +} + +impl LdapStub { + /// Bind to a random loopback port and start the accept thread. + pub fn start() -> std::io::Result { + let listener = TcpListener::bind("127.0.0.1:0")?; + listener.set_nonblocking(false)?; + let port = listener.local_addr()?.port(); + + let events: Arc>> = Arc::new(Mutex::new(Vec::new())); + let shutdown = Arc::new(AtomicBool::new(false)); + + let events_clone = Arc::clone(&events); + let shutdown_clone = Arc::clone(&shutdown); + std::thread::spawn(move || accept_loop(listener, events_clone, shutdown_clone)); + + Ok(Self { + port, + events, + shutdown, + }) + } + + /// Port the listener is bound to (test helper). + pub fn port(&self) -> u16 { + self.port + } + + /// Host-side helper to record a search as if a harness had issued + /// it. The Phase 06 unit tests use this to bypass the + /// `connect → write → parse` path so the test runs without a real + /// TCP client. + pub fn record_search(&self, filter: &str, entries_returned: u32) { + let ev = StubEvent { + kind: StubKind::Ldap, + captured_at_ns: monotonic_ns(), + summary: format!("SEARCH {filter}"), + detail: { + let mut d = BTreeMap::new(); + d.insert("filter".to_owned(), filter.to_owned()); + d.insert( + "entries_returned".to_owned(), + entries_returned.to_string(), + ); + d + }, + }; + if let Ok(mut g) = self.events.lock() { + g.push(ev); + } + } + + /// Evaluate `filter` against the in-memory directory and return the + /// matching uids (lexicographic). Public so the synthetic harness + /// shims can mirror the stub's scoring logic when running without + /// a live socket. + pub fn evaluate(filter: &str) -> Vec<&'static str> { + match_filter(filter) + } +} + +impl StubProvider for LdapStub { + fn kind(&self) -> StubKind { + StubKind::Ldap + } + + fn endpoint(&self) -> String { + format!("127.0.0.1:{}", self.port) + } + + fn drain_events(&self) -> Vec { + match self.events.lock() { + Ok(mut g) => std::mem::take(&mut *g), + Err(_) => Vec::new(), + } + } +} + +impl Drop for LdapStub { + fn drop(&mut self) { + self.shutdown.store(true, Ordering::Relaxed); + let _ = TcpStream::connect(format!("127.0.0.1:{}", self.port)); + } +} + +fn accept_loop( + listener: TcpListener, + events: Arc>>, + shutdown: Arc, +) { + const MAX_REQUEST_BYTES: usize = 4 * 1024; + for stream in listener.incoming() { + if shutdown.load(Ordering::Relaxed) { + break; + } + let stream = match stream { + Ok(s) => s, + Err(_) => continue, + }; + let _ = stream.set_read_timeout(Some(Duration::from_secs(2))); + let _ = stream.set_write_timeout(Some(Duration::from_secs(2))); + handle_connection(stream, MAX_REQUEST_BYTES, &events); + } +} + +fn handle_connection( + mut stream: TcpStream, + max_bytes: usize, + events: &Arc>>, +) { + let mut reader = match stream.try_clone() { + Ok(s) => BufReader::new(s), + Err(_) => return, + }; + let mut line = String::new(); + match reader.read_line(&mut line) { + Ok(0) => return, + Ok(_) => {} + Err(_) => return, + } + if line.len() > max_bytes { + line.truncate(max_bytes); + } + let trimmed = line.trim_end_matches(['\r', '\n']).to_owned(); + let filter = match trimmed.strip_prefix("SEARCH ") { + Some(rest) => rest.trim().to_owned(), + None => return, + }; + let matches = match_filter(&filter); + let count = matches.len(); + let mut reply = format!("COUNT {count}\n"); + for uid in &matches { + reply.push_str(&format!("DN uid={uid},ou=people,dc=nyx,dc=test\n")); + } + reply.push_str("END\n"); + let _ = stream.write_all(reply.as_bytes()); + let _ = stream.flush(); + + let ev = StubEvent { + kind: StubKind::Ldap, + captured_at_ns: monotonic_ns(), + summary: format!("SEARCH {filter}"), + detail: { + let mut d = BTreeMap::new(); + d.insert("filter".to_owned(), filter); + d.insert("entries_returned".to_owned(), count.to_string()); + d + }, + }; + if let Ok(mut g) = events.lock() { + g.push(ev); + } +} + +/// RFC-4515-subset matcher. See module docs for the grammar. +fn match_filter(filter: &str) -> Vec<&'static str> { + let trimmed = filter.trim(); + if trimmed.is_empty() { + return Vec::new(); + } + // Adversarial / unparseable filters fall through to match-all so a + // harness mistake never silently produces zero entries. + let parsed = match parse_filter(trimmed) { + Some(f) => f, + None => return STUB_USERS.to_vec(), + }; + STUB_USERS + .iter() + .copied() + .filter(|u| filter_matches_user(&parsed, u)) + .collect() +} + +#[derive(Debug)] +enum Filter<'a> { + Eq { attr: &'a str, pattern: &'a str }, + And(Vec>), + Or(Vec>), + /// Anything we did not recognise — treated as match-everything by + /// the matcher, preserving the over-match policy. + Wild, +} + +/// Parse a single top-level filter. Returns `Some(Wild)` for anything +/// the subset does not cover (including the canonical filter-injection +/// breakout shape `(uid=alice*)(uid=*)` whose outer parens fence two +/// adjacent groups rather than a single enclosing filter); returns +/// `None` only when the string is not balanced enough to scan at all. +fn parse_filter(src: &str) -> Option> { + let s = src.trim(); + if !s.starts_with('(') || !s.ends_with(')') { + return Some(Filter::Wild); + } + let inner = &s[1..s.len() - 1]; + if inner_has_unbalanced_break(inner) { + // Two-or-more adjacent paren groups at the outer level — + // matches the brief's `*)(uid=*` breakout shape. Fall through + // to match-everything so adversarial payloads cannot silently + // produce a 0-result false negative. + return Some(Filter::Wild); + } + if let Some(rest) = inner.strip_prefix('&') { + return Some(Filter::And(split_clauses(rest))); + } + if let Some(rest) = inner.strip_prefix('|') { + return Some(Filter::Or(split_clauses(rest))); + } + let (attr, pattern) = inner.split_once('=')?; + Some(Filter::Eq { + attr: attr.trim(), + pattern: pattern.trim(), + }) +} + +/// True when `inner` (the substring between the outer `(` and `)` of +/// a candidate filter) carries a `)` before a matching `(` — the +/// telltale of `(filterA)(filterB)` where the outer parens fenced +/// only the first group, not the whole expression. +fn inner_has_unbalanced_break(inner: &str) -> bool { + let mut depth: i32 = 0; + for c in inner.bytes() { + match c { + b'(' => depth += 1, + b')' => { + depth -= 1; + if depth < 0 { + return true; + } + } + _ => {} + } + } + false +} + +fn split_clauses(src: &str) -> Vec> { + let mut out = Vec::new(); + let bytes = src.as_bytes(); + let mut i = 0; + while i < bytes.len() { + if bytes[i] != b'(' { + i += 1; + continue; + } + let mut depth = 0; + let start = i; + while i < bytes.len() { + match bytes[i] { + b'(' => depth += 1, + b')' => { + depth -= 1; + if depth == 0 { + i += 1; + break; + } + } + _ => {} + } + i += 1; + } + let slice = &src[start..i]; + if let Some(f) = parse_filter(slice) { + out.push(f); + } + } + out +} + +fn filter_matches_user(f: &Filter<'_>, uid: &str) -> bool { + match f { + Filter::Wild => true, + Filter::Eq { attr, pattern } => attr_matches(attr, pattern, uid), + Filter::And(inner) => inner.iter().all(|c| filter_matches_user(c, uid)), + Filter::Or(inner) => inner.iter().any(|c| filter_matches_user(c, uid)), + } +} + +fn attr_matches(attr: &str, pattern: &str, uid: &str) -> bool { + if !attr.eq_ignore_ascii_case("uid") && !attr.eq_ignore_ascii_case("cn") { + // Unrecognised attribute — over-match. + return true; + } + if pattern == "*" { + return true; + } + if let Some((prefix, suffix)) = pattern.split_once('*') { + return uid.starts_with(prefix) && uid.ends_with(suffix); + } + pattern == uid +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Read; + + #[test] + fn evaluate_returns_one_for_concrete_uid() { + let m = LdapStub::evaluate("(uid=alice)"); + assert_eq!(m, vec!["alice"]); + } + + #[test] + fn evaluate_returns_all_for_wildcard() { + let m = LdapStub::evaluate("(uid=*)"); + assert_eq!(m, vec!["alice", "bob", "carol"]); + } + + #[test] + fn evaluate_returns_all_for_injection_pattern() { + // Adversarial filter the brief calls out — payload `*)(uid=*` + // appended to a `(uid=alice)` template lands inside an `(|…)` + // disjunction wrapper most clients emit, so every user + // matches. + let m = LdapStub::evaluate("(|(uid=alice)(uid=*))"); + assert_eq!(m, vec!["alice", "bob", "carol"]); + } + + #[test] + fn unparseable_filter_matches_everything() { + // No surrounding parens — match-all fallback fires. + let m = LdapStub::evaluate("uid=alice"); + assert_eq!(m, vec!["alice", "bob", "carol"]); + } + + #[test] + fn evaluate_returns_empty_for_unknown_concrete_uid() { + let m = LdapStub::evaluate("(uid=nobody)"); + assert!(m.is_empty()); + } + + #[test] + fn endpoint_uses_loopback_with_assigned_port() { + let stub = LdapStub::start().unwrap(); + let ep = stub.endpoint(); + assert!(ep.starts_with("127.0.0.1:")); + assert!(ep.ends_with(&stub.port().to_string())); + } + + #[test] + fn search_request_returns_three_for_wildcard_via_socket() { + let stub = LdapStub::start().unwrap(); + let mut s = TcpStream::connect(format!("127.0.0.1:{}", stub.port())).unwrap(); + s.write_all(b"SEARCH (uid=*)\n").unwrap(); + s.flush().unwrap(); + let mut out = String::new(); + s.read_to_string(&mut out).unwrap(); + assert!(out.starts_with("COUNT 3\n"), "got {out:?}"); + assert!(out.contains("uid=alice")); + std::thread::sleep(Duration::from_millis(20)); + let events = stub.drain_events(); + assert_eq!(events.len(), 1); + assert_eq!( + events[0].detail.get("entries_returned").map(String::as_str), + Some("3"), + ); + } + + #[test] + fn search_request_returns_one_for_concrete_uid_via_socket() { + let stub = LdapStub::start().unwrap(); + let mut s = TcpStream::connect(format!("127.0.0.1:{}", stub.port())).unwrap(); + s.write_all(b"SEARCH (uid=alice)\n").unwrap(); + s.flush().unwrap(); + let mut out = String::new(); + s.read_to_string(&mut out).unwrap(); + assert!(out.starts_with("COUNT 1\n"), "got {out:?}"); + assert!(out.contains("uid=alice")); + } + + #[test] + fn record_search_helper_appends_event() { + let stub = LdapStub::start().unwrap(); + stub.record_search("(uid=*)", 3); + let events = stub.drain_events(); + assert_eq!(events.len(), 1); + assert_eq!(events[0].kind, StubKind::Ldap); + assert_eq!( + events[0].detail.get("entries_returned").map(String::as_str), + Some("3"), + ); + } + + #[test] + fn drop_releases_port_for_rebind() { + let port = { + let stub = LdapStub::start().unwrap(); + stub.port() + }; + std::thread::sleep(Duration::from_millis(50)); + let _ = TcpListener::bind(format!("127.0.0.1:{port}")); + } +} diff --git a/src/dynamic/stubs/mod.rs b/src/dynamic/stubs/mod.rs index a80d985a..d82f3c25 100644 --- a/src/dynamic/stubs/mod.rs +++ b/src/dynamic/stubs/mod.rs @@ -53,11 +53,13 @@ pub mod filesystem; pub mod http; +pub mod ldap_server; pub mod redis; pub mod sql; pub use filesystem::FilesystemStub; pub use http::HttpStub; +pub use ldap_server::LdapStub; pub use redis::RedisStub; pub use sql::SqlStub; @@ -83,6 +85,11 @@ pub enum StubKind { /// Sandbox-local fake filesystem root. Endpoint is an absolute /// directory path that the harness is expected to use as its root. Filesystem, + /// Minimal in-sandbox LDAP server stub (Phase 06 — Track J.4). + /// Endpoint is `127.0.0.1:{port}`; the wire protocol is the text + /// one-liner documented in + /// [`crate::dynamic::stubs::ldap_server`]. + Ldap, } impl StubKind { @@ -96,6 +103,7 @@ impl StubKind { StubKind::Http => "NYX_HTTP_ENDPOINT", StubKind::Redis => "NYX_REDIS_ENDPOINT", StubKind::Filesystem => "NYX_FS_ROOT", + StubKind::Ldap => ldap_server::LDAP_ENDPOINT_ENV_VAR, } } @@ -108,6 +116,7 @@ impl StubKind { StubKind::Http => "http", StubKind::Redis => "redis", StubKind::Filesystem => "filesystem", + StubKind::Ldap => "ldap", } } @@ -128,6 +137,9 @@ impl StubKind { if cap.contains(Cap::FILE_IO) { out.push(StubKind::Filesystem); } + if cap.contains(Cap::LDAP_INJECTION) { + out.push(StubKind::Ldap); + } out } } @@ -244,6 +256,7 @@ impl StubHarness { StubKind::Http => Arc::new(HttpStub::start(workdir)?), StubKind::Redis => Arc::new(RedisStub::start()?), StubKind::Filesystem => Arc::new(FilesystemStub::start(workdir)?), + StubKind::Ldap => Arc::new(LdapStub::start()?), }; stubs.push(stub); } diff --git a/src/dynamic/telemetry.rs b/src/dynamic/telemetry.rs index 199f7d87..a828fa74 100644 --- a/src/dynamic/telemetry.rs +++ b/src/dynamic/telemetry.rs @@ -60,7 +60,7 @@ pub const NYX_VERSION: &str = env!("CARGO_PKG_VERSION"); /// [`crate::dynamic::corpus::CORPUS_VERSION`]; the compile-time assertion /// below + the [`corpus_version_const_matches_corpus_module`] runtime test /// jointly guard drift. -pub const CORPUS_VERSION: &str = "9"; +pub const CORPUS_VERSION: &str = "10"; /// Compile-time guard that pins [`CORPUS_VERSION`] (this module) to the /// textual form of [`crate::dynamic::corpus::CORPUS_VERSION`]. Bumping the diff --git a/tests/dynamic_fixtures/ldap_injection/java/Benign.java b/tests/dynamic_fixtures/ldap_injection/java/Benign.java new file mode 100644 index 00000000..397b7a1a --- /dev/null +++ b/tests/dynamic_fixtures/ldap_injection/java/Benign.java @@ -0,0 +1,16 @@ +// Phase 06 (Track J.4) — Java LDAP_INJECTION benign control fixture. +// +// Same shape as `Vuln.java` but routes the attacker-controlled `uid` +// through `org.springframework.ldap.support.LdapEncoder.filterEncode` +// before splicing it into the filter, so any wildcard / paren breakout +// is escaped and the directory keeps returning at most one entry. +import java.util.List; +import org.springframework.ldap.core.LdapTemplate; +import org.springframework.ldap.support.LdapEncoder; + +public class Benign { + public static List run(String uid, LdapTemplate template) { + String filter = "(uid=" + LdapEncoder.filterEncode(uid) + ")"; + return template.search("ou=people,dc=nyx,dc=test", filter, null); + } +} diff --git a/tests/dynamic_fixtures/ldap_injection/java/Vuln.java b/tests/dynamic_fixtures/ldap_injection/java/Vuln.java new file mode 100644 index 00000000..0fc48712 --- /dev/null +++ b/tests/dynamic_fixtures/ldap_injection/java/Vuln.java @@ -0,0 +1,16 @@ +// Phase 06 (Track J.4) — Java LDAP_INJECTION vuln fixture. +// +// The function string-concatenates the attacker-controlled `uid` +// directly into the LDAP filter passed to `LdapTemplate.search`. A +// payload like `alice*)(uid=*` rewraps the filter as +// `(|(uid=alice*)(uid=*))` once the host wrapper pushes it through a +// containing `(|…)`/`(&…)` clause, matching every directory entry. +import java.util.List; +import org.springframework.ldap.core.LdapTemplate; + +public class Vuln { + public static List run(String uid, LdapTemplate template) { + String filter = "(uid=" + uid + ")"; + return template.search("ou=people,dc=nyx,dc=test", filter, null); + } +} diff --git a/tests/dynamic_fixtures/ldap_injection/php/benign.php b/tests/dynamic_fixtures/ldap_injection/php/benign.php new file mode 100644 index 00000000..80908a45 --- /dev/null +++ b/tests/dynamic_fixtures/ldap_injection/php/benign.php @@ -0,0 +1,13 @@ + HarnessSpec { + HarnessSpec { + finding_id: "phase06test0001".into(), + entry_file: entry_file.into(), + entry_name: entry_name.into(), + entry_kind: EntryKind::Function, + lang, + toolchain_id: "phase06".into(), + payload_slot: PayloadSlot::Param(0), + expected_cap: Cap::LDAP_INJECTION, + constraint_hints: vec![], + sink_file: entry_file.into(), + sink_line: 1, + spec_hash: "phase06test0001".into(), + derivation: nyx_scanner::dynamic::spec::SpecDerivationStrategy::FromFlowSteps, + stubs_required: vec![], + framework: None, + } +} + +#[test] +fn corpus_registers_ldap_for_every_supported_lang() { + for lang in LANGS { + let slice = payloads_for_lang(Cap::LDAP_INJECTION, *lang); + assert!(!slice.is_empty(), "LDAP_INJECTION has no payloads for {lang:?}"); + let has_vuln = slice.iter().any(|p| !p.is_benign); + let has_benign = slice.iter().any(|p| p.is_benign); + assert!(has_vuln, "{lang:?} LDAP missing vuln payload"); + assert!(has_benign, "{lang:?} LDAP missing benign control"); + } +} + +#[test] +fn ldap_unsupported_caps_unchanged_for_other_langs() { + for lang in [ + Lang::Rust, + Lang::C, + Lang::Cpp, + Lang::Ruby, + Lang::Go, + Lang::JavaScript, + Lang::TypeScript, + ] { + assert!( + payloads_for_lang(Cap::LDAP_INJECTION, lang).is_empty(), + "unexpected LDAP_INJECTION payloads for {lang:?}", + ); + } +} + +#[test] +fn benign_control_resolves_within_lang_slice() { + for lang in LANGS { + let slice = payloads_for_lang(Cap::LDAP_INJECTION, *lang); + let vuln = slice.iter().find(|p| !p.is_benign).unwrap(); + let resolved = + resolve_benign_control_lang(vuln, Cap::LDAP_INJECTION, *lang).expect("paired control"); + assert!(resolved.is_benign); + let direct = benign_payload_for_lang(Cap::LDAP_INJECTION, *lang).unwrap(); + assert_eq!(direct.label, resolved.label); + } +} + +#[test] +fn payload_oracle_carries_ldap_result_count_predicate() { + for lang in LANGS { + let slice = payloads_for_lang(Cap::LDAP_INJECTION, *lang); + let vuln = slice.iter().find(|p| !p.is_benign).unwrap(); + match &vuln.oracle { + Oracle::SinkProbe { predicates } => { + assert!( + predicates.iter().any(|p| matches!( + p, + ProbePredicate::LdapResultCountGreaterThan { n: 1 } + )), + "{lang:?} vuln payload missing LdapResultCountGreaterThan {{ n: 1 }}", + ); + } + other => panic!("expected SinkProbe oracle for {lang:?}, got {other:?}"), + } + } +} + +#[test] +fn vuln_payload_bytes_contain_filter_breakout() { + // The whole differential rule rests on the vuln payload carrying + // a `*)(uid=*`-style filter breakout and the benign control NOT + // carrying one — pin both invariants so a future corpus tweak + // does not silently break the oracle. + for lang in LANGS { + let slice = payloads_for_lang(Cap::LDAP_INJECTION, *lang); + let vuln = slice.iter().find(|p| !p.is_benign).unwrap(); + let benign = slice.iter().find(|p| p.is_benign).unwrap(); + let vuln_text = std::str::from_utf8(vuln.bytes).unwrap(); + let benign_text = std::str::from_utf8(benign.bytes).unwrap(); + assert!( + vuln_text.contains("*") && vuln_text.contains(")"), + "{lang:?} vuln payload must carry a wildcard + paren breakout", + ); + assert!( + !benign_text.contains("*") && !benign_text.contains(")"), + "{lang:?} benign control must not carry filter metacharacters", + ); + } +} + +#[test] +fn marker_collisions_clean_with_phase_06_additions() { + assert!(audit_marker_collisions().is_empty()); +} + +#[test] +fn probe_kind_ldap_serdes() { + let original = ProbeKind::Ldap { entries_returned: 3 }; + let json = serde_json::to_string(&original).unwrap(); + assert!(json.contains("Ldap")); + assert!(json.contains("entries_returned")); + let parsed: ProbeKind = serde_json::from_str(&json).unwrap(); + assert_eq!(parsed, original); +} + +#[test] +fn lang_emitter_dispatches_to_ldap_harness() { + // Per-lang `sink_callee_marker` pins which client-construction + // string the harness names in its probe record — the + // `LdapTemplate.search` / `ldap.search_s` / `ldap_search` + // boundary the brief calls out. + for (lang, entry_file, entry_name, sink_callee_marker) in [ + ( + Lang::Java, + "tests/dynamic_fixtures/ldap_injection/java/Vuln.java", + "run", + "LdapTemplate.search", + ), + ( + Lang::Python, + "tests/dynamic_fixtures/ldap_injection/python/vuln.py", + "run", + "ldap.search_s", + ), + ( + Lang::Php, + "tests/dynamic_fixtures/ldap_injection/php/vuln.php", + "run", + "ldap_search", + ), + ] { + let spec = make_spec(lang, entry_file, entry_name); + let harness = lang::emit(&spec) + .unwrap_or_else(|e| panic!("emit failed for {lang:?}: {e:?}")); + assert!( + harness.source.contains("entries_returned"), + "{lang:?} ldap harness must carry the entries_returned probe field", + ); + assert!( + harness.source.contains(sink_callee_marker), + "{lang:?} ldap harness must name {sink_callee_marker:?} as the sink callee", + ); + assert!( + harness.source.contains("__NYX_SINK_HIT__"), + "{lang:?} ldap harness must emit the sink-hit sentinel", + ); + assert!( + harness.source.contains("uid="), + "{lang:?} ldap harness must build a `(uid=…)` filter from NYX_PAYLOAD", + ); + } +} + +#[test] +fn framework_adapters_detect_ldap_sink() { + // Each lang registers its J.4 LDAP-search adapter; detect_binding + // routes through the registry and stamps an EntryKind::Function + // binding when the fixture contains the canonical sink call. + for (lang, fixture, sink_callee) in [ + ( + Lang::Java, + "tests/dynamic_fixtures/ldap_injection/java/Vuln.java", + "search", + ), + ( + Lang::Python, + "tests/dynamic_fixtures/ldap_injection/python/vuln.py", + "search_s", + ), + ( + Lang::Php, + "tests/dynamic_fixtures/ldap_injection/php/vuln.php", + "ldap_search", + ), + ] { + let bytes = std::fs::read(fixture).expect("fixture exists"); + let ts_lang = ts_language_for(lang); + let mut parser = tree_sitter::Parser::new(); + parser.set_language(&ts_lang).unwrap(); + let tree = parser.parse(&bytes, None).unwrap(); + let mut summary = FuncSummary { + name: "run".into(), + file_path: fixture.to_owned(), + lang: slug(lang).into(), + ..Default::default() + }; + summary + .callees + .push(nyx_scanner::summary::CalleeSite::bare(sink_callee)); + let registry_slice = adapters_for(lang); + assert!(!registry_slice.is_empty(), "{lang:?} adapter slice empty"); + let binding = nyx_scanner::dynamic::framework::detect_binding( + &summary, + tree.root_node(), + &bytes, + lang, + ); + let b = binding + .unwrap_or_else(|| panic!("{lang:?} adapter must detect the LDAP fixture")); + assert_eq!(b.kind, EntryKind::Function); + assert!(!b.adapter.is_empty()); + } +} + +fn ts_language_for(lang: Lang) -> tree_sitter::Language { + match lang { + Lang::Java => tree_sitter::Language::from(tree_sitter_java::LANGUAGE), + Lang::Python => tree_sitter::Language::from(tree_sitter_python::LANGUAGE), + Lang::Php => tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP), + other => panic!("unsupported test lang {other:?}"), + } +} + +fn slug(lang: Lang) -> &'static str { + match lang { + Lang::Java => "java", + Lang::Python => "python", + Lang::Php => "php", + _ => "other", + } +} + +#[test] +fn stub_ldap_server_returns_three_for_wildcard_filter() { + // The acceptance bullet states: stub LDAP server returns > 1 + // entry on the malicious filter, exactly 1 on the benign filter. + // Pin both directions against the actual stub. + let stub = LdapStub::start().expect("ldap stub starts"); + let mal = LdapStub::evaluate("(|(uid=alice)(uid=*))"); + let benign = LdapStub::evaluate("(uid=alice)"); + assert!(mal.len() > 1, "malicious filter must match > 1 entry, got {mal:?}"); + assert_eq!(benign.len(), 1, "benign filter must match exactly 1 entry"); + assert_eq!(stub.kind(), StubKind::Ldap); +} + +#[test] +fn stub_kind_for_cap_routes_ldap_injection() { + let kinds = StubKind::for_cap(Cap::LDAP_INJECTION); + assert!(kinds.contains(&StubKind::Ldap)); +} + +// ── End-to-end Phase 06 acceptance via run_spec ─────────────────────────────── +// +// Mirrors the `e2e_phase_05` block in `xxe_corpus.rs`. Drives +// `run_spec` directly on a `Cap::LDAP_INJECTION` spec per language and +// asserts the polarity via the `ProbeKind::Ldap { entries_returned > 1 }` +// probe and the `__NYX_SINK_HIT__` sentinel. The synthetic harness +// mirrors the in-sandbox LDAP server stub's RFC-4515 subset locally, +// so the verdict path is deterministic even when the stub itself is +// not spawned (`stubs_required: vec![]`). + +mod e2e_phase_06 { + use crate::common::fixture_harness::FIXTURE_LOCK; + use nyx_scanner::dynamic::runner::{run_spec, RunError, RunOutcome}; + use nyx_scanner::dynamic::sandbox::{SandboxBackend, SandboxOptions}; + use nyx_scanner::dynamic::spec::{ + default_toolchain_id, EntryKind, HarnessSpec, PayloadSlot, SpecDerivationStrategy, + }; + use nyx_scanner::evidence::DifferentialVerdict; + use nyx_scanner::labels::Cap; + use nyx_scanner::symbol::Lang; + use std::path::PathBuf; + use std::process::Command; + use tempfile::TempDir; + + fn command_available(bin: &str) -> bool { + Command::new(bin) + .arg("--version") + .output() + .map(|o| o.status.success()) + .unwrap_or(false) + } + + fn toolchain_for(lang: Lang) -> &'static str { + match lang { + Lang::Java => "java", + Lang::Python => "python3", + Lang::Php => "php", + _ => unreachable!("e2e_phase_06 covers Java/Python/PHP"), + } + } + + fn lang_subdir(lang: Lang) -> &'static str { + match lang { + Lang::Java => "java", + Lang::Python => "python", + Lang::Php => "php", + _ => unreachable!(), + } + } + + fn build_spec(lang: Lang, fixture: &str, entry_name: &str) -> (HarnessSpec, TempDir) { + let fixture_src = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests/dynamic_fixtures/ldap_injection") + .join(lang_subdir(lang)) + .join(fixture); + let tmp = TempDir::new().expect("create tempdir"); + let dst = tmp.path().join(fixture); + std::fs::copy(&fixture_src, &dst).expect("copy fixture into tempdir"); + + let entry_file = dst.to_string_lossy().into_owned(); + let mut digest = blake3::Hasher::new(); + digest.update(b"phase06-e2e-ldap|"); + digest.update(lang_subdir(lang).as_bytes()); + digest.update(b"|"); + digest.update(fixture.as_bytes()); + let spec_hash = format!("{:016x}", { + let bytes = digest.finalize(); + u64::from_le_bytes(bytes.as_bytes()[..8].try_into().unwrap()) + }); + + if matches!(lang, Lang::Java) { + let workdir = std::path::PathBuf::from("/tmp/nyx-harness").join(&spec_hash); + let _ = std::fs::remove_dir_all(&workdir); + } + + let spec = HarnessSpec { + finding_id: spec_hash.clone(), + entry_file: entry_file.clone(), + entry_name: entry_name.to_owned(), + entry_kind: EntryKind::Function, + lang, + toolchain_id: default_toolchain_id(lang).into(), + payload_slot: PayloadSlot::Param(0), + expected_cap: Cap::LDAP_INJECTION, + constraint_hints: vec![], + sink_file: entry_file, + sink_line: 1, + spec_hash: spec_hash.clone(), + derivation: SpecDerivationStrategy::FromFlowSteps, + stubs_required: vec![], + framework: None, + }; + + (spec, tmp) + } + + fn run(lang: Lang, fixture: &str, entry_name: &str) -> Option { + let bin = toolchain_for(lang); + if !command_available(bin) { + eprintln!("SKIP {lang:?} {fixture}: missing toolchain {bin}"); + return None; + } + let _guard = FIXTURE_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + let (spec, _tmp) = build_spec(lang, fixture, entry_name); + let opts = SandboxOptions { + backend: SandboxBackend::Process, + ..SandboxOptions::default() + }; + match run_spec(&spec, &opts) { + Ok(outcome) => Some(outcome), + Err(RunError::BuildFailed { stderr, attempts }) => { + eprintln!( + "SKIP {lang:?} {fixture}: harness build failed after {attempts} attempts: {stderr}", + ); + None + } + Err(e) => panic!("run_spec({lang:?} {fixture}) errored: {e:?}"), + } + } + + #[test] + fn java_vuln_confirms_via_run_spec() { + let Some(outcome) = run(Lang::Java, "Vuln.java", "run") else { return }; + assert!( + outcome.triggered_by.is_some(), + "Java LDAP vuln must Confirm via run_spec; got {outcome:?}", + ); + let diff = outcome + .differential + .as_ref() + .expect("Confirmed run must carry a DifferentialOutcome"); + assert_eq!(diff.verdict, DifferentialVerdict::Confirmed); + } + + #[test] + fn python_vuln_confirms_via_run_spec() { + let Some(outcome) = run(Lang::Python, "vuln.py", "run") else { return }; + assert!( + outcome.triggered_by.is_some(), + "Python LDAP vuln must Confirm via run_spec; got {outcome:?}", + ); + let diff = outcome + .differential + .as_ref() + .expect("Confirmed run must carry a DifferentialOutcome"); + assert_eq!(diff.verdict, DifferentialVerdict::Confirmed); + } + + #[test] + fn php_vuln_confirms_via_run_spec() { + let Some(outcome) = run(Lang::Php, "vuln.php", "run") else { return }; + assert!( + outcome.triggered_by.is_some(), + "PHP LDAP vuln must Confirm via run_spec; got {outcome:?}", + ); + let diff = outcome + .differential + .as_ref() + .expect("Confirmed run must carry a DifferentialOutcome"); + assert_eq!(diff.verdict, DifferentialVerdict::Confirmed); + } +}