//! Phase 05 (Track J.3) — XXE corpus acceptance. //! //! Asserts the new cap end-to-end: corpus slices register per-engine //! vuln/benign pairs for Java / Python / PHP / Ruby / Go, the //! lang-aware resolver pairs them inside the correct slice, the //! per-language harness emitters splice in the synthetic XML parser + //! entity-expansion probe + sink-hit sentinel, and the framework //! adapters fire on the canonical sink call. //! //! `cargo nextest run --features dynamic --test xxe_corpus`. #![cfg(feature = "dynamic")] use nyx_scanner::dynamic::corpus::{ audit_marker_collisions, benign_payload_for_lang, payloads_for_lang, resolve_benign_control_lang, Oracle, }; use nyx_scanner::dynamic::framework::registry::adapters_for; use nyx_scanner::dynamic::lang; use nyx_scanner::dynamic::oracle::ProbePredicate; use nyx_scanner::dynamic::probe::ProbeKind; use nyx_scanner::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot}; use nyx_scanner::labels::Cap; use nyx_scanner::summary::FuncSummary; use nyx_scanner::symbol::Lang; const LANGS: &[Lang] = &[Lang::Java, Lang::Python, Lang::Php, Lang::Ruby, Lang::Go]; fn make_spec(lang: Lang, entry_file: &str, entry_name: &str) -> HarnessSpec { HarnessSpec { finding_id: "phase05test0001".into(), entry_file: entry_file.into(), entry_name: entry_name.into(), entry_kind: EntryKind::Function, lang, toolchain_id: "phase05".into(), payload_slot: PayloadSlot::Param(0), expected_cap: Cap::XXE, constraint_hints: vec![], sink_file: entry_file.into(), sink_line: 1, spec_hash: "phase05test0001".into(), derivation: nyx_scanner::dynamic::spec::SpecDerivationStrategy::FromFlowSteps, stubs_required: vec![], framework: None, } } #[test] fn corpus_registers_xxe_for_every_supported_lang() { for lang in LANGS { let slice = payloads_for_lang(Cap::XXE, *lang); assert!(!slice.is_empty(), "XXE has no payloads for {lang:?}"); let has_vuln = slice.iter().any(|p| !p.is_benign); let has_benign = slice.iter().any(|p| p.is_benign); assert!(has_vuln, "{lang:?} XXE missing vuln payload"); assert!(has_benign, "{lang:?} XXE missing benign control"); } } #[test] fn xxe_unsupported_caps_unchanged_for_other_langs() { // Phase 05 only fills Java / Python / PHP / Ruby / Go — Rust / C // / Cpp / JS / TS stay empty. for lang in [ Lang::Rust, Lang::C, Lang::Cpp, Lang::JavaScript, Lang::TypeScript, ] { assert!( payloads_for_lang(Cap::XXE, lang).is_empty(), "unexpected XXE payloads registered for {lang:?}", ); } } #[test] fn benign_control_resolves_within_lang_slice() { for lang in LANGS { let slice = payloads_for_lang(Cap::XXE, *lang); let vuln = slice.iter().find(|p| !p.is_benign).unwrap(); let resolved = resolve_benign_control_lang(vuln, Cap::XXE, *lang).expect("paired control"); assert!(resolved.is_benign); let direct = benign_payload_for_lang(Cap::XXE, *lang).unwrap(); assert_eq!(direct.label, resolved.label); } } #[test] fn payload_oracle_carries_xxe_entity_expanded_predicate() { for lang in LANGS { let slice = payloads_for_lang(Cap::XXE, *lang); let vuln = slice.iter().find(|p| !p.is_benign).unwrap(); match &vuln.oracle { Oracle::SinkProbe { predicates } => { assert!( predicates.iter().any(|p| matches!( p, ProbePredicate::XxeEntityExpanded { require_expanded: true } )), "{lang:?} vuln payload missing XxeEntityExpanded{{require_expanded:true}}", ); } other => panic!("expected SinkProbe oracle for {lang:?}, got {other:?}"), } } } #[test] fn vuln_payload_bytes_contain_doctype_entity_declaration() { // The whole differential rule rests on the vuln payload carrying // an `` decl and the benign control NOT // carrying one — pin both invariants so a future corpus tweak // does not silently break the oracle. for lang in LANGS { let slice = payloads_for_lang(Cap::XXE, *lang); let vuln = slice.iter().find(|p| !p.is_benign).unwrap(); let benign = slice.iter().find(|p| p.is_benign).unwrap(); let vuln_text = std::str::from_utf8(vuln.bytes).unwrap(); let benign_text = std::str::from_utf8(benign.bytes).unwrap(); assert!( vuln_text.contains(" tree_sitter::Language { match lang { Lang::Java => tree_sitter::Language::from(tree_sitter_java::LANGUAGE), Lang::Python => tree_sitter::Language::from(tree_sitter_python::LANGUAGE), Lang::Php => tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP), Lang::Ruby => tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE), Lang::Go => tree_sitter::Language::from(tree_sitter_go::LANGUAGE), other => panic!("unsupported test lang {other:?}"), } } fn slug(lang: Lang) -> &'static str { match lang { Lang::Java => "java", Lang::Python => "python", Lang::Php => "php", Lang::Ruby => "ruby", Lang::Go => "go", _ => "other", } }