//! Phase 05 (Track J.3) — XXE corpus acceptance. //! //! Asserts the new cap end-to-end: corpus slices register per-engine //! vuln/benign pairs for Java / Python / PHP / Ruby / Go, the //! lang-aware resolver pairs them inside the correct slice, the //! per-language harness emitters splice in the synthetic XML parser + //! entity-expansion probe + sink-hit sentinel, and the framework //! adapters fire on the canonical sink call. //! //! `cargo nextest run --features dynamic --test xxe_corpus`. #![cfg(feature = "dynamic")] mod common; use nyx_scanner::dynamic::corpus::{ Oracle, audit_marker_collisions, benign_payload_for_lang, payloads_for_lang, resolve_benign_control_lang, }; use nyx_scanner::dynamic::framework::registry::adapters_for; use nyx_scanner::dynamic::lang; use nyx_scanner::dynamic::oracle::ProbePredicate; use nyx_scanner::dynamic::probe::ProbeKind; use nyx_scanner::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot}; use nyx_scanner::labels::Cap; use nyx_scanner::summary::FuncSummary; use nyx_scanner::symbol::Lang; const LANGS: &[Lang] = &[Lang::Java, Lang::Python, Lang::Php, Lang::Ruby, Lang::Go]; fn make_spec(lang: Lang, entry_file: &str, entry_name: &str) -> HarnessSpec { HarnessSpec { finding_id: "phase05test0001".into(), entry_file: entry_file.into(), entry_name: entry_name.into(), entry_kind: EntryKind::Function, lang, toolchain_id: "phase05".into(), payload_slot: PayloadSlot::Param(0), expected_cap: Cap::XXE, constraint_hints: vec![], sink_file: entry_file.into(), sink_line: 1, spec_hash: "phase05test0001".into(), derivation: nyx_scanner::dynamic::spec::SpecDerivationStrategy::FromFlowSteps, stubs_required: vec![], framework: None, java_toolchain: nyx_scanner::dynamic::spec::JavaToolchain::default(), } } #[test] fn corpus_registers_xxe_for_every_supported_lang() { for lang in LANGS { let slice = payloads_for_lang(Cap::XXE, *lang); assert!(!slice.is_empty(), "XXE has no payloads for {lang:?}"); let has_vuln = slice.iter().any(|p| !p.is_benign); let has_benign = slice.iter().any(|p| p.is_benign); assert!(has_vuln, "{lang:?} XXE missing vuln payload"); assert!(has_benign, "{lang:?} XXE missing benign control"); } } #[test] fn xxe_unsupported_caps_unchanged_for_other_langs() { // Phase 05 only fills Java / Python / PHP / Ruby / Go — Rust / C // / Cpp / JS / TS stay empty. for lang in [ Lang::Rust, Lang::C, Lang::Cpp, Lang::JavaScript, Lang::TypeScript, ] { assert!( payloads_for_lang(Cap::XXE, lang).is_empty(), "unexpected XXE payloads registered for {lang:?}", ); } } #[test] fn benign_control_resolves_within_lang_slice() { for lang in LANGS { let slice = payloads_for_lang(Cap::XXE, *lang); // Skip the OOB-nonce variant — it self-confirms via // [`Oracle::OobCallback`] and carries no paired benign control. let vuln = slice .iter() .find(|p| !p.is_benign && !p.oob_nonce_slot) .unwrap(); let resolved = resolve_benign_control_lang(vuln, Cap::XXE, *lang).expect("paired control"); assert!(resolved.is_benign); let direct = benign_payload_for_lang(Cap::XXE, *lang).unwrap(); assert_eq!(direct.label, resolved.label); } } #[test] fn payload_oracle_carries_xxe_entity_expanded_predicate() { for lang in LANGS { let slice = payloads_for_lang(Cap::XXE, *lang); // The doctype-entity vuln carries the XxeEntityExpanded predicate. // The OOB-nonce variant uses [`Oracle::OobCallback`] and is exercised // by `python_xxe_oob_loopback_records_callback` instead. let vuln = slice .iter() .find(|p| !p.is_benign && !p.oob_nonce_slot) .unwrap(); match &vuln.oracle { Oracle::SinkProbe { predicates } => { assert!( predicates.iter().any(|p| matches!( p, ProbePredicate::XxeEntityExpanded { require_expanded: true } )), "{lang:?} vuln payload missing XxeEntityExpanded{{require_expanded:true}}", ); } other => panic!("expected SinkProbe oracle for {lang:?}, got {other:?}"), } } } #[test] fn vuln_payload_bytes_contain_doctype_entity_declaration() { // The whole differential rule rests on the vuln payload carrying // an `` decl and the benign control NOT // carrying one — pin both invariants so a future corpus tweak // does not silently break the oracle. The OOB-nonce variant's // `bytes` field is unused (the runner materialises a URL at call // time and the harness wraps it into the DTD), so skip it here. for lang in LANGS { let slice = payloads_for_lang(Cap::XXE, *lang); let vuln = slice .iter() .find(|p| !p.is_benign && !p.oob_nonce_slot) .unwrap(); let benign = slice.iter().find(|p| p.is_benign).unwrap(); let vuln_text = std::str::from_utf8(vuln.bytes).unwrap(); let benign_text = std::str::from_utf8(benign.bytes).unwrap(); assert!( vuln_text.contains(" tree_sitter::Language { match lang { Lang::Java => tree_sitter::Language::from(tree_sitter_java::LANGUAGE), Lang::Python => tree_sitter::Language::from(tree_sitter_python::LANGUAGE), Lang::Php => tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP), Lang::Ruby => tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE), Lang::Go => tree_sitter::Language::from(tree_sitter_go::LANGUAGE), other => panic!("unsupported test lang {other:?}"), } } fn slug(lang: Lang) -> &'static str { match lang { Lang::Java => "java", Lang::Python => "python", Lang::Php => "php", Lang::Ruby => "ruby", Lang::Go => "go", _ => "other", } } // ── End-to-end Phase 05 acceptance via run_spec ─────────────────────────────── // // Closes the second half of the Phase 05 deferred audit item: the // `lang_emitter_dispatches_to_xxe_harness` assertion pins the per- // language `sink_callee_marker` (`DocumentBuilder.parse` / // `lxml.etree.XMLParser.parse` / `simplexml_load_string` / // `REXML::Document.new` / `xml.Decoder.Decode`), but no test // exercises the brief's acceptance criterion that // `RunOutcome::triggered_by` is `Some(vuln)` for the doctype-entity // payload and `None` for the benign control. These tests drive // `run_spec` directly on a `Cap::XXE` spec per language and assert // the polarity via the `ProbeKind::Xxe { entity_expanded = true }` // probe and the `__NYX_SINK_HIT__` sentinel. // // The synthetic harness ignores `_spec` and uses a regex substitution // for `` declarations — deferred item 8 // (real-parser XML harness) is the structural fix. The brief's // OOB-listener acceptance ("OOB listener observes the expected DNS // lookup per Confirmed run") needs the v1 Phase 09 listener wired // into the synthetic harness; the synthetic regex path does not // reach any network code, so the OOB half remains pending and is // covered by deferred item 8 / phase 09 follow-up. // // Go is skipped: the `xxe/go/vuln.go` fixture declares `package vuln` // while the synthetic harness's `main.go` declares `package main`, so // `go build .` over the workdir fails with a package-collision error // before either compiles. Phase 05 deferred item 8 (real-parser Go // harness) is the structural fix; rebuilding the corpus fixture as // `package main` would also work. mod e2e_phase_05 { use crate::common::fixture_harness::FIXTURE_LOCK; use nyx_scanner::dynamic::runner::{RunError, RunOutcome, run_spec}; use nyx_scanner::dynamic::sandbox::{SandboxBackend, SandboxOptions}; use nyx_scanner::dynamic::spec::{ EntryKind, HarnessSpec, PayloadSlot, SpecDerivationStrategy, default_toolchain_id, }; use nyx_scanner::evidence::DifferentialVerdict; use nyx_scanner::labels::Cap; use nyx_scanner::symbol::Lang; use std::path::PathBuf; use std::process::Command; use tempfile::TempDir; fn command_available(bin: &str) -> bool { Command::new(bin) .arg("--version") .output() .map(|o| o.status.success()) .unwrap_or(false) } fn toolchain_for(lang: Lang) -> &'static str { match lang { Lang::Java => "java", Lang::Python => "python3", Lang::Php => "php", Lang::Ruby => "ruby", Lang::Go => "go", _ => unreachable!("e2e_phase_05 covers Java/Python/PHP/Ruby/Go"), } } fn lang_subdir(lang: Lang) -> &'static str { match lang { Lang::Java => "java", Lang::Python => "python", Lang::Php => "php", Lang::Ruby => "ruby", Lang::Go => "go", _ => unreachable!(), } } fn build_spec(lang: Lang, fixture: &str, entry_name: &str) -> (HarnessSpec, TempDir) { let fixture_src = PathBuf::from(env!("CARGO_MANIFEST_DIR")) .join("tests/dynamic_fixtures/xxe") .join(lang_subdir(lang)) .join(fixture); let tmp = TempDir::new().expect("create tempdir"); let dst = tmp.path().join(fixture); std::fs::copy(&fixture_src, &dst).expect("copy fixture into tempdir"); let entry_file = dst.to_string_lossy().into_owned(); let mut digest = blake3::Hasher::new(); digest.update(b"phase05-e2e-xxe|"); digest.update(lang_subdir(lang).as_bytes()); digest.update(b"|"); digest.update(fixture.as_bytes()); let spec_hash = format!("{:016x}", { let bytes = digest.finalize(); u64::from_le_bytes(bytes.as_bytes()[..8].try_into().unwrap()) }); if matches!(lang, Lang::Java) { let workdir = std::path::PathBuf::from("/tmp/nyx-harness").join(&spec_hash); let _ = std::fs::remove_dir_all(&workdir); } let spec = HarnessSpec { finding_id: spec_hash.clone(), entry_file: entry_file.clone(), entry_name: entry_name.to_owned(), entry_kind: EntryKind::Function, lang, toolchain_id: default_toolchain_id(lang).into(), payload_slot: PayloadSlot::Param(0), expected_cap: Cap::XXE, constraint_hints: vec![], sink_file: entry_file, sink_line: 1, spec_hash: spec_hash.clone(), derivation: SpecDerivationStrategy::FromFlowSteps, stubs_required: vec![], framework: None, java_toolchain: nyx_scanner::dynamic::spec::JavaToolchain::default(), }; (spec, tmp) } fn run(lang: Lang, fixture: &str, entry_name: &str) -> Option { let bin = toolchain_for(lang); if !command_available(bin) { eprintln!("SKIP {lang:?} {fixture}: missing toolchain {bin}"); return None; } let _guard = FIXTURE_LOCK.lock().unwrap_or_else(|e| e.into_inner()); let (spec, _tmp) = build_spec(lang, fixture, entry_name); let opts = SandboxOptions { backend: SandboxBackend::Process, ..SandboxOptions::default() }; // JVM startup occasionally fails under heavy cross-binary nextest // load with "Error occurred during initialization of VM: Properties // init: Could not determine current working directory." This is a // macOS getcwd() race under massive fork() churn, not a regression. // Retry up to 3 times; the second attempt almost always succeeds. for attempt in 0..3 { match run_spec(&spec, &opts) { Ok(outcome) => { if is_jvm_cwd_flake(&outcome) && attempt < 2 { eprintln!("RETRY {lang:?} {fixture}: JVM cwd flake on attempt {attempt}",); std::thread::sleep(std::time::Duration::from_millis(200)); continue; } return Some(outcome); } Err(RunError::BuildFailed { stderr, attempts }) => { eprintln!( "SKIP {lang:?} {fixture}: harness build failed after {attempts} attempts: {stderr}", ); return None; } Err(e) => panic!("run_spec({lang:?} {fixture}) errored: {e:?}"), } } None } fn is_jvm_cwd_flake(outcome: &RunOutcome) -> bool { outcome.attempts.iter().any(|a| { let stdout = std::str::from_utf8(&a.outcome.stdout).unwrap_or(""); let stderr = std::str::from_utf8(&a.outcome.stderr).unwrap_or(""); stdout.contains("Could not determine current working directory") || stderr.contains("Could not determine current working directory") }) } #[test] fn java_vuln_confirms_via_run_spec() { let Some(outcome) = run(Lang::Java, "Vuln.java", "run") else { return; }; assert!( outcome.triggered_by.is_some(), "Java XXE vuln must Confirm via run_spec; got {outcome:?}", ); let diff = outcome .differential .as_ref() .expect("Confirmed run must carry a DifferentialOutcome"); assert_eq!(diff.verdict, DifferentialVerdict::Confirmed); } #[test] fn python_vuln_confirms_via_run_spec() { let Some(outcome) = run(Lang::Python, "vuln.py", "run") else { return; }; assert!( outcome.triggered_by.is_some(), "Python XXE vuln must Confirm via run_spec; got {outcome:?}", ); let diff = outcome .differential .as_ref() .expect("Confirmed run must carry a DifferentialOutcome"); assert_eq!(diff.verdict, DifferentialVerdict::Confirmed); } #[test] fn php_vuln_confirms_via_run_spec() { let Some(outcome) = run(Lang::Php, "vuln.php", "run") else { return; }; assert!( outcome.triggered_by.is_some(), "PHP XXE vuln must Confirm via run_spec; got {outcome:?}", ); let diff = outcome .differential .as_ref() .expect("Confirmed run must carry a DifferentialOutcome"); assert_eq!(diff.verdict, DifferentialVerdict::Confirmed); } #[test] fn ruby_vuln_confirms_via_run_spec() { let Some(outcome) = run(Lang::Ruby, "vuln.rb", "run") else { return; }; assert!( outcome.triggered_by.is_some(), "Ruby XXE vuln must Confirm via run_spec; got {outcome:?}", ); let diff = outcome .differential .as_ref() .expect("Confirmed run must carry a DifferentialOutcome"); assert_eq!(diff.verdict, DifferentialVerdict::Confirmed); } #[test] fn go_vuln_confirms_via_run_spec() { let Some(outcome) = run(Lang::Go, "vuln.go", "run") else { return; }; assert!( outcome.triggered_by.is_some(), "Go XXE vuln must Confirm via run_spec; got {outcome:?}", ); let diff = outcome .differential .as_ref() .expect("Confirmed run must carry a DifferentialOutcome"); assert_eq!(diff.verdict, DifferentialVerdict::Confirmed); } /// Phase 05 OOB-loopback observation: when an [`nyx_scanner::dynamic::oob::OobListener`] /// is attached and the runner exercises the `xxe--oob-nonce` /// payload, the parser's external-entity hook performs a real HTTP /// GET against the loopback nonce URL and the listener records the /// hit. Asserts both halves of the Phase 05 OOB closure: the /// callback observation AND the verdict-tier promotion from /// `Confirmed` to `ConfirmedProvenOob` (the runner's /// `build_oob_self_confirmed_outcome` path treats the OOB-nonce /// payload as self-confirming since a benign URL structurally /// cannot hit a per-finding nonce). fn run_oob(lang: Lang, fixture: &str, entry_name: &str) -> Option { use nyx_scanner::dynamic::oob::OobListener; use nyx_scanner::dynamic::sandbox::NetworkPolicy; use std::sync::Arc; let bin = toolchain_for(lang); if !command_available(bin) { eprintln!("SKIP {lang:?} {fixture} (oob): missing toolchain {bin}"); return None; } let _guard = FIXTURE_LOCK.lock().unwrap_or_else(|e| e.into_inner()); let listener = match OobListener::bind() { Ok(listener) => Arc::new(listener), Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => { eprintln!("SKIP {lang:?} {fixture} (oob): loopback bind denied by sandbox"); return None; } Err(e) => panic!("bind OOB listener on loopback: {e}"), }; let (mut spec, _tmp) = build_spec(lang, fixture, entry_name); // Use a distinct workdir from the non-OOB e2e tests so the probe // channel files do not collide (both tests use the same fixture, so // the default spec_hash would resolve to the same // `/tmp/nyx-harness//__nyx_probes.jsonl` and the two runs // could clobber each other's drains under parallel nextest). spec.spec_hash = format!("{}-oob", spec.spec_hash); spec.finding_id = spec.spec_hash.clone(); if matches!(lang, Lang::Java) { let workdir = std::path::PathBuf::from("/tmp/nyx-harness").join(&spec.spec_hash); let _ = std::fs::remove_dir_all(&workdir); } let opts = SandboxOptions { backend: SandboxBackend::Process, network_policy: NetworkPolicy::OobOutbound { listener: Arc::clone(&listener), }, ..SandboxOptions::default() }; for attempt in 0..3 { match run_spec(&spec, &opts) { Ok(outcome) => { if is_jvm_cwd_flake(&outcome) && attempt < 2 { eprintln!( "RETRY {lang:?} {fixture} (oob): JVM cwd flake on attempt {attempt}", ); std::thread::sleep(std::time::Duration::from_millis(200)); continue; } return Some(outcome); } Err(RunError::BuildFailed { stderr, attempts }) => { eprintln!( "SKIP {lang:?} {fixture} (oob): build failed after {attempts}: {stderr}", ); return None; } Err(e) => panic!("run_spec({lang:?} {fixture} oob) errored: {e:?}"), } } None } fn assert_oob_recorded(outcome: &RunOutcome, label: &str) { let oob_attempt = outcome .attempts .iter() .find(|a| a.payload_label == label) .unwrap_or_else(|| { panic!( "OOB payload {label:?} must run when listener is attached; outcome={outcome:?}" ) }); assert!( oob_attempt.outcome.oob_callback_seen, "parser external-entity hook must fetch loopback URL so OOB listener records the nonce; got attempt={oob_attempt:?}", ); // Phase 05 OOB closure: the listener observation must promote the // verdict tier from `Confirmed` to `ConfirmedProvenOob`. The // payload carries `oob_nonce_slot: true` + `benign_control: None` // so the runner's self-confirming path emits the upgraded verdict // and sets `triggered_by` on the OOB attempt itself. assert!( oob_attempt.triggered, "OOB attempt must mark triggered=true under the self-confirming OOB path; got attempt={oob_attempt:?}", ); let diff = outcome .differential .as_ref() .expect("self-confirming OOB run must carry a DifferentialOutcome"); assert_eq!( diff.verdict, DifferentialVerdict::ConfirmedProvenOob, "OOB callback observation must promote verdict tier; got diff={diff:?}", ); } #[test] fn python_xxe_oob_loopback_records_callback() { let Some(outcome) = run_oob(Lang::Python, "vuln.py", "run") else { return; }; assert_oob_recorded(&outcome, "xxe-python-oob-nonce"); } #[test] fn java_xxe_oob_loopback_records_callback() { let Some(outcome) = run_oob(Lang::Java, "Vuln.java", "run") else { return; }; assert_oob_recorded(&outcome, "xxe-java-oob-nonce"); } #[test] fn php_xxe_oob_loopback_records_callback() { let Some(outcome) = run_oob(Lang::Php, "vuln.php", "run") else { return; }; assert_oob_recorded(&outcome, "xxe-php-oob-nonce"); } #[test] fn ruby_xxe_oob_loopback_records_callback() { let Some(outcome) = run_oob(Lang::Ruby, "vuln.rb", "run") else { return; }; assert_oob_recorded(&outcome, "xxe-ruby-oob-nonce"); } #[test] fn go_xxe_oob_loopback_records_callback() { let Some(outcome) = run_oob(Lang::Go, "vuln.go", "run") else { return; }; assert_oob_recorded(&outcome, "xxe-go-oob-nonce"); } }