//! Phase 03 (Track J.1) — DESERIALIZE corpus acceptance. //! //! Asserts the new cap end-to-end: corpus slices register per-language //! vuln/benign pairs, the lang-aware resolver pairs them inside the //! correct slice, the per-language harness emitters splice in the //! `RestrictedObjectInputStream` / `find_class` / allowed-classes //! shims, and the framework adapters fire on the matching sink call. //! //! `cargo nextest run --features dynamic --test deserialize_corpus`. #![cfg(feature = "dynamic")] mod common; use nyx_scanner::dynamic::corpus::{ Oracle, audit_marker_collisions, benign_payload_for_lang, payloads_for_lang, resolve_benign_control_lang, }; use nyx_scanner::dynamic::framework::registry::adapters_for; use nyx_scanner::dynamic::lang; use nyx_scanner::dynamic::oracle::ProbePredicate; use nyx_scanner::dynamic::probe::ProbeKind; use nyx_scanner::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot}; use nyx_scanner::labels::Cap; use nyx_scanner::summary::FuncSummary; use nyx_scanner::symbol::Lang; const LANGS: &[Lang] = &[Lang::Java, Lang::Python, Lang::Php, Lang::Ruby]; fn make_spec(lang: Lang, entry_file: &str, entry_name: &str) -> HarnessSpec { HarnessSpec { finding_id: "phase03test0001".into(), entry_file: entry_file.into(), entry_name: entry_name.into(), entry_kind: EntryKind::Function, lang, toolchain_id: "phase03".into(), payload_slot: PayloadSlot::Param(0), expected_cap: Cap::DESERIALIZE, constraint_hints: vec![], sink_file: entry_file.into(), sink_line: 1, spec_hash: "phase03test0001".into(), derivation: nyx_scanner::dynamic::spec::SpecDerivationStrategy::FromFlowSteps, stubs_required: vec![], framework: None, java_toolchain: nyx_scanner::dynamic::spec::JavaToolchain::default(), } } #[test] fn corpus_registers_deserialize_for_every_supported_lang() { for lang in LANGS { let slice = payloads_for_lang(Cap::DESERIALIZE, *lang); assert!( !slice.is_empty(), "DESERIALIZE has no payloads for {lang:?}", ); let has_vuln = slice.iter().any(|p| !p.is_benign); let has_benign = slice.iter().any(|p| p.is_benign); assert!(has_vuln, "{lang:?} DESERIALIZE missing vuln payload"); assert!(has_benign, "{lang:?} DESERIALIZE missing benign control"); } } #[test] fn deserialize_unsupported_caps_unchanged_for_other_langs() { // Phase 03 only fills Java/Python/PHP/Ruby — Rust/C/Go/JS/TS stay empty. for lang in [ Lang::Rust, Lang::C, Lang::Cpp, Lang::Go, Lang::JavaScript, Lang::TypeScript, ] { assert!( payloads_for_lang(Cap::DESERIALIZE, lang).is_empty(), "unexpected DESERIALIZE payloads registered for {lang:?}", ); } } #[test] fn benign_control_resolves_within_lang_slice() { for lang in LANGS { let slice = payloads_for_lang(Cap::DESERIALIZE, *lang); let vuln = slice.iter().find(|p| !p.is_benign).unwrap(); let resolved = resolve_benign_control_lang(vuln, Cap::DESERIALIZE, *lang).expect("paired control"); assert!(resolved.is_benign); // benign_payload_for_lang returns the same entry. let direct = benign_payload_for_lang(Cap::DESERIALIZE, *lang).unwrap(); assert_eq!(direct.label, resolved.label); } } #[test] fn payload_oracle_carries_deserialize_predicate() { for lang in LANGS { let slice = payloads_for_lang(Cap::DESERIALIZE, *lang); let vuln = slice.iter().find(|p| !p.is_benign).unwrap(); match &vuln.oracle { Oracle::SinkProbe { predicates } => { assert!( predicates.iter().any(|p| matches!( p, ProbePredicate::DeserializeGadgetInvoked { require_invoked: true } )), "{lang:?} vuln payload missing DeserializeGadgetInvoked predicate", ); } other => panic!("expected SinkProbe oracle for {lang:?}, got {other:?}"), } } } #[test] fn marker_collisions_clean_with_phase_03_additions() { assert!(audit_marker_collisions().is_empty()); } #[test] fn probe_kind_deserialize_serdes() { let original = ProbeKind::Deserialize { gadget_chain_invoked: true, }; let json = serde_json::to_string(&original).unwrap(); assert!(json.contains("Deserialize")); assert!(json.contains("gadget_chain_invoked")); let parsed: ProbeKind = serde_json::from_str(&json).unwrap(); assert_eq!(parsed, original); } #[test] fn lang_emitter_dispatches_to_deserialize_harness() { // `sink_callee_marker` is the per-language deserialize sink call // string the harness writes into the JSON probe record — the // resolveClass / find_class / unserialize / Marshal.load boundary // the brief calls out. Pinning the marker here keeps the test // honest about which guard each lang's harness names. for (lang, entry_file, entry_name, sink_callee_marker) in [ ( Lang::Java, "tests/dynamic_fixtures/deserialize/java/Vuln.java", "run", "ObjectInputStream.resolveClass", ), ( Lang::Python, "tests/dynamic_fixtures/deserialize/python/vuln.py", "run", "pickle.Unpickler.find_class", ), ( Lang::Php, "tests/dynamic_fixtures/deserialize/php/vuln.php", "run", "unserialize", ), ( Lang::Ruby, "tests/dynamic_fixtures/deserialize/ruby/vuln.rb", "run", "Marshal.load", ), ] { let spec = make_spec(lang, entry_file, entry_name); let harness = lang::emit(&spec).unwrap_or_else(|e| panic!("emit failed for {lang:?}: {e:?}")); assert!( harness.source.contains("NYX_GADGET_CLASS:"), "{lang:?} deserialize harness must parse NYX_GADGET_CLASS marker", ); assert!( harness.source.contains(sink_callee_marker), "{lang:?} deserialize harness must name {sink_callee_marker:?} as the \ resolveClass / find_class equivalent sink callee", ); } } #[test] fn deserialize_harness_drives_entry_when_derivable() { // Java: reflectively load the fixture class and invoke the derived // entry method so the fixture's own resolveClass allowlist runs before // the gadget class resolves. let java = lang::emit(&make_spec( Lang::Java, "tests/dynamic_fixtures/deserialize/java/Benign.java", "run", )) .expect("java deser emit"); assert!( java.source.contains("Class.forName(\"Benign\")"), "Java deser harness must reflectively load the fixture class", ); assert!( java.source.contains("getMethod(\"run\""), "Java deser harness must invoke the derived entry method", ); assert!( java.source.contains("nyxCauseChainHas"), "Java deser harness must detect gadget resolution via the cause chain", ); // Ruby: require_relative the fixture and drive its entry so the // const-name guard runs before Marshal.load. let ruby = lang::emit(&make_spec( Lang::Ruby, "tests/dynamic_fixtures/deserialize/ruby/benign.rb", "run", )) .expect("ruby deser emit"); assert!( ruby.source.contains("require_relative './benign'"), "Ruby deser harness must require_relative the fixture", ); assert!( ruby.source.contains("__send__(:'run'"), "Ruby deser harness must drive the derived entry function", ); } #[test] fn deserialize_harness_falls_back_to_synthetic_without_entry() { // No derivable enclosing entry → direct-sink synthetic path; the // harness must not attempt to load a fixture it cannot name. let java = lang::emit(&make_spec( Lang::Java, "tests/dynamic_fixtures/deserialize/java/Vuln.java", "", )) .expect("java deser emit"); assert!( !java.source.contains("Class.forName("), "Java deser harness must not reflect into a fixture when no entry is derivable", ); assert!( java.source.contains("nyxSyntheticDeserialize"), "Java synthetic fallback must drive the restricted-OIS path directly", ); let ruby = lang::emit(&make_spec( Lang::Ruby, "tests/dynamic_fixtures/deserialize/ruby/vuln.rb", "", )) .expect("ruby deser emit"); assert!( !ruby.source.contains("require_relative"), "Ruby deser harness must not require the fixture when no entry is derivable", ); } #[test] fn framework_adapters_detect_deserialize_sink() { // Java + Python + PHP + Ruby all register their J.1 sink adapter; // detect_binding routes through the registry and stamps an // EntryKind::Function binding when the fixture contains the // canonical sink call. for (lang, fixture) in [ ( Lang::Java, "tests/dynamic_fixtures/deserialize/java/Vuln.java", ), ( Lang::Python, "tests/dynamic_fixtures/deserialize/python/vuln.py", ), (Lang::Php, "tests/dynamic_fixtures/deserialize/php/vuln.php"), ( Lang::Ruby, "tests/dynamic_fixtures/deserialize/ruby/vuln.rb", ), ] { let bytes = std::fs::read(fixture).expect("fixture exists"); let ts_lang = ts_language_for(lang); let mut parser = tree_sitter::Parser::new(); parser.set_language(&ts_lang).unwrap(); let tree = parser.parse(&bytes, None).unwrap(); let summary = FuncSummary { name: "run".into(), file_path: fixture.to_owned(), lang: slug(lang).into(), ..Default::default() }; let registry_slice = adapters_for(lang); assert!(!registry_slice.is_empty(), "{lang:?} adapter slice empty",); let binding = nyx_scanner::dynamic::framework::detect_binding( &summary, tree.root_node(), &bytes, lang, ); let b = binding .unwrap_or_else(|| panic!("{lang:?} adapter must detect the deserialize sink fixture")); assert_eq!(b.kind, EntryKind::Function); assert!(!b.adapter.is_empty()); } } fn ts_language_for(lang: Lang) -> tree_sitter::Language { match lang { Lang::Java => tree_sitter::Language::from(tree_sitter_java::LANGUAGE), Lang::Python => tree_sitter::Language::from(tree_sitter_python::LANGUAGE), Lang::Php => tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP), Lang::Ruby => tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE), other => panic!("unsupported test lang {other:?}"), } } fn slug(lang: Lang) -> &'static str { match lang { Lang::Java => "java", Lang::Python => "python", Lang::Php => "php", Lang::Ruby => "ruby", _ => "other", } } // ── End-to-end Phase 03 acceptance via run_spec ─────────────────────────────── // // Closes the second half of the Phase 03 deferred audit item: the // `lang_emitter_dispatches_to_deserialize_harness` assertion now pins // the per-lang `sink_callee_marker`, but no test exercises the brief's // acceptance criterion that `nyx scan --verify` reports `Confirmed` on // vuln/* fixtures and `NotConfirmed` (or non-Confirmed) on benign/*. // These tests drive `run_spec` directly on a `Cap::DESERIALIZE` spec // per language and assert `RunOutcome::triggered_by` matches the // expected polarity. // // The harness emitter is synthetic (see deferred item: harness ignores // `_spec` and pattern-matches `NYX_GADGET_CLASS:` payload // bytes) — so the toolchain still needs to compile and run the // synthesised `NyxHarness.java` / `harness.py` / `harness.php` / // `harness.rb`, but the fixture body is never invoked. A missing // toolchain triggers a structured skip, not a panic. mod e2e_phase_03 { use crate::common::fixture_harness::FIXTURE_LOCK; use nyx_scanner::dynamic::runner::{RunError, RunOutcome, run_spec}; use nyx_scanner::dynamic::sandbox::SandboxOptions; use nyx_scanner::dynamic::spec::{ EntryKind, HarnessSpec, PayloadSlot, SpecDerivationStrategy, default_toolchain_id, }; use nyx_scanner::evidence::DifferentialVerdict; use nyx_scanner::labels::Cap; use nyx_scanner::symbol::Lang; use std::path::PathBuf; use std::process::Command; use tempfile::TempDir; fn command_available(bin: &str) -> bool { Command::new(bin) .arg("--version") .output() .map(|o| o.status.success()) .unwrap_or(false) } fn toolchain_for(lang: Lang) -> &'static str { match lang { Lang::Java => "java", Lang::Python => "python3", Lang::Php => "php", Lang::Ruby => "ruby", _ => unreachable!("e2e_phase_03 only covers Java/Python/PHP/Ruby"), } } fn lang_subdir(lang: Lang) -> &'static str { match lang { Lang::Java => "java", Lang::Python => "python", Lang::Php => "php", Lang::Ruby => "ruby", _ => unreachable!(), } } fn build_spec(lang: Lang, fixture: &str, entry_name: &str) -> (HarnessSpec, TempDir) { let fixture_src = PathBuf::from(env!("CARGO_MANIFEST_DIR")) .join("tests/dynamic_fixtures/deserialize") .join(lang_subdir(lang)) .join(fixture); let tmp = TempDir::new().expect("create tempdir"); let dst = tmp.path().join(fixture); std::fs::copy(&fixture_src, &dst).expect("copy fixture into tempdir"); let entry_file = dst.to_string_lossy().into_owned(); let mut digest = blake3::Hasher::new(); digest.update(b"phase03-e2e-deserialize|"); digest.update(lang_subdir(lang).as_bytes()); digest.update(b"|"); digest.update(fixture.as_bytes()); let spec_hash = format!("{:016x}", { let bytes = digest.finalize(); u64::from_le_bytes(bytes.as_bytes()[..8].try_into().unwrap()) }); // Wipe the per-spec workdir so stale .class / build artifacts // from a previous run cannot leak in. Mirrors the Java guard // in tests/common/fixture_harness.rs::run_shape_fixture_lang. if matches!(lang, Lang::Java) { let workdir = std::path::PathBuf::from("/tmp/nyx-harness").join(&spec_hash); let _ = std::fs::remove_dir_all(&workdir); } let spec = HarnessSpec { finding_id: spec_hash.clone(), entry_file: entry_file.clone(), entry_name: entry_name.to_owned(), entry_kind: EntryKind::Function, lang, toolchain_id: default_toolchain_id(lang).into(), payload_slot: PayloadSlot::Param(0), expected_cap: Cap::DESERIALIZE, constraint_hints: vec![], sink_file: entry_file, sink_line: 1, spec_hash: spec_hash.clone(), derivation: SpecDerivationStrategy::FromFlowSteps, stubs_required: vec![], framework: None, java_toolchain: nyx_scanner::dynamic::spec::JavaToolchain::default(), }; (spec, tmp) } fn run(lang: Lang, fixture: &str, entry_name: &str) -> Option { let bin = toolchain_for(lang); if !command_available(bin) { eprintln!("SKIP {lang:?} {fixture}: missing toolchain {bin}"); return None; } let _guard = FIXTURE_LOCK.lock().unwrap_or_else(|e| e.into_inner()); let (spec, _tmp) = build_spec(lang, fixture, entry_name); let opts = SandboxOptions { backend: nyx_scanner::dynamic::sandbox::SandboxBackend::Process, ..SandboxOptions::default() }; match run_spec(&spec, &opts) { Ok(outcome) => Some(outcome), Err(RunError::BuildFailed { stderr, attempts }) => { eprintln!( "SKIP {lang:?} {fixture}: harness build failed after {attempts} attempts: {stderr}", ); None } Err(e) => panic!("run_spec({lang:?} {fixture}) errored: {e:?}"), } } /// For every supported lang, the vuln fixture must Confirm: the /// synthetic harness pattern-matches `NYX_GADGET_CLASS:` /// from the curated payload bytes, writes a probe, and the /// differential rule pairs against the benign control (which carries /// an allow-listed class name and writes no probe). #[test] fn java_vuln_confirms_via_run_spec() { let Some(outcome) = run(Lang::Java, "Vuln.java", "run") else { return; }; assert!( outcome.triggered_by.is_some(), "Java DESERIALIZE vuln must Confirm via run_spec; got {outcome:?}", ); let diff = outcome .differential .as_ref() .expect("Confirmed run must carry a DifferentialOutcome"); assert_eq!( diff.verdict, DifferentialVerdict::Confirmed, "differential verdict must be Confirmed: {diff:?}", ); } #[test] fn python_vuln_confirms_via_run_spec() { let Some(outcome) = run(Lang::Python, "vuln.py", "run") else { return; }; assert!( outcome.triggered_by.is_some(), "Python DESERIALIZE vuln must Confirm via run_spec; got {outcome:?}", ); let diff = outcome .differential .as_ref() .expect("Confirmed run must carry a DifferentialOutcome"); assert_eq!(diff.verdict, DifferentialVerdict::Confirmed); } #[test] fn php_vuln_confirms_via_run_spec() { let Some(outcome) = run(Lang::Php, "vuln.php", "run") else { return; }; assert!( outcome.triggered_by.is_some(), "PHP DESERIALIZE vuln must Confirm via run_spec; got {outcome:?}", ); let diff = outcome .differential .as_ref() .expect("Confirmed run must carry a DifferentialOutcome"); assert_eq!(diff.verdict, DifferentialVerdict::Confirmed); } #[test] fn ruby_vuln_confirms_via_run_spec() { let Some(outcome) = run(Lang::Ruby, "vuln.rb", "run") else { return; }; assert!( outcome.triggered_by.is_some(), "Ruby DESERIALIZE vuln must Confirm via run_spec; got {outcome:?}", ); let diff = outcome .differential .as_ref() .expect("Confirmed run must carry a DifferentialOutcome"); assert_eq!(diff.verdict, DifferentialVerdict::Confirmed); } }