diff --git a/src/dynamic/lang/go.rs b/src/dynamic/lang/go.rs index f5c5ea6c..93757fba 100644 --- a/src/dynamic/lang/go.rs +++ b/src/dynamic/lang/go.rs @@ -838,7 +838,7 @@ func nyxHeaderProbe(name, value string) {{ }}, "captured_at_ns": uint64(time.Now().UnixNano()), "payload_id": os.Getenv("NYX_PAYLOAD_ID"), - "kind": map[string]interface{{}}{{"kind": "HeaderEmit", "name": name, "value": value}}, + "kind": map[string]interface{{}}{{"kind": "HeaderEmit", "name": name, "value": value, "protocol": "in-process"}}, "witness": __nyx_witness("http.ResponseWriter.Header.Set", []string{{name, value}}), }}) }} diff --git a/src/dynamic/lang/java.rs b/src/dynamic/lang/java.rs index 43fc75e9..b879d573 100644 --- a/src/dynamic/lang/java.rs +++ b/src/dynamic/lang/java.rs @@ -1628,7 +1628,7 @@ public class NyxHarness {{ nyxJsonEscape(name, line); line.append("\",\"value\":\""); nyxJsonEscape(value, line); - line.append("\"}},"); + line.append("\",\"protocol\":\"in-process\"}},"); line.append("\"witness\":"); line.append(nyxWitnessJson("HttpServletResponse.setHeader", new String[]{{name, value}})); line.append("}}\n"); diff --git a/src/dynamic/lang/js_shared.rs b/src/dynamic/lang/js_shared.rs index 7c1f84aa..5f0037ee 100644 --- a/src/dynamic/lang/js_shared.rs +++ b/src/dynamic/lang/js_shared.rs @@ -1483,7 +1483,7 @@ function nyxHeaderProbe(name, value) {{ ], captured_at_ns: Number(process.hrtime.bigint()), payload_id: process.env.NYX_PAYLOAD_ID || '', - kind: {{ kind: 'HeaderEmit', name: name, value: value }}, + kind: {{ kind: 'HeaderEmit', name: name, value: value, protocol: 'in-process' }}, witness: __nyx_witness('http.ServerResponse#setHeader', [name, value]), }}; try {{ @@ -1766,21 +1766,19 @@ function nyxPrototypePollutionProbe(value) {{ }}); }})(); -function nyxDeepMerge(target, source) {{ - if (source === null || typeof source !== 'object') return target; - for (const key of Object.keys(source)) {{ - const sv = source[key]; - if (sv !== null && typeof sv === 'object') {{ - if (target[key] === null || typeof target[key] !== 'object') {{ - target[key] = {{}}; - }} - nyxDeepMerge(target[key], sv); - }} else {{ - target[key] = sv; - }} - }} - return target; -}} +// Phase 10 sink: route the parsed payload through the real +// `lodash.merge` pinned at lodash 4.17.4. Lodash hardened `_.merge` +// against the `__proto__` key starting in 4.17.5 (well before the +// official CVE-2018-16487 fix at 4.17.11 which targeted `_.set` / +// `_.setWith`), so the canary only fires against <= 4.17.4. The +// staged `package.json` pins this version exactly; `prepare_node` +// resolves the dep via `npm install` before the harness runs. +// Exercising the real merge implementation (vs the hand-rolled +// `nyxDeepMerge` that previously stood in) covers lodash's actual +// recursion / cycle / array-vs-object decision shape so a future +// fixture that hits a patched range can be added without re-shaping +// the harness. +const _lodashMerge = require('lodash').merge; const payload = process.env.NYX_PAYLOAD || ''; let parsed; @@ -1791,9 +1789,9 @@ try {{ }} const target = {{}}; try {{ - nyxDeepMerge(target, parsed); + _lodashMerge(target, parsed); }} catch (e) {{ - // Naive merge may throw on weird inputs; the canary observation + // lodash.merge can throw on weird inputs; the canary observation // already wrote any probe before the throw. }} console.log('__NYX_SINK_HIT__'); @@ -1806,7 +1804,12 @@ console.log(JSON.stringify({{ source: body, filename: "harness.js".to_owned(), command: vec!["node".to_owned(), "harness.js".to_owned()], - extra_files: Vec::new(), + extra_files: vec![( + "package.json".to_owned(), + r#"{"name":"nyx-prototype-pollution-harness","private":true,"dependencies":{"lodash":"4.17.4"}} +"# + .to_owned(), + )], entry_subpath: None, } } diff --git a/src/dynamic/lang/php.rs b/src/dynamic/lang/php.rs index 77ca8bea..13634df9 100644 --- a/src/dynamic/lang/php.rs +++ b/src/dynamic/lang/php.rs @@ -1494,7 +1494,7 @@ function _nyx_header_probe(string $name, string $value): void {{ ], 'captured_at_ns' => (int) hrtime(true), 'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''), - 'kind' => ['kind' => 'HeaderEmit', 'name' => $name, 'value' => $value], + 'kind' => ['kind' => 'HeaderEmit', 'name' => $name, 'value' => $value, 'protocol' => 'in-process'], 'witness' => __nyx_witness('header()', [$name, $value]), ]; @file_put_contents($p, json_encode($rec) . "\n", FILE_APPEND); diff --git a/src/dynamic/lang/python.rs b/src/dynamic/lang/python.rs index 62378696..daf5fb8f 100644 --- a/src/dynamic/lang/python.rs +++ b/src/dynamic/lang/python.rs @@ -2038,58 +2038,31 @@ pub fn emit_xpath_harness(spec: &HarnessSpec) -> HarnessSource { import importlib import json import os -import re import sys import time {probe} -_NYX_XPATH_USERS = ["alice", "bob", "carol"] - - -def _nyx_xpath_select(expr): - needle = "//user[@name=" - if not expr.startswith(needle): - return 0 - rest = expr[len(needle):] - if not rest.endswith("]"): - return 0 - predicate = rest[:-1] - m = re.match(r"^'([^']*)'(.*)$", predicate) - if m is not None: - literal = m.group(1) - tail = m.group(2).strip() - if tail == "" or tail == "]": - return sum(1 for u in _NYX_XPATH_USERS if u == literal) - if re.match(r"^or\s+", tail, re.IGNORECASE): - return len(_NYX_XPATH_USERS) - m = re.match(r'^"([^"]*)"\s*$', predicate) - if m is not None: - literal = m.group(1) - return sum(1 for u in _NYX_XPATH_USERS if u == literal) - if re.match(r"^concat\(", predicate, re.IGNORECASE): - parts = re.findall(r"'([^']*)'", predicate) - joined = "".join(p for p in parts if p not in (',"',)) - joined = joined.replace(",\"'\",", "'") - return sum(1 for u in _NYX_XPATH_USERS if u == joined) - return len(_NYX_XPATH_USERS) - def _nyx_xpath_via_fixture(payload): # Phase 07 tier-(a): import the fixture and call its - # `{entry_name}` so the real `lxml.etree.xpath` (or other - # XPath evaluator the fixture chooses) runs against the staged - # corpus document. Returns the node count, or `None` when the - # import or call fails (e.g. lxml is not installed on the host) - # so the caller can fall back to the inline matcher. + # `{entry_name}` so the real `lxml.etree.xpath` runs against the + # staged corpus document. A missing `lxml` host install is the + # only structural reason the import fails; in that case we emit + # the conventional `NYX_IMPORT_ERROR:` stderr marker plus + # `sys.exit(77)` so the runner maps the outcome to + # `RunError::BuildFailed` and the e2e SKIP branch fires. sys.path.insert(0, ".") try: mod = importlib.import_module("{module_name}") - except Exception: - return None + except ImportError as _e: + print(f"NYX_IMPORT_ERROR: {{_e}}", file=sys.stderr, flush=True) + sys.exit(77) fn = getattr(mod, "{entry_name}", None) if fn is None: - return None + raise RuntimeError( + "Phase 07 XPath harness: entry function '{entry_name}' not found in fixture module '{module_name}'" + ) try: result = fn(payload) except Exception: @@ -2119,8 +2092,7 @@ def _nyx_run(): payload = os.environ.get("NYX_PAYLOAD", "") expr = "//user[@name='" + payload + "']" nodes = _nyx_xpath_via_fixture(payload) - if nodes is None: - nodes = _nyx_xpath_select(expr) + print("__NYX_XPATH_TIER_A__", flush=True) _nyx_xpath_probe(expr, nodes) print("__NYX_SINK_HIT__", flush=True) sys.stdout.write(json.dumps({{"expr": expr, "nodes_returned": nodes}}) + "\n") @@ -2131,7 +2103,10 @@ if __name__ == "__main__": _nyx_run() "# ); - let extra_files = vec![(corpus_filename.to_owned(), corpus_xml.to_owned())]; + let extra_files = vec![ + (corpus_filename.to_owned(), corpus_xml.to_owned()), + ("requirements.txt".to_owned(), "lxml\n".to_owned()), + ]; HarnessSource { source: body, filename: "harness.py".to_owned(), @@ -2270,7 +2245,7 @@ def _nyx_header_probe(name, value): ], "captured_at_ns": time.time_ns(), "payload_id": os.environ.get("NYX_PAYLOAD_ID", ""), - "kind": {{"kind": "HeaderEmit", "name": name, "value": value}}, + "kind": {{"kind": "HeaderEmit", "name": name, "value": value, "protocol": "in-process"}}, "witness": __nyx_witness("flask.Response.headers.__setitem__", [name, value]), }} __nyx_emit(rec) @@ -3472,8 +3447,16 @@ mod tests { "tests/dynamic_fixtures/xpath_injection/python/vuln.py", "run", )); - assert_eq!(h.extra_files.len(), 1); + assert_eq!(h.extra_files.len(), 2); assert_eq!(h.extra_files[0].0, "xpath_corpus.xml"); + assert_eq!( + h.extra_files[1].0, "requirements.txt", + "Python XPath harness must stage requirements.txt so prepare_python pip-installs lxml", + ); + assert_eq!( + h.extra_files[1].1, "lxml\n", + "Python XPath harness requirements.txt must pin lxml so tier-(a) imports succeed", + ); assert!( h.source.contains("def _nyx_xpath_via_fixture(payload):"), "Python XPath harness must define the fixture-routing helper", @@ -3488,11 +3471,31 @@ mod tests { ); assert!( h.source.contains("nodes = _nyx_xpath_via_fixture(payload)"), - "Python XPath harness main must call the fixture-routing helper first", + "Python XPath harness main must call the fixture-routing helper", + ); + } + + #[test] + fn emit_xpath_harness_drops_inline_matcher_fallback() { + let h = emit_xpath_harness(&make_xpath_spec( + "tests/dynamic_fixtures/xpath_injection/python/vuln.py", + "run", + )); + assert!( + !h.source.contains("_nyx_xpath_select"), + "Python XPath harness must no longer carry the inline `_nyx_xpath_select` matcher fallback", ); assert!( - h.source.contains("nodes = _nyx_xpath_select(expr)"), - "Python XPath harness must keep the inline matcher as a fallback", + h.source.contains("NYX_IMPORT_ERROR:"), + "Python XPath harness must emit the conventional NYX_IMPORT_ERROR stderr marker so the runner SKIPs hosts without lxml installed", + ); + assert!( + h.source.contains("sys.exit(77)"), + "Python XPath harness must exit 77 on ImportError so RunError::BuildFailed fires", + ); + assert!( + h.source.contains("__NYX_XPATH_TIER_A__"), + "Python XPath harness must print the tier-(a) stdout marker after a successful fixture call so e2e assertions can pin tier-(a) execution", ); } diff --git a/src/dynamic/lang/ruby.rs b/src/dynamic/lang/ruby.rs index 2b0809d0..348dc7d4 100644 --- a/src/dynamic/lang/ruby.rs +++ b/src/dynamic/lang/ruby.rs @@ -1196,7 +1196,7 @@ def _nyx_header_probe(name, value) ], 'captured_at_ns' => Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond), 'payload_id' => ENV['NYX_PAYLOAD_ID'] || '', - 'kind' => {{ 'kind' => 'HeaderEmit', 'name' => name, 'value' => value }}, + 'kind' => {{ 'kind' => 'HeaderEmit', 'name' => name, 'value' => value, 'protocol' => 'in-process' }}, 'witness' => __nyx_witness('Rack::Response#set_header', [name, value]), }} File.open(p, 'a') {{ |f| f.write(rec.to_json + "\n") }} diff --git a/src/dynamic/lang/rust.rs b/src/dynamic/lang/rust.rs index 6da67ba7..0dea5218 100644 --- a/src/dynamic/lang/rust.rs +++ b/src/dynamic/lang/rust.rs @@ -730,7 +730,7 @@ fn nyx_header_probe(name: &str, value: &str) {{ line.push_str(&nyx_json_escape(name)); line.push_str("\",\"value\":\""); line.push_str(&nyx_json_escape(value)); - line.push_str("\"}},\"witness\":{{}}}}\n"); + line.push_str("\",\"protocol\":\"in-process\"}},\"witness\":{{}}}}\n"); if let Ok(mut f) = OpenOptions::new().create(true).append(true).open(&p) {{ let _ = f.write_all(line.as_bytes()); }} diff --git a/src/dynamic/oracle.rs b/src/dynamic/oracle.rs index 1d0bca98..d12828c1 100644 --- a/src/dynamic/oracle.rs +++ b/src/dynamic/oracle.rs @@ -797,7 +797,7 @@ fn probes_satisfy_count_gt(probes: &[SinkProbe], n: u32) -> bool { /// [`ProbePredicate::HeaderInjected`] (Phase 08 — Track J.6). fn probes_satisfy_header_injected(probes: &[SinkProbe], header_name: &str) -> bool { probes.iter().any(|p| match &p.kind { - ProbeKind::HeaderEmit { name, value } => { + ProbeKind::HeaderEmit { name, value, .. } => { (header_name == "*" || name.eq_ignore_ascii_case(header_name)) && value.contains("\r\n") } _ => false, diff --git a/src/dynamic/probe.rs b/src/dynamic/probe.rs index 7d0f779b..1543f4bc 100644 --- a/src/dynamic/probe.rs +++ b/src/dynamic/probe.rs @@ -103,6 +103,39 @@ impl ProbeArg { } } +/// Transport layer that recorded a [`ProbeKind::HeaderEmit`] observation. +/// +/// Today every per-language harness shim monkey-patches the framework's +/// response object (`flask.Response.headers.__setitem__`, the Java +/// servlet stub's `setHeader`, the Node `nyxResponse.setHeader` mock, +/// etc.) so the bytes are captured *before* the host runtime's CRLF +/// validator could reject them. Those probes carry +/// [`HeaderEmitProtocol::InProcess`]. +/// +/// A future tier-(b) harness booting a real Tomcat / werkzeug / +/// `http.createServer` on loopback would tap the bytes the underlying +/// server actually wrote to the response socket and record them as +/// [`HeaderEmitProtocol::Wire`]. The variant exists now so an oracle +/// tightening landing later (e.g. a sibling +/// `ProbePredicate::HeaderSmuggledInWire` that scans wire-frame bytes +/// for two distinct `name:` lines) does not need to re-shape the +/// probe schema. +/// +/// Probe records emitted before this field existed deserialise as +/// [`HeaderEmitProtocol::InProcess`] via `#[serde(default)]` on the +/// containing [`ProbeKind::HeaderEmit`] field. +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "kebab-case")] +pub enum HeaderEmitProtocol { + /// Bytes captured by an in-process monkey-patch on the framework's + /// header setter, before the host runtime's CRLF validator ran. + #[default] + InProcess, + /// Bytes captured at the wire layer — the literal response frame + /// the underlying real server wrote to the response socket. + Wire, +} + /// Discriminator on a [`SinkProbe`] (Phase 08 — Track C.4). /// /// Distinguishes a probe written from the normal sink-instrumentation @@ -213,6 +246,20 @@ pub enum ProbeKind { /// host concatenates attacker bytes into this string without /// CRLF stripping; a benign host URL-encodes them (`%0d%0a`). value: String, + /// Transport layer at which the bytes were captured. Today's + /// per-language harness shims monkey-patch the framework's + /// response object before any CRLF validator runs and so + /// produce [`HeaderEmitProtocol::InProcess`]. A future + /// tier-(b) harness booting a real Tomcat / werkzeug / + /// `http.createServer` on loopback would record the bytes the + /// underlying server actually wrote to the response socket as + /// [`HeaderEmitProtocol::Wire`]. Pre-existing on-disk probe + /// records that pre-date this field deserialise as + /// [`HeaderEmitProtocol::InProcess`] via `#[serde(default)]` + /// so an oracle tightening landing later does not need to + /// re-shape the probe schema. + #[serde(default)] + protocol: HeaderEmitProtocol, }, /// Phase 09 (Track J.7) HTTP-redirect observation. Stamped by /// the per-language harness shim's instrumented redirect entry diff --git a/tests/header_injection_corpus.rs b/tests/header_injection_corpus.rs index f84d51c2..05e38056 100644 --- a/tests/header_injection_corpus.rs +++ b/tests/header_injection_corpus.rs @@ -22,7 +22,7 @@ use nyx_scanner::dynamic::corpus::{ use nyx_scanner::dynamic::framework::registry::adapters_for; use nyx_scanner::dynamic::lang; use nyx_scanner::dynamic::oracle::{ProbePredicate, oracle_fired}; -use nyx_scanner::dynamic::probe::{ProbeKind, ProbeWitness, SinkProbe}; +use nyx_scanner::dynamic::probe::{HeaderEmitProtocol, ProbeKind, ProbeWitness, SinkProbe}; use nyx_scanner::dynamic::sandbox::SandboxOutcome; use nyx_scanner::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot}; use nyx_scanner::labels::Cap; @@ -158,15 +158,53 @@ fn probe_kind_header_emit_serdes() { let original = ProbeKind::HeaderEmit { name: "Set-Cookie".into(), value: "nyx-session\r\nSet-Cookie: nyx-injected=pwn".into(), + protocol: HeaderEmitProtocol::InProcess, }; let json = serde_json::to_string(&original).unwrap(); assert!(json.contains("HeaderEmit")); assert!(json.contains("name")); assert!(json.contains("value")); + assert!(json.contains("\"protocol\":\"in-process\"")); let parsed: ProbeKind = serde_json::from_str(&json).unwrap(); assert_eq!(parsed, original); } +#[test] +fn probe_kind_header_emit_serdes_wire_variant() { + let original = ProbeKind::HeaderEmit { + name: "Set-Cookie".into(), + value: "nyx-session\r\nSet-Cookie: nyx-injected=pwn".into(), + protocol: HeaderEmitProtocol::Wire, + }; + let json = serde_json::to_string(&original).unwrap(); + assert!(json.contains("\"protocol\":\"wire\"")); + let parsed: ProbeKind = serde_json::from_str(&json).unwrap(); + assert_eq!(parsed, original); +} + +#[test] +fn probe_kind_header_emit_deserialises_legacy_records_as_in_process() { + // Probe records emitted before the protocol field existed must + // continue to deserialise via the `#[serde(default)]` hatch so the + // future oracle tightening landing does not need to migrate the + // on-disk channel format. + let legacy_json = + r#"{"kind":"HeaderEmit","name":"Set-Cookie","value":"nyx-session\r\nSet-Cookie: pwn"}"#; + let parsed: ProbeKind = serde_json::from_str(legacy_json).unwrap(); + match parsed { + ProbeKind::HeaderEmit { + name, + value, + protocol, + } => { + assert_eq!(name, "Set-Cookie"); + assert_eq!(value, "nyx-session\r\nSet-Cookie: pwn"); + assert_eq!(protocol, HeaderEmitProtocol::InProcess); + } + other => panic!("expected HeaderEmit, got {other:?}"), + } +} + #[test] fn header_injected_predicate_fires_on_crlf_value() { let oracle = Oracle::SinkProbe { @@ -182,6 +220,7 @@ fn header_injected_predicate_fires_on_crlf_value() { kind: ProbeKind::HeaderEmit { name: "Set-Cookie".into(), value: "nyx-session\r\nSet-Cookie: nyx-injected=pwn".into(), + protocol: HeaderEmitProtocol::InProcess, }, witness: ProbeWitness::empty(), }]; @@ -213,6 +252,7 @@ fn header_injected_predicate_clear_when_value_is_url_encoded() { kind: ProbeKind::HeaderEmit { name: "Set-Cookie".into(), value: "nyx-session%0D%0ASet-Cookie%3A%20nyx-injected%3Dpwn".into(), + protocol: HeaderEmitProtocol::InProcess, }, witness: ProbeWitness::empty(), }]; @@ -246,6 +286,7 @@ fn header_injected_predicate_clear_on_unrelated_header() { kind: ProbeKind::HeaderEmit { name: "X-Trace-Id".into(), value: "trace\r\nX-Injected: 1".into(), + protocol: HeaderEmitProtocol::InProcess, }, witness: ProbeWitness::empty(), }]; diff --git a/tests/prototype_pollution_corpus.rs b/tests/prototype_pollution_corpus.rs index f3e995d9..f3971ccc 100644 --- a/tests/prototype_pollution_corpus.rs +++ b/tests/prototype_pollution_corpus.rs @@ -254,8 +254,23 @@ fn lang_emitter_dispatches_to_prototype_pollution_harness() { "{lang:?} harness must install the canary trap on Object.prototype", ); assert!( - harness.source.contains("nyxDeepMerge"), - "{lang:?} harness must inline the deep-merge sink", + harness.source.contains("require('lodash').merge"), + "{lang:?} harness must route through the real `lodash.merge` (Phase 10 follow-up swap)", + ); + assert!( + !harness.source.contains("function nyxDeepMerge"), + "{lang:?} harness must no longer declare the hand-rolled `nyxDeepMerge` shim", + ); + assert!( + !harness.source.contains("nyxDeepMerge(target,"), + "{lang:?} harness must no longer call the hand-rolled `nyxDeepMerge` shim", + ); + assert!( + harness + .extra_files + .iter() + .any(|(p, c)| p == "package.json" && c.contains("\"lodash\":\"4.17.4\"")), + "{lang:?} harness must publish a `package.json` pinning lodash 4.17.4 (last version before `_.merge` was hardened against `__proto__`); empirical bisect shows 4.17.5+ already filters the key so newer pins flip the vuln fixture to NotConfirmed", ); assert!( harness.source.contains("__NYX_SINK_HIT__"), diff --git a/tests/xpath_corpus.rs b/tests/xpath_corpus.rs index 2e6b615f..40e16ccb 100644 --- a/tests/xpath_corpus.rs +++ b/tests/xpath_corpus.rs @@ -549,6 +549,17 @@ mod e2e_phase_07 { .as_ref() .expect("Confirmed run must carry a DifferentialOutcome"); assert_eq!(diff.verdict, DifferentialVerdict::Confirmed); + let tier_a_marker = b"__NYX_XPATH_TIER_A__"; + let saw_tier_a = outcome.attempts.iter().any(|a| { + a.outcome + .stdout + .windows(tier_a_marker.len()) + .any(|w| w == tier_a_marker) + }); + assert!( + saw_tier_a, + "Python XPath vuln must reach the tier-(a) real-lxml path (stdout marker `__NYX_XPATH_TIER_A__`); the inline `_nyx_xpath_select` fallback was removed and the harness now SKIPs via NYX_IMPORT_ERROR + exit 77 when lxml is unavailable", + ); } #[test]