From 738f1fedbcc21934aa98f03cfd85619ea6f025b4 Mon Sep 17 00:00:00 2001 From: elipeter Date: Mon, 1 Jun 2026 12:34:38 -0500 Subject: [PATCH] feat(dynamic): implement entry-driven verification with fallback to synthetic direct-sink, enhance per-language emitters, and improve test coverage --- docs/dynamic.md | 23 ++-- src/dynamic/lang/java.rs | 151 +++++++++++++++++---- src/dynamic/lang/js_shared.rs | 202 +++++++++++++++++++++++----- src/dynamic/lang/ruby.rs | 113 ++++++++++++---- src/dynamic/spec.rs | 168 +++++++++++++++++++++-- src/dynamic/trace.rs | 9 ++ src/dynamic/verify.rs | 15 +++ tests/deserialize_corpus.rs | 73 ++++++++++ tests/prototype_pollution_corpus.rs | 48 ++++++- 9 files changed, 686 insertions(+), 116 deletions(-) diff --git a/docs/dynamic.md b/docs/dynamic.md index d1633275..3e283970 100644 --- a/docs/dynamic.md +++ b/docs/dynamic.md @@ -342,16 +342,19 @@ audits the tree for unseeded `rand` usage on every CI run. ## Limitations -- The harness drives the sink, not always the enclosing function. When a - finding's safety comes from a guard in the code around the sink (a merge - target built with `Object.create(null)`, an `ObjectInputStream` subclass - whose `resolveClass` enforces an allowlist, a const-name check before - `Marshal.load`), the synthesized harness can exercise the sink directly and - miss that guard, which over-confirms. Read `Confirmed` as "this sink is - reachable and fires on attacker input," not "this exact code path has no - in-line mitigation." Framework-level guards (auth middleware, helmet) are - recognized and demote to `ConfirmedWithKnownGuard`; custom in-function guards - are not yet captured. +- The harness drives the finding's enclosing entry function when one is + derivable, routing the payload to the tainted parameter, so a guard in the + code around the sink (a merge target built with `Object.create(null)`, an + `ObjectInputStream` subclass whose `resolveClass` enforces an allowlist, a + const-name check before `Marshal.load`) runs first and participates in the + verdict. The build-time choice is recorded on the verify trace as + `entry_invocation` (`mode=entry_function` or `mode=direct_sink`). When no + enclosing entry can be derived the harness falls back to driving the sink + directly, and that fallback can over-confirm a guard it never executes. Read + a `direct_sink` `Confirmed` as "this sink is reachable and fires on attacker + input," not "this exact code path has no in-line mitigation." Framework-level + guards (auth middleware, helmet) are also recognized and demote to + `ConfirmedWithKnownGuard`. - Per-language payload curation is uneven. Command and code injection ship for all ten languages; the classic data-style injection caps (SQL, path traversal, SSRF, XSS) ship a tuned set for Rust and fall back to a diff --git a/src/dynamic/lang/java.rs b/src/dynamic/lang/java.rs index 75df026c..0758beee 100644 --- a/src/dynamic/lang/java.rs +++ b/src/dynamic/lang/java.rs @@ -759,8 +759,98 @@ pub fn emit(spec: &HarnessSpec) -> Result { /// boundary fires for both vuln and benign payloads; downstream /// instantiation failures (e.g. `serialVersionUID` mismatch on the /// allow-listed payload) are caught and treated as non-probe paths. -pub fn emit_deserialize_harness(_spec: &HarnessSpec) -> HarnessSource { +pub fn emit_deserialize_harness(spec: &HarnessSpec) -> HarnessSource { let shim = probe_shim(); + + // Tier-(a) main: drive the fixture's enclosing entry with the forged + // blob so a caller-side mitigation (a `resolveClass` allowlist / + // restricted ObjectInputStream subclass) runs before the gadget class + // is resolved. Detection is by exception type: a vanilla + // ObjectInputStream reaches `resolveClass(gadget)` and raises + // ClassNotFoundException (the gadget is not on the classpath) — that is + // unrestricted deserialization, so a probe fires. A guarded fixture + // raises InvalidClassException at its allowlist check *before* the + // class resolves, so no probe is written. Falls back to the tier-(b) + // synthetic restricted-OIS path when reflection setup fails. + let main_body = if spec.entry_is_derivable() { + let class_name = java_entry_class_name(spec); + let method_name = &spec.entry_name; + format!( + r#" public static void main(String[] args) {{ + String payload = System.getenv("NYX_PAYLOAD"); + if (payload == null) payload = ""; + String prefix = "NYX_GADGET_CLASS:"; + boolean drove = false; + if (payload.startsWith(prefix)) {{ + String cls = payload.substring(prefix.length()); + // Tier-(a): drive `{class_name}.{method_name}(byte[])` so the + // fixture's own (un)restricted deserialization path runs. + try {{ + byte[] blob = nyxForgeClassDescriptor(cls); + Class entryCls = Class.forName("{class_name}"); + java.lang.reflect.Method m = entryCls.getMethod("{method_name}", byte[].class); + drove = true; + try {{ + m.invoke(null, (Object) blob); + }} catch (java.lang.reflect.InvocationTargetException ite) {{ + if (nyxCauseChainHas(ite.getCause(), ClassNotFoundException.class)) {{ + // The fixture's deserializer reached and tried to + // resolve the gadget class (unrestricted path). + nyxDeserializeProbe(true); + }} + // InvalidClassException (a caller-side allowlist block) + // lands here too but is not a ClassNotFoundException, so + // a guarded fixture writes no probe. + }} catch (Throwable t) {{ + // Other reflective-call failure — non-probe path. + }} + }} catch (Throwable setup) {{ + // Reflection setup failed (class / method missing) — fall + // through to the tier-(b) synthetic path below. + drove = false; + }} + }} + if (!drove) {{ + // Tier-(b): the enclosing entry could not be driven — synthetic + // restricted-OIS direct path (recorded as direct-sink fallback). + nyxSyntheticDeserialize(payload); + }} + // Sink-reachability sentinel — runner's `vuln_fired && sink_hit` + // gate consumes this; without it differential confirmation cannot + // fire even when the probe was written. + System.out.println("__NYX_SINK_HIT__"); + }} + + /// True when `t` or any exception in its cause chain is an instance of + /// `want` — used to detect the gadget-class resolution attempt that a + /// vanilla ObjectInputStream surfaces as ClassNotFoundException. + static boolean nyxCauseChainHas(Throwable t, Class want) {{ + int hops = 0; + while (t != null && hops < 32) {{ + if (want.isInstance(t)) return true; + t = t.getCause(); + hops++; + }} + return false; + }} +"# + ) + } else { + // No derivable enclosing entry — drive the synthetic restricted-OIS + // path directly. + r#" public static void main(String[] args) { + String payload = System.getenv("NYX_PAYLOAD"); + if (payload == null) payload = ""; + nyxSyntheticDeserialize(payload); + // Sink-reachability sentinel — runner's `vuln_fired && sink_hit` + // gate consumes this; without it differential confirmation cannot + // fire even when the probe was written. + System.out.println("__NYX_SINK_HIT__"); + } +"# + .to_owned() + }; + let source = format!( r#"// Nyx dynamic harness — deserialize (Phase 03 / Track J.1). import java.io.ByteArrayInputStream; @@ -835,36 +925,33 @@ public class NyxHarness {{ return baos.toByteArray(); }} - public static void main(String[] args) {{ - String payload = System.getenv("NYX_PAYLOAD"); - if (payload == null) payload = ""; + /// Tier-(b) synthetic direct-sink: run the forged blob through a + /// restricted ObjectInputStream the harness controls. Bypasses any + /// caller-side guard, so it is used only when the fixture's own entry + /// could not be driven. + static void nyxSyntheticDeserialize(String payload) {{ String prefix = "NYX_GADGET_CLASS:"; - if (payload.startsWith(prefix)) {{ - String cls = payload.substring(prefix.length()); + if (!payload.startsWith(prefix)) return; + String cls = payload.substring(prefix.length()); + try {{ + byte[] blob = nyxForgeClassDescriptor(cls); + NyxRestrictedOIS ois = new NyxRestrictedOIS( + new ByteArrayInputStream(blob)); try {{ - byte[] blob = nyxForgeClassDescriptor(cls); - NyxRestrictedOIS ois = new NyxRestrictedOIS( - new ByteArrayInputStream(blob)); - try {{ - ois.readObject(); - }} finally {{ - try {{ ois.close(); }} catch (IOException ignored) {{}} - }} - }} catch (InvalidClassException e) {{ - // Restricted block — probe already written above. - }} catch (Throwable t) {{ - // Allow-listed but downstream instantiation fails (the - // minimal stream omits the field bytes the real class - // expects). resolveClass already fired; treat as a - // non-probe path. + ois.readObject(); + }} finally {{ + try {{ ois.close(); }} catch (IOException ignored) {{}} }} + }} catch (InvalidClassException e) {{ + // Restricted block — probe already written above. + }} catch (Throwable t) {{ + // Allow-listed but downstream instantiation fails (the minimal + // stream omits the field bytes the real class expects). + // resolveClass already fired; treat as a non-probe path. }} - // Sink-reachability sentinel — runner's `vuln_fired && sink_hit` - // gate consumes this; without it differential confirmation cannot - // fire even when the probe was written. - System.out.println("__NYX_SINK_HIT__"); }} -}} + +{main_body}}} "# ); HarnessSource { @@ -881,6 +968,18 @@ public class NyxHarness {{ } } +/// Derive the Java class that declares the entry method from the spec's +/// `entry_file` basename (Java's public-class-per-file convention: a sink +/// in `Vuln.java` lives in `public class Vuln`). Used by the +/// deserialize harness to reflectively load the fixture class. +fn java_entry_class_name(spec: &HarnessSpec) -> String { + std::path::Path::new(&spec.entry_file) + .file_stem() + .and_then(|s| s.to_str()) + .map(|s| s.to_owned()) + .unwrap_or_else(|| "NyxEntry".to_owned()) +} + /// Phase 04 — Track J.2 SSTI harness for Java (Thymeleaf). /// /// Reads `NYX_PAYLOAD`, simulates Thymeleaf's `[[${expr}]]` inlined- diff --git a/src/dynamic/lang/js_shared.rs b/src/dynamic/lang/js_shared.rs index e7b46287..d76662a4 100644 --- a/src/dynamic/lang/js_shared.rs +++ b/src/dynamic/lang/js_shared.rs @@ -634,7 +634,7 @@ pub fn emit(spec: &HarnessSpec, is_typescript: bool) -> Result (m && m.__esModule ? m.default : m))(require('$2'));") + .replace(/^\s*import\s+['"]([^'"]+)['"];?\s*$/gm, "require('$1');"); + src = src.replace(/^\s*export\s+default\s+/gm, "module.exports.default = "); + src = src.replace(/^\s*export\s+((?:async\s+)?function|class|const|let|var)\s+([A-Za-z_$][\w$]*)/gm, function (m, kw, name) { tail.push([name, name]); return kw + " " + name; }); + src = src.replace(/^\s*export\s+\{([^}]*)\};?\s*$/gm, function (m, names) { + names.split(",").map(function (s) { return s.trim(); }).filter(Boolean).forEach(function (spec) { + const parts = spec.split(/\s+as\s+/); + const local = parts[0].trim(); + const exported = (parts.length > 1 ? parts[1] : parts[0]).trim(); + tail.push([exported, local]); + }); + return ""; + }); + let suffix = "\n"; + for (const pair of tail) { suffix += "module.exports[" + JSON.stringify(pair[0]) + "] = " + pair[1] + ";\n"; } + return src + suffix; +} + +function nyxLoadTsEntry(file) { + const fs = require('fs'); + const Module = require('module'); + const path = require('path'); + let src = fs.readFileSync(file, 'utf8'); + if (typeof Module.stripTypeScriptTypes === 'function') { + try { src = Module.stripTypeScriptTypes(src, { mode: 'transform' }); } catch (e) { /* fall through with raw source */ } + } + src = nyxEsmToCjs(src); + const m = new Module(file, module); + m.filename = path.resolve(file); + m.paths = Module._nodeModulePaths(path.dirname(m.filename)); + m._compile(src, m.filename); + return m.exports; +} + +"#; + /// Phase 10 — Track J.8 prototype-pollution harness for Node /// (`lodash.merge` / `Object.assign` / `JSON.parse`-then-deep-assign). /// @@ -2629,9 +2684,14 @@ function nyxFollowLocation(location) {{ /// literal has no `__proto__` key — or a fixture that constructs /// its target via `Object.create(null)` — leaves the prototype /// chain untouched and emits no probe. -pub fn emit_prototype_pollution_harness(_spec: &HarnessSpec) -> HarnessSource { +pub fn emit_prototype_pollution_harness(spec: &HarnessSpec, is_typescript: bool) -> HarnessSource { let shim = probe_shim(); - let body = format!( + + // Shared canary-trap preamble: installs the Object.prototype setter + // trap *before* any sink runs, so a write that lands on the shared + // prototype is observed regardless of whether it came from the + // fixture's own merge (tier-a) or the synthetic fallback (tier-b). + let preamble = format!( r#"// Nyx dynamic harness — PROTOTYPE_POLLUTION canary trap (Phase 10 / Track J.8). {shim} @@ -2699,40 +2759,96 @@ function nyxPrototypePollutionProbe(value) {{ }}); }})(); -// Phase 10 sink: route the parsed payload through the real -// `lodash.merge` pinned at lodash 4.17.4. Lodash hardened `_.merge` -// against the `__proto__` key starting in 4.17.5 (well before the -// official CVE-2018-16487 fix at 4.17.11 which targeted `_.set` / -// `_.setWith`), so the canary only fires against <= 4.17.4. The -// staged `package.json` pins this version exactly; `prepare_node` -// resolves the dep via `npm install` before the harness runs. -// Exercising the real merge implementation (vs the hand-rolled -// `nyxDeepMerge` that previously stood in) covers lodash's actual -// recursion / cycle / array-vs-object decision shape so a future -// fixture that hits a patched range can be added without re-shaping -// the harness. -const _lodashMerge = require('lodash').merge; - const payload = process.env.NYX_PAYLOAD || ''; -let parsed; -try {{ - parsed = JSON.parse(payload); -}} catch (e) {{ - parsed = {{}}; -}} -const target = {{}}; -try {{ - _lodashMerge(target, parsed); -}} catch (e) {{ - // lodash.merge can throw on weird inputs; the canary observation - // already wrote any probe before the throw. -}} -console.log('__NYX_SINK_HIT__'); -console.log(JSON.stringify({{ - canary_present: Object.prototype.hasOwnProperty(NYX_PP_CANARY), -}})); "# ); + + // Tier-(b) synthetic direct-sink block. Routes the parsed payload + // through the real `lodash.merge` pinned at lodash 4.17.4 (hardened + // against `__proto__` from 4.17.5) into a *vanilla* `{}` target. Used + // standalone when no enclosing entry is derivable, and as the runtime + // fallback inside the entry-driven harness when the fixture cannot be + // loaded. NOTE: this drives the sink directly and therefore bypasses + // any caller-side mitigation — it must run only when the fixture's own + // entry could not be driven. + let synthetic_sink = r#" const _lodashMerge = require('lodash').merge; + let parsed; + try { parsed = JSON.parse(payload); } catch (e) { parsed = {}; } + const target = {}; + try { + _lodashMerge(target, parsed); + } catch (e) { + // lodash.merge can throw on weird inputs; the canary observation + // already wrote any probe before the throw. + } +"#; + + let tail = r#"console.log('__NYX_SINK_HIT__'); +console.log(JSON.stringify({ + canary_present: Object.prototype.hasOwnProperty(NYX_PP_CANARY), +})); +"#; + + let (body, entry_subpath) = if spec.entry_is_derivable() { + let entry_subpath = if is_typescript { "entry.ts" } else { "entry.js" }; + let entry_name = &spec.entry_name; + let call_args = pp_entry_call_args(spec); + // TypeScript fixtures use ES-module imports + type annotations the + // bare CommonJS `require` cannot parse, and Node's native `.ts` + // loading applies ESM-namespace interop (so `import * as _ from + // 'lodash'` would not expose `_.merge`). Load TS through the + // type-stripping + ESM→CJS shim so `esModuleInterop`-style fixtures + // run as the author intended. JS fixtures are CommonJS — require + // them directly. + let loader_defs = if is_typescript { TS_ENTRY_LOADER_JS } else { "" }; + let entry_load_expr = if is_typescript { + format!("nyxLoadTsEntry('./{entry_subpath}')") + } else { + format!("require('./{entry_subpath}')") + }; + let body = format!( + r#"{preamble} +{loader_defs}// Tier-(a): drive the fixture's enclosing entry `{entry_name}` so a +// caller-side mitigation (a merge target built with `Object.create(null)`, +// an allowlist, …) runs *before* the merge sink. The Object.prototype +// canary trap above observes any write that reaches the shared prototype, +// so a benign fixture that builds a prototype-less target produces no +// probe even under the `__proto__` payload. +let _drove = false; +let _entry; +try {{ + _entry = {entry_load_expr}; +}} catch (e) {{ + // load failed (missing dep / unparseable source) — tier-(b) below. +}} +const _fn = _entry && (typeof _entry === 'function' + ? _entry + : (typeof _entry['{entry_name}'] === 'function' + ? _entry['{entry_name}'] + : (typeof _entry.run === 'function' ? _entry.run : null))); +if (typeof _fn === 'function') {{ + try {{ + _fn({call_args}); + }} catch (e) {{ + // The fixture threw after we drove it (e.g. JSON.parse failure or a + // guard that raises). We still drove the entry, so do not fall back. + }} + _drove = true; +}} +if (!_drove) {{ + // Tier-(b): the enclosing entry could not be driven at runtime — fall + // back to the synthetic direct-sink merge so the harness still emits a + // signal. Recorded as a direct-sink fallback in the VerifyTrace. +{synthetic_sink}}} +{tail}"# + ); + (body, Some(entry_subpath.to_owned())) + } else { + // No derivable enclosing entry — drive the sink directly. + let body = format!("{preamble}\n{synthetic_sink}{tail}"); + (body, None) + }; + HarnessSource { source: body, filename: "harness.js".to_owned(), @@ -2743,7 +2859,23 @@ console.log(JSON.stringify({{ "# .to_owned(), )], - entry_subpath: None, + entry_subpath, + } +} + +/// Build the JS argument list for invoking the prototype-pollution entry +/// with the payload routed to its tainted parameter. `PayloadSlot::Param(n)` +/// places the payload at position `n` (earlier positions filled with +/// `undefined`); every other slot passes the payload as the sole argument +/// (the fixture reads its own channel — env / argv — for the rest). +fn pp_entry_call_args(spec: &HarnessSpec) -> String { + match &spec.payload_slot { + crate::dynamic::spec::PayloadSlot::Param(n) => { + let mut parts = vec!["undefined"; *n]; + parts.push("payload"); + parts.join(", ") + } + _ => "payload".to_owned(), } } diff --git a/src/dynamic/lang/ruby.rs b/src/dynamic/lang/ruby.rs index 47a377be..8fe5b974 100644 --- a/src/dynamic/lang/ruby.rs +++ b/src/dynamic/lang/ruby.rs @@ -1210,15 +1210,21 @@ fn should_stage_framework_dependency_files(spec: &HarnessSpec) -> bool { /// Phase 03 — Track J.1 deserialize harness for Ruby. /// -/// Wraps a call to `Marshal.load(input)` with a const-lookup -/// instrumentation that asserts the requested constant is on the -/// allowlist (`Integer`, `String`, `Array`). When the marker class -/// is outside the allowlist the shim writes a -/// [`crate::dynamic::probe::ProbeKind::Deserialize`] probe with -/// `gadget_chain_invoked: true`. -pub fn emit_deserialize_harness(_spec: &HarnessSpec) -> HarnessSource { +/// Forges a Marshal v4.8 class-reference blob for the corpus +/// `NYX_GADGET_CLASS:` marker and observes whether the gadget class +/// is resolved. When the finding's enclosing entry is derivable +/// ([`HarnessSpec::entry_is_derivable`]) the harness drives that function +/// with the forged blob (tier-a) so a caller-side mitigation — a +/// const-name allowlist before `Marshal.load`, a restricted loader — runs +/// first and a guarded fixture produces no +/// [`crate::dynamic::probe::ProbeKind::Deserialize`] probe. When no entry +/// is derivable (or the fixture cannot be loaded at runtime) it falls back +/// to driving `Marshal.load` directly (tier-b), which bypasses any +/// caller-side guard; that fallback is recorded on the VerifyTrace. +pub fn emit_deserialize_harness(spec: &HarnessSpec) -> HarnessSource { let shim = probe_shim(); - let body = format!( + // Shared helper definitions: probe writer + Marshal class-ref forger. + let preamble = format!( r#"# Nyx dynamic harness — deserialize (Phase 03 / Track J.1). require 'json' @@ -1256,33 +1262,82 @@ def _nyx_forge_marshal_class_ref(name) "\x04\x08c".b + len_byte + name.b end -allowlist = ['Integer', 'String', 'Array'] payload = ENV['NYX_PAYLOAD'] || '' -if payload.start_with?('NYX_GADGET_CLASS:') - cls = payload[('NYX_GADGET_CLASS:'.length)..] - begin - Marshal.load(_nyx_forge_marshal_class_ref(cls)) - rescue ArgumentError => e - # `undefined class/module ` — the Marshal class-resolution - # boundary refused the lookup. Real hardening would surface this - # via a `Marshal.const_defined?` pre-check + reject; we record the - # gadget-class invocation here. - if e.message.start_with?('undefined class/module') - _nyx_deserialize_probe(true) +"# + ); + + // Tier-(b) synthetic direct-sink: hand the forged blob straight to the + // real `Marshal.load`. Bypasses any caller-side guard, so it runs only + // when the fixture's own entry could not be driven. + let synthetic_sink = r#" if payload.start_with?('NYX_GADGET_CLASS:') + cls = payload[('NYX_GADGET_CLASS:'.length)..] + begin + Marshal.load(_nyx_forge_marshal_class_ref(cls)) + rescue ArgumentError => e + # `undefined class/module ` — Marshal's class-resolution + # boundary refused the lookup; record the gadget-class invocation. + _nyx_deserialize_probe(true) if e.message.start_with?('undefined class/module') + rescue TypeError, NameError + # Allow-listed class that resolves cleanly (e.g. `Integer`) — no probe. end - rescue TypeError, NameError - # Allow-listed class that exists at load time (e.g. `Integer`) - # resolves cleanly via `Object.const_get` and Marshal returns the - # class object — no rescue path. Other unexpected errors fall - # through without writing a probe. end -end -# Sink-reachability sentinel — runner's `vuln_fired && sink_hit` +"#; + + let tail = r#"# Sink-reachability sentinel — runner's `vuln_fired && sink_hit` # gate consumes this; without it differential confirmation cannot # fire even when the probe was written. STDOUT.puts '__NYX_SINK_HIT__' -"# - ); +"#; + + let body = if spec.entry_is_derivable() { + let entry_basename = derive_entry_basename(&spec.entry_file); + let entry_name = &spec.entry_name; + format!( + r#"{preamble} +drove = false +if payload.start_with?('NYX_GADGET_CLASS:') + cls = payload[('NYX_GADGET_CLASS:'.length)..] + blob = _nyx_forge_marshal_class_ref(cls) + # Tier-(a): drive the fixture's enclosing entry `{entry_name}` so a + # caller-side guard (const-name allowlist, restricted loader) runs + # before Marshal.load. A guarded fixture refuses the gadget blob with + # its own error and never reaches the unresolved-class boundary, so no + # probe is written. + loaded = false + begin + require_relative './{entry_basename}' + loaded = true + rescue Exception + loaded = false + end + if loaded && Object.new.respond_to?(:'{entry_name}', true) + drove = true + begin + Object.new.__send__(:'{entry_name}', blob) + rescue ArgumentError => e + # Vanilla Marshal.load reached the gadget class but could not + # resolve it → unrestricted deserialization. A caller-side guard + # that raises (e.g. "blocked: ...") also lands here but with a + # different message, so it does not write a probe. + _nyx_deserialize_probe(true) if e.message.start_with?('undefined class/module') + rescue TypeError, NameError + # Allow-listed class that resolves cleanly — no probe. + rescue Exception + # Any other failure inside the fixture — no probe. + end + end +end +unless drove + # Tier-(b): the enclosing entry could not be driven — synthetic + # direct-sink fallback (recorded as direct-sink on the VerifyTrace). +{synthetic_sink}end +{tail}"# + ) + } else { + // No derivable enclosing entry — drive Marshal.load directly. + format!("{preamble}\n{synthetic_sink}{tail}") + }; + HarnessSource { source: body, filename: "harness.rb".to_owned(), diff --git a/src/dynamic/spec.rs b/src/dynamic/spec.rs index 1230031d..81afa52a 100644 --- a/src/dynamic/spec.rs +++ b/src/dynamic/spec.rs @@ -418,6 +418,22 @@ impl HarnessSpec { supported.contains(&self.entry_kind.tag()) } + /// True when the spec names a concrete enclosing entry function the + /// harness can drive — i.e. `entry_name` resolved to a real symbol + /// rather than the `""` placeholder a rule-namespace finding + /// falls back to when no flow-step / summary / AST resolution can name + /// the function the sink sits in. + /// + /// The per-language harness emitters consult this to decide whether to + /// invoke the finding's enclosing function (so caller-side guards run + /// before the sink) or fall back to a synthetic direct-sink harness; + /// [`crate::dynamic::verify::verify_finding`] records the same decision + /// on the [`crate::dynamic::trace::VerifyTrace`] via + /// [`crate::dynamic::trace::TraceStage::EntryInvocation`]. + pub fn entry_is_derivable(&self) -> bool { + !self.entry_name.is_empty() && self.entry_name != "" + } + /// Returns the ordered list of derivation strategies that /// [`HarnessSpec::from_finding_opts`] attempts. Used by the verifier when /// it needs to report which candidates were tried before declaring an @@ -1395,17 +1411,99 @@ fn resolve_enclosing_function( if let Some(name) = enclosing_function_from_flow_steps(evidence) { return Some(name); } - let summaries = summaries?; - let mut hits = summaries - .iter() - .filter(|(k, _)| k.lang == lang) - .filter(|(_, s)| paths_match(&s.file_path, &diag.path)); - let first = hits.next()?; - if hits.next().is_some() { - // Ambiguous: multiple functions in this file; refuse to guess. - return None; + if let Some(summaries) = summaries { + let mut hits = summaries + .iter() + .filter(|(k, _)| k.lang == lang) + .filter(|(_, s)| paths_match(&s.file_path, &diag.path)); + if let Some(first) = hits.next() + && hits.next().is_none() + { + // Unambiguous: exactly one function in this file. + return Some(first.1.name.clone()); + } + // Ambiguous (or none): fall through to AST resolution below rather + // than refusing to guess — the sink line disambiguates. } - Some(first.1.name.clone()) + // Last resort: parse the file and name the innermost function whose + // line span contains the sink. Recovers a drivable entry for + // rule-namespace findings that carry no flow_steps and have no (or an + // ambiguous) summary — e.g. the deserialize fixtures verified with + // `--index off`. + resolve_enclosing_function_via_ast(&diag.path, diag.line, lang) +} + +/// Parse `path` and return the name of the innermost function/method +/// definition whose 1-based line span contains `line`. +/// +/// Used as the final fallback in [`resolve_enclosing_function`] so the +/// spec names the function a sink sits in even when the taint engine +/// produced no flow_steps and no [`GlobalSummaries`] were threaded +/// (the common `--index off` rule-namespace path). Best-effort: returns +/// `None` when the file cannot be read/parsed, the grammar is missing, or +/// the sink is at file top level with no enclosing function. +fn resolve_enclosing_function_via_ast(path: &str, line: usize, lang: Lang) -> Option { + let bytes = std::fs::read(path).ok()?; + let ts_lang = tree_sitter_lang_for(lang)?; + let mut parser = tree_sitter::Parser::new(); + parser.set_language(&ts_lang).ok()?; + let tree = parser.parse(&bytes, None)?; + let slug = lang_slug(lang); + let target_row = line.saturating_sub(1); + + // Walk every node spanning the target row, keeping the smallest-span + // `Kind::Function` node (the innermost enclosing function). + let mut best: Option<(usize, String)> = None; + let mut stack = vec![tree.root_node()]; + while let Some(node) = stack.pop() { + let start_row = node.start_position().row; + let end_row = node.end_position().row; + if start_row > target_row || end_row < target_row { + continue; + } + if crate::labels::lookup(slug, node.kind()) == crate::labels::Kind::Function + && let Some(name) = function_node_name(node, &bytes) + { + let span = end_row - start_row; + if best.as_ref().is_none_or(|(best_span, _)| span < *best_span) { + best = Some((span, name)); + } + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + stack.push(child); + } + } + best.map(|(_, name)| name) +} + +/// Extract the declared name of a `Kind::Function` AST node. +/// +/// Prefers the grammar's `name` field (present on Java `method_declaration`, +/// Ruby `method`, JS `function_declaration`, Python `function_definition`, +/// …); falls back to the first identifier-shaped child for grammars that do +/// not expose a `name` field. Returns `None` for anonymous functions. +fn function_node_name(node: tree_sitter::Node, bytes: &[u8]) -> Option { + if let Some(name_node) = node.child_by_field_name("name") + && let Ok(text) = name_node.utf8_text(bytes) + && !text.is_empty() + { + return Some(text.to_owned()); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + let kind = child.kind(); + if (kind == "identifier" + || kind == "name" + || kind == "field_identifier" + || kind.ends_with("_identifier")) + && let Ok(text) = child.utf8_text(bytes) + && !text.is_empty() + { + return Some(text.to_owned()); + } + } + None } /// Lookup a `FuncSummary` by `(lang, name)` and filter to one whose @@ -1895,6 +1993,27 @@ mod tests { use super::*; use crate::evidence::{Evidence, FlowStep, FlowStepKind}; + #[test] + fn ast_resolver_names_run_for_deser_fixtures() { + // The deserialize fixtures carry no flow_steps and resolve no + // summaries under `--index off`; AST resolution must still name the + // enclosing `run` function the sink sits in so the harness can drive + // it and the author's guard participates in the verdict. + let cases = [ + ("tests/dynamic_fixtures/deserialize/java/Benign.java", 36, Lang::Java), + ("tests/dynamic_fixtures/deserialize/java/Vuln.java", 14, Lang::Java), + ("tests/dynamic_fixtures/deserialize/ruby/benign.rb", 14, Lang::Ruby), + ("tests/dynamic_fixtures/deserialize/ruby/vuln.rb", 7, Lang::Ruby), + ]; + for (path, line, lang) in cases { + assert_eq!( + resolve_enclosing_function_via_ast(path, line, lang).as_deref(), + Some("run"), + "AST resolution should name `run` for {path}:{line}" + ); + } + } + fn source_step(file: &str, function: &str) -> FlowStep { FlowStep { step: 1, @@ -2016,6 +2135,35 @@ mod tests { assert_eq!(spec.entry_name, "process"); assert_eq!(spec.toolchain_id, "rust-stable"); assert!(!spec.spec_hash.is_empty()); + // A flow-step-named entry is drivable — the harness invokes it. + assert!(spec.entry_is_derivable()); + } + + #[test] + fn entry_is_derivable_distinguishes_real_name_from_placeholder() { + let mut spec = HarnessSpec { + finding_id: "0".into(), + entry_file: "src/app.rs".into(), + entry_name: "run".into(), + entry_kind: EntryKind::Function, + lang: Lang::Rust, + toolchain_id: "rust-stable".into(), + payload_slot: PayloadSlot::Param(0), + expected_cap: crate::labels::Cap::SQL_QUERY, + constraint_hints: vec![], + sink_file: "src/app.rs".into(), + sink_line: 1, + spec_hash: "0".into(), + derivation: SpecDerivationStrategy::FromFlowSteps, + stubs_required: vec![], + framework: None, + java_toolchain: JavaToolchain::default(), + }; + assert!(spec.entry_is_derivable()); + spec.entry_name = "".into(); + assert!(!spec.entry_is_derivable()); + spec.entry_name = String::new(); + assert!(!spec.entry_is_derivable()); } #[test] diff --git a/src/dynamic/trace.rs b/src/dynamic/trace.rs index 78a55d6e..7713c19d 100644 --- a/src/dynamic/trace.rs +++ b/src/dynamic/trace.rs @@ -49,6 +49,13 @@ pub enum TraceStage { /// so a trace consumer can audit framework-detection coverage by /// counting `framework_adapter_*` events. FrameworkAdapterNone, + /// The harness-build decision about which entry the synthesized + /// harness drives. `detail` carries `mode=entry_function entry=` + /// when the finding's enclosing function was determinable (the harness + /// invokes it so caller-side guards run), or + /// `mode=direct_sink fallback=no_enclosing_entry` when no entry could + /// be derived and the harness falls back to driving the sink directly. + EntryInvocation, BuildStarted, BuildDone, SandboxStarted, @@ -78,6 +85,7 @@ impl TraceStage { Self::SpecDone => "spec_done", Self::FrameworkAdapterDetected => "framework_adapter_detected", Self::FrameworkAdapterNone => "framework_adapter_none", + Self::EntryInvocation => "entry_invocation", Self::BuildStarted => "build_started", Self::BuildDone => "build_done", Self::SandboxStarted => "sandbox_started", @@ -243,6 +251,7 @@ mod tests { // to these exact tokens so audit grep queries stay stable. assert_eq!(TraceStage::SpecStarted.as_str(), "spec_started"); assert_eq!(TraceStage::SpecDone.as_str(), "spec_done"); + assert_eq!(TraceStage::EntryInvocation.as_str(), "entry_invocation"); assert_eq!(TraceStage::BuildStarted.as_str(), "build_started"); assert_eq!(TraceStage::BuildDone.as_str(), "build_done"); assert_eq!(TraceStage::SandboxStarted.as_str(), "sandbox_started"); diff --git a/src/dynamic/verify.rs b/src/dynamic/verify.rs index 0a7f846a..d88aabd7 100644 --- a/src/dynamic/verify.rs +++ b/src/dynamic/verify.rs @@ -633,6 +633,21 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult { ), } + // Record whether the synthesized harness will drive the finding's + // enclosing entry function (so caller-side guards participate in the + // verdict) or fall back to a synthetic direct-sink invocation because + // no enclosing entry could be derived. The per-language emitters + // consult the same `entry_is_derivable()` predicate, so this trace + // event is the build-time source of truth for the entry-vs-sink choice. + trace.record( + crate::dynamic::trace::TraceStage::EntryInvocation, + Some(if spec.entry_is_derivable() { + format!("mode=entry_function entry={}", spec.entry_name) + } else { + "mode=direct_sink fallback=no_enclosing_entry".to_owned() + }), + ); + // Pre-flight gate: surface a structured `Inconclusive(EntryKindUnsupported)` // up-front when the spec's [`EntryKind`] is not in the lang emitter's // supported list. Without this, the same condition would degrade silently diff --git a/tests/deserialize_corpus.rs b/tests/deserialize_corpus.rs index bb798f0f..a651632c 100644 --- a/tests/deserialize_corpus.rs +++ b/tests/deserialize_corpus.rs @@ -182,6 +182,79 @@ fn lang_emitter_dispatches_to_deserialize_harness() { } } +#[test] +fn deserialize_harness_drives_entry_when_derivable() { + // Java: reflectively load the fixture class and invoke the derived + // entry method so the fixture's own resolveClass allowlist runs before + // the gadget class resolves. + let java = lang::emit(&make_spec( + Lang::Java, + "tests/dynamic_fixtures/deserialize/java/Benign.java", + "run", + )) + .expect("java deser emit"); + assert!( + java.source.contains("Class.forName(\"Benign\")"), + "Java deser harness must reflectively load the fixture class", + ); + assert!( + java.source.contains("getMethod(\"run\""), + "Java deser harness must invoke the derived entry method", + ); + assert!( + java.source.contains("nyxCauseChainHas"), + "Java deser harness must detect gadget resolution via the cause chain", + ); + + // Ruby: require_relative the fixture and drive its entry so the + // const-name guard runs before Marshal.load. + let ruby = lang::emit(&make_spec( + Lang::Ruby, + "tests/dynamic_fixtures/deserialize/ruby/benign.rb", + "run", + )) + .expect("ruby deser emit"); + assert!( + ruby.source.contains("require_relative './benign'"), + "Ruby deser harness must require_relative the fixture", + ); + assert!( + ruby.source.contains("__send__(:'run'"), + "Ruby deser harness must drive the derived entry function", + ); +} + +#[test] +fn deserialize_harness_falls_back_to_synthetic_without_entry() { + // No derivable enclosing entry → direct-sink synthetic path; the + // harness must not attempt to load a fixture it cannot name. + let java = lang::emit(&make_spec( + Lang::Java, + "tests/dynamic_fixtures/deserialize/java/Vuln.java", + "", + )) + .expect("java deser emit"); + assert!( + !java.source.contains("Class.forName("), + "Java deser harness must not reflect into a fixture when no entry is derivable", + ); + assert!( + java.source.contains("nyxSyntheticDeserialize"), + "Java synthetic fallback must drive the restricted-OIS path directly", + ); + + let ruby = lang::emit(&make_spec( + Lang::Ruby, + "tests/dynamic_fixtures/deserialize/ruby/vuln.rb", + "", + )) + .expect("ruby deser emit"); + assert!( + !ruby.source.contains("require_relative"), + "Ruby deser harness must not require the fixture when no entry is derivable", + ); +} + #[test] fn framework_adapters_detect_deserialize_sink() { // Java + Python + PHP + Ruby all register their J.1 sink adapter; diff --git a/tests/prototype_pollution_corpus.rs b/tests/prototype_pollution_corpus.rs index f3971ccc..76372091 100644 --- a/tests/prototype_pollution_corpus.rs +++ b/tests/prototype_pollution_corpus.rs @@ -418,12 +418,13 @@ fn slug(lang: Lang) -> &'static str { // into the prototype chain. // // Per-lang skips mirror the Phase 08 e2e block: -// - TypeScript: the synthetic harness short-circuits the entry -// source load entirely (`entry_subpath: None`), so no `tsx` / -// `ts-node` is needed at runtime — but on hosts without -// `tree_sitter_typescript` or the npm Node toolchain, the -// harness build will fall through `BuildFailed` and skip via the -// same branch. +// - TypeScript: the entry-driven harness now loads the fixture +// through an in-harness type-stripping + ESM→CJS shim +// (`nyxLoadTsEntry`), so no `tsx` / `ts-node` is needed at +// runtime — but on hosts without `tree_sitter_typescript`, a Node +// build lacking `module.stripTypeScriptTypes`, or the npm Node +// toolchain, the harness build/load falls through `BuildFailed` +// (or the runtime tier-(b) fallback) and skips via the same branch. mod e2e_phase_10 { use crate::common::fixture_harness::FIXTURE_LOCK; @@ -540,6 +541,25 @@ mod e2e_phase_10 { assert_eq!(diff.verdict, DifferentialVerdict::Confirmed); } + /// A benign control must NOT confirm: the entry-driven harness invokes + /// the fixture's own `run`, whose `Object.create(null)` merge target + /// keeps the `__proto__` payload off the shared prototype, so the + /// canary trap stays clear and the differential never confirms. + fn assert_not_confirmed(lang: Lang, outcome: &RunOutcome) { + assert!( + outcome.triggered_by.is_none(), + "{lang:?} PROTOTYPE_POLLUTION benign control must NOT confirm — the \ + caller-side `Object.create(null)` guard must participate; got {outcome:?}", + ); + if let Some(diff) = outcome.differential.as_ref() { + assert_ne!( + diff.verdict, + DifferentialVerdict::Confirmed, + "{lang:?} benign differential must not be Confirmed", + ); + } + } + #[test] fn js_vuln_confirms_via_run_spec() { let Some(outcome) = run(Lang::JavaScript, "vuln.js", "run") else { @@ -555,4 +575,20 @@ mod e2e_phase_10 { }; assert_confirmed(Lang::TypeScript, &outcome); } + + #[test] + fn js_benign_not_confirmed_via_run_spec() { + let Some(outcome) = run(Lang::JavaScript, "benign.js", "run") else { + return; + }; + assert_not_confirmed(Lang::JavaScript, &outcome); + } + + #[test] + fn ts_benign_not_confirmed_via_run_spec() { + let Some(outcome) = run(Lang::TypeScript, "benign.ts", "run") else { + return; + }; + assert_not_confirmed(Lang::TypeScript, &outcome); + } }