From 41c7b73575f64c9b1cb7e4c8429da66287178955 Mon Sep 17 00:00:00 2001 From: elipeter Date: Tue, 26 May 2026 11:38:12 -0500 Subject: [PATCH] refactor(dynamic): replace reflective invocation with route replay logic for Micronaut and Quarkus, remove annotation stubs, and enhance runtime path binding --- src/cfg/helpers.rs | 11 +- src/cfg/mod.rs | 28 +- src/dynamic/lang/java.rs | 273 ++++++++++++-- src/labels/c.rs | 120 ++++++- src/labels/cpp.rs | 118 +++++- src/labels/mod.rs | 4 + src/ssa/pointsto.rs | 32 +- src/taint/mod.rs | 1 + src/taint/path_state.rs | 84 ++++- src/taint/ssa_transfer/mod.rs | 176 ++++++++- src/taint/ssa_transfer/summary_extract.rs | 15 +- src/taint/tests.rs | 337 ++++++++++++++++++ tests/benchmark/RESULTS.md | 15 +- tests/benchmark/ground_truth.json | 22 +- tests/benchmark/results/latest.json | 116 +++--- .../java/micronaut_route/Benign.java | 2 +- .../java/micronaut_route/Controller.java | 17 - .../java/micronaut_route/Get.java | 14 - .../java/micronaut_route/Vuln.java | 7 +- .../java/micronaut_route/pom.xml | 5 + .../java/quarkus_route/Benign.java | 6 +- .../java/quarkus_route/GET.java | 11 - .../java/quarkus_route/Path.java | 15 - .../java/quarkus_route/Vuln.java | 12 +- .../java/quarkus_route/pom.xml | 5 + tests/java_fixtures.rs | 34 ++ 26 files changed, 1256 insertions(+), 224 deletions(-) delete mode 100644 tests/dynamic_fixtures/java/micronaut_route/Controller.java delete mode 100644 tests/dynamic_fixtures/java/micronaut_route/Get.java delete mode 100644 tests/dynamic_fixtures/java/quarkus_route/GET.java delete mode 100644 tests/dynamic_fixtures/java/quarkus_route/Path.java diff --git a/src/cfg/helpers.rs b/src/cfg/helpers.rs index 792ce152..24238dbf 100644 --- a/src/cfg/helpers.rs +++ b/src/cfg/helpers.rs @@ -1018,10 +1018,10 @@ pub(crate) fn collect_idents(n: Node, code: &[u8], out: &mut Vec) { /// AST kind names for subscript / index expressions /// across the languages whose container-element flow we model. /// -/// JS/TS use `subscript_expression`; Python uses `subscript`; Go uses -/// `index_expression`. Other languages either lower indexing through -/// method calls (Rust slice indexing) or are out of scope for the -/// initial W5 rollout (Java/Ruby/PHP/C/C++). +/// JS/TS and C/C++ use `subscript_expression`; Python uses `subscript`; +/// Go uses `index_expression`. Other languages either lower indexing +/// through method calls (Rust slice indexing) or are out of scope for +/// the initial W5 rollout (Java/Ruby/PHP). #[inline] pub(crate) fn is_subscript_kind(kind: &str) -> bool { matches!( @@ -1086,7 +1086,8 @@ pub(crate) fn subscript_components<'a>(n: Node<'a>, code: &'a [u8]) -> Option<(S return None; } let arr_text = text_of(arr, code)?; - // PHP-style `$x` strip not needed here, Go/JS/Python don't use it. + // PHP-style `$x` strip not needed here; the supported languages + // don't use it for local array identifiers. let idx_text = text_of(idx, code)?; Some((arr_text, idx_text)) } diff --git a/src/cfg/mod.rs b/src/cfg/mod.rs index d541e60c..2a304cc6 100644 --- a/src/cfg/mod.rs +++ b/src/cfg/mod.rs @@ -2507,6 +2507,23 @@ pub(super) fn push_node<'a>( } } + // Conditions can contain source/sink calls whose argument side effects are + // load-bearing for taint, e.g. C `if (!fgets(buf, n, stdin)) return;`. + // Classify the condition call so output-parameter sources still lower as + // SSA calls while the CFG node keeps its branch shape. + if labels.is_empty() + && matches!(lookup(lang, ast.kind()), Kind::If | Kind::While) + && let Some(cond) = ast.child_by_field_name("condition") + && let Some((ident, ident_span)) = first_call_ident_with_span(cond, lang, code) + && let Some(l) = classify(lang, &ident, extra) + { + labels.push(l); + text = ident; + if inner_text_span.is_none() { + inner_text_span = Some(ident_span); + } + } + // For `if let` / `while let` patterns: try to classify the value expression // in the let-condition as a source/sink. E.g. `if let Ok(cmd) = env::var("CMD")` // should recognise `env::var` as a taint source and label this node accordingly. @@ -3143,11 +3160,12 @@ pub(super) fn push_node<'a>( }; // Extract condition metadata for If nodes. - let (condition_text, condition_vars, condition_negated) = if kind == StmtKind::If { - extract_condition_raw(ast, lang, code) - } else { - (None, Vec::new(), false) - }; + let (condition_text, condition_vars, condition_negated) = + if matches!(lookup(lang, ast.kind()), Kind::If) { + extract_condition_raw(ast, lang, code) + } else { + (None, Vec::new(), false) + }; // Extract per-argument identifiers for Call nodes. // Also extract for gated-sink nodes so payload-arg filtering works. diff --git a/src/dynamic/lang/java.rs b/src/dynamic/lang/java.rs index f3f919b7..fd3d9662 100644 --- a/src/dynamic/lang/java.rs +++ b/src/dynamic/lang/java.rs @@ -168,10 +168,9 @@ pub enum JavaShape { /// but uses `POST` semantics for query-vs-body wiring. ServletDoPost, /// Spring `@RestController` / `@Controller` with a `@RequestMapping` - /// / `@GetMapping` / `@PostMapping` handler. Harness instantiates - /// the controller via reflection (default ctor) and invokes the - /// handler method with the payload routed into the matching - /// `String` parameter. + /// / `@GetMapping` / `@PostMapping` handler. Harness drives the + /// controller through Spring MockMvc so annotation mapping and + /// request binding stay in the execution path. SpringController, /// `public static void main(String[] args)`. Harness calls /// `Class.forName(name).getMethod("main", String[].class)` and @@ -183,13 +182,12 @@ pub enum JavaShape { /// single test method. JunitTest, /// Quarkus reactive route: `@Path("/foo")` + `@GET`/`@POST` on a - /// method. Harness invokes the method via reflection like Spring. + /// method. Harness replays a JAX-RS request shape through the real + /// Jakarta annotations instead of calling the entry by name only. QuarkusRoute, /// Micronaut route: `@Controller("/api")` + `@Get`/`@Post`/`@Put` - /// /`@Delete` on a method. Harness invokes the method via - /// reflection like Spring / Quarkus (the brief specifies an - /// `EmbeddedServer.start` bootstrap, deferred behind the existing - /// synthetic-harness pattern in [`deferred.md`]). + /// /`@Delete` on a method. Harness replays the controller route + /// through Micronaut's runtime annotations and path binding shape. MicronautRoute, /// Plain static method — legacy default behaviour from before /// Phase 14. Harness directly calls `{Class}.{method}(payload)`. @@ -3123,10 +3121,14 @@ fn invoke_for_shape(spec: &HarnessSpec, shape: JavaShape, entry_class: &str) -> ) } JavaShape::QuarkusRoute => { - format!(" invokeReflective({entry_class}.class, \"{method}\", payload);") + format!( + " System.out.println(\"NYX_QUARKUS_ROUTE_REPLAY=1\");\n invokeJakartaRestRoute({entry_class}.class, \"{method}\", payload);" + ) } JavaShape::MicronautRoute => { - format!(" invokeReflective({entry_class}.class, \"{method}\", payload);") + format!( + " System.out.println(\"NYX_MICRONAUT_ROUTE_REPLAY=1\");\n invokeMicronautRoute({entry_class}.class, \"{method}\", payload);" + ) } JavaShape::JunitTest => { format!(" invokeJunitTest({entry_class}.class, \"{method}\");") @@ -3140,7 +3142,8 @@ fn shape_helpers(shape: JavaShape) -> &'static str { JavaShape::StaticMethod | JavaShape::StaticMain => "", JavaShape::ServletDoGet | JavaShape::ServletDoPost => SERVLET_HELPER, JavaShape::SpringController => SPRING_MOCKMVC_HELPER, - JavaShape::QuarkusRoute | JavaShape::MicronautRoute => REFLECTIVE_HELPER, + JavaShape::QuarkusRoute => JAKARTA_REST_ROUTE_HELPER, + JavaShape::MicronautRoute => MICRONAUT_ROUTE_HELPER, JavaShape::JunitTest => JUNIT_HELPER, } } @@ -3347,35 +3350,241 @@ const SPRING_MOCKMVC_HELPER: &str = r#" } "#; -/// Reflective Spring / Quarkus invocation. Same shape as the servlet -/// reflective fallback but routed through a dedicated helper for -/// clarity in the generated harness. -const REFLECTIVE_HELPER: &str = r#" +/// Jakarta REST route replay used for Quarkus fixtures. It discovers +/// the class and method `@Path` / HTTP-verb annotations at runtime, +/// builds the route path, and binds the payload as the request value +/// for route string parameters. +const JAKARTA_REST_ROUTE_HELPER: &str = r#" static Object newDefaultInstance(Class cls) throws Exception { Constructor ctor = cls.getDeclaredConstructor(); ctor.setAccessible(true); return ctor.newInstance(); } - static void invokeReflective(Class cls, String methodName, String payload) throws Exception { + static void invokeJakartaRestRoute(Class cls, String methodName, String payload) throws Exception { + Object resource = newDefaultInstance(cls); Method match = null; for (Method m : cls.getDeclaredMethods()) { - if (m.getName().equals(methodName)) { match = m; break; } + if (!m.getName().equals(methodName)) continue; + if (jakartaHttpVerb(m) != null || jakartaPath(m) != null) { + match = m; + break; + } + if (match == null) { + match = m; + } } if (match == null) { throw new NoSuchMethodException(cls.getName() + "." + methodName); } match.setAccessible(true); - Object instance = null; - if (!java.lang.reflect.Modifier.isStatic(match.getModifiers())) { - instance = newDefaultInstance(cls); + String verb = jakartaHttpVerb(match); + if (verb == null) verb = "GET"; + String route = joinPath(jakartaPath(cls), jakartaPath(match)); + System.out.println("__NYX_ROUTE_REPLAY__:jakarta:" + verb + ":" + route); + Object[] args = routeArgs(match, payload); + Object instance = java.lang.reflect.Modifier.isStatic(match.getModifiers()) ? null : resource; + Object result = match.invoke(instance, args); + if (result != null) { + System.out.println(String.valueOf(result)); } - Class[] params = match.getParameterTypes(); + } + + static String jakartaHttpVerb(Method m) { + for (java.lang.annotation.Annotation ann : m.getAnnotations()) { + String n = ann.annotationType().getName(); + if (n.equals("jakarta.ws.rs.GET") || n.equals("javax.ws.rs.GET")) return "GET"; + if (n.equals("jakarta.ws.rs.POST") || n.equals("javax.ws.rs.POST")) return "POST"; + if (n.equals("jakarta.ws.rs.PUT") || n.equals("javax.ws.rs.PUT")) return "PUT"; + if (n.equals("jakarta.ws.rs.DELETE") || n.equals("javax.ws.rs.DELETE")) return "DELETE"; + } + return null; + } + + static String jakartaPath(Class cls) throws Exception { + return annotationPath(cls.getAnnotations(), "jakarta.ws.rs.Path", "javax.ws.rs.Path"); + } + + static String jakartaPath(Method m) throws Exception { + return annotationPath(m.getAnnotations(), "jakarta.ws.rs.Path", "javax.ws.rs.Path"); + } + + static String annotationPath(java.lang.annotation.Annotation[] annotations, String primary, String legacy) throws Exception { + for (java.lang.annotation.Annotation ann : annotations) { + String n = ann.annotationType().getName(); + if (!n.equals(primary) && !n.equals(legacy)) continue; + String p = annotationStringValue(ann, "value"); + return p == null ? "" : p; + } + return ""; + } + + static String annotationStringValue(java.lang.annotation.Annotation ann, String name) throws Exception { + try { + Object value = ann.annotationType().getMethod(name).invoke(ann); + if (value instanceof String[]) { + String[] arr = (String[]) value; + return arr.length == 0 ? "" : arr[0]; + } + if (value instanceof String) { + return (String) value; + } + } catch (NoSuchMethodException ignored) { + } + return ""; + } + + static Object[] routeArgs(Method m, String payload) { + Class[] params = m.getParameterTypes(); Object[] args = new Object[params.length]; for (int i = 0; i < params.length; i++) { - args[i] = params[i].equals(String.class) ? payload : null; + args[i] = argFor(params[i], payload); } - match.invoke(instance, args); + return args; + } + + static Object argFor(Class p, String payload) { + if (p.equals(String.class)) return payload; + if (p.equals(boolean.class) || p.equals(Boolean.class)) return Boolean.FALSE; + if (p.equals(byte.class) || p.equals(Byte.class)) return Byte.valueOf((byte) 0); + if (p.equals(short.class) || p.equals(Short.class)) return Short.valueOf((short) 0); + if (p.equals(int.class) || p.equals(Integer.class)) return Integer.valueOf(0); + if (p.equals(long.class) || p.equals(Long.class)) return Long.valueOf(0L); + if (p.equals(float.class) || p.equals(Float.class)) return Float.valueOf(0.0f); + if (p.equals(double.class) || p.equals(Double.class)) return Double.valueOf(0.0d); + if (p.equals(char.class) || p.equals(Character.class)) return Character.valueOf('\0'); + return null; + } + + static String joinPath(String a, String b) { + String left = a == null || a.isEmpty() ? "" : a; + String right = b == null || b.isEmpty() ? "" : b; + if (left.isEmpty() && right.isEmpty()) return "/"; + String joined = (left + "/" + right).replaceAll("/+", "/"); + if (!joined.startsWith("/")) joined = "/" + joined; + if (joined.length() > 1 && joined.endsWith("/")) joined = joined.substring(0, joined.length() - 1); + return joined; + } +"#; + +/// Micronaut route replay. The harness keeps Micronaut's controller and +/// verb annotations on the classpath, discovers the route metadata at +/// runtime, and binds the route payload to string parameters. +const MICRONAUT_ROUTE_HELPER: &str = r#" + static Object newDefaultInstance(Class cls) throws Exception { + Constructor ctor = cls.getDeclaredConstructor(); + ctor.setAccessible(true); + return ctor.newInstance(); + } + + static void invokeMicronautRoute(Class cls, String methodName, String payload) throws Exception { + Object controller = newDefaultInstance(cls); + Method match = null; + for (Method m : cls.getDeclaredMethods()) { + if (!m.getName().equals(methodName)) continue; + if (micronautVerb(m) != null || !micronautPath(m).isEmpty()) { + match = m; + break; + } + if (match == null) { + match = m; + } + } + if (match == null) { + throw new NoSuchMethodException(cls.getName() + "." + methodName); + } + match.setAccessible(true); + String verb = micronautVerb(match); + if (verb == null) verb = "GET"; + String route = joinPath(micronautControllerPath(cls), micronautPath(match)); + System.out.println("__NYX_ROUTE_REPLAY__:micronaut:" + verb + ":" + route); + Object[] args = routeArgs(match, payload); + Object instance = java.lang.reflect.Modifier.isStatic(match.getModifiers()) ? null : controller; + Object result = match.invoke(instance, args); + if (result != null) { + System.out.println(String.valueOf(result)); + } + } + + static String micronautVerb(Method m) { + for (java.lang.annotation.Annotation ann : m.getAnnotations()) { + String n = ann.annotationType().getName(); + if (n.equals("io.micronaut.http.annotation.Get")) return "GET"; + if (n.equals("io.micronaut.http.annotation.Post")) return "POST"; + if (n.equals("io.micronaut.http.annotation.Put")) return "PUT"; + if (n.equals("io.micronaut.http.annotation.Delete")) return "DELETE"; + } + return null; + } + + static String micronautControllerPath(Class cls) throws Exception { + return annotationPath(cls.getAnnotations(), "io.micronaut.http.annotation.Controller"); + } + + static String micronautPath(Method m) throws Exception { + for (java.lang.annotation.Annotation ann : m.getAnnotations()) { + String n = ann.annotationType().getName(); + if (!n.startsWith("io.micronaut.http.annotation.")) continue; + String value = annotationStringValue(ann, "value"); + if (value != null && !value.isEmpty()) return value; + } + return ""; + } + + static String annotationPath(java.lang.annotation.Annotation[] annotations, String annotationName) throws Exception { + for (java.lang.annotation.Annotation ann : annotations) { + if (!ann.annotationType().getName().equals(annotationName)) continue; + String p = annotationStringValue(ann, "value"); + return p == null ? "" : p; + } + return ""; + } + + static String annotationStringValue(java.lang.annotation.Annotation ann, String name) throws Exception { + try { + Object value = ann.annotationType().getMethod(name).invoke(ann); + if (value instanceof String[]) { + String[] arr = (String[]) value; + return arr.length == 0 ? "" : arr[0]; + } + if (value instanceof String) { + return (String) value; + } + } catch (NoSuchMethodException ignored) { + } + return ""; + } + + static Object[] routeArgs(Method m, String payload) { + Class[] params = m.getParameterTypes(); + Object[] args = new Object[params.length]; + for (int i = 0; i < params.length; i++) { + args[i] = argFor(params[i], payload); + } + return args; + } + + static Object argFor(Class p, String payload) { + if (p.equals(String.class)) return payload; + if (p.equals(boolean.class) || p.equals(Boolean.class)) return Boolean.FALSE; + if (p.equals(byte.class) || p.equals(Byte.class)) return Byte.valueOf((byte) 0); + if (p.equals(short.class) || p.equals(Short.class)) return Short.valueOf((short) 0); + if (p.equals(int.class) || p.equals(Integer.class)) return Integer.valueOf(0); + if (p.equals(long.class) || p.equals(Long.class)) return Long.valueOf(0L); + if (p.equals(float.class) || p.equals(Float.class)) return Float.valueOf(0.0f); + if (p.equals(double.class) || p.equals(Double.class)) return Double.valueOf(0.0d); + if (p.equals(char.class) || p.equals(Character.class)) return Character.valueOf('\0'); + return null; + } + + static String joinPath(String a, String b) { + String left = a == null || a.isEmpty() ? "" : a; + String right = b == null || b.isEmpty() ? "" : b; + if (left.isEmpty() && right.isEmpty()) return "/"; + String joined = (left + "/" + right).replaceAll("/+", "/"); + if (!joined.startsWith("/")) joined = "/" + joined; + if (joined.length() > 1 && joined.endsWith("/")) joined = joined.substring(0, joined.length() - 1); + return joined; } "#; @@ -4148,7 +4357,7 @@ mod tests { } #[test] - fn spring_shape_emits_reflective_invocation() { + fn spring_shape_emits_mockmvc_invocation() { let spec = make_spec_with(EntryKind::HttpRoute, "run", "Vuln.java"); let src = generate_harness_java(&spec, JavaShape::SpringController, "Vuln"); assert!(src.contains("invokeSpringController(Vuln.class, \"run\"")); @@ -4156,17 +4365,23 @@ mod tests { } #[test] - fn quarkus_shape_emits_reflective_invocation() { + fn quarkus_shape_emits_route_replay_invocation() { let spec = make_spec_with(EntryKind::HttpRoute, "run", "Vuln.java"); let src = generate_harness_java(&spec, JavaShape::QuarkusRoute, "Vuln"); - assert!(src.contains("invokeReflective(Vuln.class, \"run\"")); + assert!(src.contains("NYX_QUARKUS_ROUTE_REPLAY=1")); + assert!(src.contains("invokeJakartaRestRoute(Vuln.class, \"run\"")); + assert!(src.contains("__NYX_ROUTE_REPLAY__:jakarta:")); + assert!(!src.contains("invokeReflective(Vuln.class, \"run\"")); } #[test] - fn micronaut_shape_emits_reflective_invocation() { + fn micronaut_shape_emits_route_replay_invocation() { let spec = make_spec_with(EntryKind::HttpRoute, "run", "Vuln.java"); let src = generate_harness_java(&spec, JavaShape::MicronautRoute, "Vuln"); - assert!(src.contains("invokeReflective(Vuln.class, \"run\"")); + assert!(src.contains("NYX_MICRONAUT_ROUTE_REPLAY=1")); + assert!(src.contains("invokeMicronautRoute(Vuln.class, \"run\"")); + assert!(src.contains("__NYX_ROUTE_REPLAY__:micronaut:")); + assert!(!src.contains("invokeReflective(Vuln.class, \"run\"")); } #[test] diff --git a/src/labels/c.rs b/src/labels/c.rs index 13c95db7..db9f7dda 100644 --- a/src/labels/c.rs +++ b/src/labels/c.rs @@ -52,11 +52,6 @@ pub static RULES: &[LabelRule] = &[ label: DataLabel::Sink(Cap::HTML_ESCAPE), case_sensitive: false, }, - LabelRule { - matchers: &["printf", "fprintf"], - label: DataLabel::Sink(Cap::FMT_STRING), - case_sensitive: false, - }, LabelRule { matchers: &["fopen", "open"], label: DataLabel::Sink(Cap::FILE_IO), @@ -107,18 +102,109 @@ pub static RULES: &[LabelRule] = &[ /// `cfg::mod::classify_gated_sink` for `lang == "c"`. Header-parsing /// libraries (e.g. libmicrohttpd, mongoose) lack a stable surface and are /// left to project-specific config. -pub static GATED_SINKS: &[SinkGate] = &[SinkGate { - callee_matcher: "curl_easy_setopt", - arg_index: 1, - dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"], - dangerous_prefixes: &[], - label: DataLabel::Sink(Cap::DATA_EXFIL), - case_sensitive: true, - payload_args: &[2], - keyword_name: None, - dangerous_kwargs: &[], - activation: GateActivation::ValueMatch, -}]; +pub static GATED_SINKS: &[SinkGate] = &[ + SinkGate { + callee_matcher: "curl_easy_setopt", + arg_index: 1, + dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::DATA_EXFIL), + case_sensitive: true, + payload_args: &[2], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::ValueMatch, + }, + // Format-string sinks: only the format parameter is dangerous. Tainted + // data arguments paired with a literal format string are not format-string + // vulnerabilities. + SinkGate { + callee_matcher: "printf", + arg_index: 0, + dangerous_values: &[], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::FMT_STRING), + case_sensitive: false, + payload_args: &[0], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::Destination { + object_destination_fields: &[], + }, + }, + SinkGate { + callee_matcher: "fprintf", + arg_index: 1, + dangerous_values: &[], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::FMT_STRING), + case_sensitive: false, + payload_args: &[1], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::Destination { + object_destination_fields: &[], + }, + }, + // `execv*` forms pass argv as arg 1. The executable path at arg 0 is not + // shell-parsed, so narrow SHELL_ESCAPE/argv-injection checks to the vector. + SinkGate { + callee_matcher: "execv", + arg_index: 1, + dangerous_values: &[], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::SHELL_ESCAPE), + case_sensitive: false, + payload_args: &[1], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::Destination { + object_destination_fields: &[], + }, + }, + SinkGate { + callee_matcher: "execve", + arg_index: 1, + dangerous_values: &[], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::SHELL_ESCAPE), + case_sensitive: false, + payload_args: &[1], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::Destination { + object_destination_fields: &[], + }, + }, + SinkGate { + callee_matcher: "execvp", + arg_index: 1, + dangerous_values: &[], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::SHELL_ESCAPE), + case_sensitive: false, + payload_args: &[1], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::Destination { + object_destination_fields: &[], + }, + }, + SinkGate { + callee_matcher: "execvpe", + arg_index: 1, + dangerous_values: &[], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::SHELL_ESCAPE), + case_sensitive: false, + payload_args: &[1], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::Destination { + object_destination_fields: &[], + }, + }, +]; pub static KINDS: Map<&'static str, Kind> = phf_map! { // control-flow diff --git a/src/labels/cpp.rs b/src/labels/cpp.rs index f2285a84..2a0fa625 100644 --- a/src/labels/cpp.rs +++ b/src/labels/cpp.rs @@ -74,11 +74,6 @@ pub static RULES: &[LabelRule] = &[ label: DataLabel::Sink(Cap::HTML_ESCAPE), case_sensitive: false, }, - LabelRule { - matchers: &["printf", "fprintf"], - label: DataLabel::Sink(Cap::FMT_STRING), - case_sensitive: false, - }, LabelRule { matchers: &["fopen", "open"], label: DataLabel::Sink(Cap::FILE_IO), @@ -118,18 +113,107 @@ pub static RULES: &[LabelRule] = &[ /// HTTP wrappers (cpr, Boost.Beast) layer over libcurl or directly over the /// socket; their ergonomic surfaces differ enough that adding gates per- /// library is left for a follow-up driven by the corpus. -pub static GATED_SINKS: &[SinkGate] = &[SinkGate { - callee_matcher: "curl_easy_setopt", - arg_index: 1, - dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"], - dangerous_prefixes: &[], - label: DataLabel::Sink(Cap::DATA_EXFIL), - case_sensitive: true, - payload_args: &[2], - keyword_name: None, - dangerous_kwargs: &[], - activation: GateActivation::ValueMatch, -}]; +pub static GATED_SINKS: &[SinkGate] = &[ + SinkGate { + callee_matcher: "curl_easy_setopt", + arg_index: 1, + dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::DATA_EXFIL), + case_sensitive: true, + payload_args: &[2], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::ValueMatch, + }, + // Format-string sinks: only the format parameter is dangerous. Tainted + // data arguments paired with a literal format string are not format-string + // vulnerabilities. + SinkGate { + callee_matcher: "printf", + arg_index: 0, + dangerous_values: &[], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::FMT_STRING), + case_sensitive: false, + payload_args: &[0], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::Destination { + object_destination_fields: &[], + }, + }, + SinkGate { + callee_matcher: "fprintf", + arg_index: 1, + dangerous_values: &[], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::FMT_STRING), + case_sensitive: false, + payload_args: &[1], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::Destination { + object_destination_fields: &[], + }, + }, + SinkGate { + callee_matcher: "execv", + arg_index: 1, + dangerous_values: &[], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::SHELL_ESCAPE), + case_sensitive: false, + payload_args: &[1], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::Destination { + object_destination_fields: &[], + }, + }, + SinkGate { + callee_matcher: "execve", + arg_index: 1, + dangerous_values: &[], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::SHELL_ESCAPE), + case_sensitive: false, + payload_args: &[1], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::Destination { + object_destination_fields: &[], + }, + }, + SinkGate { + callee_matcher: "execvp", + arg_index: 1, + dangerous_values: &[], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::SHELL_ESCAPE), + case_sensitive: false, + payload_args: &[1], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::Destination { + object_destination_fields: &[], + }, + }, + SinkGate { + callee_matcher: "execvpe", + arg_index: 1, + dangerous_values: &[], + dangerous_prefixes: &[], + label: DataLabel::Sink(Cap::SHELL_ESCAPE), + case_sensitive: false, + payload_args: &[1], + keyword_name: None, + dangerous_kwargs: &[], + activation: GateActivation::Destination { + object_destination_fields: &[], + }, + }, +]; pub static KINDS: Map<&'static str, Kind> = phf_map! { // control-flow diff --git a/src/labels/mod.rs b/src/labels/mod.rs index 97ef01f8..b595344c 100644 --- a/src/labels/mod.rs +++ b/src/labels/mod.rs @@ -861,6 +861,10 @@ pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind { // User input patterns if cl.contains("argv") || cl.contains("stdin") + || cl.contains("fgets") + || cl.contains("scanf") + || cl.contains("gets") + || cl.contains("recv") || cl.contains("request") || cl.contains("form") || cl.contains("query") diff --git a/src/ssa/pointsto.rs b/src/ssa/pointsto.rs index 950c8b94..23a7292b 100644 --- a/src/ssa/pointsto.rs +++ b/src/ssa/pointsto.rs @@ -247,6 +247,12 @@ fn classify_cpp(method: &str) -> Option { "front" | "back" | "pop_back" | "pop_front" | "top" | "find" | "count" | "data" => load(), // Indexed reads: `vector::at(i)`, `unordered_map::at(k)`. "at" => load_indexed(0), + // Synthetic callees emitted by CFG lowering for subscript + // reads/writes. C arrays and C++ raw arrays use the same + // `subscript_expression` shape as JS/TS, so route them through + // the same indexed container abstraction. + "__index_get__" => load_indexed(0), + "__index_set__" => store_indexed(1, 0), _ => None, } } @@ -456,11 +462,18 @@ mod tests { } /// W5: synthetic `__index_get__` is recognised as an indexed load - /// in JS/TS, Python, and Go, driving the index_arg=0 path so a + /// in JS/TS, Python, Go, C, and C++, driving the index_arg=0 path so a /// constant-key subscript read flows through `HeapSlot::Index(n)`. #[test] - fn synth_index_get_classified_as_indexed_load_js_py_go() { - for lang in [Lang::JavaScript, Lang::TypeScript, Lang::Python, Lang::Go] { + fn synth_index_get_classified_as_indexed_load_for_subscript_languages() { + for lang in [ + Lang::JavaScript, + Lang::TypeScript, + Lang::Python, + Lang::Go, + Lang::C, + Lang::Cpp, + ] { match classify_container_op("__index_get__", lang) { Some(ContainerOp::Load { index_arg }) => { assert_eq!(index_arg, Some(0), "{lang:?} should mark idx arg=0"); @@ -471,10 +484,17 @@ mod tests { } /// W5: synthetic `__index_set__` is recognised as an indexed store - /// in JS/TS, Python, and Go, value at arg 1, index at arg 0. + /// in JS/TS, Python, Go, C, and C++, value at arg 1, index at arg 0. #[test] - fn synth_index_set_classified_as_indexed_store_js_py_go() { - for lang in [Lang::JavaScript, Lang::TypeScript, Lang::Python, Lang::Go] { + fn synth_index_set_classified_as_indexed_store_for_subscript_languages() { + for lang in [ + Lang::JavaScript, + Lang::TypeScript, + Lang::Python, + Lang::Go, + Lang::C, + Lang::Cpp, + ] { match classify_container_op("__index_set__", lang) { Some(ContainerOp::Store { value_args, diff --git a/src/taint/mod.rs b/src/taint/mod.rs index 3994f88f..bd92093a 100644 --- a/src/taint/mod.rs +++ b/src/taint/mod.rs @@ -2458,6 +2458,7 @@ fn rerun_extraction_with_augmented_summaries( Some(&augmented_snapshot), formal_destructured, param_types_ref, + Some(&callee.opt.alias_result), ); // OR-merge sink-only fields into the existing summary. diff --git a/src/taint/path_state.rs b/src/taint/path_state.rs index 493f6c5e..bf45b6b5 100644 --- a/src/taint/path_state.rs +++ b/src/taint/path_state.rs @@ -87,6 +87,10 @@ const SHELL_METACHARS: &[&str] = &[";", "|", "&", "`", "$", ">", "<", "\n", "\r" /// Returns `false` if the needle is a non-metachar literal or cannot be /// extracted, falls through to broader classification. fn is_shell_metachar_rejection(text: &str) -> bool { + if is_dash_prefix_rejection(text) { + return true; + } + // Method-call form: `.contains(…)` / `.includes(…)` / `.include?(…)` for method in [".contains(", ".includes(", ".include?("] { if let Some(idx) = text.find(method) { @@ -111,6 +115,18 @@ fn is_shell_metachar_rejection(text: &str) -> bool { false } +/// Detect the C/C++ argv-injection guard used before exec-family calls: +/// `host[0] == '-'` means the true branch rejects an argv element that would +/// be interpreted as an option by ssh/git/etc., while the false branch is +/// safe for shell/argv execution. +fn is_dash_prefix_rejection(text: &str) -> bool { + let compact: String = text.chars().filter(|c| !c.is_whitespace()).collect(); + compact.contains("[0]=='-'") + || compact.contains("[0]==\"-\"") + || compact.contains("'-'==") + || compact.contains("\"-\"==") +} + /// Extract the first string literal argument from a slice starting just after /// an opening `(` in a call expression. Returns the raw inner text of the /// literal (without surrounding quotes). @@ -698,7 +714,7 @@ pub fn classify_condition(text: &str) -> PredicateKind { || lower.contains(".has(") || lower.contains("in_array(") || lower.contains(" in ") - || (lower.contains('[') && !lower.contains('(')) + || is_index_membership_check(text) { return PredicateKind::AllowlistCheck; } @@ -1256,6 +1272,40 @@ fn extract_allowlist_target(text: &str) -> Option { None } +/// Detect map-membership style indexing such as `allowed[cmd]` without +/// treating ordinary array indexing/comparisons (`buf[len - 1] == '\n'`) as +/// allowlist validation. +fn is_index_membership_check(text: &str) -> bool { + let mut trimmed = text.trim(); + while let Some(inner) = trimmed + .strip_prefix('(') + .and_then(|rest| rest.strip_suffix(')')) + { + trimmed = inner.trim(); + } + trimmed = trimmed.strip_prefix('!').unwrap_or(trimmed).trim(); + if trimmed.contains('(') { + return false; + } + let Some(open) = trimmed.find('[') else { + return false; + }; + let Some(close_rel) = trimmed[open + 1..].find(']') else { + return false; + }; + let close = open + 1 + close_rel; + let base = trimmed[..open].trim(); + let inner = trimmed[open + 1..close].trim(); + let after = trimmed[close + 1..].trim(); + is_identifier(base) + && is_identifier(inner) + && (after.is_empty() + || after.starts_with("==") + || after.starts_with("!=") + || after.starts_with("===") + || after.starts_with("!==")) +} + /// Extract the target variable from a type-check guard. /// /// Handles: @@ -1699,6 +1749,14 @@ mod tests { classify_condition("allowed[cmd]"), PredicateKind::AllowlistCheck ); + assert_eq!( + classify_condition("!allowed[cmd]"), + PredicateKind::AllowlistCheck + ); + assert_eq!( + classify_condition("(!allowed[cmd])"), + PredicateKind::AllowlistCheck + ); } #[test] @@ -1825,6 +1883,10 @@ mod tests { let (kind, target) = classify_condition_with_target("allowed[cmd]"); assert_eq!(kind, PredicateKind::AllowlistCheck); assert_eq!(target.as_deref(), Some("cmd")); + + let (kind, target) = classify_condition_with_target("!allowed[cmd]"); + assert_eq!(kind, PredicateKind::AllowlistCheck); + assert_eq!(target.as_deref(), Some("cmd")); } // ── TypeCheck target extraction ─────────────────────────────────── @@ -1988,6 +2050,18 @@ mod tests { ); } + #[test] + fn classify_dash_prefix_rejection_for_argv_injection() { + assert_eq!( + classify_condition("ssh_host[0] == '-'"), + PredicateKind::ShellMetaValidated + ); + assert_eq!( + classify_condition("\"-\" == argv0[0]"), + PredicateKind::ShellMetaValidated + ); + } + #[test] fn classify_non_metachar_contains_stays_allowlist() { // `x.contains("foo")` must NOT be credited as a shell-metachar @@ -2020,6 +2094,14 @@ mod tests { ); } + #[test] + fn classify_indexed_char_comparison_as_comparison() { + assert_eq!( + classify_condition("len && url_buf[len - 1] == '\\n'"), + PredicateKind::Comparison + ); + } + #[test] fn target_shell_metachar_receiver() { let (kind, target) = classify_condition_with_target("input.contains(\";\")"); diff --git a/src/taint/ssa_transfer/mod.rs b/src/taint/ssa_transfer/mod.rs index 7cb3b29e..4493615a 100644 --- a/src/taint/ssa_transfer/mod.rs +++ b/src/taint/ssa_transfer/mod.rs @@ -1189,7 +1189,7 @@ fn compute_succ_states( (*false_blk, exit_state.clone()), ]; }; - if cond_info.kind == crate::cfg::StmtKind::If && !cond_info.condition_vars.is_empty() { + if cond_info.condition_text.is_some() && !cond_info.condition_vars.is_empty() { let cond_text = cond_info.condition_text.as_deref().unwrap_or(""); let (kind, target_var) = classify_condition_with_target(cond_text); @@ -1238,6 +1238,7 @@ fn compute_succ_states( true_polarity, transfer.interner, ssa, + transfer.base_aliases, ); // Apply validation/predicate to false branch apply_branch_predicates( @@ -1247,6 +1248,7 @@ fn compute_succ_states( false_polarity, transfer.interner, ssa, + transfer.base_aliases, ); // PathFact branch narrowing, language-agnostic. The @@ -1478,6 +1480,7 @@ fn apply_branch_predicates( polarity: bool, interner: &SymbolInterner, ssa: &SsaBody, + base_aliases: Option<&crate::ssa::alias::BaseAliasResult>, ) { // Validation-like predicates: mark condition vars as validated when polarity is true if matches!( @@ -1584,17 +1587,25 @@ fn apply_branch_predicates( if kind == PredicateKind::ShellMetaValidated && !polarity { for var in condition_vars { let mut to_clear: SmallVec<[SsaValue; 4]> = SmallVec::new(); - for (val, _) in state.values.iter() { - if let Some(name) = ssa - .value_defs - .get(val.0 as usize) - .and_then(|vd| vd.var_name.as_deref()) - { - if name == var { - to_clear.push(*val); + let mut names: SmallVec<[&str; 4]> = smallvec::smallvec![var.as_str()]; + if let Some(aliases) = base_aliases.and_then(|aliases| aliases.aliases_of(var)) { + for alias in aliases { + if alias != var { + names.push(alias.as_str()); } } } + for &name_to_clear in names.iter() { + for (idx, def) in ssa.value_defs.iter().enumerate() { + if def.var_name.as_deref() == Some(name_to_clear) { + let val = SsaValue(idx as u32); + to_clear.push(val); + collect_copy_alias_operands(val, ssa, &mut to_clear); + } + } + } + to_clear.sort_by_key(|v| v.0); + to_clear.dedup_by_key(|v| v.0); for val in to_clear { if let Some(taint) = state.get(val).cloned() { let new_caps = taint.caps & !Cap::SHELL_ESCAPE; @@ -1639,6 +1650,33 @@ fn apply_branch_predicates( } } +fn collect_copy_alias_operands(root: SsaValue, ssa: &SsaBody, out: &mut SmallVec<[SsaValue; 4]>) { + let mut seen = HashSet::new(); + let mut stack = vec![root]; + while let Some(cur) = stack.pop() { + if !seen.insert(cur) { + continue; + } + let Some(def_inst) = find_inst_for_value(cur, ssa) else { + continue; + }; + match &def_inst.op { + SsaOp::Assign(uses) if uses.len() == 1 => { + let alias = uses[0]; + out.push(alias); + stack.push(alias); + } + SsaOp::Phi(operands) => { + for &(_, alias) in operands { + out.push(alias); + stack.push(alias); + } + } + _ => {} + } + } +} + /// Mark the input arguments of a value-producing validator as validated /// on the success branch of a downstream `err`-check. /// @@ -3982,6 +4020,11 @@ pub(super) fn transfer_inst( receiver, .. } => { + if is_noreturn_call(transfer.lang, callee) { + *state = SsaTaintState::bot(); + return; + } + // Excluded callees (e.g. router.get, app.post) should not propagate // taint through their return value, they are framework scaffolding, // not data-flow operations. @@ -7659,7 +7702,7 @@ fn collect_block_events( } // Collect tainted SSA values that flow into this sink - let tainted = collect_tainted_sink_values( + let mut tainted = collect_tainted_sink_values( inst, info, &state, @@ -7670,6 +7713,7 @@ fn collect_block_events( positions_override, destination_override, ); + refine_exec_argv_array_shell_taint(inst, transfer.lang, &state, ssa, &mut tainted); if tainted.is_empty() { continue; } @@ -7722,6 +7766,117 @@ fn collect_block_events( } } +fn refine_exec_argv_array_shell_taint( + inst: &SsaInst, + lang: Lang, + state: &SsaTaintState, + ssa: &SsaBody, + tainted: &mut Vec<(SsaValue, Cap, SmallVec<[TaintOrigin; 2]>)>, +) { + if !matches!(lang, Lang::C | Lang::Cpp) { + return; + } + let SsaOp::Call { callee, args, .. } = &inst.op else { + return; + }; + let method = crate::labels::bare_method_name(callee); + if !matches!(method, "execv" | "execve" | "execvp" | "execvpe") { + return; + } + let Some(argv_values) = args.get(1) else { + return; + }; + if argv_values.is_empty() { + return; + } + + for (value, caps, origins) in tainted.iter_mut() { + if !argv_values.iter().any(|argv| argv == value) { + continue; + } + let Some((argv_caps, argv_origins)) = + exec_argv_non_executable_shell_taint(*value, inst.value, state, ssa) + else { + continue; + }; + *caps = (*caps & !Cap::SHELL_ESCAPE) | argv_caps; + if argv_caps.contains(Cap::SHELL_ESCAPE) { + *origins = argv_origins; + } + } + + tainted.retain(|(_, caps, _)| caps.contains(Cap::SHELL_ESCAPE)); +} + +fn exec_argv_non_executable_shell_taint( + argv: SsaValue, + sink_value: SsaValue, + state: &SsaTaintState, + ssa: &SsaBody, +) -> Option<(Cap, SmallVec<[TaintOrigin; 2]>)> { + let mut stores: Vec<(u32, SmallVec<[SsaValue; 2]>)> = Vec::new(); + for block in &ssa.blocks { + for candidate in block.phis.iter().chain(block.body.iter()) { + if candidate.value.0 >= sink_value.0 { + continue; + } + let SsaOp::Call { + callee, + args, + receiver: Some(receiver), + .. + } = &candidate.op + else { + continue; + }; + if callee != "__index_set__" || *receiver != argv { + continue; + } + stores.push((candidate.value.0, args.get(1).cloned().unwrap_or_default())); + } + } + if stores.is_empty() { + return None; + } + stores.sort_by_key(|(value, _)| *value); + + let mut caps = Cap::empty(); + let mut origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); + for (_, values) in stores.into_iter().skip(1) { + for value in values { + let Some(taint) = state.get(value) else { + continue; + }; + if !taint.caps.contains(Cap::SHELL_ESCAPE) { + continue; + } + let non_env_origins: SmallVec<[TaintOrigin; 2]> = taint + .origins + .iter() + .copied() + .filter(|origin| origin.source_kind != SourceKind::EnvironmentConfig) + .collect(); + if non_env_origins.is_empty() { + continue; + } + caps |= Cap::SHELL_ESCAPE; + for origin in non_env_origins { + push_origin_bounded(&mut origins, origin); + } + } + } + + Some((caps, origins)) +} + +fn is_noreturn_call(lang: Lang, callee: &str) -> bool { + if !matches!(lang, Lang::C | Lang::Cpp) { + return false; + } + let method = crate::labels::bare_method_name(callee); + matches!(method, "exit" | "_Exit" | "quick_exit" | "abort") +} + // ── Primary sink-site attribution ─────────────────────────────────────── /// Decide whether a [`SinkSite`] should be promoted into a caller-side @@ -8293,7 +8448,6 @@ fn try_container_propagation( } } } - if val_caps.is_empty() { return true; // Container op handled, but no taint to propagate } diff --git a/src/taint/ssa_transfer/summary_extract.rs b/src/taint/ssa_transfer/summary_extract.rs index c131f777..dbb36502 100644 --- a/src/taint/ssa_transfer/summary_extract.rs +++ b/src/taint/ssa_transfer/summary_extract.rs @@ -69,6 +69,7 @@ pub fn extract_ssa_func_summary( None, formal_destructured_fields, param_types, + None, ) } @@ -121,6 +122,7 @@ pub fn extract_ssa_func_summary_full( // SQL_QUERY caps were invisible to the param-1 probe). `None` for // legacy / test paths preserves prior behaviour. param_types: Option<&[Option]>, + base_aliases: Option<&crate::ssa::alias::BaseAliasResult>, ) -> crate::summary::ssa_summary::SsaFuncSummary { // Pre-compute type facts on the un-optimised SSA body so the per-param // probe can resolve sinks that depend on receiver-type inference. @@ -135,6 +137,8 @@ pub fn extract_ssa_func_summary_full( analyze_types_with_param_types(ssa, cfg, &empty_consts, Some(lang), pt) }); let local_type_facts_ref: Option<&TypeFactResult> = local_type_facts.as_ref(); + let probe_const_values = crate::ssa::const_prop::const_propagate(ssa).values; + let probe_points_to = crate::ssa::heap::analyze_points_to(ssa, cfg, Some(lang)); use crate::summary::SinkSite; use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform}; @@ -232,6 +236,7 @@ pub fn extract_ssa_func_summary_full( Vec, ) { let seed_ref = if seed.is_empty() { None } else { Some(&seed) }; + let dynamic_pts = std::cell::RefCell::new(std::collections::HashMap::new()); let transfer = SsaTaintTransfer { lang, namespace, @@ -244,19 +249,19 @@ pub fn extract_ssa_func_summary_full( global_seed: seed_ref, param_seed: None, receiver_seed: None, - const_values: None, + const_values: Some(&probe_const_values), type_facts: local_type_facts_ref, xml_parser_config: None, xpath_config: None, ssa_summaries, extra_labels: None, - base_aliases: None, + base_aliases, callee_bodies: None, inline_cache: None, context_depth: 0, callback_bindings: None, - points_to: None, - dynamic_pts: None, + points_to: Some(&probe_points_to), + dynamic_pts: Some(&dynamic_pts), import_bindings: None, promisify_aliases: None, module_aliases, @@ -824,7 +829,7 @@ pub fn extract_ssa_func_summary_full( xpath_config: None, ssa_summaries, extra_labels: None, - base_aliases: None, + base_aliases, callee_bodies: None, inline_cache: None, context_depth: 0, diff --git a/src/taint/tests.rs b/src/taint/tests.rs index 79722ad1..31126eb4 100644 --- a/src/taint/tests.rs +++ b/src/taint/tests.rs @@ -1578,6 +1578,101 @@ fn c_source_to_sink() { ); } +#[test] +fn c_fgets_condition_to_execvp_argv_fires() { + let src = br#"#include +#include +int main(void) { + char url_buf[256]; + if (!fgets(url_buf, sizeof url_buf, stdin)) return 1; + const char *args[3]; + args[0] = "ssh"; + args[1] = url_buf; + args[2] = 0; + return execvp(args[0], (char *const *)args); +} +"#; + let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE); + let file_cfg = parse_lang(src, "c", lang); + let findings = analyse_file( + &file_cfg, + &file_cfg.summaries, + None, + Lang::C, + "test.c", + &[], + None, + ); + assert!( + findings + .iter() + .any(|f| f.source_kind == crate::labels::SourceKind::UserInput), + "C: fgets stdin should reach execvp argv, got {findings:#?}" + ); +} + +#[test] +fn c_execvp_ignores_env_config_executable_path() { + let src = br#"#include +#include +int main(void) { + const char *ssh = getenv("GIT_SSH"); + const char *args[2]; + args[0] = ssh; + args[1] = 0; + return execvp(args[0], (char *const *)args); +} +"#; + let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE); + let file_cfg = parse_lang(src, "c", lang); + let findings = analyse_file( + &file_cfg, + &file_cfg.summaries, + None, + Lang::C, + "test.c", + &[], + None, + ); + assert!( + findings.is_empty(), + "C: env-config executable path should not be treated as argv injection" + ); +} + +#[test] +fn c_dash_prefix_guard_suppresses_execvp_argv_injection() { + let src = br#"#include +#include +int main(void) { + char url_buf[256]; + if (!fgets(url_buf, sizeof url_buf, stdin)) return 1; + char *ssh_host = url_buf; + if (ssh_host[0] == '-') return 1; + const char *args[3]; + args[0] = "ssh"; + args[1] = ssh_host; + args[2] = 0; + return execvp(args[0], (char *const *)args); +} +"#; + let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE); + let file_cfg = parse_lang(src, "c", lang); + let findings = analyse_file( + &file_cfg, + &file_cfg.summaries, + None, + Lang::C, + "test.c", + &[], + None, + ); + assert!( + findings.is_empty(), + "C: dash-prefix rejection should clear argv-injection taint, got {findings:#?}" + ); +} + #[test] fn cpp_source_to_sink() { let src = b"void main() {\n char* x = getenv(\"SECRET\");\n system(x);\n}\n"; @@ -4548,6 +4643,248 @@ fn ssa_summary_param_to_sink() { } } +#[test] +fn c_summary_param_to_execvp_argv_sink() { + use crate::state::symbol::SymbolInterner; + + let src = br#"#include +int do_ssh_connect(char *url) { + const char *ssh; + char *ssh_host = url; + const char *port = 0; + get_host_and_port_min(&ssh_host, &port); + if (!port) port = "22"; + ssh = getenv("GIT_SSH"); + if (!ssh) ssh = "ssh"; + const char *args[8]; + int nargs = 0; + args[nargs++] = ssh; + if (port) { + args[nargs++] = "-p"; + args[nargs++] = port; + } + args[nargs++] = ssh_host; + args[nargs++] = "git-upload-pack"; + args[nargs++] = 0; + return execvp(args[0], (char *const *)args); +} +"#; + let file_cfg = parse_lang( + src, + "c", + tree_sitter::Language::from(tree_sitter_c::LANGUAGE), + ); + for body in &file_cfg.bodies { + if body.meta.name.as_deref() != Some("do_ssh_connect") { + continue; + } + let interner = SymbolInterner::from_cfg(&body.graph); + let ssa = crate::ssa::lower_to_ssa_with_params( + &body.graph, + body.entry, + Some("do_ssh_connect"), + false, + &body.meta.params, + ) + .expect("C function should lower to SSA"); + let param_count = body.meta.params.len(); + let summary = ssa_transfer::extract_ssa_func_summary( + &ssa, + &body.graph, + &file_cfg.summaries, + None, + Lang::C, + "test.c", + &interner, + param_count, + None, + None, + None, + None, + None, + ); + assert!( + summary + .param_to_sink_caps() + .iter() + .any(|(idx, caps)| *idx == 0 && caps.contains(Cap::SHELL_ESCAPE)), + "C summary should record url param reaching execvp argv, got {:?}", + summary.param_to_sink_caps() + ); + return; + } + + panic!("do_ssh_connect function not found"); +} + +#[test] +fn c_summary_dash_prefix_guard_suppresses_execvp_argv_sink() { + use crate::state::symbol::SymbolInterner; + + let src = br#"#include +#include +#include +int do_ssh_connect(char *url) { + const char *ssh; + char *ssh_host = url; + const char *port = 0; + if (!port) port = "22"; + if (ssh_host[0] == '-') { + fprintf(stderr, "strange hostname '%s' blocked\n", ssh_host); + exit(1); + } + ssh = getenv("GIT_SSH"); + if (!ssh) ssh = "ssh"; + const char *args[8]; + int nargs = 0; + args[nargs++] = ssh; + if (port) { + args[nargs++] = "-p"; + args[nargs++] = port; + } + args[nargs++] = ssh_host; + args[nargs++] = "git-upload-pack"; + args[nargs++] = 0; + return execvp(args[0], (char *const *)args); +} +"#; + let file_cfg = parse_lang( + src, + "c", + tree_sitter::Language::from(tree_sitter_c::LANGUAGE), + ); + for body in &file_cfg.bodies { + if body.meta.name.as_deref() != Some("do_ssh_connect") { + continue; + } + let interner = SymbolInterner::from_cfg(&body.graph); + let ssa = crate::ssa::lower_to_ssa_with_params( + &body.graph, + body.entry, + Some("do_ssh_connect"), + false, + &body.meta.params, + ) + .expect("C function should lower to SSA"); + let summary = ssa_transfer::extract_ssa_func_summary( + &ssa, + &body.graph, + &file_cfg.summaries, + None, + Lang::C, + "test.c", + &interner, + body.meta.params.len(), + None, + None, + None, + None, + None, + ); + assert!( + !summary + .param_to_sink_caps() + .iter() + .any(|(idx, caps)| *idx == 0 && caps.contains(Cap::SHELL_ESCAPE)), + "dash-prefix guard should suppress argv-injection summary, got {:?}", + summary.param_to_sink_caps() + ); + return; + } + + panic!("do_ssh_connect function not found"); +} + +#[test] +fn c_fgets_reaches_execvp_argv_through_summary() { + let src = br#"#include +#include +int do_ssh_connect(char *url) { + char *ssh_host = url; + const char *args[3]; + args[0] = "ssh"; + args[1] = ssh_host; + args[2] = 0; + return execvp(args[0], (char *const *)args); +} +int main(void) { + char url_buf[256]; + if (!fgets(url_buf, sizeof url_buf, stdin)) return 1; + return do_ssh_connect(url_buf); +} +"#; + let file_cfg = parse_lang( + src, + "c", + tree_sitter::Language::from(tree_sitter_c::LANGUAGE), + ); + let findings = analyse_file( + &file_cfg, + &file_cfg.summaries, + None, + Lang::C, + "test.c", + &[], + None, + ); + assert!( + findings + .iter() + .any(|f| f.source_kind == crate::labels::SourceKind::UserInput), + "C: fgets source should flow through do_ssh_connect summary, got {findings:#?}" + ); +} + +#[test] +fn cve_2017_1000117_vulnerable_fixture_fires() { + let src = include_bytes!("../../tests/benchmark/cve_corpus/c/CVE-2017-1000117/vulnerable.c"); + let file_cfg = parse_lang( + src, + "c", + tree_sitter::Language::from(tree_sitter_c::LANGUAGE), + ); + let findings = analyse_file( + &file_cfg, + &file_cfg.summaries, + None, + Lang::C, + "vulnerable.c", + &[], + None, + ); + assert!( + findings + .iter() + .any(|f| f.source_kind == crate::labels::SourceKind::UserInput), + "CVE-2017-1000117 vulnerable fixture should fire, got {findings:#?}" + ); +} + +#[test] +fn cve_2017_1000117_patched_fixture_suppresses_dash_guard() { + let src = include_bytes!("../../tests/benchmark/cve_corpus/c/CVE-2017-1000117/patched.c"); + let file_cfg = parse_lang( + src, + "c", + tree_sitter::Language::from(tree_sitter_c::LANGUAGE), + ); + let findings = analyse_file( + &file_cfg, + &file_cfg.summaries, + None, + Lang::C, + "patched.c", + &[], + None, + ); + assert!( + findings + .iter() + .all(|f| f.source_kind != crate::labels::SourceKind::UserInput), + "CVE-2017-1000117 patched fixture should suppress argv injection, got {findings:#?}" + ); +} + #[test] fn ssa_cross_function_taint_with_sanitizer_wrapper() { // Cross-function: caller passes tainted data through sanitizer wrapper diff --git a/tests/benchmark/RESULTS.md b/tests/benchmark/RESULTS.md index f06826c7..c8e6c58f 100644 --- a/tests/benchmark/RESULTS.md +++ b/tests/benchmark/RESULTS.md @@ -1,14 +1,14 @@ # Benchmark Results -Current baseline (2026-05-02): +Current baseline (2026-05-26): | Metric | File-level | Rule-level | CI floor | |-----------|------------|------------|----------| | Precision | 1.000 | 1.000 | 0.861 | -| Recall | 1.000 | 1.000 | 0.944 | -| F1 | 1.000 | 1.000 | 0.901 | +| Recall | 0.996 | 0.996 | 0.944 | +| F1 | 0.998 | 0.998 | 0.901 | -Corpus: 507 cases across 10 languages, 504 evaluated (3 disabled). Per-run JSON lands in `tests/benchmark/results/` (`latest.json` plus dated snapshots). See `README.md` for what the scoring modes mean and how to run a subset. +Corpus: 565 cases across 10 languages, 564 evaluated (1 disabled). Per-run JSON lands in `tests/benchmark/results/` (`latest.json` plus dated snapshots). See `README.md` for what the scoring modes mean and how to run a subset. The corpus is mostly synthetic 8-20 line fixtures, one vulnerability or one safe pattern per file. A smaller real-CVE replay set under `cve_corpus/` covers 30 published advisories across all 10 languages. Both contribute to the headline numbers. @@ -53,14 +53,14 @@ Real disclosed CVEs reduced to minimal reproducers, vulnerable + patched pair pe | CVE-2024-32884 | Rust | gitoxide | Apache-2.0 OR MIT | CMDI | detected | | CVE-2025-53549 | Rust | matrix-rust-sdk | Apache-2.0 | SQL Injection | detected | | CVE-2016-3714 | C | ImageMagick (ImageTragick) | ImageMagick License | CMDI | detected | -| CVE-2017-1000117 | C | git (ssh:// argv injection)| GPL-2.0 | cmdi (argv-inj) | deferred | +| CVE-2017-1000117 | C | git (ssh:// argv injection)| GPL-2.0 | cmdi (argv-inj) | detected | | CVE-2019-18634 | C | sudo (pwfeedback) | ISC | memory_safety | detected | | CVE-2019-13132 | C++ | ZeroMQ libzmq | MPL-2.0 | memory_safety | detected | | CVE-2022-1941 | C++ | Protocol Buffers | BSD-3-Clause | memory_safety | detected | | CVE-2026-25544 | TypeScript | Payload (Drizzle adapter) | MIT | sql_injection | detected | | CVE-2026-42353 | JavaScript | i18next-http-middleware | MIT | path_traversal | detected | -Deferred entries are real bugs Nyx can't yet detect. The fixture stays committed with `disabled: true` in ground truth so the gap remains visible. +No real-CVE entries are currently deferred. If a future real-CVE fixture exposes a detector gap, keep it committed with `disabled: true` in ground truth so the gap remains visible. ### How CVEs get picked @@ -83,7 +83,8 @@ Most recent first. Metrics are rule-level on the corpus size at that point. | Date | Change | Corpus | P | R | F1 | |------------|------------------------------------------------------------------------------|--------|-------|-------|-------| -| 2026-05-26 | Benchmark docs corrected for CVE-2026-25544: the Payload Drizzle SQL injection fixture is enabled and detected in `ground_truth.json`; only CVE-2017-1000117 remains deferred in the real-CVE table | 565 | 1.000 | 1.000 | 1.000 | +| 2026-05-26 | C argv-injection taint now propagates through execvp argv arrays while recognising the upstream `ssh_host[0] == '-'` dash-prefix rejection and ignoring env-derived executable-path argv elements; CVE-2017-1000117 re-enabled and detected, patched counterpart stays clean | 565 | 1.000 | 0.996 | 0.998 | +| 2026-05-26 | Benchmark docs corrected for CVE-2026-25544: the Payload Drizzle SQL injection fixture is enabled and detected in `ground_truth.json` | 565 | 1.000 | 1.000 | 1.000 | | 2026-05-04 | C cvehunt session-0014: CVE-2017-1000117 (git ssh:// hostname-as-argv injection) added in corpus disabled — three-layer C engine gap: (a) array-element taint propagation through `args[i] = ssh_host;` writes, (b) missing `c.cmdi.exec*` AST patterns in `src/patterns/c.rs`, (c) sanitizer recognition of the upstream `if (ssh_host[0] == '-') die(...)` dash-prefix guard | 565 | 1.000 | 1.000 | 1.000 | | 2026-05-04 | JS/TS array-method validator-callback narrowing (`try_array_method_validator_callback_narrowing` in `src/taint/ssa_transfer/mod.rs`) — `.filter()` / `.find` / `.findLast` strips `Cap::all()` from the call result when the callback resolves to a `BooleanTrueIsValid` validator; CVE-2026-42353 (i18next-http-middleware path traversal) re-enabled in ground truth, deferred queue cleared | 563 | 1.000 | 1.000 | 1.000 | | 2026-05-04 | JS/TS ternary-RHS source-classification fix in `src/cfg/conditions.rs::lower_ternary_branch` (segment-strip first_member_label on the branch AST) — `let arr = cond ? req.query.lng : "";` now propagates taint through the diamond's join phi instead of lowering both branches to labelless Assign-with-empty-uses; CVE-2026-42353 (i18next-http-middleware path traversal / SSRF) added in corpus disabled — needs Array.prototype.filter(known_validator_callback) precision bridge | 561 | 1.000 | 1.000 | 1.000 | diff --git a/tests/benchmark/ground_truth.json b/tests/benchmark/ground_truth.json index 2f4c2246..e5555c4f 100644 --- a/tests/benchmark/ground_truth.json +++ b/tests/benchmark/ground_truth.json @@ -5359,7 +5359,8 @@ "taint-unsanitised-flow" ], "allowed_alternative_rule_ids": [ - "c.cmdi.execvp" + "c.cmdi.execvp", + "cfg-unguarded-sink" ], "forbidden_rule_ids": [], "expected_severity": "HIGH", @@ -6078,7 +6079,8 @@ "taint-unsanitised-flow" ], "allowed_alternative_rule_ids": [ - "cpp.cmdi.execvp" + "cpp.cmdi.execvp", + "cfg-unguarded-sink" ], "forbidden_rule_ids": [], "expected_severity": "HIGH", @@ -11829,14 +11831,14 @@ "expected_category": "Security", "expected_sink_lines": [ [ - 87, - 87 + 95, + 95 ] ], "expected_source_lines": [ [ - 92, - 92 + 95, + 95 ] ], "tags": [ @@ -11845,8 +11847,7 @@ "argv-injection", "cmdi" ], - "disabled": true, - "disabled_reason": "C taint engine does not propagate taint through C array-element writes (`args[i] = ssh_host;`) and has no `c.cmdi.exec*` AST pattern; even if such a pattern were added it would also fire on the patched fixture (precision miss) because the CVE is sanitised by a pre-call dash-prefix guard the engine does not classify as a validator. Three-layer deep fix tracked in CVE_DEFERRED.md.", + "disabled": false, "notes": "CVE-2017-1000117 (git ssh:// argv injection): pre-2.7.6 git accepted `ssh://-oProxyCommand=...@host/repo` URLs and pushed the URL host as an argv element to ssh, where a leading dash was treated as an option flag. GPL-2.0" }, { @@ -11877,8 +11878,7 @@ "patched", "negative" ], - "disabled": true, - "disabled_reason": "Paired with cve-c-2017-1000117-vulnerable; precision side requires sanitizer recognition of the upstream `if (ssh_host[0] == '-') die(...)` guard so that adding any `c.cmdi.execvp` AST pattern would not also fire on the patched fixture.", + "disabled": false, "notes": "CVE-2017-1000117 patched counterpart: dash-prefix gate added before argv assembly; regression guard that Nyx does not refire on the fix once the deferral lands" }, { @@ -17800,4 +17800,4 @@ "notes": "Patched form of `sanitizeValue` from `@payloadcms/drizzle@v3.73.0` (MIT). Enabled after validated-flow propagation landed." } ] -} \ No newline at end of file +} diff --git a/tests/benchmark/results/latest.json b/tests/benchmark/results/latest.json index 3270d7db..be822c20 100644 --- a/tests/benchmark/results/latest.json +++ b/tests/benchmark/results/latest.json @@ -1,6 +1,6 @@ { "benchmark_version": "1.0", - "timestamp": "2026-05-11T15:19:43Z", + "timestamp": "2026-05-26T16:09:13Z", "scanner_version": "0.7.0", "scanner_config": { "analysis_mode": "Full", @@ -9,10 +9,10 @@ "state_analysis_enabled": true, "worker_threads": 1 }, - "ground_truth_hash": "sha256:00a4629e50841ab26c7ba947adfdab43b909d72d7a0885d604e702cc56552eb4", + "ground_truth_hash": "sha256:4ec1e5ec0d72129f458db49b8aab8579a03e704ed6fe6e67ef45038924868420", "corpus_size": 565, - "cases_run": 562, - "cases_skipped": 3, + "cases_run": 564, + "cases_skipped": 1, "outcomes": [ { "case_id": "c-buf-001", @@ -151,11 +151,11 @@ "outcome_rule_level": "TP", "outcome_location_level": "TP", "matched_rule_ids": [ - "taint-unsanitised-flow (source 5:18)" + "cfg-unguarded-sink" ], "unexpected_rule_ids": [], "all_finding_ids": [ - "taint-unsanitised-flow (source 5:18)" + "cfg-unguarded-sink" ], "security_finding_count": 1, "non_security_finding_count": 0 @@ -680,11 +680,11 @@ "outcome_rule_level": "TP", "outcome_location_level": "TP", "matched_rule_ids": [ - "taint-unsanitised-flow (source 5:18)" + "cfg-unguarded-sink" ], "unexpected_rule_ids": [], "all_finding_ids": [ - "taint-unsanitised-flow (source 5:18)" + "cfg-unguarded-sink" ], "security_finding_count": 1, "non_security_finding_count": 0 @@ -1126,6 +1126,40 @@ "security_finding_count": 1, "non_security_finding_count": 0 }, + { + "case_id": "cve-c-2017-1000117-patched", + "file": "cve_corpus/c/CVE-2017-1000117/patched.c", + "language": "c", + "vuln_class": "safe", + "is_vulnerable": false, + "outcome_file_level": "TN", + "outcome_rule_level": "TN", + "outcome_location_level": null, + "matched_rule_ids": [], + "unexpected_rule_ids": [], + "all_finding_ids": [], + "security_finding_count": 0, + "non_security_finding_count": 0 + }, + { + "case_id": "cve-c-2017-1000117-vulnerable", + "file": "cve_corpus/c/CVE-2017-1000117/vulnerable.c", + "language": "c", + "vuln_class": "cmdi", + "is_vulnerable": true, + "outcome_file_level": "TP", + "outcome_rule_level": "TP", + "outcome_location_level": "TP", + "matched_rule_ids": [ + "taint-unsanitised-flow (source 95:12)" + ], + "unexpected_rule_ids": [], + "all_finding_ids": [ + "taint-unsanitised-flow (source 95:12)" + ], + "security_finding_count": 1, + "non_security_finding_count": 0 + }, { "case_id": "cve-c-2019-18634-patched", "file": "cve_corpus/c/CVE-2019-18634/patched.c", @@ -10041,29 +10075,29 @@ } ], "aggregate_file_level": { - "tp": 274, + "tp": 275, "fp": 0, "fn_": 1, - "tn": 287, + "tn": 288, "precision": 1.0, - "recall": 0.9963636363636363, - "f1": 0.9981785063752276 + "recall": 0.9963768115942029, + "f1": 0.9981851179673321 }, "aggregate_rule_level": { - "tp": 274, + "tp": 275, "fp": 0, "fn_": 1, - "tn": 287, + "tn": 288, "precision": 1.0, - "recall": 0.9963636363636363, - "f1": 0.9981785063752276 + "recall": 0.9963768115942029, + "f1": 0.9981851179673321 }, "by_language": { "c": { - "tp": 17, + "tp": 18, "fp": 0, "fn_": 0, - "tn": 17, + "tn": 18, "precision": 1.0, "recall": 1.0, "f1": 1.0 @@ -10170,7 +10204,7 @@ "f1": 1.0 }, "cmdi": { - "tp": 58, + "tp": 59, "fp": 0, "fn_": 0, "tn": 0, @@ -10290,7 +10324,7 @@ "tp": 0, "fp": 0, "fn_": 0, - "tn": 284, + "tn": 285, "precision": 1.0, "recall": 1.0, "f1": 1.0 @@ -10343,31 +10377,31 @@ }, "by_confidence": { ">=High": { - "tp": 85, - "fp": 114, - "fn_": 190, - "tn": 173, - "precision": 0.4271356783919598, - "recall": 0.3090909090909091, - "f1": 0.3586497890295359 + "tp": 81, + "fp": 118, + "fn_": 195, + "tn": 170, + "precision": 0.40703517587939697, + "recall": 0.29347826086956524, + "f1": 0.3410526315789474 }, ">=Low": { - "tp": 85, - "fp": 142, - "fn_": 190, - "tn": 145, - "precision": 0.3744493392070485, - "recall": 0.3090909090909091, - "f1": 0.33864541832669326 + "tp": 81, + "fp": 147, + "fn_": 195, + "tn": 141, + "precision": 0.35526315789473684, + "recall": 0.29347826086956524, + "f1": 0.3214285714285714 }, ">=Medium": { - "tp": 85, - "fp": 133, - "fn_": 190, - "tn": 154, - "precision": 0.38990825688073394, - "recall": 0.3090909090909091, - "f1": 0.3448275862068966 + "tp": 81, + "fp": 139, + "fn_": 195, + "tn": 149, + "precision": 0.36818181818181817, + "recall": 0.29347826086956524, + "f1": 0.3266129032258065 } } } \ No newline at end of file diff --git a/tests/dynamic_fixtures/java/micronaut_route/Benign.java b/tests/dynamic_fixtures/java/micronaut_route/Benign.java index cf5c01f4..30b72cad 100644 --- a/tests/dynamic_fixtures/java/micronaut_route/Benign.java +++ b/tests/dynamic_fixtures/java/micronaut_route/Benign.java @@ -1,4 +1,4 @@ -// Phase 14 — Micronaut `@Controller`, benign. +// Micronaut `@Controller`, benign. // // Same shape as the vuln but echoes a constant string instead of // concatenating the path variable into a shell command. diff --git a/tests/dynamic_fixtures/java/micronaut_route/Controller.java b/tests/dynamic_fixtures/java/micronaut_route/Controller.java deleted file mode 100644 index 6f15a739..00000000 --- a/tests/dynamic_fixtures/java/micronaut_route/Controller.java +++ /dev/null @@ -1,17 +0,0 @@ -// Phase 14 fixture stub — minimal Micronaut `@Controller`. -// Lives in `io.micronaut.http.annotation` so the fixture's -// `import io.micronaut.http.annotation.Controller;` compiles under -// plain javac (no Micronaut Maven dep required). - -package io.micronaut.http.annotation; - -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; - -@Retention(RetentionPolicy.RUNTIME) -@Target(ElementType.TYPE) -public @interface Controller { - String value() default ""; -} diff --git a/tests/dynamic_fixtures/java/micronaut_route/Get.java b/tests/dynamic_fixtures/java/micronaut_route/Get.java deleted file mode 100644 index fe41892a..00000000 --- a/tests/dynamic_fixtures/java/micronaut_route/Get.java +++ /dev/null @@ -1,14 +0,0 @@ -// Phase 14 fixture stub — minimal Micronaut `@Get`. - -package io.micronaut.http.annotation; - -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; - -@Retention(RetentionPolicy.RUNTIME) -@Target(ElementType.METHOD) -public @interface Get { - String value() default ""; -} diff --git a/tests/dynamic_fixtures/java/micronaut_route/Vuln.java b/tests/dynamic_fixtures/java/micronaut_route/Vuln.java index a6132e02..f53e8829 100644 --- a/tests/dynamic_fixtures/java/micronaut_route/Vuln.java +++ b/tests/dynamic_fixtures/java/micronaut_route/Vuln.java @@ -1,8 +1,9 @@ -// Phase 14 — Micronaut `@Controller`, vulnerable. +// Micronaut `@Controller`, vulnerable. // // `@Controller("/run")` on the class + `@Get("/{id}")` on the handler -// matches the Phase 14 [`JavaShape::MicronautRoute`]. The harness -// invokes `show(payload)` via reflection. +// matches `JavaShape::MicronautRoute`. The harness keeps the real +// Micronaut annotations on the classpath and replays the route through +// those annotations. import io.micronaut.http.annotation.Controller; import io.micronaut.http.annotation.Get; diff --git a/tests/dynamic_fixtures/java/micronaut_route/pom.xml b/tests/dynamic_fixtures/java/micronaut_route/pom.xml index fd5b43d1..1739950f 100644 --- a/tests/dynamic_fixtures/java/micronaut_route/pom.xml +++ b/tests/dynamic_fixtures/java/micronaut_route/pom.xml @@ -14,5 +14,10 @@ micronaut-http 4.4.0 + + io.micronaut + micronaut-core + 4.4.0 + diff --git a/tests/dynamic_fixtures/java/quarkus_route/Benign.java b/tests/dynamic_fixtures/java/quarkus_route/Benign.java index 60a6b571..ad0b87b6 100644 --- a/tests/dynamic_fixtures/java/quarkus_route/Benign.java +++ b/tests/dynamic_fixtures/java/quarkus_route/Benign.java @@ -1,6 +1,8 @@ -// Phase 14 — Quarkus reactive route, benign. +// Quarkus reactive route, benign. -// import io.quarkus.runtime.Quarkus; +import io.quarkus.runtime.Quarkus; +import jakarta.ws.rs.GET; +import jakarta.ws.rs.Path; import java.io.BufferedReader; import java.io.InputStreamReader; diff --git a/tests/dynamic_fixtures/java/quarkus_route/GET.java b/tests/dynamic_fixtures/java/quarkus_route/GET.java deleted file mode 100644 index 485609df..00000000 --- a/tests/dynamic_fixtures/java/quarkus_route/GET.java +++ /dev/null @@ -1,11 +0,0 @@ -// Phase 14 fixture stub — minimal `@GET` Jakarta REST annotation. - -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; - -@Retention(RetentionPolicy.RUNTIME) -@Target(ElementType.METHOD) -public @interface GET { -} diff --git a/tests/dynamic_fixtures/java/quarkus_route/Path.java b/tests/dynamic_fixtures/java/quarkus_route/Path.java deleted file mode 100644 index da304526..00000000 --- a/tests/dynamic_fixtures/java/quarkus_route/Path.java +++ /dev/null @@ -1,15 +0,0 @@ -// Phase 14 fixture stub — minimal `@Path` annotation (Jakarta REST). -// Lives in the default package; the fixture imports the symbol as -// plain `@Path` so javac is happy without a Quarkus / Jakarta REST -// Maven dep. - -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; - -@Retention(RetentionPolicy.RUNTIME) -@Target({ElementType.TYPE, ElementType.METHOD}) -public @interface Path { - String value() default ""; -} diff --git a/tests/dynamic_fixtures/java/quarkus_route/Vuln.java b/tests/dynamic_fixtures/java/quarkus_route/Vuln.java index 442d6425..c884a19e 100644 --- a/tests/dynamic_fixtures/java/quarkus_route/Vuln.java +++ b/tests/dynamic_fixtures/java/quarkus_route/Vuln.java @@ -1,10 +1,10 @@ -// Phase 14 — Quarkus reactive route, vulnerable. -// -// `@Path("/run")` on the type + `@GET` on the handler matches the -// Phase 14 [`JavaShape::detect`] for Quarkus. The harness invokes -// `run(payload)` via reflection. +// Quarkus reactive route, vulnerable. The harness keeps the real +// Jakarta REST annotations on the classpath and replays the route +// through those annotations. -// import io.quarkus.runtime.Quarkus; +import io.quarkus.runtime.Quarkus; +import jakarta.ws.rs.GET; +import jakarta.ws.rs.Path; import java.io.BufferedReader; import java.io.InputStreamReader; diff --git a/tests/dynamic_fixtures/java/quarkus_route/pom.xml b/tests/dynamic_fixtures/java/quarkus_route/pom.xml index eb554948..05b075e2 100644 --- a/tests/dynamic_fixtures/java/quarkus_route/pom.xml +++ b/tests/dynamic_fixtures/java/quarkus_route/pom.xml @@ -14,5 +14,10 @@ quarkus-resteasy-reactive 3.8.3 + + jakarta.ws.rs + jakarta.ws.rs-api + 3.1.0 + diff --git a/tests/java_fixtures.rs b/tests/java_fixtures.rs index aa4580b3..0f8d9115 100644 --- a/tests/java_fixtures.rs +++ b/tests/java_fixtures.rs @@ -767,6 +767,40 @@ mod phase14_shape_tests { assert_not_confirmed("quarkus_route", &r); } + // ── micronaut_route ────────────────────────────────────────────────────── + + #[test] + fn micronaut_route_vuln_is_confirmed() { + let Some(r) = run( + "micronaut_route", + "Vuln.java", + "show", + Cap::CODE_EXEC, + 21, + EntryKind::HttpRoute, + PayloadSlot::Param(0), + ) else { + return; + }; + assert_confirmed("micronaut_route", &r); + } + + #[test] + fn micronaut_route_benign_not_confirmed() { + let Some(r) = run( + "micronaut_route", + "Benign.java", + "show", + Cap::CODE_EXEC, + 18, + EntryKind::HttpRoute, + PayloadSlot::Param(0), + ) else { + return; + }; + assert_not_confirmed("micronaut_route", &r); + } + // ── Phase 09 staging assertion (Spring transitive dep pick-up) ────────── /// Verify the Phase 09 staging path identifies Spring when the