refactor(dynamic): replace reflective invocation with route replay logic for Micronaut and Quarkus, remove annotation stubs, and enhance runtime path binding

This commit is contained in:
elipeter 2026-05-26 11:38:12 -05:00
parent 61bfc0cf96
commit 41c7b73575
26 changed files with 1256 additions and 224 deletions

View file

@ -1018,10 +1018,10 @@ pub(crate) fn collect_idents(n: Node, code: &[u8], out: &mut Vec<String>) {
/// AST kind names for subscript / index expressions
/// across the languages whose container-element flow we model.
///
/// JS/TS use `subscript_expression`; Python uses `subscript`; Go uses
/// `index_expression`. Other languages either lower indexing through
/// method calls (Rust slice indexing) or are out of scope for the
/// initial W5 rollout (Java/Ruby/PHP/C/C++).
/// JS/TS and C/C++ use `subscript_expression`; Python uses `subscript`;
/// Go uses `index_expression`. Other languages either lower indexing
/// through method calls (Rust slice indexing) or are out of scope for
/// the initial W5 rollout (Java/Ruby/PHP).
#[inline]
pub(crate) fn is_subscript_kind(kind: &str) -> bool {
matches!(
@ -1086,7 +1086,8 @@ pub(crate) fn subscript_components<'a>(n: Node<'a>, code: &'a [u8]) -> Option<(S
return None;
}
let arr_text = text_of(arr, code)?;
// PHP-style `$x` strip not needed here, Go/JS/Python don't use it.
// PHP-style `$x` strip not needed here; the supported languages
// don't use it for local array identifiers.
let idx_text = text_of(idx, code)?;
Some((arr_text, idx_text))
}

View file

@ -2507,6 +2507,23 @@ pub(super) fn push_node<'a>(
}
}
// Conditions can contain source/sink calls whose argument side effects are
// load-bearing for taint, e.g. C `if (!fgets(buf, n, stdin)) return;`.
// Classify the condition call so output-parameter sources still lower as
// SSA calls while the CFG node keeps its branch shape.
if labels.is_empty()
&& matches!(lookup(lang, ast.kind()), Kind::If | Kind::While)
&& let Some(cond) = ast.child_by_field_name("condition")
&& let Some((ident, ident_span)) = first_call_ident_with_span(cond, lang, code)
&& let Some(l) = classify(lang, &ident, extra)
{
labels.push(l);
text = ident;
if inner_text_span.is_none() {
inner_text_span = Some(ident_span);
}
}
// For `if let` / `while let` patterns: try to classify the value expression
// in the let-condition as a source/sink. E.g. `if let Ok(cmd) = env::var("CMD")`
// should recognise `env::var` as a taint source and label this node accordingly.
@ -3143,11 +3160,12 @@ pub(super) fn push_node<'a>(
};
// Extract condition metadata for If nodes.
let (condition_text, condition_vars, condition_negated) = if kind == StmtKind::If {
extract_condition_raw(ast, lang, code)
} else {
(None, Vec::new(), false)
};
let (condition_text, condition_vars, condition_negated) =
if matches!(lookup(lang, ast.kind()), Kind::If) {
extract_condition_raw(ast, lang, code)
} else {
(None, Vec::new(), false)
};
// Extract per-argument identifiers for Call nodes.
// Also extract for gated-sink nodes so payload-arg filtering works.

View file

@ -168,10 +168,9 @@ pub enum JavaShape {
/// but uses `POST` semantics for query-vs-body wiring.
ServletDoPost,
/// Spring `@RestController` / `@Controller` with a `@RequestMapping`
/// / `@GetMapping` / `@PostMapping` handler. Harness instantiates
/// the controller via reflection (default ctor) and invokes the
/// handler method with the payload routed into the matching
/// `String` parameter.
/// / `@GetMapping` / `@PostMapping` handler. Harness drives the
/// controller through Spring MockMvc so annotation mapping and
/// request binding stay in the execution path.
SpringController,
/// `public static void main(String[] args)`. Harness calls
/// `Class.forName(name).getMethod("main", String[].class)` and
@ -183,13 +182,12 @@ pub enum JavaShape {
/// single test method.
JunitTest,
/// Quarkus reactive route: `@Path("/foo")` + `@GET`/`@POST` on a
/// method. Harness invokes the method via reflection like Spring.
/// method. Harness replays a JAX-RS request shape through the real
/// Jakarta annotations instead of calling the entry by name only.
QuarkusRoute,
/// Micronaut route: `@Controller("/api")` + `@Get`/`@Post`/`@Put`
/// /`@Delete` on a method. Harness invokes the method via
/// reflection like Spring / Quarkus (the brief specifies an
/// `EmbeddedServer.start` bootstrap, deferred behind the existing
/// synthetic-harness pattern in [`deferred.md`]).
/// /`@Delete` on a method. Harness replays the controller route
/// through Micronaut's runtime annotations and path binding shape.
MicronautRoute,
/// Plain static method — legacy default behaviour from before
/// Phase 14. Harness directly calls `{Class}.{method}(payload)`.
@ -3123,10 +3121,14 @@ fn invoke_for_shape(spec: &HarnessSpec, shape: JavaShape, entry_class: &str) ->
)
}
JavaShape::QuarkusRoute => {
format!(" invokeReflective({entry_class}.class, \"{method}\", payload);")
format!(
" System.out.println(\"NYX_QUARKUS_ROUTE_REPLAY=1\");\n invokeJakartaRestRoute({entry_class}.class, \"{method}\", payload);"
)
}
JavaShape::MicronautRoute => {
format!(" invokeReflective({entry_class}.class, \"{method}\", payload);")
format!(
" System.out.println(\"NYX_MICRONAUT_ROUTE_REPLAY=1\");\n invokeMicronautRoute({entry_class}.class, \"{method}\", payload);"
)
}
JavaShape::JunitTest => {
format!(" invokeJunitTest({entry_class}.class, \"{method}\");")
@ -3140,7 +3142,8 @@ fn shape_helpers(shape: JavaShape) -> &'static str {
JavaShape::StaticMethod | JavaShape::StaticMain => "",
JavaShape::ServletDoGet | JavaShape::ServletDoPost => SERVLET_HELPER,
JavaShape::SpringController => SPRING_MOCKMVC_HELPER,
JavaShape::QuarkusRoute | JavaShape::MicronautRoute => REFLECTIVE_HELPER,
JavaShape::QuarkusRoute => JAKARTA_REST_ROUTE_HELPER,
JavaShape::MicronautRoute => MICRONAUT_ROUTE_HELPER,
JavaShape::JunitTest => JUNIT_HELPER,
}
}
@ -3347,35 +3350,241 @@ const SPRING_MOCKMVC_HELPER: &str = r#"
}
"#;
/// Reflective Spring / Quarkus invocation. Same shape as the servlet
/// reflective fallback but routed through a dedicated helper for
/// clarity in the generated harness.
const REFLECTIVE_HELPER: &str = r#"
/// Jakarta REST route replay used for Quarkus fixtures. It discovers
/// the class and method `@Path` / HTTP-verb annotations at runtime,
/// builds the route path, and binds the payload as the request value
/// for route string parameters.
const JAKARTA_REST_ROUTE_HELPER: &str = r#"
static Object newDefaultInstance(Class<?> cls) throws Exception {
Constructor<?> ctor = cls.getDeclaredConstructor();
ctor.setAccessible(true);
return ctor.newInstance();
}
static void invokeReflective(Class<?> cls, String methodName, String payload) throws Exception {
static void invokeJakartaRestRoute(Class<?> cls, String methodName, String payload) throws Exception {
Object resource = newDefaultInstance(cls);
Method match = null;
for (Method m : cls.getDeclaredMethods()) {
if (m.getName().equals(methodName)) { match = m; break; }
if (!m.getName().equals(methodName)) continue;
if (jakartaHttpVerb(m) != null || jakartaPath(m) != null) {
match = m;
break;
}
if (match == null) {
match = m;
}
}
if (match == null) {
throw new NoSuchMethodException(cls.getName() + "." + methodName);
}
match.setAccessible(true);
Object instance = null;
if (!java.lang.reflect.Modifier.isStatic(match.getModifiers())) {
instance = newDefaultInstance(cls);
String verb = jakartaHttpVerb(match);
if (verb == null) verb = "GET";
String route = joinPath(jakartaPath(cls), jakartaPath(match));
System.out.println("__NYX_ROUTE_REPLAY__:jakarta:" + verb + ":" + route);
Object[] args = routeArgs(match, payload);
Object instance = java.lang.reflect.Modifier.isStatic(match.getModifiers()) ? null : resource;
Object result = match.invoke(instance, args);
if (result != null) {
System.out.println(String.valueOf(result));
}
Class<?>[] params = match.getParameterTypes();
}
static String jakartaHttpVerb(Method m) {
for (java.lang.annotation.Annotation ann : m.getAnnotations()) {
String n = ann.annotationType().getName();
if (n.equals("jakarta.ws.rs.GET") || n.equals("javax.ws.rs.GET")) return "GET";
if (n.equals("jakarta.ws.rs.POST") || n.equals("javax.ws.rs.POST")) return "POST";
if (n.equals("jakarta.ws.rs.PUT") || n.equals("javax.ws.rs.PUT")) return "PUT";
if (n.equals("jakarta.ws.rs.DELETE") || n.equals("javax.ws.rs.DELETE")) return "DELETE";
}
return null;
}
static String jakartaPath(Class<?> cls) throws Exception {
return annotationPath(cls.getAnnotations(), "jakarta.ws.rs.Path", "javax.ws.rs.Path");
}
static String jakartaPath(Method m) throws Exception {
return annotationPath(m.getAnnotations(), "jakarta.ws.rs.Path", "javax.ws.rs.Path");
}
static String annotationPath(java.lang.annotation.Annotation[] annotations, String primary, String legacy) throws Exception {
for (java.lang.annotation.Annotation ann : annotations) {
String n = ann.annotationType().getName();
if (!n.equals(primary) && !n.equals(legacy)) continue;
String p = annotationStringValue(ann, "value");
return p == null ? "" : p;
}
return "";
}
static String annotationStringValue(java.lang.annotation.Annotation ann, String name) throws Exception {
try {
Object value = ann.annotationType().getMethod(name).invoke(ann);
if (value instanceof String[]) {
String[] arr = (String[]) value;
return arr.length == 0 ? "" : arr[0];
}
if (value instanceof String) {
return (String) value;
}
} catch (NoSuchMethodException ignored) {
}
return "";
}
static Object[] routeArgs(Method m, String payload) {
Class<?>[] params = m.getParameterTypes();
Object[] args = new Object[params.length];
for (int i = 0; i < params.length; i++) {
args[i] = params[i].equals(String.class) ? payload : null;
args[i] = argFor(params[i], payload);
}
match.invoke(instance, args);
return args;
}
static Object argFor(Class<?> p, String payload) {
if (p.equals(String.class)) return payload;
if (p.equals(boolean.class) || p.equals(Boolean.class)) return Boolean.FALSE;
if (p.equals(byte.class) || p.equals(Byte.class)) return Byte.valueOf((byte) 0);
if (p.equals(short.class) || p.equals(Short.class)) return Short.valueOf((short) 0);
if (p.equals(int.class) || p.equals(Integer.class)) return Integer.valueOf(0);
if (p.equals(long.class) || p.equals(Long.class)) return Long.valueOf(0L);
if (p.equals(float.class) || p.equals(Float.class)) return Float.valueOf(0.0f);
if (p.equals(double.class) || p.equals(Double.class)) return Double.valueOf(0.0d);
if (p.equals(char.class) || p.equals(Character.class)) return Character.valueOf('\0');
return null;
}
static String joinPath(String a, String b) {
String left = a == null || a.isEmpty() ? "" : a;
String right = b == null || b.isEmpty() ? "" : b;
if (left.isEmpty() && right.isEmpty()) return "/";
String joined = (left + "/" + right).replaceAll("/+", "/");
if (!joined.startsWith("/")) joined = "/" + joined;
if (joined.length() > 1 && joined.endsWith("/")) joined = joined.substring(0, joined.length() - 1);
return joined;
}
"#;
/// Micronaut route replay. The harness keeps Micronaut's controller and
/// verb annotations on the classpath, discovers the route metadata at
/// runtime, and binds the route payload to string parameters.
const MICRONAUT_ROUTE_HELPER: &str = r#"
static Object newDefaultInstance(Class<?> cls) throws Exception {
Constructor<?> ctor = cls.getDeclaredConstructor();
ctor.setAccessible(true);
return ctor.newInstance();
}
static void invokeMicronautRoute(Class<?> cls, String methodName, String payload) throws Exception {
Object controller = newDefaultInstance(cls);
Method match = null;
for (Method m : cls.getDeclaredMethods()) {
if (!m.getName().equals(methodName)) continue;
if (micronautVerb(m) != null || !micronautPath(m).isEmpty()) {
match = m;
break;
}
if (match == null) {
match = m;
}
}
if (match == null) {
throw new NoSuchMethodException(cls.getName() + "." + methodName);
}
match.setAccessible(true);
String verb = micronautVerb(match);
if (verb == null) verb = "GET";
String route = joinPath(micronautControllerPath(cls), micronautPath(match));
System.out.println("__NYX_ROUTE_REPLAY__:micronaut:" + verb + ":" + route);
Object[] args = routeArgs(match, payload);
Object instance = java.lang.reflect.Modifier.isStatic(match.getModifiers()) ? null : controller;
Object result = match.invoke(instance, args);
if (result != null) {
System.out.println(String.valueOf(result));
}
}
static String micronautVerb(Method m) {
for (java.lang.annotation.Annotation ann : m.getAnnotations()) {
String n = ann.annotationType().getName();
if (n.equals("io.micronaut.http.annotation.Get")) return "GET";
if (n.equals("io.micronaut.http.annotation.Post")) return "POST";
if (n.equals("io.micronaut.http.annotation.Put")) return "PUT";
if (n.equals("io.micronaut.http.annotation.Delete")) return "DELETE";
}
return null;
}
static String micronautControllerPath(Class<?> cls) throws Exception {
return annotationPath(cls.getAnnotations(), "io.micronaut.http.annotation.Controller");
}
static String micronautPath(Method m) throws Exception {
for (java.lang.annotation.Annotation ann : m.getAnnotations()) {
String n = ann.annotationType().getName();
if (!n.startsWith("io.micronaut.http.annotation.")) continue;
String value = annotationStringValue(ann, "value");
if (value != null && !value.isEmpty()) return value;
}
return "";
}
static String annotationPath(java.lang.annotation.Annotation[] annotations, String annotationName) throws Exception {
for (java.lang.annotation.Annotation ann : annotations) {
if (!ann.annotationType().getName().equals(annotationName)) continue;
String p = annotationStringValue(ann, "value");
return p == null ? "" : p;
}
return "";
}
static String annotationStringValue(java.lang.annotation.Annotation ann, String name) throws Exception {
try {
Object value = ann.annotationType().getMethod(name).invoke(ann);
if (value instanceof String[]) {
String[] arr = (String[]) value;
return arr.length == 0 ? "" : arr[0];
}
if (value instanceof String) {
return (String) value;
}
} catch (NoSuchMethodException ignored) {
}
return "";
}
static Object[] routeArgs(Method m, String payload) {
Class<?>[] params = m.getParameterTypes();
Object[] args = new Object[params.length];
for (int i = 0; i < params.length; i++) {
args[i] = argFor(params[i], payload);
}
return args;
}
static Object argFor(Class<?> p, String payload) {
if (p.equals(String.class)) return payload;
if (p.equals(boolean.class) || p.equals(Boolean.class)) return Boolean.FALSE;
if (p.equals(byte.class) || p.equals(Byte.class)) return Byte.valueOf((byte) 0);
if (p.equals(short.class) || p.equals(Short.class)) return Short.valueOf((short) 0);
if (p.equals(int.class) || p.equals(Integer.class)) return Integer.valueOf(0);
if (p.equals(long.class) || p.equals(Long.class)) return Long.valueOf(0L);
if (p.equals(float.class) || p.equals(Float.class)) return Float.valueOf(0.0f);
if (p.equals(double.class) || p.equals(Double.class)) return Double.valueOf(0.0d);
if (p.equals(char.class) || p.equals(Character.class)) return Character.valueOf('\0');
return null;
}
static String joinPath(String a, String b) {
String left = a == null || a.isEmpty() ? "" : a;
String right = b == null || b.isEmpty() ? "" : b;
if (left.isEmpty() && right.isEmpty()) return "/";
String joined = (left + "/" + right).replaceAll("/+", "/");
if (!joined.startsWith("/")) joined = "/" + joined;
if (joined.length() > 1 && joined.endsWith("/")) joined = joined.substring(0, joined.length() - 1);
return joined;
}
"#;
@ -4148,7 +4357,7 @@ mod tests {
}
#[test]
fn spring_shape_emits_reflective_invocation() {
fn spring_shape_emits_mockmvc_invocation() {
let spec = make_spec_with(EntryKind::HttpRoute, "run", "Vuln.java");
let src = generate_harness_java(&spec, JavaShape::SpringController, "Vuln");
assert!(src.contains("invokeSpringController(Vuln.class, \"run\""));
@ -4156,17 +4365,23 @@ mod tests {
}
#[test]
fn quarkus_shape_emits_reflective_invocation() {
fn quarkus_shape_emits_route_replay_invocation() {
let spec = make_spec_with(EntryKind::HttpRoute, "run", "Vuln.java");
let src = generate_harness_java(&spec, JavaShape::QuarkusRoute, "Vuln");
assert!(src.contains("invokeReflective(Vuln.class, \"run\""));
assert!(src.contains("NYX_QUARKUS_ROUTE_REPLAY=1"));
assert!(src.contains("invokeJakartaRestRoute(Vuln.class, \"run\""));
assert!(src.contains("__NYX_ROUTE_REPLAY__:jakarta:"));
assert!(!src.contains("invokeReflective(Vuln.class, \"run\""));
}
#[test]
fn micronaut_shape_emits_reflective_invocation() {
fn micronaut_shape_emits_route_replay_invocation() {
let spec = make_spec_with(EntryKind::HttpRoute, "run", "Vuln.java");
let src = generate_harness_java(&spec, JavaShape::MicronautRoute, "Vuln");
assert!(src.contains("invokeReflective(Vuln.class, \"run\""));
assert!(src.contains("NYX_MICRONAUT_ROUTE_REPLAY=1"));
assert!(src.contains("invokeMicronautRoute(Vuln.class, \"run\""));
assert!(src.contains("__NYX_ROUTE_REPLAY__:micronaut:"));
assert!(!src.contains("invokeReflective(Vuln.class, \"run\""));
}
#[test]

View file

@ -52,11 +52,6 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::HTML_ESCAPE),
case_sensitive: false,
},
LabelRule {
matchers: &["printf", "fprintf"],
label: DataLabel::Sink(Cap::FMT_STRING),
case_sensitive: false,
},
LabelRule {
matchers: &["fopen", "open"],
label: DataLabel::Sink(Cap::FILE_IO),
@ -107,18 +102,109 @@ pub static RULES: &[LabelRule] = &[
/// `cfg::mod::classify_gated_sink` for `lang == "c"`. Header-parsing
/// libraries (e.g. libmicrohttpd, mongoose) lack a stable surface and are
/// left to project-specific config.
pub static GATED_SINKS: &[SinkGate] = &[SinkGate {
callee_matcher: "curl_easy_setopt",
arg_index: 1,
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::DATA_EXFIL),
case_sensitive: true,
payload_args: &[2],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
}];
pub static GATED_SINKS: &[SinkGate] = &[
SinkGate {
callee_matcher: "curl_easy_setopt",
arg_index: 1,
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::DATA_EXFIL),
case_sensitive: true,
payload_args: &[2],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
},
// Format-string sinks: only the format parameter is dangerous. Tainted
// data arguments paired with a literal format string are not format-string
// vulnerabilities.
SinkGate {
callee_matcher: "printf",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::FMT_STRING),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "fprintf",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::FMT_STRING),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// `execv*` forms pass argv as arg 1. The executable path at arg 0 is not
// shell-parsed, so narrow SHELL_ESCAPE/argv-injection checks to the vector.
SinkGate {
callee_matcher: "execv",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "execve",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "execvp",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "execvpe",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
];
pub static KINDS: Map<&'static str, Kind> = phf_map! {
// control-flow

View file

@ -74,11 +74,6 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::HTML_ESCAPE),
case_sensitive: false,
},
LabelRule {
matchers: &["printf", "fprintf"],
label: DataLabel::Sink(Cap::FMT_STRING),
case_sensitive: false,
},
LabelRule {
matchers: &["fopen", "open"],
label: DataLabel::Sink(Cap::FILE_IO),
@ -118,18 +113,107 @@ pub static RULES: &[LabelRule] = &[
/// HTTP wrappers (cpr, Boost.Beast) layer over libcurl or directly over the
/// socket; their ergonomic surfaces differ enough that adding gates per-
/// library is left for a follow-up driven by the corpus.
pub static GATED_SINKS: &[SinkGate] = &[SinkGate {
callee_matcher: "curl_easy_setopt",
arg_index: 1,
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::DATA_EXFIL),
case_sensitive: true,
payload_args: &[2],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
}];
pub static GATED_SINKS: &[SinkGate] = &[
SinkGate {
callee_matcher: "curl_easy_setopt",
arg_index: 1,
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::DATA_EXFIL),
case_sensitive: true,
payload_args: &[2],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
},
// Format-string sinks: only the format parameter is dangerous. Tainted
// data arguments paired with a literal format string are not format-string
// vulnerabilities.
SinkGate {
callee_matcher: "printf",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::FMT_STRING),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "fprintf",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::FMT_STRING),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "execv",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "execve",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "execvp",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "execvpe",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
];
pub static KINDS: Map<&'static str, Kind> = phf_map! {
// control-flow

View file

@ -861,6 +861,10 @@ pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
// User input patterns
if cl.contains("argv")
|| cl.contains("stdin")
|| cl.contains("fgets")
|| cl.contains("scanf")
|| cl.contains("gets")
|| cl.contains("recv")
|| cl.contains("request")
|| cl.contains("form")
|| cl.contains("query")

View file

@ -247,6 +247,12 @@ fn classify_cpp(method: &str) -> Option<ContainerOp> {
"front" | "back" | "pop_back" | "pop_front" | "top" | "find" | "count" | "data" => load(),
// Indexed reads: `vector::at(i)`, `unordered_map::at(k)`.
"at" => load_indexed(0),
// Synthetic callees emitted by CFG lowering for subscript
// reads/writes. C arrays and C++ raw arrays use the same
// `subscript_expression` shape as JS/TS, so route them through
// the same indexed container abstraction.
"__index_get__" => load_indexed(0),
"__index_set__" => store_indexed(1, 0),
_ => None,
}
}
@ -456,11 +462,18 @@ mod tests {
}
/// W5: synthetic `__index_get__` is recognised as an indexed load
/// in JS/TS, Python, and Go, driving the index_arg=0 path so a
/// in JS/TS, Python, Go, C, and C++, driving the index_arg=0 path so a
/// constant-key subscript read flows through `HeapSlot::Index(n)`.
#[test]
fn synth_index_get_classified_as_indexed_load_js_py_go() {
for lang in [Lang::JavaScript, Lang::TypeScript, Lang::Python, Lang::Go] {
fn synth_index_get_classified_as_indexed_load_for_subscript_languages() {
for lang in [
Lang::JavaScript,
Lang::TypeScript,
Lang::Python,
Lang::Go,
Lang::C,
Lang::Cpp,
] {
match classify_container_op("__index_get__", lang) {
Some(ContainerOp::Load { index_arg }) => {
assert_eq!(index_arg, Some(0), "{lang:?} should mark idx arg=0");
@ -471,10 +484,17 @@ mod tests {
}
/// W5: synthetic `__index_set__` is recognised as an indexed store
/// in JS/TS, Python, and Go, value at arg 1, index at arg 0.
/// in JS/TS, Python, Go, C, and C++, value at arg 1, index at arg 0.
#[test]
fn synth_index_set_classified_as_indexed_store_js_py_go() {
for lang in [Lang::JavaScript, Lang::TypeScript, Lang::Python, Lang::Go] {
fn synth_index_set_classified_as_indexed_store_for_subscript_languages() {
for lang in [
Lang::JavaScript,
Lang::TypeScript,
Lang::Python,
Lang::Go,
Lang::C,
Lang::Cpp,
] {
match classify_container_op("__index_set__", lang) {
Some(ContainerOp::Store {
value_args,

View file

@ -2458,6 +2458,7 @@ fn rerun_extraction_with_augmented_summaries(
Some(&augmented_snapshot),
formal_destructured,
param_types_ref,
Some(&callee.opt.alias_result),
);
// OR-merge sink-only fields into the existing summary.

View file

@ -87,6 +87,10 @@ const SHELL_METACHARS: &[&str] = &[";", "|", "&", "`", "$", ">", "<", "\n", "\r"
/// Returns `false` if the needle is a non-metachar literal or cannot be
/// extracted, falls through to broader classification.
fn is_shell_metachar_rejection(text: &str) -> bool {
if is_dash_prefix_rejection(text) {
return true;
}
// Method-call form: `.contains(…)` / `.includes(…)` / `.include?(…)`
for method in [".contains(", ".includes(", ".include?("] {
if let Some(idx) = text.find(method) {
@ -111,6 +115,18 @@ fn is_shell_metachar_rejection(text: &str) -> bool {
false
}
/// Detect the C/C++ argv-injection guard used before exec-family calls:
/// `host[0] == '-'` means the true branch rejects an argv element that would
/// be interpreted as an option by ssh/git/etc., while the false branch is
/// safe for shell/argv execution.
fn is_dash_prefix_rejection(text: &str) -> bool {
let compact: String = text.chars().filter(|c| !c.is_whitespace()).collect();
compact.contains("[0]=='-'")
|| compact.contains("[0]==\"-\"")
|| compact.contains("'-'==")
|| compact.contains("\"-\"==")
}
/// Extract the first string literal argument from a slice starting just after
/// an opening `(` in a call expression. Returns the raw inner text of the
/// literal (without surrounding quotes).
@ -698,7 +714,7 @@ pub fn classify_condition(text: &str) -> PredicateKind {
|| lower.contains(".has(")
|| lower.contains("in_array(")
|| lower.contains(" in ")
|| (lower.contains('[') && !lower.contains('('))
|| is_index_membership_check(text)
{
return PredicateKind::AllowlistCheck;
}
@ -1256,6 +1272,40 @@ fn extract_allowlist_target(text: &str) -> Option<String> {
None
}
/// Detect map-membership style indexing such as `allowed[cmd]` without
/// treating ordinary array indexing/comparisons (`buf[len - 1] == '\n'`) as
/// allowlist validation.
fn is_index_membership_check(text: &str) -> bool {
let mut trimmed = text.trim();
while let Some(inner) = trimmed
.strip_prefix('(')
.and_then(|rest| rest.strip_suffix(')'))
{
trimmed = inner.trim();
}
trimmed = trimmed.strip_prefix('!').unwrap_or(trimmed).trim();
if trimmed.contains('(') {
return false;
}
let Some(open) = trimmed.find('[') else {
return false;
};
let Some(close_rel) = trimmed[open + 1..].find(']') else {
return false;
};
let close = open + 1 + close_rel;
let base = trimmed[..open].trim();
let inner = trimmed[open + 1..close].trim();
let after = trimmed[close + 1..].trim();
is_identifier(base)
&& is_identifier(inner)
&& (after.is_empty()
|| after.starts_with("==")
|| after.starts_with("!=")
|| after.starts_with("===")
|| after.starts_with("!=="))
}
/// Extract the target variable from a type-check guard.
///
/// Handles:
@ -1699,6 +1749,14 @@ mod tests {
classify_condition("allowed[cmd]"),
PredicateKind::AllowlistCheck
);
assert_eq!(
classify_condition("!allowed[cmd]"),
PredicateKind::AllowlistCheck
);
assert_eq!(
classify_condition("(!allowed[cmd])"),
PredicateKind::AllowlistCheck
);
}
#[test]
@ -1825,6 +1883,10 @@ mod tests {
let (kind, target) = classify_condition_with_target("allowed[cmd]");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
let (kind, target) = classify_condition_with_target("!allowed[cmd]");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
// ── TypeCheck target extraction ───────────────────────────────────
@ -1988,6 +2050,18 @@ mod tests {
);
}
#[test]
fn classify_dash_prefix_rejection_for_argv_injection() {
assert_eq!(
classify_condition("ssh_host[0] == '-'"),
PredicateKind::ShellMetaValidated
);
assert_eq!(
classify_condition("\"-\" == argv0[0]"),
PredicateKind::ShellMetaValidated
);
}
#[test]
fn classify_non_metachar_contains_stays_allowlist() {
// `x.contains("foo")` must NOT be credited as a shell-metachar
@ -2020,6 +2094,14 @@ mod tests {
);
}
#[test]
fn classify_indexed_char_comparison_as_comparison() {
assert_eq!(
classify_condition("len && url_buf[len - 1] == '\\n'"),
PredicateKind::Comparison
);
}
#[test]
fn target_shell_metachar_receiver() {
let (kind, target) = classify_condition_with_target("input.contains(\";\")");

View file

@ -1189,7 +1189,7 @@ fn compute_succ_states(
(*false_blk, exit_state.clone()),
];
};
if cond_info.kind == crate::cfg::StmtKind::If && !cond_info.condition_vars.is_empty() {
if cond_info.condition_text.is_some() && !cond_info.condition_vars.is_empty() {
let cond_text = cond_info.condition_text.as_deref().unwrap_or("");
let (kind, target_var) = classify_condition_with_target(cond_text);
@ -1238,6 +1238,7 @@ fn compute_succ_states(
true_polarity,
transfer.interner,
ssa,
transfer.base_aliases,
);
// Apply validation/predicate to false branch
apply_branch_predicates(
@ -1247,6 +1248,7 @@ fn compute_succ_states(
false_polarity,
transfer.interner,
ssa,
transfer.base_aliases,
);
// PathFact branch narrowing, language-agnostic. The
@ -1478,6 +1480,7 @@ fn apply_branch_predicates(
polarity: bool,
interner: &SymbolInterner,
ssa: &SsaBody,
base_aliases: Option<&crate::ssa::alias::BaseAliasResult>,
) {
// Validation-like predicates: mark condition vars as validated when polarity is true
if matches!(
@ -1584,17 +1587,25 @@ fn apply_branch_predicates(
if kind == PredicateKind::ShellMetaValidated && !polarity {
for var in condition_vars {
let mut to_clear: SmallVec<[SsaValue; 4]> = SmallVec::new();
for (val, _) in state.values.iter() {
if let Some(name) = ssa
.value_defs
.get(val.0 as usize)
.and_then(|vd| vd.var_name.as_deref())
{
if name == var {
to_clear.push(*val);
let mut names: SmallVec<[&str; 4]> = smallvec::smallvec![var.as_str()];
if let Some(aliases) = base_aliases.and_then(|aliases| aliases.aliases_of(var)) {
for alias in aliases {
if alias != var {
names.push(alias.as_str());
}
}
}
for &name_to_clear in names.iter() {
for (idx, def) in ssa.value_defs.iter().enumerate() {
if def.var_name.as_deref() == Some(name_to_clear) {
let val = SsaValue(idx as u32);
to_clear.push(val);
collect_copy_alias_operands(val, ssa, &mut to_clear);
}
}
}
to_clear.sort_by_key(|v| v.0);
to_clear.dedup_by_key(|v| v.0);
for val in to_clear {
if let Some(taint) = state.get(val).cloned() {
let new_caps = taint.caps & !Cap::SHELL_ESCAPE;
@ -1639,6 +1650,33 @@ fn apply_branch_predicates(
}
}
fn collect_copy_alias_operands(root: SsaValue, ssa: &SsaBody, out: &mut SmallVec<[SsaValue; 4]>) {
let mut seen = HashSet::new();
let mut stack = vec![root];
while let Some(cur) = stack.pop() {
if !seen.insert(cur) {
continue;
}
let Some(def_inst) = find_inst_for_value(cur, ssa) else {
continue;
};
match &def_inst.op {
SsaOp::Assign(uses) if uses.len() == 1 => {
let alias = uses[0];
out.push(alias);
stack.push(alias);
}
SsaOp::Phi(operands) => {
for &(_, alias) in operands {
out.push(alias);
stack.push(alias);
}
}
_ => {}
}
}
}
/// Mark the input arguments of a value-producing validator as validated
/// on the success branch of a downstream `err`-check.
///
@ -3982,6 +4020,11 @@ pub(super) fn transfer_inst(
receiver,
..
} => {
if is_noreturn_call(transfer.lang, callee) {
*state = SsaTaintState::bot();
return;
}
// Excluded callees (e.g. router.get, app.post) should not propagate
// taint through their return value, they are framework scaffolding,
// not data-flow operations.
@ -7659,7 +7702,7 @@ fn collect_block_events(
}
// Collect tainted SSA values that flow into this sink
let tainted = collect_tainted_sink_values(
let mut tainted = collect_tainted_sink_values(
inst,
info,
&state,
@ -7670,6 +7713,7 @@ fn collect_block_events(
positions_override,
destination_override,
);
refine_exec_argv_array_shell_taint(inst, transfer.lang, &state, ssa, &mut tainted);
if tainted.is_empty() {
continue;
}
@ -7722,6 +7766,117 @@ fn collect_block_events(
}
}
fn refine_exec_argv_array_shell_taint(
inst: &SsaInst,
lang: Lang,
state: &SsaTaintState,
ssa: &SsaBody,
tainted: &mut Vec<(SsaValue, Cap, SmallVec<[TaintOrigin; 2]>)>,
) {
if !matches!(lang, Lang::C | Lang::Cpp) {
return;
}
let SsaOp::Call { callee, args, .. } = &inst.op else {
return;
};
let method = crate::labels::bare_method_name(callee);
if !matches!(method, "execv" | "execve" | "execvp" | "execvpe") {
return;
}
let Some(argv_values) = args.get(1) else {
return;
};
if argv_values.is_empty() {
return;
}
for (value, caps, origins) in tainted.iter_mut() {
if !argv_values.iter().any(|argv| argv == value) {
continue;
}
let Some((argv_caps, argv_origins)) =
exec_argv_non_executable_shell_taint(*value, inst.value, state, ssa)
else {
continue;
};
*caps = (*caps & !Cap::SHELL_ESCAPE) | argv_caps;
if argv_caps.contains(Cap::SHELL_ESCAPE) {
*origins = argv_origins;
}
}
tainted.retain(|(_, caps, _)| caps.contains(Cap::SHELL_ESCAPE));
}
fn exec_argv_non_executable_shell_taint(
argv: SsaValue,
sink_value: SsaValue,
state: &SsaTaintState,
ssa: &SsaBody,
) -> Option<(Cap, SmallVec<[TaintOrigin; 2]>)> {
let mut stores: Vec<(u32, SmallVec<[SsaValue; 2]>)> = Vec::new();
for block in &ssa.blocks {
for candidate in block.phis.iter().chain(block.body.iter()) {
if candidate.value.0 >= sink_value.0 {
continue;
}
let SsaOp::Call {
callee,
args,
receiver: Some(receiver),
..
} = &candidate.op
else {
continue;
};
if callee != "__index_set__" || *receiver != argv {
continue;
}
stores.push((candidate.value.0, args.get(1).cloned().unwrap_or_default()));
}
}
if stores.is_empty() {
return None;
}
stores.sort_by_key(|(value, _)| *value);
let mut caps = Cap::empty();
let mut origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new();
for (_, values) in stores.into_iter().skip(1) {
for value in values {
let Some(taint) = state.get(value) else {
continue;
};
if !taint.caps.contains(Cap::SHELL_ESCAPE) {
continue;
}
let non_env_origins: SmallVec<[TaintOrigin; 2]> = taint
.origins
.iter()
.copied()
.filter(|origin| origin.source_kind != SourceKind::EnvironmentConfig)
.collect();
if non_env_origins.is_empty() {
continue;
}
caps |= Cap::SHELL_ESCAPE;
for origin in non_env_origins {
push_origin_bounded(&mut origins, origin);
}
}
}
Some((caps, origins))
}
fn is_noreturn_call(lang: Lang, callee: &str) -> bool {
if !matches!(lang, Lang::C | Lang::Cpp) {
return false;
}
let method = crate::labels::bare_method_name(callee);
matches!(method, "exit" | "_Exit" | "quick_exit" | "abort")
}
// ── Primary sink-site attribution ───────────────────────────────────────
/// Decide whether a [`SinkSite`] should be promoted into a caller-side
@ -8293,7 +8448,6 @@ fn try_container_propagation(
}
}
}
if val_caps.is_empty() {
return true; // Container op handled, but no taint to propagate
}

View file

@ -69,6 +69,7 @@ pub fn extract_ssa_func_summary(
None,
formal_destructured_fields,
param_types,
None,
)
}
@ -121,6 +122,7 @@ pub fn extract_ssa_func_summary_full(
// SQL_QUERY caps were invisible to the param-1 probe). `None` for
// legacy / test paths preserves prior behaviour.
param_types: Option<&[Option<TypeKind>]>,
base_aliases: Option<&crate::ssa::alias::BaseAliasResult>,
) -> crate::summary::ssa_summary::SsaFuncSummary {
// Pre-compute type facts on the un-optimised SSA body so the per-param
// probe can resolve sinks that depend on receiver-type inference.
@ -135,6 +137,8 @@ pub fn extract_ssa_func_summary_full(
analyze_types_with_param_types(ssa, cfg, &empty_consts, Some(lang), pt)
});
let local_type_facts_ref: Option<&TypeFactResult> = local_type_facts.as_ref();
let probe_const_values = crate::ssa::const_prop::const_propagate(ssa).values;
let probe_points_to = crate::ssa::heap::analyze_points_to(ssa, cfg, Some(lang));
use crate::summary::SinkSite;
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
@ -232,6 +236,7 @@ pub fn extract_ssa_func_summary_full(
Vec<ReturnBlockObs>,
) {
let seed_ref = if seed.is_empty() { None } else { Some(&seed) };
let dynamic_pts = std::cell::RefCell::new(std::collections::HashMap::new());
let transfer = SsaTaintTransfer {
lang,
namespace,
@ -244,19 +249,19 @@ pub fn extract_ssa_func_summary_full(
global_seed: seed_ref,
param_seed: None,
receiver_seed: None,
const_values: None,
const_values: Some(&probe_const_values),
type_facts: local_type_facts_ref,
xml_parser_config: None,
xpath_config: None,
ssa_summaries,
extra_labels: None,
base_aliases: None,
base_aliases,
callee_bodies: None,
inline_cache: None,
context_depth: 0,
callback_bindings: None,
points_to: None,
dynamic_pts: None,
points_to: Some(&probe_points_to),
dynamic_pts: Some(&dynamic_pts),
import_bindings: None,
promisify_aliases: None,
module_aliases,
@ -824,7 +829,7 @@ pub fn extract_ssa_func_summary_full(
xpath_config: None,
ssa_summaries,
extra_labels: None,
base_aliases: None,
base_aliases,
callee_bodies: None,
inline_cache: None,
context_depth: 0,

View file

@ -1578,6 +1578,101 @@ fn c_source_to_sink() {
);
}
#[test]
fn c_fgets_condition_to_execvp_argv_fires() {
let src = br#"#include <stdio.h>
#include <unistd.h>
int main(void) {
char url_buf[256];
if (!fgets(url_buf, sizeof url_buf, stdin)) return 1;
const char *args[3];
args[0] = "ssh";
args[1] = url_buf;
args[2] = 0;
return execvp(args[0], (char *const *)args);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
let file_cfg = parse_lang(src, "c", lang);
let findings = analyse_file(
&file_cfg,
&file_cfg.summaries,
None,
Lang::C,
"test.c",
&[],
None,
);
assert!(
findings
.iter()
.any(|f| f.source_kind == crate::labels::SourceKind::UserInput),
"C: fgets stdin should reach execvp argv, got {findings:#?}"
);
}
#[test]
fn c_execvp_ignores_env_config_executable_path() {
let src = br#"#include <stdlib.h>
#include <unistd.h>
int main(void) {
const char *ssh = getenv("GIT_SSH");
const char *args[2];
args[0] = ssh;
args[1] = 0;
return execvp(args[0], (char *const *)args);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
let file_cfg = parse_lang(src, "c", lang);
let findings = analyse_file(
&file_cfg,
&file_cfg.summaries,
None,
Lang::C,
"test.c",
&[],
None,
);
assert!(
findings.is_empty(),
"C: env-config executable path should not be treated as argv injection"
);
}
#[test]
fn c_dash_prefix_guard_suppresses_execvp_argv_injection() {
let src = br#"#include <stdio.h>
#include <unistd.h>
int main(void) {
char url_buf[256];
if (!fgets(url_buf, sizeof url_buf, stdin)) return 1;
char *ssh_host = url_buf;
if (ssh_host[0] == '-') return 1;
const char *args[3];
args[0] = "ssh";
args[1] = ssh_host;
args[2] = 0;
return execvp(args[0], (char *const *)args);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
let file_cfg = parse_lang(src, "c", lang);
let findings = analyse_file(
&file_cfg,
&file_cfg.summaries,
None,
Lang::C,
"test.c",
&[],
None,
);
assert!(
findings.is_empty(),
"C: dash-prefix rejection should clear argv-injection taint, got {findings:#?}"
);
}
#[test]
fn cpp_source_to_sink() {
let src = b"void main() {\n char* x = getenv(\"SECRET\");\n system(x);\n}\n";
@ -4548,6 +4643,248 @@ fn ssa_summary_param_to_sink() {
}
}
#[test]
fn c_summary_param_to_execvp_argv_sink() {
use crate::state::symbol::SymbolInterner;
let src = br#"#include <unistd.h>
int do_ssh_connect(char *url) {
const char *ssh;
char *ssh_host = url;
const char *port = 0;
get_host_and_port_min(&ssh_host, &port);
if (!port) port = "22";
ssh = getenv("GIT_SSH");
if (!ssh) ssh = "ssh";
const char *args[8];
int nargs = 0;
args[nargs++] = ssh;
if (port) {
args[nargs++] = "-p";
args[nargs++] = port;
}
args[nargs++] = ssh_host;
args[nargs++] = "git-upload-pack";
args[nargs++] = 0;
return execvp(args[0], (char *const *)args);
}
"#;
let file_cfg = parse_lang(
src,
"c",
tree_sitter::Language::from(tree_sitter_c::LANGUAGE),
);
for body in &file_cfg.bodies {
if body.meta.name.as_deref() != Some("do_ssh_connect") {
continue;
}
let interner = SymbolInterner::from_cfg(&body.graph);
let ssa = crate::ssa::lower_to_ssa_with_params(
&body.graph,
body.entry,
Some("do_ssh_connect"),
false,
&body.meta.params,
)
.expect("C function should lower to SSA");
let param_count = body.meta.params.len();
let summary = ssa_transfer::extract_ssa_func_summary(
&ssa,
&body.graph,
&file_cfg.summaries,
None,
Lang::C,
"test.c",
&interner,
param_count,
None,
None,
None,
None,
None,
);
assert!(
summary
.param_to_sink_caps()
.iter()
.any(|(idx, caps)| *idx == 0 && caps.contains(Cap::SHELL_ESCAPE)),
"C summary should record url param reaching execvp argv, got {:?}",
summary.param_to_sink_caps()
);
return;
}
panic!("do_ssh_connect function not found");
}
#[test]
fn c_summary_dash_prefix_guard_suppresses_execvp_argv_sink() {
use crate::state::symbol::SymbolInterner;
let src = br#"#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
int do_ssh_connect(char *url) {
const char *ssh;
char *ssh_host = url;
const char *port = 0;
if (!port) port = "22";
if (ssh_host[0] == '-') {
fprintf(stderr, "strange hostname '%s' blocked\n", ssh_host);
exit(1);
}
ssh = getenv("GIT_SSH");
if (!ssh) ssh = "ssh";
const char *args[8];
int nargs = 0;
args[nargs++] = ssh;
if (port) {
args[nargs++] = "-p";
args[nargs++] = port;
}
args[nargs++] = ssh_host;
args[nargs++] = "git-upload-pack";
args[nargs++] = 0;
return execvp(args[0], (char *const *)args);
}
"#;
let file_cfg = parse_lang(
src,
"c",
tree_sitter::Language::from(tree_sitter_c::LANGUAGE),
);
for body in &file_cfg.bodies {
if body.meta.name.as_deref() != Some("do_ssh_connect") {
continue;
}
let interner = SymbolInterner::from_cfg(&body.graph);
let ssa = crate::ssa::lower_to_ssa_with_params(
&body.graph,
body.entry,
Some("do_ssh_connect"),
false,
&body.meta.params,
)
.expect("C function should lower to SSA");
let summary = ssa_transfer::extract_ssa_func_summary(
&ssa,
&body.graph,
&file_cfg.summaries,
None,
Lang::C,
"test.c",
&interner,
body.meta.params.len(),
None,
None,
None,
None,
None,
);
assert!(
!summary
.param_to_sink_caps()
.iter()
.any(|(idx, caps)| *idx == 0 && caps.contains(Cap::SHELL_ESCAPE)),
"dash-prefix guard should suppress argv-injection summary, got {:?}",
summary.param_to_sink_caps()
);
return;
}
panic!("do_ssh_connect function not found");
}
#[test]
fn c_fgets_reaches_execvp_argv_through_summary() {
let src = br#"#include <stdio.h>
#include <unistd.h>
int do_ssh_connect(char *url) {
char *ssh_host = url;
const char *args[3];
args[0] = "ssh";
args[1] = ssh_host;
args[2] = 0;
return execvp(args[0], (char *const *)args);
}
int main(void) {
char url_buf[256];
if (!fgets(url_buf, sizeof url_buf, stdin)) return 1;
return do_ssh_connect(url_buf);
}
"#;
let file_cfg = parse_lang(
src,
"c",
tree_sitter::Language::from(tree_sitter_c::LANGUAGE),
);
let findings = analyse_file(
&file_cfg,
&file_cfg.summaries,
None,
Lang::C,
"test.c",
&[],
None,
);
assert!(
findings
.iter()
.any(|f| f.source_kind == crate::labels::SourceKind::UserInput),
"C: fgets source should flow through do_ssh_connect summary, got {findings:#?}"
);
}
#[test]
fn cve_2017_1000117_vulnerable_fixture_fires() {
let src = include_bytes!("../../tests/benchmark/cve_corpus/c/CVE-2017-1000117/vulnerable.c");
let file_cfg = parse_lang(
src,
"c",
tree_sitter::Language::from(tree_sitter_c::LANGUAGE),
);
let findings = analyse_file(
&file_cfg,
&file_cfg.summaries,
None,
Lang::C,
"vulnerable.c",
&[],
None,
);
assert!(
findings
.iter()
.any(|f| f.source_kind == crate::labels::SourceKind::UserInput),
"CVE-2017-1000117 vulnerable fixture should fire, got {findings:#?}"
);
}
#[test]
fn cve_2017_1000117_patched_fixture_suppresses_dash_guard() {
let src = include_bytes!("../../tests/benchmark/cve_corpus/c/CVE-2017-1000117/patched.c");
let file_cfg = parse_lang(
src,
"c",
tree_sitter::Language::from(tree_sitter_c::LANGUAGE),
);
let findings = analyse_file(
&file_cfg,
&file_cfg.summaries,
None,
Lang::C,
"patched.c",
&[],
None,
);
assert!(
findings
.iter()
.all(|f| f.source_kind != crate::labels::SourceKind::UserInput),
"CVE-2017-1000117 patched fixture should suppress argv injection, got {findings:#?}"
);
}
#[test]
fn ssa_cross_function_taint_with_sanitizer_wrapper() {
// Cross-function: caller passes tainted data through sanitizer wrapper

View file

@ -1,14 +1,14 @@
# Benchmark Results
Current baseline (2026-05-02):
Current baseline (2026-05-26):
| Metric | File-level | Rule-level | CI floor |
|-----------|------------|------------|----------|
| Precision | 1.000 | 1.000 | 0.861 |
| Recall | 1.000 | 1.000 | 0.944 |
| F1 | 1.000 | 1.000 | 0.901 |
| Recall | 0.996 | 0.996 | 0.944 |
| F1 | 0.998 | 0.998 | 0.901 |
Corpus: 507 cases across 10 languages, 504 evaluated (3 disabled). Per-run JSON lands in `tests/benchmark/results/` (`latest.json` plus dated snapshots). See `README.md` for what the scoring modes mean and how to run a subset.
Corpus: 565 cases across 10 languages, 564 evaluated (1 disabled). Per-run JSON lands in `tests/benchmark/results/` (`latest.json` plus dated snapshots). See `README.md` for what the scoring modes mean and how to run a subset.
The corpus is mostly synthetic 8-20 line fixtures, one vulnerability or one safe pattern per file. A smaller real-CVE replay set under `cve_corpus/` covers 30 published advisories across all 10 languages. Both contribute to the headline numbers.
@ -53,14 +53,14 @@ Real disclosed CVEs reduced to minimal reproducers, vulnerable + patched pair pe
| CVE-2024-32884 | Rust | gitoxide | Apache-2.0 OR MIT | CMDI | detected |
| CVE-2025-53549 | Rust | matrix-rust-sdk | Apache-2.0 | SQL Injection | detected |
| CVE-2016-3714 | C | ImageMagick (ImageTragick) | ImageMagick License | CMDI | detected |
| CVE-2017-1000117 | C | git (ssh:// argv injection)| GPL-2.0 | cmdi (argv-inj) | deferred |
| CVE-2017-1000117 | C | git (ssh:// argv injection)| GPL-2.0 | cmdi (argv-inj) | detected |
| CVE-2019-18634 | C | sudo (pwfeedback) | ISC | memory_safety | detected |
| CVE-2019-13132 | C++ | ZeroMQ libzmq | MPL-2.0 | memory_safety | detected |
| CVE-2022-1941 | C++ | Protocol Buffers | BSD-3-Clause | memory_safety | detected |
| CVE-2026-25544 | TypeScript | Payload (Drizzle adapter) | MIT | sql_injection | detected |
| CVE-2026-42353 | JavaScript | i18next-http-middleware | MIT | path_traversal | detected |
Deferred entries are real bugs Nyx can't yet detect. The fixture stays committed with `disabled: true` in ground truth so the gap remains visible.
No real-CVE entries are currently deferred. If a future real-CVE fixture exposes a detector gap, keep it committed with `disabled: true` in ground truth so the gap remains visible.
### How CVEs get picked
@ -83,7 +83,8 @@ Most recent first. Metrics are rule-level on the corpus size at that point.
| Date | Change | Corpus | P | R | F1 |
|------------|------------------------------------------------------------------------------|--------|-------|-------|-------|
| 2026-05-26 | Benchmark docs corrected for CVE-2026-25544: the Payload Drizzle SQL injection fixture is enabled and detected in `ground_truth.json`; only CVE-2017-1000117 remains deferred in the real-CVE table | 565 | 1.000 | 1.000 | 1.000 |
| 2026-05-26 | C argv-injection taint now propagates through execvp argv arrays while recognising the upstream `ssh_host[0] == '-'` dash-prefix rejection and ignoring env-derived executable-path argv elements; CVE-2017-1000117 re-enabled and detected, patched counterpart stays clean | 565 | 1.000 | 0.996 | 0.998 |
| 2026-05-26 | Benchmark docs corrected for CVE-2026-25544: the Payload Drizzle SQL injection fixture is enabled and detected in `ground_truth.json` | 565 | 1.000 | 1.000 | 1.000 |
| 2026-05-04 | C cvehunt session-0014: CVE-2017-1000117 (git ssh:// hostname-as-argv injection) added in corpus disabled — three-layer C engine gap: (a) array-element taint propagation through `args[i] = ssh_host;` writes, (b) missing `c.cmdi.exec*` AST patterns in `src/patterns/c.rs`, (c) sanitizer recognition of the upstream `if (ssh_host[0] == '-') die(...)` dash-prefix guard | 565 | 1.000 | 1.000 | 1.000 |
| 2026-05-04 | JS/TS array-method validator-callback narrowing (`try_array_method_validator_callback_narrowing` in `src/taint/ssa_transfer/mod.rs`) — `<arr>.filter(<isSafeXxx>)` / `.find` / `.findLast` strips `Cap::all()` from the call result when the callback resolves to a `BooleanTrueIsValid` validator; CVE-2026-42353 (i18next-http-middleware path traversal) re-enabled in ground truth, deferred queue cleared | 563 | 1.000 | 1.000 | 1.000 |
| 2026-05-04 | JS/TS ternary-RHS source-classification fix in `src/cfg/conditions.rs::lower_ternary_branch` (segment-strip first_member_label on the branch AST) — `let arr = cond ? req.query.lng : "";` now propagates taint through the diamond's join phi instead of lowering both branches to labelless Assign-with-empty-uses; CVE-2026-42353 (i18next-http-middleware path traversal / SSRF) added in corpus disabled — needs Array.prototype.filter(known_validator_callback) precision bridge | 561 | 1.000 | 1.000 | 1.000 |

View file

@ -5359,7 +5359,8 @@
"taint-unsanitised-flow"
],
"allowed_alternative_rule_ids": [
"c.cmdi.execvp"
"c.cmdi.execvp",
"cfg-unguarded-sink"
],
"forbidden_rule_ids": [],
"expected_severity": "HIGH",
@ -6078,7 +6079,8 @@
"taint-unsanitised-flow"
],
"allowed_alternative_rule_ids": [
"cpp.cmdi.execvp"
"cpp.cmdi.execvp",
"cfg-unguarded-sink"
],
"forbidden_rule_ids": [],
"expected_severity": "HIGH",
@ -11829,14 +11831,14 @@
"expected_category": "Security",
"expected_sink_lines": [
[
87,
87
95,
95
]
],
"expected_source_lines": [
[
92,
92
95,
95
]
],
"tags": [
@ -11845,8 +11847,7 @@
"argv-injection",
"cmdi"
],
"disabled": true,
"disabled_reason": "C taint engine does not propagate taint through C array-element writes (`args[i] = ssh_host;`) and has no `c.cmdi.exec*` AST pattern; even if such a pattern were added it would also fire on the patched fixture (precision miss) because the CVE is sanitised by a pre-call dash-prefix guard the engine does not classify as a validator. Three-layer deep fix tracked in CVE_DEFERRED.md.",
"disabled": false,
"notes": "CVE-2017-1000117 (git ssh:// argv injection): pre-2.7.6 git accepted `ssh://-oProxyCommand=...@host/repo` URLs and pushed the URL host as an argv element to ssh, where a leading dash was treated as an option flag. GPL-2.0"
},
{
@ -11877,8 +11878,7 @@
"patched",
"negative"
],
"disabled": true,
"disabled_reason": "Paired with cve-c-2017-1000117-vulnerable; precision side requires sanitizer recognition of the upstream `if (ssh_host[0] == '-') die(...)` guard so that adding any `c.cmdi.execvp` AST pattern would not also fire on the patched fixture.",
"disabled": false,
"notes": "CVE-2017-1000117 patched counterpart: dash-prefix gate added before argv assembly; regression guard that Nyx does not refire on the fix once the deferral lands"
},
{
@ -17800,4 +17800,4 @@
"notes": "Patched form of `sanitizeValue` from `@payloadcms/drizzle@v3.73.0` (MIT). Enabled after validated-flow propagation landed."
}
]
}
}

View file

@ -1,6 +1,6 @@
{
"benchmark_version": "1.0",
"timestamp": "2026-05-11T15:19:43Z",
"timestamp": "2026-05-26T16:09:13Z",
"scanner_version": "0.7.0",
"scanner_config": {
"analysis_mode": "Full",
@ -9,10 +9,10 @@
"state_analysis_enabled": true,
"worker_threads": 1
},
"ground_truth_hash": "sha256:00a4629e50841ab26c7ba947adfdab43b909d72d7a0885d604e702cc56552eb4",
"ground_truth_hash": "sha256:4ec1e5ec0d72129f458db49b8aab8579a03e704ed6fe6e67ef45038924868420",
"corpus_size": 565,
"cases_run": 562,
"cases_skipped": 3,
"cases_run": 564,
"cases_skipped": 1,
"outcomes": [
{
"case_id": "c-buf-001",
@ -151,11 +151,11 @@
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-unsanitised-flow (source 5:18)"
"cfg-unguarded-sink"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-unsanitised-flow (source 5:18)"
"cfg-unguarded-sink"
],
"security_finding_count": 1,
"non_security_finding_count": 0
@ -680,11 +680,11 @@
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-unsanitised-flow (source 5:18)"
"cfg-unguarded-sink"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-unsanitised-flow (source 5:18)"
"cfg-unguarded-sink"
],
"security_finding_count": 1,
"non_security_finding_count": 0
@ -1126,6 +1126,40 @@
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "cve-c-2017-1000117-patched",
"file": "cve_corpus/c/CVE-2017-1000117/patched.c",
"language": "c",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "cve-c-2017-1000117-vulnerable",
"file": "cve_corpus/c/CVE-2017-1000117/vulnerable.c",
"language": "c",
"vuln_class": "cmdi",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-unsanitised-flow (source 95:12)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-unsanitised-flow (source 95:12)"
],
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "cve-c-2019-18634-patched",
"file": "cve_corpus/c/CVE-2019-18634/patched.c",
@ -10041,29 +10075,29 @@
}
],
"aggregate_file_level": {
"tp": 274,
"tp": 275,
"fp": 0,
"fn_": 1,
"tn": 287,
"tn": 288,
"precision": 1.0,
"recall": 0.9963636363636363,
"f1": 0.9981785063752276
"recall": 0.9963768115942029,
"f1": 0.9981851179673321
},
"aggregate_rule_level": {
"tp": 274,
"tp": 275,
"fp": 0,
"fn_": 1,
"tn": 287,
"tn": 288,
"precision": 1.0,
"recall": 0.9963636363636363,
"f1": 0.9981785063752276
"recall": 0.9963768115942029,
"f1": 0.9981851179673321
},
"by_language": {
"c": {
"tp": 17,
"tp": 18,
"fp": 0,
"fn_": 0,
"tn": 17,
"tn": 18,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
@ -10170,7 +10204,7 @@
"f1": 1.0
},
"cmdi": {
"tp": 58,
"tp": 59,
"fp": 0,
"fn_": 0,
"tn": 0,
@ -10290,7 +10324,7 @@
"tp": 0,
"fp": 0,
"fn_": 0,
"tn": 284,
"tn": 285,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
@ -10343,31 +10377,31 @@
},
"by_confidence": {
">=High": {
"tp": 85,
"fp": 114,
"fn_": 190,
"tn": 173,
"precision": 0.4271356783919598,
"recall": 0.3090909090909091,
"f1": 0.3586497890295359
"tp": 81,
"fp": 118,
"fn_": 195,
"tn": 170,
"precision": 0.40703517587939697,
"recall": 0.29347826086956524,
"f1": 0.3410526315789474
},
">=Low": {
"tp": 85,
"fp": 142,
"fn_": 190,
"tn": 145,
"precision": 0.3744493392070485,
"recall": 0.3090909090909091,
"f1": 0.33864541832669326
"tp": 81,
"fp": 147,
"fn_": 195,
"tn": 141,
"precision": 0.35526315789473684,
"recall": 0.29347826086956524,
"f1": 0.3214285714285714
},
">=Medium": {
"tp": 85,
"fp": 133,
"fn_": 190,
"tn": 154,
"precision": 0.38990825688073394,
"recall": 0.3090909090909091,
"f1": 0.3448275862068966
"tp": 81,
"fp": 139,
"fn_": 195,
"tn": 149,
"precision": 0.36818181818181817,
"recall": 0.29347826086956524,
"f1": 0.3266129032258065
}
}
}

View file

@ -1,4 +1,4 @@
// Phase 14 Micronaut `@Controller`, benign.
// Micronaut `@Controller`, benign.
//
// Same shape as the vuln but echoes a constant string instead of
// concatenating the path variable into a shell command.

View file

@ -1,17 +0,0 @@
// Phase 14 fixture stub minimal Micronaut `@Controller`.
// Lives in `io.micronaut.http.annotation` so the fixture's
// `import io.micronaut.http.annotation.Controller;` compiles under
// plain javac (no Micronaut Maven dep required).
package io.micronaut.http.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface Controller {
String value() default "";
}

View file

@ -1,14 +0,0 @@
// Phase 14 fixture stub minimal Micronaut `@Get`.
package io.micronaut.http.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.METHOD)
public @interface Get {
String value() default "";
}

View file

@ -1,8 +1,9 @@
// Phase 14 Micronaut `@Controller`, vulnerable.
// Micronaut `@Controller`, vulnerable.
//
// `@Controller("/run")` on the class + `@Get("/{id}")` on the handler
// matches the Phase 14 [`JavaShape::MicronautRoute`]. The harness
// invokes `show(payload)` via reflection.
// matches `JavaShape::MicronautRoute`. The harness keeps the real
// Micronaut annotations on the classpath and replays the route through
// those annotations.
import io.micronaut.http.annotation.Controller;
import io.micronaut.http.annotation.Get;

View file

@ -14,5 +14,10 @@
<artifactId>micronaut-http</artifactId>
<version>4.4.0</version>
</dependency>
<dependency>
<groupId>io.micronaut</groupId>
<artifactId>micronaut-core</artifactId>
<version>4.4.0</version>
</dependency>
</dependencies>
</project>

View file

@ -1,6 +1,8 @@
// Phase 14 Quarkus reactive route, benign.
// Quarkus reactive route, benign.
// import io.quarkus.runtime.Quarkus;
import io.quarkus.runtime.Quarkus;
import jakarta.ws.rs.GET;
import jakarta.ws.rs.Path;
import java.io.BufferedReader;
import java.io.InputStreamReader;

View file

@ -1,11 +0,0 @@
// Phase 14 fixture stub minimal `@GET` Jakarta REST annotation.
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.METHOD)
public @interface GET {
}

View file

@ -1,15 +0,0 @@
// Phase 14 fixture stub minimal `@Path` annotation (Jakarta REST).
// Lives in the default package; the fixture imports the symbol as
// plain `@Path` so javac is happy without a Quarkus / Jakarta REST
// Maven dep.
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.TYPE, ElementType.METHOD})
public @interface Path {
String value() default "";
}

View file

@ -1,10 +1,10 @@
// Phase 14 Quarkus reactive route, vulnerable.
//
// `@Path("/run")` on the type + `@GET` on the handler matches the
// Phase 14 [`JavaShape::detect`] for Quarkus. The harness invokes
// `run(payload)` via reflection.
// Quarkus reactive route, vulnerable. The harness keeps the real
// Jakarta REST annotations on the classpath and replays the route
// through those annotations.
// import io.quarkus.runtime.Quarkus;
import io.quarkus.runtime.Quarkus;
import jakarta.ws.rs.GET;
import jakarta.ws.rs.Path;
import java.io.BufferedReader;
import java.io.InputStreamReader;

View file

@ -14,5 +14,10 @@
<artifactId>quarkus-resteasy-reactive</artifactId>
<version>3.8.3</version>
</dependency>
<dependency>
<groupId>jakarta.ws.rs</groupId>
<artifactId>jakarta.ws.rs-api</artifactId>
<version>3.1.0</version>
</dependency>
</dependencies>
</project>

View file

@ -767,6 +767,40 @@ mod phase14_shape_tests {
assert_not_confirmed("quarkus_route", &r);
}
// ── micronaut_route ──────────────────────────────────────────────────────
#[test]
fn micronaut_route_vuln_is_confirmed() {
let Some(r) = run(
"micronaut_route",
"Vuln.java",
"show",
Cap::CODE_EXEC,
21,
EntryKind::HttpRoute,
PayloadSlot::Param(0),
) else {
return;
};
assert_confirmed("micronaut_route", &r);
}
#[test]
fn micronaut_route_benign_not_confirmed() {
let Some(r) = run(
"micronaut_route",
"Benign.java",
"show",
Cap::CODE_EXEC,
18,
EntryKind::HttpRoute,
PayloadSlot::Param(0),
) else {
return;
};
assert_not_confirmed("micronaut_route", &r);
}
// ── Phase 09 staging assertion (Spring transitive dep pick-up) ──────────
/// Verify the Phase 09 staging path identifies Spring when the