refactor(dynamic): replace reflective invocation with route replay logic for Micronaut and Quarkus, remove annotation stubs, and enhance runtime path binding

This commit is contained in:
elipeter 2026-05-26 11:38:12 -05:00
parent 61bfc0cf96
commit 41c7b73575
26 changed files with 1256 additions and 224 deletions

View file

@ -1018,10 +1018,10 @@ pub(crate) fn collect_idents(n: Node, code: &[u8], out: &mut Vec<String>) {
/// AST kind names for subscript / index expressions
/// across the languages whose container-element flow we model.
///
/// JS/TS use `subscript_expression`; Python uses `subscript`; Go uses
/// `index_expression`. Other languages either lower indexing through
/// method calls (Rust slice indexing) or are out of scope for the
/// initial W5 rollout (Java/Ruby/PHP/C/C++).
/// JS/TS and C/C++ use `subscript_expression`; Python uses `subscript`;
/// Go uses `index_expression`. Other languages either lower indexing
/// through method calls (Rust slice indexing) or are out of scope for
/// the initial W5 rollout (Java/Ruby/PHP).
#[inline]
pub(crate) fn is_subscript_kind(kind: &str) -> bool {
matches!(
@ -1086,7 +1086,8 @@ pub(crate) fn subscript_components<'a>(n: Node<'a>, code: &'a [u8]) -> Option<(S
return None;
}
let arr_text = text_of(arr, code)?;
// PHP-style `$x` strip not needed here, Go/JS/Python don't use it.
// PHP-style `$x` strip not needed here; the supported languages
// don't use it for local array identifiers.
let idx_text = text_of(idx, code)?;
Some((arr_text, idx_text))
}

View file

@ -2507,6 +2507,23 @@ pub(super) fn push_node<'a>(
}
}
// Conditions can contain source/sink calls whose argument side effects are
// load-bearing for taint, e.g. C `if (!fgets(buf, n, stdin)) return;`.
// Classify the condition call so output-parameter sources still lower as
// SSA calls while the CFG node keeps its branch shape.
if labels.is_empty()
&& matches!(lookup(lang, ast.kind()), Kind::If | Kind::While)
&& let Some(cond) = ast.child_by_field_name("condition")
&& let Some((ident, ident_span)) = first_call_ident_with_span(cond, lang, code)
&& let Some(l) = classify(lang, &ident, extra)
{
labels.push(l);
text = ident;
if inner_text_span.is_none() {
inner_text_span = Some(ident_span);
}
}
// For `if let` / `while let` patterns: try to classify the value expression
// in the let-condition as a source/sink. E.g. `if let Ok(cmd) = env::var("CMD")`
// should recognise `env::var` as a taint source and label this node accordingly.
@ -3143,11 +3160,12 @@ pub(super) fn push_node<'a>(
};
// Extract condition metadata for If nodes.
let (condition_text, condition_vars, condition_negated) = if kind == StmtKind::If {
extract_condition_raw(ast, lang, code)
} else {
(None, Vec::new(), false)
};
let (condition_text, condition_vars, condition_negated) =
if matches!(lookup(lang, ast.kind()), Kind::If) {
extract_condition_raw(ast, lang, code)
} else {
(None, Vec::new(), false)
};
// Extract per-argument identifiers for Call nodes.
// Also extract for gated-sink nodes so payload-arg filtering works.

View file

@ -168,10 +168,9 @@ pub enum JavaShape {
/// but uses `POST` semantics for query-vs-body wiring.
ServletDoPost,
/// Spring `@RestController` / `@Controller` with a `@RequestMapping`
/// / `@GetMapping` / `@PostMapping` handler. Harness instantiates
/// the controller via reflection (default ctor) and invokes the
/// handler method with the payload routed into the matching
/// `String` parameter.
/// / `@GetMapping` / `@PostMapping` handler. Harness drives the
/// controller through Spring MockMvc so annotation mapping and
/// request binding stay in the execution path.
SpringController,
/// `public static void main(String[] args)`. Harness calls
/// `Class.forName(name).getMethod("main", String[].class)` and
@ -183,13 +182,12 @@ pub enum JavaShape {
/// single test method.
JunitTest,
/// Quarkus reactive route: `@Path("/foo")` + `@GET`/`@POST` on a
/// method. Harness invokes the method via reflection like Spring.
/// method. Harness replays a JAX-RS request shape through the real
/// Jakarta annotations instead of calling the entry by name only.
QuarkusRoute,
/// Micronaut route: `@Controller("/api")` + `@Get`/`@Post`/`@Put`
/// /`@Delete` on a method. Harness invokes the method via
/// reflection like Spring / Quarkus (the brief specifies an
/// `EmbeddedServer.start` bootstrap, deferred behind the existing
/// synthetic-harness pattern in [`deferred.md`]).
/// /`@Delete` on a method. Harness replays the controller route
/// through Micronaut's runtime annotations and path binding shape.
MicronautRoute,
/// Plain static method — legacy default behaviour from before
/// Phase 14. Harness directly calls `{Class}.{method}(payload)`.
@ -3123,10 +3121,14 @@ fn invoke_for_shape(spec: &HarnessSpec, shape: JavaShape, entry_class: &str) ->
)
}
JavaShape::QuarkusRoute => {
format!(" invokeReflective({entry_class}.class, \"{method}\", payload);")
format!(
" System.out.println(\"NYX_QUARKUS_ROUTE_REPLAY=1\");\n invokeJakartaRestRoute({entry_class}.class, \"{method}\", payload);"
)
}
JavaShape::MicronautRoute => {
format!(" invokeReflective({entry_class}.class, \"{method}\", payload);")
format!(
" System.out.println(\"NYX_MICRONAUT_ROUTE_REPLAY=1\");\n invokeMicronautRoute({entry_class}.class, \"{method}\", payload);"
)
}
JavaShape::JunitTest => {
format!(" invokeJunitTest({entry_class}.class, \"{method}\");")
@ -3140,7 +3142,8 @@ fn shape_helpers(shape: JavaShape) -> &'static str {
JavaShape::StaticMethod | JavaShape::StaticMain => "",
JavaShape::ServletDoGet | JavaShape::ServletDoPost => SERVLET_HELPER,
JavaShape::SpringController => SPRING_MOCKMVC_HELPER,
JavaShape::QuarkusRoute | JavaShape::MicronautRoute => REFLECTIVE_HELPER,
JavaShape::QuarkusRoute => JAKARTA_REST_ROUTE_HELPER,
JavaShape::MicronautRoute => MICRONAUT_ROUTE_HELPER,
JavaShape::JunitTest => JUNIT_HELPER,
}
}
@ -3347,35 +3350,241 @@ const SPRING_MOCKMVC_HELPER: &str = r#"
}
"#;
/// Reflective Spring / Quarkus invocation. Same shape as the servlet
/// reflective fallback but routed through a dedicated helper for
/// clarity in the generated harness.
const REFLECTIVE_HELPER: &str = r#"
/// Jakarta REST route replay used for Quarkus fixtures. It discovers
/// the class and method `@Path` / HTTP-verb annotations at runtime,
/// builds the route path, and binds the payload as the request value
/// for route string parameters.
const JAKARTA_REST_ROUTE_HELPER: &str = r#"
static Object newDefaultInstance(Class<?> cls) throws Exception {
Constructor<?> ctor = cls.getDeclaredConstructor();
ctor.setAccessible(true);
return ctor.newInstance();
}
static void invokeReflective(Class<?> cls, String methodName, String payload) throws Exception {
static void invokeJakartaRestRoute(Class<?> cls, String methodName, String payload) throws Exception {
Object resource = newDefaultInstance(cls);
Method match = null;
for (Method m : cls.getDeclaredMethods()) {
if (m.getName().equals(methodName)) { match = m; break; }
if (!m.getName().equals(methodName)) continue;
if (jakartaHttpVerb(m) != null || jakartaPath(m) != null) {
match = m;
break;
}
if (match == null) {
match = m;
}
}
if (match == null) {
throw new NoSuchMethodException(cls.getName() + "." + methodName);
}
match.setAccessible(true);
Object instance = null;
if (!java.lang.reflect.Modifier.isStatic(match.getModifiers())) {
instance = newDefaultInstance(cls);
String verb = jakartaHttpVerb(match);
if (verb == null) verb = "GET";
String route = joinPath(jakartaPath(cls), jakartaPath(match));
System.out.println("__NYX_ROUTE_REPLAY__:jakarta:" + verb + ":" + route);
Object[] args = routeArgs(match, payload);
Object instance = java.lang.reflect.Modifier.isStatic(match.getModifiers()) ? null : resource;
Object result = match.invoke(instance, args);
if (result != null) {
System.out.println(String.valueOf(result));
}
Class<?>[] params = match.getParameterTypes();
}
static String jakartaHttpVerb(Method m) {
for (java.lang.annotation.Annotation ann : m.getAnnotations()) {
String n = ann.annotationType().getName();
if (n.equals("jakarta.ws.rs.GET") || n.equals("javax.ws.rs.GET")) return "GET";
if (n.equals("jakarta.ws.rs.POST") || n.equals("javax.ws.rs.POST")) return "POST";
if (n.equals("jakarta.ws.rs.PUT") || n.equals("javax.ws.rs.PUT")) return "PUT";
if (n.equals("jakarta.ws.rs.DELETE") || n.equals("javax.ws.rs.DELETE")) return "DELETE";
}
return null;
}
static String jakartaPath(Class<?> cls) throws Exception {
return annotationPath(cls.getAnnotations(), "jakarta.ws.rs.Path", "javax.ws.rs.Path");
}
static String jakartaPath(Method m) throws Exception {
return annotationPath(m.getAnnotations(), "jakarta.ws.rs.Path", "javax.ws.rs.Path");
}
static String annotationPath(java.lang.annotation.Annotation[] annotations, String primary, String legacy) throws Exception {
for (java.lang.annotation.Annotation ann : annotations) {
String n = ann.annotationType().getName();
if (!n.equals(primary) && !n.equals(legacy)) continue;
String p = annotationStringValue(ann, "value");
return p == null ? "" : p;
}
return "";
}
static String annotationStringValue(java.lang.annotation.Annotation ann, String name) throws Exception {
try {
Object value = ann.annotationType().getMethod(name).invoke(ann);
if (value instanceof String[]) {
String[] arr = (String[]) value;
return arr.length == 0 ? "" : arr[0];
}
if (value instanceof String) {
return (String) value;
}
} catch (NoSuchMethodException ignored) {
}
return "";
}
static Object[] routeArgs(Method m, String payload) {
Class<?>[] params = m.getParameterTypes();
Object[] args = new Object[params.length];
for (int i = 0; i < params.length; i++) {
args[i] = params[i].equals(String.class) ? payload : null;
args[i] = argFor(params[i], payload);
}
match.invoke(instance, args);
return args;
}
static Object argFor(Class<?> p, String payload) {
if (p.equals(String.class)) return payload;
if (p.equals(boolean.class) || p.equals(Boolean.class)) return Boolean.FALSE;
if (p.equals(byte.class) || p.equals(Byte.class)) return Byte.valueOf((byte) 0);
if (p.equals(short.class) || p.equals(Short.class)) return Short.valueOf((short) 0);
if (p.equals(int.class) || p.equals(Integer.class)) return Integer.valueOf(0);
if (p.equals(long.class) || p.equals(Long.class)) return Long.valueOf(0L);
if (p.equals(float.class) || p.equals(Float.class)) return Float.valueOf(0.0f);
if (p.equals(double.class) || p.equals(Double.class)) return Double.valueOf(0.0d);
if (p.equals(char.class) || p.equals(Character.class)) return Character.valueOf('\0');
return null;
}
static String joinPath(String a, String b) {
String left = a == null || a.isEmpty() ? "" : a;
String right = b == null || b.isEmpty() ? "" : b;
if (left.isEmpty() && right.isEmpty()) return "/";
String joined = (left + "/" + right).replaceAll("/+", "/");
if (!joined.startsWith("/")) joined = "/" + joined;
if (joined.length() > 1 && joined.endsWith("/")) joined = joined.substring(0, joined.length() - 1);
return joined;
}
"#;
/// Micronaut route replay. The harness keeps Micronaut's controller and
/// verb annotations on the classpath, discovers the route metadata at
/// runtime, and binds the route payload to string parameters.
const MICRONAUT_ROUTE_HELPER: &str = r#"
static Object newDefaultInstance(Class<?> cls) throws Exception {
Constructor<?> ctor = cls.getDeclaredConstructor();
ctor.setAccessible(true);
return ctor.newInstance();
}
static void invokeMicronautRoute(Class<?> cls, String methodName, String payload) throws Exception {
Object controller = newDefaultInstance(cls);
Method match = null;
for (Method m : cls.getDeclaredMethods()) {
if (!m.getName().equals(methodName)) continue;
if (micronautVerb(m) != null || !micronautPath(m).isEmpty()) {
match = m;
break;
}
if (match == null) {
match = m;
}
}
if (match == null) {
throw new NoSuchMethodException(cls.getName() + "." + methodName);
}
match.setAccessible(true);
String verb = micronautVerb(match);
if (verb == null) verb = "GET";
String route = joinPath(micronautControllerPath(cls), micronautPath(match));
System.out.println("__NYX_ROUTE_REPLAY__:micronaut:" + verb + ":" + route);
Object[] args = routeArgs(match, payload);
Object instance = java.lang.reflect.Modifier.isStatic(match.getModifiers()) ? null : controller;
Object result = match.invoke(instance, args);
if (result != null) {
System.out.println(String.valueOf(result));
}
}
static String micronautVerb(Method m) {
for (java.lang.annotation.Annotation ann : m.getAnnotations()) {
String n = ann.annotationType().getName();
if (n.equals("io.micronaut.http.annotation.Get")) return "GET";
if (n.equals("io.micronaut.http.annotation.Post")) return "POST";
if (n.equals("io.micronaut.http.annotation.Put")) return "PUT";
if (n.equals("io.micronaut.http.annotation.Delete")) return "DELETE";
}
return null;
}
static String micronautControllerPath(Class<?> cls) throws Exception {
return annotationPath(cls.getAnnotations(), "io.micronaut.http.annotation.Controller");
}
static String micronautPath(Method m) throws Exception {
for (java.lang.annotation.Annotation ann : m.getAnnotations()) {
String n = ann.annotationType().getName();
if (!n.startsWith("io.micronaut.http.annotation.")) continue;
String value = annotationStringValue(ann, "value");
if (value != null && !value.isEmpty()) return value;
}
return "";
}
static String annotationPath(java.lang.annotation.Annotation[] annotations, String annotationName) throws Exception {
for (java.lang.annotation.Annotation ann : annotations) {
if (!ann.annotationType().getName().equals(annotationName)) continue;
String p = annotationStringValue(ann, "value");
return p == null ? "" : p;
}
return "";
}
static String annotationStringValue(java.lang.annotation.Annotation ann, String name) throws Exception {
try {
Object value = ann.annotationType().getMethod(name).invoke(ann);
if (value instanceof String[]) {
String[] arr = (String[]) value;
return arr.length == 0 ? "" : arr[0];
}
if (value instanceof String) {
return (String) value;
}
} catch (NoSuchMethodException ignored) {
}
return "";
}
static Object[] routeArgs(Method m, String payload) {
Class<?>[] params = m.getParameterTypes();
Object[] args = new Object[params.length];
for (int i = 0; i < params.length; i++) {
args[i] = argFor(params[i], payload);
}
return args;
}
static Object argFor(Class<?> p, String payload) {
if (p.equals(String.class)) return payload;
if (p.equals(boolean.class) || p.equals(Boolean.class)) return Boolean.FALSE;
if (p.equals(byte.class) || p.equals(Byte.class)) return Byte.valueOf((byte) 0);
if (p.equals(short.class) || p.equals(Short.class)) return Short.valueOf((short) 0);
if (p.equals(int.class) || p.equals(Integer.class)) return Integer.valueOf(0);
if (p.equals(long.class) || p.equals(Long.class)) return Long.valueOf(0L);
if (p.equals(float.class) || p.equals(Float.class)) return Float.valueOf(0.0f);
if (p.equals(double.class) || p.equals(Double.class)) return Double.valueOf(0.0d);
if (p.equals(char.class) || p.equals(Character.class)) return Character.valueOf('\0');
return null;
}
static String joinPath(String a, String b) {
String left = a == null || a.isEmpty() ? "" : a;
String right = b == null || b.isEmpty() ? "" : b;
if (left.isEmpty() && right.isEmpty()) return "/";
String joined = (left + "/" + right).replaceAll("/+", "/");
if (!joined.startsWith("/")) joined = "/" + joined;
if (joined.length() > 1 && joined.endsWith("/")) joined = joined.substring(0, joined.length() - 1);
return joined;
}
"#;
@ -4148,7 +4357,7 @@ mod tests {
}
#[test]
fn spring_shape_emits_reflective_invocation() {
fn spring_shape_emits_mockmvc_invocation() {
let spec = make_spec_with(EntryKind::HttpRoute, "run", "Vuln.java");
let src = generate_harness_java(&spec, JavaShape::SpringController, "Vuln");
assert!(src.contains("invokeSpringController(Vuln.class, \"run\""));
@ -4156,17 +4365,23 @@ mod tests {
}
#[test]
fn quarkus_shape_emits_reflective_invocation() {
fn quarkus_shape_emits_route_replay_invocation() {
let spec = make_spec_with(EntryKind::HttpRoute, "run", "Vuln.java");
let src = generate_harness_java(&spec, JavaShape::QuarkusRoute, "Vuln");
assert!(src.contains("invokeReflective(Vuln.class, \"run\""));
assert!(src.contains("NYX_QUARKUS_ROUTE_REPLAY=1"));
assert!(src.contains("invokeJakartaRestRoute(Vuln.class, \"run\""));
assert!(src.contains("__NYX_ROUTE_REPLAY__:jakarta:"));
assert!(!src.contains("invokeReflective(Vuln.class, \"run\""));
}
#[test]
fn micronaut_shape_emits_reflective_invocation() {
fn micronaut_shape_emits_route_replay_invocation() {
let spec = make_spec_with(EntryKind::HttpRoute, "run", "Vuln.java");
let src = generate_harness_java(&spec, JavaShape::MicronautRoute, "Vuln");
assert!(src.contains("invokeReflective(Vuln.class, \"run\""));
assert!(src.contains("NYX_MICRONAUT_ROUTE_REPLAY=1"));
assert!(src.contains("invokeMicronautRoute(Vuln.class, \"run\""));
assert!(src.contains("__NYX_ROUTE_REPLAY__:micronaut:"));
assert!(!src.contains("invokeReflective(Vuln.class, \"run\""));
}
#[test]

View file

@ -52,11 +52,6 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::HTML_ESCAPE),
case_sensitive: false,
},
LabelRule {
matchers: &["printf", "fprintf"],
label: DataLabel::Sink(Cap::FMT_STRING),
case_sensitive: false,
},
LabelRule {
matchers: &["fopen", "open"],
label: DataLabel::Sink(Cap::FILE_IO),
@ -107,18 +102,109 @@ pub static RULES: &[LabelRule] = &[
/// `cfg::mod::classify_gated_sink` for `lang == "c"`. Header-parsing
/// libraries (e.g. libmicrohttpd, mongoose) lack a stable surface and are
/// left to project-specific config.
pub static GATED_SINKS: &[SinkGate] = &[SinkGate {
callee_matcher: "curl_easy_setopt",
arg_index: 1,
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::DATA_EXFIL),
case_sensitive: true,
payload_args: &[2],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
}];
pub static GATED_SINKS: &[SinkGate] = &[
SinkGate {
callee_matcher: "curl_easy_setopt",
arg_index: 1,
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::DATA_EXFIL),
case_sensitive: true,
payload_args: &[2],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
},
// Format-string sinks: only the format parameter is dangerous. Tainted
// data arguments paired with a literal format string are not format-string
// vulnerabilities.
SinkGate {
callee_matcher: "printf",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::FMT_STRING),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "fprintf",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::FMT_STRING),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
// `execv*` forms pass argv as arg 1. The executable path at arg 0 is not
// shell-parsed, so narrow SHELL_ESCAPE/argv-injection checks to the vector.
SinkGate {
callee_matcher: "execv",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "execve",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "execvp",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "execvpe",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
];
pub static KINDS: Map<&'static str, Kind> = phf_map! {
// control-flow

View file

@ -74,11 +74,6 @@ pub static RULES: &[LabelRule] = &[
label: DataLabel::Sink(Cap::HTML_ESCAPE),
case_sensitive: false,
},
LabelRule {
matchers: &["printf", "fprintf"],
label: DataLabel::Sink(Cap::FMT_STRING),
case_sensitive: false,
},
LabelRule {
matchers: &["fopen", "open"],
label: DataLabel::Sink(Cap::FILE_IO),
@ -118,18 +113,107 @@ pub static RULES: &[LabelRule] = &[
/// HTTP wrappers (cpr, Boost.Beast) layer over libcurl or directly over the
/// socket; their ergonomic surfaces differ enough that adding gates per-
/// library is left for a follow-up driven by the corpus.
pub static GATED_SINKS: &[SinkGate] = &[SinkGate {
callee_matcher: "curl_easy_setopt",
arg_index: 1,
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::DATA_EXFIL),
case_sensitive: true,
payload_args: &[2],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
}];
pub static GATED_SINKS: &[SinkGate] = &[
SinkGate {
callee_matcher: "curl_easy_setopt",
arg_index: 1,
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::DATA_EXFIL),
case_sensitive: true,
payload_args: &[2],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::ValueMatch,
},
// Format-string sinks: only the format parameter is dangerous. Tainted
// data arguments paired with a literal format string are not format-string
// vulnerabilities.
SinkGate {
callee_matcher: "printf",
arg_index: 0,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::FMT_STRING),
case_sensitive: false,
payload_args: &[0],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "fprintf",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::FMT_STRING),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "execv",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "execve",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "execvp",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
SinkGate {
callee_matcher: "execvpe",
arg_index: 1,
dangerous_values: &[],
dangerous_prefixes: &[],
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
case_sensitive: false,
payload_args: &[1],
keyword_name: None,
dangerous_kwargs: &[],
activation: GateActivation::Destination {
object_destination_fields: &[],
},
},
];
pub static KINDS: Map<&'static str, Kind> = phf_map! {
// control-flow

View file

@ -861,6 +861,10 @@ pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
// User input patterns
if cl.contains("argv")
|| cl.contains("stdin")
|| cl.contains("fgets")
|| cl.contains("scanf")
|| cl.contains("gets")
|| cl.contains("recv")
|| cl.contains("request")
|| cl.contains("form")
|| cl.contains("query")

View file

@ -247,6 +247,12 @@ fn classify_cpp(method: &str) -> Option<ContainerOp> {
"front" | "back" | "pop_back" | "pop_front" | "top" | "find" | "count" | "data" => load(),
// Indexed reads: `vector::at(i)`, `unordered_map::at(k)`.
"at" => load_indexed(0),
// Synthetic callees emitted by CFG lowering for subscript
// reads/writes. C arrays and C++ raw arrays use the same
// `subscript_expression` shape as JS/TS, so route them through
// the same indexed container abstraction.
"__index_get__" => load_indexed(0),
"__index_set__" => store_indexed(1, 0),
_ => None,
}
}
@ -456,11 +462,18 @@ mod tests {
}
/// W5: synthetic `__index_get__` is recognised as an indexed load
/// in JS/TS, Python, and Go, driving the index_arg=0 path so a
/// in JS/TS, Python, Go, C, and C++, driving the index_arg=0 path so a
/// constant-key subscript read flows through `HeapSlot::Index(n)`.
#[test]
fn synth_index_get_classified_as_indexed_load_js_py_go() {
for lang in [Lang::JavaScript, Lang::TypeScript, Lang::Python, Lang::Go] {
fn synth_index_get_classified_as_indexed_load_for_subscript_languages() {
for lang in [
Lang::JavaScript,
Lang::TypeScript,
Lang::Python,
Lang::Go,
Lang::C,
Lang::Cpp,
] {
match classify_container_op("__index_get__", lang) {
Some(ContainerOp::Load { index_arg }) => {
assert_eq!(index_arg, Some(0), "{lang:?} should mark idx arg=0");
@ -471,10 +484,17 @@ mod tests {
}
/// W5: synthetic `__index_set__` is recognised as an indexed store
/// in JS/TS, Python, and Go, value at arg 1, index at arg 0.
/// in JS/TS, Python, Go, C, and C++, value at arg 1, index at arg 0.
#[test]
fn synth_index_set_classified_as_indexed_store_js_py_go() {
for lang in [Lang::JavaScript, Lang::TypeScript, Lang::Python, Lang::Go] {
fn synth_index_set_classified_as_indexed_store_for_subscript_languages() {
for lang in [
Lang::JavaScript,
Lang::TypeScript,
Lang::Python,
Lang::Go,
Lang::C,
Lang::Cpp,
] {
match classify_container_op("__index_set__", lang) {
Some(ContainerOp::Store {
value_args,

View file

@ -2458,6 +2458,7 @@ fn rerun_extraction_with_augmented_summaries(
Some(&augmented_snapshot),
formal_destructured,
param_types_ref,
Some(&callee.opt.alias_result),
);
// OR-merge sink-only fields into the existing summary.

View file

@ -87,6 +87,10 @@ const SHELL_METACHARS: &[&str] = &[";", "|", "&", "`", "$", ">", "<", "\n", "\r"
/// Returns `false` if the needle is a non-metachar literal or cannot be
/// extracted, falls through to broader classification.
fn is_shell_metachar_rejection(text: &str) -> bool {
if is_dash_prefix_rejection(text) {
return true;
}
// Method-call form: `.contains(…)` / `.includes(…)` / `.include?(…)`
for method in [".contains(", ".includes(", ".include?("] {
if let Some(idx) = text.find(method) {
@ -111,6 +115,18 @@ fn is_shell_metachar_rejection(text: &str) -> bool {
false
}
/// Detect the C/C++ argv-injection guard used before exec-family calls:
/// `host[0] == '-'` means the true branch rejects an argv element that would
/// be interpreted as an option by ssh/git/etc., while the false branch is
/// safe for shell/argv execution.
fn is_dash_prefix_rejection(text: &str) -> bool {
let compact: String = text.chars().filter(|c| !c.is_whitespace()).collect();
compact.contains("[0]=='-'")
|| compact.contains("[0]==\"-\"")
|| compact.contains("'-'==")
|| compact.contains("\"-\"==")
}
/// Extract the first string literal argument from a slice starting just after
/// an opening `(` in a call expression. Returns the raw inner text of the
/// literal (without surrounding quotes).
@ -698,7 +714,7 @@ pub fn classify_condition(text: &str) -> PredicateKind {
|| lower.contains(".has(")
|| lower.contains("in_array(")
|| lower.contains(" in ")
|| (lower.contains('[') && !lower.contains('('))
|| is_index_membership_check(text)
{
return PredicateKind::AllowlistCheck;
}
@ -1256,6 +1272,40 @@ fn extract_allowlist_target(text: &str) -> Option<String> {
None
}
/// Detect map-membership style indexing such as `allowed[cmd]` without
/// treating ordinary array indexing/comparisons (`buf[len - 1] == '\n'`) as
/// allowlist validation.
fn is_index_membership_check(text: &str) -> bool {
let mut trimmed = text.trim();
while let Some(inner) = trimmed
.strip_prefix('(')
.and_then(|rest| rest.strip_suffix(')'))
{
trimmed = inner.trim();
}
trimmed = trimmed.strip_prefix('!').unwrap_or(trimmed).trim();
if trimmed.contains('(') {
return false;
}
let Some(open) = trimmed.find('[') else {
return false;
};
let Some(close_rel) = trimmed[open + 1..].find(']') else {
return false;
};
let close = open + 1 + close_rel;
let base = trimmed[..open].trim();
let inner = trimmed[open + 1..close].trim();
let after = trimmed[close + 1..].trim();
is_identifier(base)
&& is_identifier(inner)
&& (after.is_empty()
|| after.starts_with("==")
|| after.starts_with("!=")
|| after.starts_with("===")
|| after.starts_with("!=="))
}
/// Extract the target variable from a type-check guard.
///
/// Handles:
@ -1699,6 +1749,14 @@ mod tests {
classify_condition("allowed[cmd]"),
PredicateKind::AllowlistCheck
);
assert_eq!(
classify_condition("!allowed[cmd]"),
PredicateKind::AllowlistCheck
);
assert_eq!(
classify_condition("(!allowed[cmd])"),
PredicateKind::AllowlistCheck
);
}
#[test]
@ -1825,6 +1883,10 @@ mod tests {
let (kind, target) = classify_condition_with_target("allowed[cmd]");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
let (kind, target) = classify_condition_with_target("!allowed[cmd]");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
// ── TypeCheck target extraction ───────────────────────────────────
@ -1988,6 +2050,18 @@ mod tests {
);
}
#[test]
fn classify_dash_prefix_rejection_for_argv_injection() {
assert_eq!(
classify_condition("ssh_host[0] == '-'"),
PredicateKind::ShellMetaValidated
);
assert_eq!(
classify_condition("\"-\" == argv0[0]"),
PredicateKind::ShellMetaValidated
);
}
#[test]
fn classify_non_metachar_contains_stays_allowlist() {
// `x.contains("foo")` must NOT be credited as a shell-metachar
@ -2020,6 +2094,14 @@ mod tests {
);
}
#[test]
fn classify_indexed_char_comparison_as_comparison() {
assert_eq!(
classify_condition("len && url_buf[len - 1] == '\\n'"),
PredicateKind::Comparison
);
}
#[test]
fn target_shell_metachar_receiver() {
let (kind, target) = classify_condition_with_target("input.contains(\";\")");

View file

@ -1189,7 +1189,7 @@ fn compute_succ_states(
(*false_blk, exit_state.clone()),
];
};
if cond_info.kind == crate::cfg::StmtKind::If && !cond_info.condition_vars.is_empty() {
if cond_info.condition_text.is_some() && !cond_info.condition_vars.is_empty() {
let cond_text = cond_info.condition_text.as_deref().unwrap_or("");
let (kind, target_var) = classify_condition_with_target(cond_text);
@ -1238,6 +1238,7 @@ fn compute_succ_states(
true_polarity,
transfer.interner,
ssa,
transfer.base_aliases,
);
// Apply validation/predicate to false branch
apply_branch_predicates(
@ -1247,6 +1248,7 @@ fn compute_succ_states(
false_polarity,
transfer.interner,
ssa,
transfer.base_aliases,
);
// PathFact branch narrowing, language-agnostic. The
@ -1478,6 +1480,7 @@ fn apply_branch_predicates(
polarity: bool,
interner: &SymbolInterner,
ssa: &SsaBody,
base_aliases: Option<&crate::ssa::alias::BaseAliasResult>,
) {
// Validation-like predicates: mark condition vars as validated when polarity is true
if matches!(
@ -1584,17 +1587,25 @@ fn apply_branch_predicates(
if kind == PredicateKind::ShellMetaValidated && !polarity {
for var in condition_vars {
let mut to_clear: SmallVec<[SsaValue; 4]> = SmallVec::new();
for (val, _) in state.values.iter() {
if let Some(name) = ssa
.value_defs
.get(val.0 as usize)
.and_then(|vd| vd.var_name.as_deref())
{
if name == var {
to_clear.push(*val);
let mut names: SmallVec<[&str; 4]> = smallvec::smallvec![var.as_str()];
if let Some(aliases) = base_aliases.and_then(|aliases| aliases.aliases_of(var)) {
for alias in aliases {
if alias != var {
names.push(alias.as_str());
}
}
}
for &name_to_clear in names.iter() {
for (idx, def) in ssa.value_defs.iter().enumerate() {
if def.var_name.as_deref() == Some(name_to_clear) {
let val = SsaValue(idx as u32);
to_clear.push(val);
collect_copy_alias_operands(val, ssa, &mut to_clear);
}
}
}
to_clear.sort_by_key(|v| v.0);
to_clear.dedup_by_key(|v| v.0);
for val in to_clear {
if let Some(taint) = state.get(val).cloned() {
let new_caps = taint.caps & !Cap::SHELL_ESCAPE;
@ -1639,6 +1650,33 @@ fn apply_branch_predicates(
}
}
fn collect_copy_alias_operands(root: SsaValue, ssa: &SsaBody, out: &mut SmallVec<[SsaValue; 4]>) {
let mut seen = HashSet::new();
let mut stack = vec![root];
while let Some(cur) = stack.pop() {
if !seen.insert(cur) {
continue;
}
let Some(def_inst) = find_inst_for_value(cur, ssa) else {
continue;
};
match &def_inst.op {
SsaOp::Assign(uses) if uses.len() == 1 => {
let alias = uses[0];
out.push(alias);
stack.push(alias);
}
SsaOp::Phi(operands) => {
for &(_, alias) in operands {
out.push(alias);
stack.push(alias);
}
}
_ => {}
}
}
}
/// Mark the input arguments of a value-producing validator as validated
/// on the success branch of a downstream `err`-check.
///
@ -3982,6 +4020,11 @@ pub(super) fn transfer_inst(
receiver,
..
} => {
if is_noreturn_call(transfer.lang, callee) {
*state = SsaTaintState::bot();
return;
}
// Excluded callees (e.g. router.get, app.post) should not propagate
// taint through their return value, they are framework scaffolding,
// not data-flow operations.
@ -7659,7 +7702,7 @@ fn collect_block_events(
}
// Collect tainted SSA values that flow into this sink
let tainted = collect_tainted_sink_values(
let mut tainted = collect_tainted_sink_values(
inst,
info,
&state,
@ -7670,6 +7713,7 @@ fn collect_block_events(
positions_override,
destination_override,
);
refine_exec_argv_array_shell_taint(inst, transfer.lang, &state, ssa, &mut tainted);
if tainted.is_empty() {
continue;
}
@ -7722,6 +7766,117 @@ fn collect_block_events(
}
}
fn refine_exec_argv_array_shell_taint(
inst: &SsaInst,
lang: Lang,
state: &SsaTaintState,
ssa: &SsaBody,
tainted: &mut Vec<(SsaValue, Cap, SmallVec<[TaintOrigin; 2]>)>,
) {
if !matches!(lang, Lang::C | Lang::Cpp) {
return;
}
let SsaOp::Call { callee, args, .. } = &inst.op else {
return;
};
let method = crate::labels::bare_method_name(callee);
if !matches!(method, "execv" | "execve" | "execvp" | "execvpe") {
return;
}
let Some(argv_values) = args.get(1) else {
return;
};
if argv_values.is_empty() {
return;
}
for (value, caps, origins) in tainted.iter_mut() {
if !argv_values.iter().any(|argv| argv == value) {
continue;
}
let Some((argv_caps, argv_origins)) =
exec_argv_non_executable_shell_taint(*value, inst.value, state, ssa)
else {
continue;
};
*caps = (*caps & !Cap::SHELL_ESCAPE) | argv_caps;
if argv_caps.contains(Cap::SHELL_ESCAPE) {
*origins = argv_origins;
}
}
tainted.retain(|(_, caps, _)| caps.contains(Cap::SHELL_ESCAPE));
}
fn exec_argv_non_executable_shell_taint(
argv: SsaValue,
sink_value: SsaValue,
state: &SsaTaintState,
ssa: &SsaBody,
) -> Option<(Cap, SmallVec<[TaintOrigin; 2]>)> {
let mut stores: Vec<(u32, SmallVec<[SsaValue; 2]>)> = Vec::new();
for block in &ssa.blocks {
for candidate in block.phis.iter().chain(block.body.iter()) {
if candidate.value.0 >= sink_value.0 {
continue;
}
let SsaOp::Call {
callee,
args,
receiver: Some(receiver),
..
} = &candidate.op
else {
continue;
};
if callee != "__index_set__" || *receiver != argv {
continue;
}
stores.push((candidate.value.0, args.get(1).cloned().unwrap_or_default()));
}
}
if stores.is_empty() {
return None;
}
stores.sort_by_key(|(value, _)| *value);
let mut caps = Cap::empty();
let mut origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new();
for (_, values) in stores.into_iter().skip(1) {
for value in values {
let Some(taint) = state.get(value) else {
continue;
};
if !taint.caps.contains(Cap::SHELL_ESCAPE) {
continue;
}
let non_env_origins: SmallVec<[TaintOrigin; 2]> = taint
.origins
.iter()
.copied()
.filter(|origin| origin.source_kind != SourceKind::EnvironmentConfig)
.collect();
if non_env_origins.is_empty() {
continue;
}
caps |= Cap::SHELL_ESCAPE;
for origin in non_env_origins {
push_origin_bounded(&mut origins, origin);
}
}
}
Some((caps, origins))
}
fn is_noreturn_call(lang: Lang, callee: &str) -> bool {
if !matches!(lang, Lang::C | Lang::Cpp) {
return false;
}
let method = crate::labels::bare_method_name(callee);
matches!(method, "exit" | "_Exit" | "quick_exit" | "abort")
}
// ── Primary sink-site attribution ───────────────────────────────────────
/// Decide whether a [`SinkSite`] should be promoted into a caller-side
@ -8293,7 +8448,6 @@ fn try_container_propagation(
}
}
}
if val_caps.is_empty() {
return true; // Container op handled, but no taint to propagate
}

View file

@ -69,6 +69,7 @@ pub fn extract_ssa_func_summary(
None,
formal_destructured_fields,
param_types,
None,
)
}
@ -121,6 +122,7 @@ pub fn extract_ssa_func_summary_full(
// SQL_QUERY caps were invisible to the param-1 probe). `None` for
// legacy / test paths preserves prior behaviour.
param_types: Option<&[Option<TypeKind>]>,
base_aliases: Option<&crate::ssa::alias::BaseAliasResult>,
) -> crate::summary::ssa_summary::SsaFuncSummary {
// Pre-compute type facts on the un-optimised SSA body so the per-param
// probe can resolve sinks that depend on receiver-type inference.
@ -135,6 +137,8 @@ pub fn extract_ssa_func_summary_full(
analyze_types_with_param_types(ssa, cfg, &empty_consts, Some(lang), pt)
});
let local_type_facts_ref: Option<&TypeFactResult> = local_type_facts.as_ref();
let probe_const_values = crate::ssa::const_prop::const_propagate(ssa).values;
let probe_points_to = crate::ssa::heap::analyze_points_to(ssa, cfg, Some(lang));
use crate::summary::SinkSite;
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
@ -232,6 +236,7 @@ pub fn extract_ssa_func_summary_full(
Vec<ReturnBlockObs>,
) {
let seed_ref = if seed.is_empty() { None } else { Some(&seed) };
let dynamic_pts = std::cell::RefCell::new(std::collections::HashMap::new());
let transfer = SsaTaintTransfer {
lang,
namespace,
@ -244,19 +249,19 @@ pub fn extract_ssa_func_summary_full(
global_seed: seed_ref,
param_seed: None,
receiver_seed: None,
const_values: None,
const_values: Some(&probe_const_values),
type_facts: local_type_facts_ref,
xml_parser_config: None,
xpath_config: None,
ssa_summaries,
extra_labels: None,
base_aliases: None,
base_aliases,
callee_bodies: None,
inline_cache: None,
context_depth: 0,
callback_bindings: None,
points_to: None,
dynamic_pts: None,
points_to: Some(&probe_points_to),
dynamic_pts: Some(&dynamic_pts),
import_bindings: None,
promisify_aliases: None,
module_aliases,
@ -824,7 +829,7 @@ pub fn extract_ssa_func_summary_full(
xpath_config: None,
ssa_summaries,
extra_labels: None,
base_aliases: None,
base_aliases,
callee_bodies: None,
inline_cache: None,
context_depth: 0,

View file

@ -1578,6 +1578,101 @@ fn c_source_to_sink() {
);
}
#[test]
fn c_fgets_condition_to_execvp_argv_fires() {
let src = br#"#include <stdio.h>
#include <unistd.h>
int main(void) {
char url_buf[256];
if (!fgets(url_buf, sizeof url_buf, stdin)) return 1;
const char *args[3];
args[0] = "ssh";
args[1] = url_buf;
args[2] = 0;
return execvp(args[0], (char *const *)args);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
let file_cfg = parse_lang(src, "c", lang);
let findings = analyse_file(
&file_cfg,
&file_cfg.summaries,
None,
Lang::C,
"test.c",
&[],
None,
);
assert!(
findings
.iter()
.any(|f| f.source_kind == crate::labels::SourceKind::UserInput),
"C: fgets stdin should reach execvp argv, got {findings:#?}"
);
}
#[test]
fn c_execvp_ignores_env_config_executable_path() {
let src = br#"#include <stdlib.h>
#include <unistd.h>
int main(void) {
const char *ssh = getenv("GIT_SSH");
const char *args[2];
args[0] = ssh;
args[1] = 0;
return execvp(args[0], (char *const *)args);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
let file_cfg = parse_lang(src, "c", lang);
let findings = analyse_file(
&file_cfg,
&file_cfg.summaries,
None,
Lang::C,
"test.c",
&[],
None,
);
assert!(
findings.is_empty(),
"C: env-config executable path should not be treated as argv injection"
);
}
#[test]
fn c_dash_prefix_guard_suppresses_execvp_argv_injection() {
let src = br#"#include <stdio.h>
#include <unistd.h>
int main(void) {
char url_buf[256];
if (!fgets(url_buf, sizeof url_buf, stdin)) return 1;
char *ssh_host = url_buf;
if (ssh_host[0] == '-') return 1;
const char *args[3];
args[0] = "ssh";
args[1] = ssh_host;
args[2] = 0;
return execvp(args[0], (char *const *)args);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
let file_cfg = parse_lang(src, "c", lang);
let findings = analyse_file(
&file_cfg,
&file_cfg.summaries,
None,
Lang::C,
"test.c",
&[],
None,
);
assert!(
findings.is_empty(),
"C: dash-prefix rejection should clear argv-injection taint, got {findings:#?}"
);
}
#[test]
fn cpp_source_to_sink() {
let src = b"void main() {\n char* x = getenv(\"SECRET\");\n system(x);\n}\n";
@ -4548,6 +4643,248 @@ fn ssa_summary_param_to_sink() {
}
}
#[test]
fn c_summary_param_to_execvp_argv_sink() {
use crate::state::symbol::SymbolInterner;
let src = br#"#include <unistd.h>
int do_ssh_connect(char *url) {
const char *ssh;
char *ssh_host = url;
const char *port = 0;
get_host_and_port_min(&ssh_host, &port);
if (!port) port = "22";
ssh = getenv("GIT_SSH");
if (!ssh) ssh = "ssh";
const char *args[8];
int nargs = 0;
args[nargs++] = ssh;
if (port) {
args[nargs++] = "-p";
args[nargs++] = port;
}
args[nargs++] = ssh_host;
args[nargs++] = "git-upload-pack";
args[nargs++] = 0;
return execvp(args[0], (char *const *)args);
}
"#;
let file_cfg = parse_lang(
src,
"c",
tree_sitter::Language::from(tree_sitter_c::LANGUAGE),
);
for body in &file_cfg.bodies {
if body.meta.name.as_deref() != Some("do_ssh_connect") {
continue;
}
let interner = SymbolInterner::from_cfg(&body.graph);
let ssa = crate::ssa::lower_to_ssa_with_params(
&body.graph,
body.entry,
Some("do_ssh_connect"),
false,
&body.meta.params,
)
.expect("C function should lower to SSA");
let param_count = body.meta.params.len();
let summary = ssa_transfer::extract_ssa_func_summary(
&ssa,
&body.graph,
&file_cfg.summaries,
None,
Lang::C,
"test.c",
&interner,
param_count,
None,
None,
None,
None,
None,
);
assert!(
summary
.param_to_sink_caps()
.iter()
.any(|(idx, caps)| *idx == 0 && caps.contains(Cap::SHELL_ESCAPE)),
"C summary should record url param reaching execvp argv, got {:?}",
summary.param_to_sink_caps()
);
return;
}
panic!("do_ssh_connect function not found");
}
#[test]
fn c_summary_dash_prefix_guard_suppresses_execvp_argv_sink() {
use crate::state::symbol::SymbolInterner;
let src = br#"#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
int do_ssh_connect(char *url) {
const char *ssh;
char *ssh_host = url;
const char *port = 0;
if (!port) port = "22";
if (ssh_host[0] == '-') {
fprintf(stderr, "strange hostname '%s' blocked\n", ssh_host);
exit(1);
}
ssh = getenv("GIT_SSH");
if (!ssh) ssh = "ssh";
const char *args[8];
int nargs = 0;
args[nargs++] = ssh;
if (port) {
args[nargs++] = "-p";
args[nargs++] = port;
}
args[nargs++] = ssh_host;
args[nargs++] = "git-upload-pack";
args[nargs++] = 0;
return execvp(args[0], (char *const *)args);
}
"#;
let file_cfg = parse_lang(
src,
"c",
tree_sitter::Language::from(tree_sitter_c::LANGUAGE),
);
for body in &file_cfg.bodies {
if body.meta.name.as_deref() != Some("do_ssh_connect") {
continue;
}
let interner = SymbolInterner::from_cfg(&body.graph);
let ssa = crate::ssa::lower_to_ssa_with_params(
&body.graph,
body.entry,
Some("do_ssh_connect"),
false,
&body.meta.params,
)
.expect("C function should lower to SSA");
let summary = ssa_transfer::extract_ssa_func_summary(
&ssa,
&body.graph,
&file_cfg.summaries,
None,
Lang::C,
"test.c",
&interner,
body.meta.params.len(),
None,
None,
None,
None,
None,
);
assert!(
!summary
.param_to_sink_caps()
.iter()
.any(|(idx, caps)| *idx == 0 && caps.contains(Cap::SHELL_ESCAPE)),
"dash-prefix guard should suppress argv-injection summary, got {:?}",
summary.param_to_sink_caps()
);
return;
}
panic!("do_ssh_connect function not found");
}
#[test]
fn c_fgets_reaches_execvp_argv_through_summary() {
let src = br#"#include <stdio.h>
#include <unistd.h>
int do_ssh_connect(char *url) {
char *ssh_host = url;
const char *args[3];
args[0] = "ssh";
args[1] = ssh_host;
args[2] = 0;
return execvp(args[0], (char *const *)args);
}
int main(void) {
char url_buf[256];
if (!fgets(url_buf, sizeof url_buf, stdin)) return 1;
return do_ssh_connect(url_buf);
}
"#;
let file_cfg = parse_lang(
src,
"c",
tree_sitter::Language::from(tree_sitter_c::LANGUAGE),
);
let findings = analyse_file(
&file_cfg,
&file_cfg.summaries,
None,
Lang::C,
"test.c",
&[],
None,
);
assert!(
findings
.iter()
.any(|f| f.source_kind == crate::labels::SourceKind::UserInput),
"C: fgets source should flow through do_ssh_connect summary, got {findings:#?}"
);
}
#[test]
fn cve_2017_1000117_vulnerable_fixture_fires() {
let src = include_bytes!("../../tests/benchmark/cve_corpus/c/CVE-2017-1000117/vulnerable.c");
let file_cfg = parse_lang(
src,
"c",
tree_sitter::Language::from(tree_sitter_c::LANGUAGE),
);
let findings = analyse_file(
&file_cfg,
&file_cfg.summaries,
None,
Lang::C,
"vulnerable.c",
&[],
None,
);
assert!(
findings
.iter()
.any(|f| f.source_kind == crate::labels::SourceKind::UserInput),
"CVE-2017-1000117 vulnerable fixture should fire, got {findings:#?}"
);
}
#[test]
fn cve_2017_1000117_patched_fixture_suppresses_dash_guard() {
let src = include_bytes!("../../tests/benchmark/cve_corpus/c/CVE-2017-1000117/patched.c");
let file_cfg = parse_lang(
src,
"c",
tree_sitter::Language::from(tree_sitter_c::LANGUAGE),
);
let findings = analyse_file(
&file_cfg,
&file_cfg.summaries,
None,
Lang::C,
"patched.c",
&[],
None,
);
assert!(
findings
.iter()
.all(|f| f.source_kind != crate::labels::SourceKind::UserInput),
"CVE-2017-1000117 patched fixture should suppress argv injection, got {findings:#?}"
);
}
#[test]
fn ssa_cross_function_taint_with_sanitizer_wrapper() {
// Cross-function: caller passes tainted data through sanitizer wrapper