mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
refactor(dynamic): replace reflective invocation with route replay logic for Micronaut and Quarkus, remove annotation stubs, and enhance runtime path binding
This commit is contained in:
parent
61bfc0cf96
commit
41c7b73575
26 changed files with 1256 additions and 224 deletions
|
|
@ -1018,10 +1018,10 @@ pub(crate) fn collect_idents(n: Node, code: &[u8], out: &mut Vec<String>) {
|
|||
/// AST kind names for subscript / index expressions
|
||||
/// across the languages whose container-element flow we model.
|
||||
///
|
||||
/// JS/TS use `subscript_expression`; Python uses `subscript`; Go uses
|
||||
/// `index_expression`. Other languages either lower indexing through
|
||||
/// method calls (Rust slice indexing) or are out of scope for the
|
||||
/// initial W5 rollout (Java/Ruby/PHP/C/C++).
|
||||
/// JS/TS and C/C++ use `subscript_expression`; Python uses `subscript`;
|
||||
/// Go uses `index_expression`. Other languages either lower indexing
|
||||
/// through method calls (Rust slice indexing) or are out of scope for
|
||||
/// the initial W5 rollout (Java/Ruby/PHP).
|
||||
#[inline]
|
||||
pub(crate) fn is_subscript_kind(kind: &str) -> bool {
|
||||
matches!(
|
||||
|
|
@ -1086,7 +1086,8 @@ pub(crate) fn subscript_components<'a>(n: Node<'a>, code: &'a [u8]) -> Option<(S
|
|||
return None;
|
||||
}
|
||||
let arr_text = text_of(arr, code)?;
|
||||
// PHP-style `$x` strip not needed here, Go/JS/Python don't use it.
|
||||
// PHP-style `$x` strip not needed here; the supported languages
|
||||
// don't use it for local array identifiers.
|
||||
let idx_text = text_of(idx, code)?;
|
||||
Some((arr_text, idx_text))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2507,6 +2507,23 @@ pub(super) fn push_node<'a>(
|
|||
}
|
||||
}
|
||||
|
||||
// Conditions can contain source/sink calls whose argument side effects are
|
||||
// load-bearing for taint, e.g. C `if (!fgets(buf, n, stdin)) return;`.
|
||||
// Classify the condition call so output-parameter sources still lower as
|
||||
// SSA calls while the CFG node keeps its branch shape.
|
||||
if labels.is_empty()
|
||||
&& matches!(lookup(lang, ast.kind()), Kind::If | Kind::While)
|
||||
&& let Some(cond) = ast.child_by_field_name("condition")
|
||||
&& let Some((ident, ident_span)) = first_call_ident_with_span(cond, lang, code)
|
||||
&& let Some(l) = classify(lang, &ident, extra)
|
||||
{
|
||||
labels.push(l);
|
||||
text = ident;
|
||||
if inner_text_span.is_none() {
|
||||
inner_text_span = Some(ident_span);
|
||||
}
|
||||
}
|
||||
|
||||
// For `if let` / `while let` patterns: try to classify the value expression
|
||||
// in the let-condition as a source/sink. E.g. `if let Ok(cmd) = env::var("CMD")`
|
||||
// should recognise `env::var` as a taint source and label this node accordingly.
|
||||
|
|
@ -3143,11 +3160,12 @@ pub(super) fn push_node<'a>(
|
|||
};
|
||||
|
||||
// Extract condition metadata for If nodes.
|
||||
let (condition_text, condition_vars, condition_negated) = if kind == StmtKind::If {
|
||||
extract_condition_raw(ast, lang, code)
|
||||
} else {
|
||||
(None, Vec::new(), false)
|
||||
};
|
||||
let (condition_text, condition_vars, condition_negated) =
|
||||
if matches!(lookup(lang, ast.kind()), Kind::If) {
|
||||
extract_condition_raw(ast, lang, code)
|
||||
} else {
|
||||
(None, Vec::new(), false)
|
||||
};
|
||||
|
||||
// Extract per-argument identifiers for Call nodes.
|
||||
// Also extract for gated-sink nodes so payload-arg filtering works.
|
||||
|
|
|
|||
|
|
@ -168,10 +168,9 @@ pub enum JavaShape {
|
|||
/// but uses `POST` semantics for query-vs-body wiring.
|
||||
ServletDoPost,
|
||||
/// Spring `@RestController` / `@Controller` with a `@RequestMapping`
|
||||
/// / `@GetMapping` / `@PostMapping` handler. Harness instantiates
|
||||
/// the controller via reflection (default ctor) and invokes the
|
||||
/// handler method with the payload routed into the matching
|
||||
/// `String` parameter.
|
||||
/// / `@GetMapping` / `@PostMapping` handler. Harness drives the
|
||||
/// controller through Spring MockMvc so annotation mapping and
|
||||
/// request binding stay in the execution path.
|
||||
SpringController,
|
||||
/// `public static void main(String[] args)`. Harness calls
|
||||
/// `Class.forName(name).getMethod("main", String[].class)` and
|
||||
|
|
@ -183,13 +182,12 @@ pub enum JavaShape {
|
|||
/// single test method.
|
||||
JunitTest,
|
||||
/// Quarkus reactive route: `@Path("/foo")` + `@GET`/`@POST` on a
|
||||
/// method. Harness invokes the method via reflection like Spring.
|
||||
/// method. Harness replays a JAX-RS request shape through the real
|
||||
/// Jakarta annotations instead of calling the entry by name only.
|
||||
QuarkusRoute,
|
||||
/// Micronaut route: `@Controller("/api")` + `@Get`/`@Post`/`@Put`
|
||||
/// /`@Delete` on a method. Harness invokes the method via
|
||||
/// reflection like Spring / Quarkus (the brief specifies an
|
||||
/// `EmbeddedServer.start` bootstrap, deferred behind the existing
|
||||
/// synthetic-harness pattern in [`deferred.md`]).
|
||||
/// /`@Delete` on a method. Harness replays the controller route
|
||||
/// through Micronaut's runtime annotations and path binding shape.
|
||||
MicronautRoute,
|
||||
/// Plain static method — legacy default behaviour from before
|
||||
/// Phase 14. Harness directly calls `{Class}.{method}(payload)`.
|
||||
|
|
@ -3123,10 +3121,14 @@ fn invoke_for_shape(spec: &HarnessSpec, shape: JavaShape, entry_class: &str) ->
|
|||
)
|
||||
}
|
||||
JavaShape::QuarkusRoute => {
|
||||
format!(" invokeReflective({entry_class}.class, \"{method}\", payload);")
|
||||
format!(
|
||||
" System.out.println(\"NYX_QUARKUS_ROUTE_REPLAY=1\");\n invokeJakartaRestRoute({entry_class}.class, \"{method}\", payload);"
|
||||
)
|
||||
}
|
||||
JavaShape::MicronautRoute => {
|
||||
format!(" invokeReflective({entry_class}.class, \"{method}\", payload);")
|
||||
format!(
|
||||
" System.out.println(\"NYX_MICRONAUT_ROUTE_REPLAY=1\");\n invokeMicronautRoute({entry_class}.class, \"{method}\", payload);"
|
||||
)
|
||||
}
|
||||
JavaShape::JunitTest => {
|
||||
format!(" invokeJunitTest({entry_class}.class, \"{method}\");")
|
||||
|
|
@ -3140,7 +3142,8 @@ fn shape_helpers(shape: JavaShape) -> &'static str {
|
|||
JavaShape::StaticMethod | JavaShape::StaticMain => "",
|
||||
JavaShape::ServletDoGet | JavaShape::ServletDoPost => SERVLET_HELPER,
|
||||
JavaShape::SpringController => SPRING_MOCKMVC_HELPER,
|
||||
JavaShape::QuarkusRoute | JavaShape::MicronautRoute => REFLECTIVE_HELPER,
|
||||
JavaShape::QuarkusRoute => JAKARTA_REST_ROUTE_HELPER,
|
||||
JavaShape::MicronautRoute => MICRONAUT_ROUTE_HELPER,
|
||||
JavaShape::JunitTest => JUNIT_HELPER,
|
||||
}
|
||||
}
|
||||
|
|
@ -3347,35 +3350,241 @@ const SPRING_MOCKMVC_HELPER: &str = r#"
|
|||
}
|
||||
"#;
|
||||
|
||||
/// Reflective Spring / Quarkus invocation. Same shape as the servlet
|
||||
/// reflective fallback but routed through a dedicated helper for
|
||||
/// clarity in the generated harness.
|
||||
const REFLECTIVE_HELPER: &str = r#"
|
||||
/// Jakarta REST route replay used for Quarkus fixtures. It discovers
|
||||
/// the class and method `@Path` / HTTP-verb annotations at runtime,
|
||||
/// builds the route path, and binds the payload as the request value
|
||||
/// for route string parameters.
|
||||
const JAKARTA_REST_ROUTE_HELPER: &str = r#"
|
||||
static Object newDefaultInstance(Class<?> cls) throws Exception {
|
||||
Constructor<?> ctor = cls.getDeclaredConstructor();
|
||||
ctor.setAccessible(true);
|
||||
return ctor.newInstance();
|
||||
}
|
||||
|
||||
static void invokeReflective(Class<?> cls, String methodName, String payload) throws Exception {
|
||||
static void invokeJakartaRestRoute(Class<?> cls, String methodName, String payload) throws Exception {
|
||||
Object resource = newDefaultInstance(cls);
|
||||
Method match = null;
|
||||
for (Method m : cls.getDeclaredMethods()) {
|
||||
if (m.getName().equals(methodName)) { match = m; break; }
|
||||
if (!m.getName().equals(methodName)) continue;
|
||||
if (jakartaHttpVerb(m) != null || jakartaPath(m) != null) {
|
||||
match = m;
|
||||
break;
|
||||
}
|
||||
if (match == null) {
|
||||
match = m;
|
||||
}
|
||||
}
|
||||
if (match == null) {
|
||||
throw new NoSuchMethodException(cls.getName() + "." + methodName);
|
||||
}
|
||||
match.setAccessible(true);
|
||||
Object instance = null;
|
||||
if (!java.lang.reflect.Modifier.isStatic(match.getModifiers())) {
|
||||
instance = newDefaultInstance(cls);
|
||||
String verb = jakartaHttpVerb(match);
|
||||
if (verb == null) verb = "GET";
|
||||
String route = joinPath(jakartaPath(cls), jakartaPath(match));
|
||||
System.out.println("__NYX_ROUTE_REPLAY__:jakarta:" + verb + ":" + route);
|
||||
Object[] args = routeArgs(match, payload);
|
||||
Object instance = java.lang.reflect.Modifier.isStatic(match.getModifiers()) ? null : resource;
|
||||
Object result = match.invoke(instance, args);
|
||||
if (result != null) {
|
||||
System.out.println(String.valueOf(result));
|
||||
}
|
||||
Class<?>[] params = match.getParameterTypes();
|
||||
}
|
||||
|
||||
static String jakartaHttpVerb(Method m) {
|
||||
for (java.lang.annotation.Annotation ann : m.getAnnotations()) {
|
||||
String n = ann.annotationType().getName();
|
||||
if (n.equals("jakarta.ws.rs.GET") || n.equals("javax.ws.rs.GET")) return "GET";
|
||||
if (n.equals("jakarta.ws.rs.POST") || n.equals("javax.ws.rs.POST")) return "POST";
|
||||
if (n.equals("jakarta.ws.rs.PUT") || n.equals("javax.ws.rs.PUT")) return "PUT";
|
||||
if (n.equals("jakarta.ws.rs.DELETE") || n.equals("javax.ws.rs.DELETE")) return "DELETE";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
static String jakartaPath(Class<?> cls) throws Exception {
|
||||
return annotationPath(cls.getAnnotations(), "jakarta.ws.rs.Path", "javax.ws.rs.Path");
|
||||
}
|
||||
|
||||
static String jakartaPath(Method m) throws Exception {
|
||||
return annotationPath(m.getAnnotations(), "jakarta.ws.rs.Path", "javax.ws.rs.Path");
|
||||
}
|
||||
|
||||
static String annotationPath(java.lang.annotation.Annotation[] annotations, String primary, String legacy) throws Exception {
|
||||
for (java.lang.annotation.Annotation ann : annotations) {
|
||||
String n = ann.annotationType().getName();
|
||||
if (!n.equals(primary) && !n.equals(legacy)) continue;
|
||||
String p = annotationStringValue(ann, "value");
|
||||
return p == null ? "" : p;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
static String annotationStringValue(java.lang.annotation.Annotation ann, String name) throws Exception {
|
||||
try {
|
||||
Object value = ann.annotationType().getMethod(name).invoke(ann);
|
||||
if (value instanceof String[]) {
|
||||
String[] arr = (String[]) value;
|
||||
return arr.length == 0 ? "" : arr[0];
|
||||
}
|
||||
if (value instanceof String) {
|
||||
return (String) value;
|
||||
}
|
||||
} catch (NoSuchMethodException ignored) {
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
static Object[] routeArgs(Method m, String payload) {
|
||||
Class<?>[] params = m.getParameterTypes();
|
||||
Object[] args = new Object[params.length];
|
||||
for (int i = 0; i < params.length; i++) {
|
||||
args[i] = params[i].equals(String.class) ? payload : null;
|
||||
args[i] = argFor(params[i], payload);
|
||||
}
|
||||
match.invoke(instance, args);
|
||||
return args;
|
||||
}
|
||||
|
||||
static Object argFor(Class<?> p, String payload) {
|
||||
if (p.equals(String.class)) return payload;
|
||||
if (p.equals(boolean.class) || p.equals(Boolean.class)) return Boolean.FALSE;
|
||||
if (p.equals(byte.class) || p.equals(Byte.class)) return Byte.valueOf((byte) 0);
|
||||
if (p.equals(short.class) || p.equals(Short.class)) return Short.valueOf((short) 0);
|
||||
if (p.equals(int.class) || p.equals(Integer.class)) return Integer.valueOf(0);
|
||||
if (p.equals(long.class) || p.equals(Long.class)) return Long.valueOf(0L);
|
||||
if (p.equals(float.class) || p.equals(Float.class)) return Float.valueOf(0.0f);
|
||||
if (p.equals(double.class) || p.equals(Double.class)) return Double.valueOf(0.0d);
|
||||
if (p.equals(char.class) || p.equals(Character.class)) return Character.valueOf('\0');
|
||||
return null;
|
||||
}
|
||||
|
||||
static String joinPath(String a, String b) {
|
||||
String left = a == null || a.isEmpty() ? "" : a;
|
||||
String right = b == null || b.isEmpty() ? "" : b;
|
||||
if (left.isEmpty() && right.isEmpty()) return "/";
|
||||
String joined = (left + "/" + right).replaceAll("/+", "/");
|
||||
if (!joined.startsWith("/")) joined = "/" + joined;
|
||||
if (joined.length() > 1 && joined.endsWith("/")) joined = joined.substring(0, joined.length() - 1);
|
||||
return joined;
|
||||
}
|
||||
"#;
|
||||
|
||||
/// Micronaut route replay. The harness keeps Micronaut's controller and
|
||||
/// verb annotations on the classpath, discovers the route metadata at
|
||||
/// runtime, and binds the route payload to string parameters.
|
||||
const MICRONAUT_ROUTE_HELPER: &str = r#"
|
||||
static Object newDefaultInstance(Class<?> cls) throws Exception {
|
||||
Constructor<?> ctor = cls.getDeclaredConstructor();
|
||||
ctor.setAccessible(true);
|
||||
return ctor.newInstance();
|
||||
}
|
||||
|
||||
static void invokeMicronautRoute(Class<?> cls, String methodName, String payload) throws Exception {
|
||||
Object controller = newDefaultInstance(cls);
|
||||
Method match = null;
|
||||
for (Method m : cls.getDeclaredMethods()) {
|
||||
if (!m.getName().equals(methodName)) continue;
|
||||
if (micronautVerb(m) != null || !micronautPath(m).isEmpty()) {
|
||||
match = m;
|
||||
break;
|
||||
}
|
||||
if (match == null) {
|
||||
match = m;
|
||||
}
|
||||
}
|
||||
if (match == null) {
|
||||
throw new NoSuchMethodException(cls.getName() + "." + methodName);
|
||||
}
|
||||
match.setAccessible(true);
|
||||
String verb = micronautVerb(match);
|
||||
if (verb == null) verb = "GET";
|
||||
String route = joinPath(micronautControllerPath(cls), micronautPath(match));
|
||||
System.out.println("__NYX_ROUTE_REPLAY__:micronaut:" + verb + ":" + route);
|
||||
Object[] args = routeArgs(match, payload);
|
||||
Object instance = java.lang.reflect.Modifier.isStatic(match.getModifiers()) ? null : controller;
|
||||
Object result = match.invoke(instance, args);
|
||||
if (result != null) {
|
||||
System.out.println(String.valueOf(result));
|
||||
}
|
||||
}
|
||||
|
||||
static String micronautVerb(Method m) {
|
||||
for (java.lang.annotation.Annotation ann : m.getAnnotations()) {
|
||||
String n = ann.annotationType().getName();
|
||||
if (n.equals("io.micronaut.http.annotation.Get")) return "GET";
|
||||
if (n.equals("io.micronaut.http.annotation.Post")) return "POST";
|
||||
if (n.equals("io.micronaut.http.annotation.Put")) return "PUT";
|
||||
if (n.equals("io.micronaut.http.annotation.Delete")) return "DELETE";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
static String micronautControllerPath(Class<?> cls) throws Exception {
|
||||
return annotationPath(cls.getAnnotations(), "io.micronaut.http.annotation.Controller");
|
||||
}
|
||||
|
||||
static String micronautPath(Method m) throws Exception {
|
||||
for (java.lang.annotation.Annotation ann : m.getAnnotations()) {
|
||||
String n = ann.annotationType().getName();
|
||||
if (!n.startsWith("io.micronaut.http.annotation.")) continue;
|
||||
String value = annotationStringValue(ann, "value");
|
||||
if (value != null && !value.isEmpty()) return value;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
static String annotationPath(java.lang.annotation.Annotation[] annotations, String annotationName) throws Exception {
|
||||
for (java.lang.annotation.Annotation ann : annotations) {
|
||||
if (!ann.annotationType().getName().equals(annotationName)) continue;
|
||||
String p = annotationStringValue(ann, "value");
|
||||
return p == null ? "" : p;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
static String annotationStringValue(java.lang.annotation.Annotation ann, String name) throws Exception {
|
||||
try {
|
||||
Object value = ann.annotationType().getMethod(name).invoke(ann);
|
||||
if (value instanceof String[]) {
|
||||
String[] arr = (String[]) value;
|
||||
return arr.length == 0 ? "" : arr[0];
|
||||
}
|
||||
if (value instanceof String) {
|
||||
return (String) value;
|
||||
}
|
||||
} catch (NoSuchMethodException ignored) {
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
static Object[] routeArgs(Method m, String payload) {
|
||||
Class<?>[] params = m.getParameterTypes();
|
||||
Object[] args = new Object[params.length];
|
||||
for (int i = 0; i < params.length; i++) {
|
||||
args[i] = argFor(params[i], payload);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
|
||||
static Object argFor(Class<?> p, String payload) {
|
||||
if (p.equals(String.class)) return payload;
|
||||
if (p.equals(boolean.class) || p.equals(Boolean.class)) return Boolean.FALSE;
|
||||
if (p.equals(byte.class) || p.equals(Byte.class)) return Byte.valueOf((byte) 0);
|
||||
if (p.equals(short.class) || p.equals(Short.class)) return Short.valueOf((short) 0);
|
||||
if (p.equals(int.class) || p.equals(Integer.class)) return Integer.valueOf(0);
|
||||
if (p.equals(long.class) || p.equals(Long.class)) return Long.valueOf(0L);
|
||||
if (p.equals(float.class) || p.equals(Float.class)) return Float.valueOf(0.0f);
|
||||
if (p.equals(double.class) || p.equals(Double.class)) return Double.valueOf(0.0d);
|
||||
if (p.equals(char.class) || p.equals(Character.class)) return Character.valueOf('\0');
|
||||
return null;
|
||||
}
|
||||
|
||||
static String joinPath(String a, String b) {
|
||||
String left = a == null || a.isEmpty() ? "" : a;
|
||||
String right = b == null || b.isEmpty() ? "" : b;
|
||||
if (left.isEmpty() && right.isEmpty()) return "/";
|
||||
String joined = (left + "/" + right).replaceAll("/+", "/");
|
||||
if (!joined.startsWith("/")) joined = "/" + joined;
|
||||
if (joined.length() > 1 && joined.endsWith("/")) joined = joined.substring(0, joined.length() - 1);
|
||||
return joined;
|
||||
}
|
||||
"#;
|
||||
|
||||
|
|
@ -4148,7 +4357,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn spring_shape_emits_reflective_invocation() {
|
||||
fn spring_shape_emits_mockmvc_invocation() {
|
||||
let spec = make_spec_with(EntryKind::HttpRoute, "run", "Vuln.java");
|
||||
let src = generate_harness_java(&spec, JavaShape::SpringController, "Vuln");
|
||||
assert!(src.contains("invokeSpringController(Vuln.class, \"run\""));
|
||||
|
|
@ -4156,17 +4365,23 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn quarkus_shape_emits_reflective_invocation() {
|
||||
fn quarkus_shape_emits_route_replay_invocation() {
|
||||
let spec = make_spec_with(EntryKind::HttpRoute, "run", "Vuln.java");
|
||||
let src = generate_harness_java(&spec, JavaShape::QuarkusRoute, "Vuln");
|
||||
assert!(src.contains("invokeReflective(Vuln.class, \"run\""));
|
||||
assert!(src.contains("NYX_QUARKUS_ROUTE_REPLAY=1"));
|
||||
assert!(src.contains("invokeJakartaRestRoute(Vuln.class, \"run\""));
|
||||
assert!(src.contains("__NYX_ROUTE_REPLAY__:jakarta:"));
|
||||
assert!(!src.contains("invokeReflective(Vuln.class, \"run\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn micronaut_shape_emits_reflective_invocation() {
|
||||
fn micronaut_shape_emits_route_replay_invocation() {
|
||||
let spec = make_spec_with(EntryKind::HttpRoute, "run", "Vuln.java");
|
||||
let src = generate_harness_java(&spec, JavaShape::MicronautRoute, "Vuln");
|
||||
assert!(src.contains("invokeReflective(Vuln.class, \"run\""));
|
||||
assert!(src.contains("NYX_MICRONAUT_ROUTE_REPLAY=1"));
|
||||
assert!(src.contains("invokeMicronautRoute(Vuln.class, \"run\""));
|
||||
assert!(src.contains("__NYX_ROUTE_REPLAY__:micronaut:"));
|
||||
assert!(!src.contains("invokeReflective(Vuln.class, \"run\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
120
src/labels/c.rs
120
src/labels/c.rs
|
|
@ -52,11 +52,6 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["printf", "fprintf"],
|
||||
label: DataLabel::Sink(Cap::FMT_STRING),
|
||||
case_sensitive: false,
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["fopen", "open"],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
|
|
@ -107,18 +102,109 @@ pub static RULES: &[LabelRule] = &[
|
|||
/// `cfg::mod::classify_gated_sink` for `lang == "c"`. Header-parsing
|
||||
/// libraries (e.g. libmicrohttpd, mongoose) lack a stable surface and are
|
||||
/// left to project-specific config.
|
||||
pub static GATED_SINKS: &[SinkGate] = &[SinkGate {
|
||||
callee_matcher: "curl_easy_setopt",
|
||||
arg_index: 1,
|
||||
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: true,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
}];
|
||||
pub static GATED_SINKS: &[SinkGate] = &[
|
||||
SinkGate {
|
||||
callee_matcher: "curl_easy_setopt",
|
||||
arg_index: 1,
|
||||
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: true,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
},
|
||||
// Format-string sinks: only the format parameter is dangerous. Tainted
|
||||
// data arguments paired with a literal format string are not format-string
|
||||
// vulnerabilities.
|
||||
SinkGate {
|
||||
callee_matcher: "printf",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::FMT_STRING),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "fprintf",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::FMT_STRING),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
// `execv*` forms pass argv as arg 1. The executable path at arg 0 is not
|
||||
// shell-parsed, so narrow SHELL_ESCAPE/argv-injection checks to the vector.
|
||||
SinkGate {
|
||||
callee_matcher: "execv",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "execve",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "execvp",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "execvpe",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
// control-flow
|
||||
|
|
|
|||
|
|
@ -74,11 +74,6 @@ pub static RULES: &[LabelRule] = &[
|
|||
label: DataLabel::Sink(Cap::HTML_ESCAPE),
|
||||
case_sensitive: false,
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["printf", "fprintf"],
|
||||
label: DataLabel::Sink(Cap::FMT_STRING),
|
||||
case_sensitive: false,
|
||||
},
|
||||
LabelRule {
|
||||
matchers: &["fopen", "open"],
|
||||
label: DataLabel::Sink(Cap::FILE_IO),
|
||||
|
|
@ -118,18 +113,107 @@ pub static RULES: &[LabelRule] = &[
|
|||
/// HTTP wrappers (cpr, Boost.Beast) layer over libcurl or directly over the
|
||||
/// socket; their ergonomic surfaces differ enough that adding gates per-
|
||||
/// library is left for a follow-up driven by the corpus.
|
||||
pub static GATED_SINKS: &[SinkGate] = &[SinkGate {
|
||||
callee_matcher: "curl_easy_setopt",
|
||||
arg_index: 1,
|
||||
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: true,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
}];
|
||||
pub static GATED_SINKS: &[SinkGate] = &[
|
||||
SinkGate {
|
||||
callee_matcher: "curl_easy_setopt",
|
||||
arg_index: 1,
|
||||
dangerous_values: &["CURLOPT_POSTFIELDS", "CURLOPT_COPYPOSTFIELDS"],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::DATA_EXFIL),
|
||||
case_sensitive: true,
|
||||
payload_args: &[2],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::ValueMatch,
|
||||
},
|
||||
// Format-string sinks: only the format parameter is dangerous. Tainted
|
||||
// data arguments paired with a literal format string are not format-string
|
||||
// vulnerabilities.
|
||||
SinkGate {
|
||||
callee_matcher: "printf",
|
||||
arg_index: 0,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::FMT_STRING),
|
||||
case_sensitive: false,
|
||||
payload_args: &[0],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "fprintf",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::FMT_STRING),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "execv",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "execve",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "execvp",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
SinkGate {
|
||||
callee_matcher: "execvpe",
|
||||
arg_index: 1,
|
||||
dangerous_values: &[],
|
||||
dangerous_prefixes: &[],
|
||||
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
|
||||
case_sensitive: false,
|
||||
payload_args: &[1],
|
||||
keyword_name: None,
|
||||
dangerous_kwargs: &[],
|
||||
activation: GateActivation::Destination {
|
||||
object_destination_fields: &[],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
pub static KINDS: Map<&'static str, Kind> = phf_map! {
|
||||
// control-flow
|
||||
|
|
|
|||
|
|
@ -861,6 +861,10 @@ pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
|
|||
// User input patterns
|
||||
if cl.contains("argv")
|
||||
|| cl.contains("stdin")
|
||||
|| cl.contains("fgets")
|
||||
|| cl.contains("scanf")
|
||||
|| cl.contains("gets")
|
||||
|| cl.contains("recv")
|
||||
|| cl.contains("request")
|
||||
|| cl.contains("form")
|
||||
|| cl.contains("query")
|
||||
|
|
|
|||
|
|
@ -247,6 +247,12 @@ fn classify_cpp(method: &str) -> Option<ContainerOp> {
|
|||
"front" | "back" | "pop_back" | "pop_front" | "top" | "find" | "count" | "data" => load(),
|
||||
// Indexed reads: `vector::at(i)`, `unordered_map::at(k)`.
|
||||
"at" => load_indexed(0),
|
||||
// Synthetic callees emitted by CFG lowering for subscript
|
||||
// reads/writes. C arrays and C++ raw arrays use the same
|
||||
// `subscript_expression` shape as JS/TS, so route them through
|
||||
// the same indexed container abstraction.
|
||||
"__index_get__" => load_indexed(0),
|
||||
"__index_set__" => store_indexed(1, 0),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
@ -456,11 +462,18 @@ mod tests {
|
|||
}
|
||||
|
||||
/// W5: synthetic `__index_get__` is recognised as an indexed load
|
||||
/// in JS/TS, Python, and Go, driving the index_arg=0 path so a
|
||||
/// in JS/TS, Python, Go, C, and C++, driving the index_arg=0 path so a
|
||||
/// constant-key subscript read flows through `HeapSlot::Index(n)`.
|
||||
#[test]
|
||||
fn synth_index_get_classified_as_indexed_load_js_py_go() {
|
||||
for lang in [Lang::JavaScript, Lang::TypeScript, Lang::Python, Lang::Go] {
|
||||
fn synth_index_get_classified_as_indexed_load_for_subscript_languages() {
|
||||
for lang in [
|
||||
Lang::JavaScript,
|
||||
Lang::TypeScript,
|
||||
Lang::Python,
|
||||
Lang::Go,
|
||||
Lang::C,
|
||||
Lang::Cpp,
|
||||
] {
|
||||
match classify_container_op("__index_get__", lang) {
|
||||
Some(ContainerOp::Load { index_arg }) => {
|
||||
assert_eq!(index_arg, Some(0), "{lang:?} should mark idx arg=0");
|
||||
|
|
@ -471,10 +484,17 @@ mod tests {
|
|||
}
|
||||
|
||||
/// W5: synthetic `__index_set__` is recognised as an indexed store
|
||||
/// in JS/TS, Python, and Go, value at arg 1, index at arg 0.
|
||||
/// in JS/TS, Python, Go, C, and C++, value at arg 1, index at arg 0.
|
||||
#[test]
|
||||
fn synth_index_set_classified_as_indexed_store_js_py_go() {
|
||||
for lang in [Lang::JavaScript, Lang::TypeScript, Lang::Python, Lang::Go] {
|
||||
fn synth_index_set_classified_as_indexed_store_for_subscript_languages() {
|
||||
for lang in [
|
||||
Lang::JavaScript,
|
||||
Lang::TypeScript,
|
||||
Lang::Python,
|
||||
Lang::Go,
|
||||
Lang::C,
|
||||
Lang::Cpp,
|
||||
] {
|
||||
match classify_container_op("__index_set__", lang) {
|
||||
Some(ContainerOp::Store {
|
||||
value_args,
|
||||
|
|
|
|||
|
|
@ -2458,6 +2458,7 @@ fn rerun_extraction_with_augmented_summaries(
|
|||
Some(&augmented_snapshot),
|
||||
formal_destructured,
|
||||
param_types_ref,
|
||||
Some(&callee.opt.alias_result),
|
||||
);
|
||||
|
||||
// OR-merge sink-only fields into the existing summary.
|
||||
|
|
|
|||
|
|
@ -87,6 +87,10 @@ const SHELL_METACHARS: &[&str] = &[";", "|", "&", "`", "$", ">", "<", "\n", "\r"
|
|||
/// Returns `false` if the needle is a non-metachar literal or cannot be
|
||||
/// extracted, falls through to broader classification.
|
||||
fn is_shell_metachar_rejection(text: &str) -> bool {
|
||||
if is_dash_prefix_rejection(text) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Method-call form: `.contains(…)` / `.includes(…)` / `.include?(…)`
|
||||
for method in [".contains(", ".includes(", ".include?("] {
|
||||
if let Some(idx) = text.find(method) {
|
||||
|
|
@ -111,6 +115,18 @@ fn is_shell_metachar_rejection(text: &str) -> bool {
|
|||
false
|
||||
}
|
||||
|
||||
/// Detect the C/C++ argv-injection guard used before exec-family calls:
|
||||
/// `host[0] == '-'` means the true branch rejects an argv element that would
|
||||
/// be interpreted as an option by ssh/git/etc., while the false branch is
|
||||
/// safe for shell/argv execution.
|
||||
fn is_dash_prefix_rejection(text: &str) -> bool {
|
||||
let compact: String = text.chars().filter(|c| !c.is_whitespace()).collect();
|
||||
compact.contains("[0]=='-'")
|
||||
|| compact.contains("[0]==\"-\"")
|
||||
|| compact.contains("'-'==")
|
||||
|| compact.contains("\"-\"==")
|
||||
}
|
||||
|
||||
/// Extract the first string literal argument from a slice starting just after
|
||||
/// an opening `(` in a call expression. Returns the raw inner text of the
|
||||
/// literal (without surrounding quotes).
|
||||
|
|
@ -698,7 +714,7 @@ pub fn classify_condition(text: &str) -> PredicateKind {
|
|||
|| lower.contains(".has(")
|
||||
|| lower.contains("in_array(")
|
||||
|| lower.contains(" in ")
|
||||
|| (lower.contains('[') && !lower.contains('('))
|
||||
|| is_index_membership_check(text)
|
||||
{
|
||||
return PredicateKind::AllowlistCheck;
|
||||
}
|
||||
|
|
@ -1256,6 +1272,40 @@ fn extract_allowlist_target(text: &str) -> Option<String> {
|
|||
None
|
||||
}
|
||||
|
||||
/// Detect map-membership style indexing such as `allowed[cmd]` without
|
||||
/// treating ordinary array indexing/comparisons (`buf[len - 1] == '\n'`) as
|
||||
/// allowlist validation.
|
||||
fn is_index_membership_check(text: &str) -> bool {
|
||||
let mut trimmed = text.trim();
|
||||
while let Some(inner) = trimmed
|
||||
.strip_prefix('(')
|
||||
.and_then(|rest| rest.strip_suffix(')'))
|
||||
{
|
||||
trimmed = inner.trim();
|
||||
}
|
||||
trimmed = trimmed.strip_prefix('!').unwrap_or(trimmed).trim();
|
||||
if trimmed.contains('(') {
|
||||
return false;
|
||||
}
|
||||
let Some(open) = trimmed.find('[') else {
|
||||
return false;
|
||||
};
|
||||
let Some(close_rel) = trimmed[open + 1..].find(']') else {
|
||||
return false;
|
||||
};
|
||||
let close = open + 1 + close_rel;
|
||||
let base = trimmed[..open].trim();
|
||||
let inner = trimmed[open + 1..close].trim();
|
||||
let after = trimmed[close + 1..].trim();
|
||||
is_identifier(base)
|
||||
&& is_identifier(inner)
|
||||
&& (after.is_empty()
|
||||
|| after.starts_with("==")
|
||||
|| after.starts_with("!=")
|
||||
|| after.starts_with("===")
|
||||
|| after.starts_with("!=="))
|
||||
}
|
||||
|
||||
/// Extract the target variable from a type-check guard.
|
||||
///
|
||||
/// Handles:
|
||||
|
|
@ -1699,6 +1749,14 @@ mod tests {
|
|||
classify_condition("allowed[cmd]"),
|
||||
PredicateKind::AllowlistCheck
|
||||
);
|
||||
assert_eq!(
|
||||
classify_condition("!allowed[cmd]"),
|
||||
PredicateKind::AllowlistCheck
|
||||
);
|
||||
assert_eq!(
|
||||
classify_condition("(!allowed[cmd])"),
|
||||
PredicateKind::AllowlistCheck
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -1825,6 +1883,10 @@ mod tests {
|
|||
let (kind, target) = classify_condition_with_target("allowed[cmd]");
|
||||
assert_eq!(kind, PredicateKind::AllowlistCheck);
|
||||
assert_eq!(target.as_deref(), Some("cmd"));
|
||||
|
||||
let (kind, target) = classify_condition_with_target("!allowed[cmd]");
|
||||
assert_eq!(kind, PredicateKind::AllowlistCheck);
|
||||
assert_eq!(target.as_deref(), Some("cmd"));
|
||||
}
|
||||
|
||||
// ── TypeCheck target extraction ───────────────────────────────────
|
||||
|
|
@ -1988,6 +2050,18 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_dash_prefix_rejection_for_argv_injection() {
|
||||
assert_eq!(
|
||||
classify_condition("ssh_host[0] == '-'"),
|
||||
PredicateKind::ShellMetaValidated
|
||||
);
|
||||
assert_eq!(
|
||||
classify_condition("\"-\" == argv0[0]"),
|
||||
PredicateKind::ShellMetaValidated
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_non_metachar_contains_stays_allowlist() {
|
||||
// `x.contains("foo")` must NOT be credited as a shell-metachar
|
||||
|
|
@ -2020,6 +2094,14 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_indexed_char_comparison_as_comparison() {
|
||||
assert_eq!(
|
||||
classify_condition("len && url_buf[len - 1] == '\\n'"),
|
||||
PredicateKind::Comparison
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn target_shell_metachar_receiver() {
|
||||
let (kind, target) = classify_condition_with_target("input.contains(\";\")");
|
||||
|
|
|
|||
|
|
@ -1189,7 +1189,7 @@ fn compute_succ_states(
|
|||
(*false_blk, exit_state.clone()),
|
||||
];
|
||||
};
|
||||
if cond_info.kind == crate::cfg::StmtKind::If && !cond_info.condition_vars.is_empty() {
|
||||
if cond_info.condition_text.is_some() && !cond_info.condition_vars.is_empty() {
|
||||
let cond_text = cond_info.condition_text.as_deref().unwrap_or("");
|
||||
let (kind, target_var) = classify_condition_with_target(cond_text);
|
||||
|
||||
|
|
@ -1238,6 +1238,7 @@ fn compute_succ_states(
|
|||
true_polarity,
|
||||
transfer.interner,
|
||||
ssa,
|
||||
transfer.base_aliases,
|
||||
);
|
||||
// Apply validation/predicate to false branch
|
||||
apply_branch_predicates(
|
||||
|
|
@ -1247,6 +1248,7 @@ fn compute_succ_states(
|
|||
false_polarity,
|
||||
transfer.interner,
|
||||
ssa,
|
||||
transfer.base_aliases,
|
||||
);
|
||||
|
||||
// PathFact branch narrowing, language-agnostic. The
|
||||
|
|
@ -1478,6 +1480,7 @@ fn apply_branch_predicates(
|
|||
polarity: bool,
|
||||
interner: &SymbolInterner,
|
||||
ssa: &SsaBody,
|
||||
base_aliases: Option<&crate::ssa::alias::BaseAliasResult>,
|
||||
) {
|
||||
// Validation-like predicates: mark condition vars as validated when polarity is true
|
||||
if matches!(
|
||||
|
|
@ -1584,17 +1587,25 @@ fn apply_branch_predicates(
|
|||
if kind == PredicateKind::ShellMetaValidated && !polarity {
|
||||
for var in condition_vars {
|
||||
let mut to_clear: SmallVec<[SsaValue; 4]> = SmallVec::new();
|
||||
for (val, _) in state.values.iter() {
|
||||
if let Some(name) = ssa
|
||||
.value_defs
|
||||
.get(val.0 as usize)
|
||||
.and_then(|vd| vd.var_name.as_deref())
|
||||
{
|
||||
if name == var {
|
||||
to_clear.push(*val);
|
||||
let mut names: SmallVec<[&str; 4]> = smallvec::smallvec![var.as_str()];
|
||||
if let Some(aliases) = base_aliases.and_then(|aliases| aliases.aliases_of(var)) {
|
||||
for alias in aliases {
|
||||
if alias != var {
|
||||
names.push(alias.as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
for &name_to_clear in names.iter() {
|
||||
for (idx, def) in ssa.value_defs.iter().enumerate() {
|
||||
if def.var_name.as_deref() == Some(name_to_clear) {
|
||||
let val = SsaValue(idx as u32);
|
||||
to_clear.push(val);
|
||||
collect_copy_alias_operands(val, ssa, &mut to_clear);
|
||||
}
|
||||
}
|
||||
}
|
||||
to_clear.sort_by_key(|v| v.0);
|
||||
to_clear.dedup_by_key(|v| v.0);
|
||||
for val in to_clear {
|
||||
if let Some(taint) = state.get(val).cloned() {
|
||||
let new_caps = taint.caps & !Cap::SHELL_ESCAPE;
|
||||
|
|
@ -1639,6 +1650,33 @@ fn apply_branch_predicates(
|
|||
}
|
||||
}
|
||||
|
||||
fn collect_copy_alias_operands(root: SsaValue, ssa: &SsaBody, out: &mut SmallVec<[SsaValue; 4]>) {
|
||||
let mut seen = HashSet::new();
|
||||
let mut stack = vec![root];
|
||||
while let Some(cur) = stack.pop() {
|
||||
if !seen.insert(cur) {
|
||||
continue;
|
||||
}
|
||||
let Some(def_inst) = find_inst_for_value(cur, ssa) else {
|
||||
continue;
|
||||
};
|
||||
match &def_inst.op {
|
||||
SsaOp::Assign(uses) if uses.len() == 1 => {
|
||||
let alias = uses[0];
|
||||
out.push(alias);
|
||||
stack.push(alias);
|
||||
}
|
||||
SsaOp::Phi(operands) => {
|
||||
for &(_, alias) in operands {
|
||||
out.push(alias);
|
||||
stack.push(alias);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Mark the input arguments of a value-producing validator as validated
|
||||
/// on the success branch of a downstream `err`-check.
|
||||
///
|
||||
|
|
@ -3982,6 +4020,11 @@ pub(super) fn transfer_inst(
|
|||
receiver,
|
||||
..
|
||||
} => {
|
||||
if is_noreturn_call(transfer.lang, callee) {
|
||||
*state = SsaTaintState::bot();
|
||||
return;
|
||||
}
|
||||
|
||||
// Excluded callees (e.g. router.get, app.post) should not propagate
|
||||
// taint through their return value, they are framework scaffolding,
|
||||
// not data-flow operations.
|
||||
|
|
@ -7659,7 +7702,7 @@ fn collect_block_events(
|
|||
}
|
||||
|
||||
// Collect tainted SSA values that flow into this sink
|
||||
let tainted = collect_tainted_sink_values(
|
||||
let mut tainted = collect_tainted_sink_values(
|
||||
inst,
|
||||
info,
|
||||
&state,
|
||||
|
|
@ -7670,6 +7713,7 @@ fn collect_block_events(
|
|||
positions_override,
|
||||
destination_override,
|
||||
);
|
||||
refine_exec_argv_array_shell_taint(inst, transfer.lang, &state, ssa, &mut tainted);
|
||||
if tainted.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -7722,6 +7766,117 @@ fn collect_block_events(
|
|||
}
|
||||
}
|
||||
|
||||
fn refine_exec_argv_array_shell_taint(
|
||||
inst: &SsaInst,
|
||||
lang: Lang,
|
||||
state: &SsaTaintState,
|
||||
ssa: &SsaBody,
|
||||
tainted: &mut Vec<(SsaValue, Cap, SmallVec<[TaintOrigin; 2]>)>,
|
||||
) {
|
||||
if !matches!(lang, Lang::C | Lang::Cpp) {
|
||||
return;
|
||||
}
|
||||
let SsaOp::Call { callee, args, .. } = &inst.op else {
|
||||
return;
|
||||
};
|
||||
let method = crate::labels::bare_method_name(callee);
|
||||
if !matches!(method, "execv" | "execve" | "execvp" | "execvpe") {
|
||||
return;
|
||||
}
|
||||
let Some(argv_values) = args.get(1) else {
|
||||
return;
|
||||
};
|
||||
if argv_values.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
for (value, caps, origins) in tainted.iter_mut() {
|
||||
if !argv_values.iter().any(|argv| argv == value) {
|
||||
continue;
|
||||
}
|
||||
let Some((argv_caps, argv_origins)) =
|
||||
exec_argv_non_executable_shell_taint(*value, inst.value, state, ssa)
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
*caps = (*caps & !Cap::SHELL_ESCAPE) | argv_caps;
|
||||
if argv_caps.contains(Cap::SHELL_ESCAPE) {
|
||||
*origins = argv_origins;
|
||||
}
|
||||
}
|
||||
|
||||
tainted.retain(|(_, caps, _)| caps.contains(Cap::SHELL_ESCAPE));
|
||||
}
|
||||
|
||||
fn exec_argv_non_executable_shell_taint(
|
||||
argv: SsaValue,
|
||||
sink_value: SsaValue,
|
||||
state: &SsaTaintState,
|
||||
ssa: &SsaBody,
|
||||
) -> Option<(Cap, SmallVec<[TaintOrigin; 2]>)> {
|
||||
let mut stores: Vec<(u32, SmallVec<[SsaValue; 2]>)> = Vec::new();
|
||||
for block in &ssa.blocks {
|
||||
for candidate in block.phis.iter().chain(block.body.iter()) {
|
||||
if candidate.value.0 >= sink_value.0 {
|
||||
continue;
|
||||
}
|
||||
let SsaOp::Call {
|
||||
callee,
|
||||
args,
|
||||
receiver: Some(receiver),
|
||||
..
|
||||
} = &candidate.op
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
if callee != "__index_set__" || *receiver != argv {
|
||||
continue;
|
||||
}
|
||||
stores.push((candidate.value.0, args.get(1).cloned().unwrap_or_default()));
|
||||
}
|
||||
}
|
||||
if stores.is_empty() {
|
||||
return None;
|
||||
}
|
||||
stores.sort_by_key(|(value, _)| *value);
|
||||
|
||||
let mut caps = Cap::empty();
|
||||
let mut origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new();
|
||||
for (_, values) in stores.into_iter().skip(1) {
|
||||
for value in values {
|
||||
let Some(taint) = state.get(value) else {
|
||||
continue;
|
||||
};
|
||||
if !taint.caps.contains(Cap::SHELL_ESCAPE) {
|
||||
continue;
|
||||
}
|
||||
let non_env_origins: SmallVec<[TaintOrigin; 2]> = taint
|
||||
.origins
|
||||
.iter()
|
||||
.copied()
|
||||
.filter(|origin| origin.source_kind != SourceKind::EnvironmentConfig)
|
||||
.collect();
|
||||
if non_env_origins.is_empty() {
|
||||
continue;
|
||||
}
|
||||
caps |= Cap::SHELL_ESCAPE;
|
||||
for origin in non_env_origins {
|
||||
push_origin_bounded(&mut origins, origin);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some((caps, origins))
|
||||
}
|
||||
|
||||
fn is_noreturn_call(lang: Lang, callee: &str) -> bool {
|
||||
if !matches!(lang, Lang::C | Lang::Cpp) {
|
||||
return false;
|
||||
}
|
||||
let method = crate::labels::bare_method_name(callee);
|
||||
matches!(method, "exit" | "_Exit" | "quick_exit" | "abort")
|
||||
}
|
||||
|
||||
// ── Primary sink-site attribution ───────────────────────────────────────
|
||||
|
||||
/// Decide whether a [`SinkSite`] should be promoted into a caller-side
|
||||
|
|
@ -8293,7 +8448,6 @@ fn try_container_propagation(
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if val_caps.is_empty() {
|
||||
return true; // Container op handled, but no taint to propagate
|
||||
}
|
||||
|
|
|
|||
|
|
@ -69,6 +69,7 @@ pub fn extract_ssa_func_summary(
|
|||
None,
|
||||
formal_destructured_fields,
|
||||
param_types,
|
||||
None,
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -121,6 +122,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
// SQL_QUERY caps were invisible to the param-1 probe). `None` for
|
||||
// legacy / test paths preserves prior behaviour.
|
||||
param_types: Option<&[Option<TypeKind>]>,
|
||||
base_aliases: Option<&crate::ssa::alias::BaseAliasResult>,
|
||||
) -> crate::summary::ssa_summary::SsaFuncSummary {
|
||||
// Pre-compute type facts on the un-optimised SSA body so the per-param
|
||||
// probe can resolve sinks that depend on receiver-type inference.
|
||||
|
|
@ -135,6 +137,8 @@ pub fn extract_ssa_func_summary_full(
|
|||
analyze_types_with_param_types(ssa, cfg, &empty_consts, Some(lang), pt)
|
||||
});
|
||||
let local_type_facts_ref: Option<&TypeFactResult> = local_type_facts.as_ref();
|
||||
let probe_const_values = crate::ssa::const_prop::const_propagate(ssa).values;
|
||||
let probe_points_to = crate::ssa::heap::analyze_points_to(ssa, cfg, Some(lang));
|
||||
use crate::summary::SinkSite;
|
||||
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
|
||||
|
||||
|
|
@ -232,6 +236,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
Vec<ReturnBlockObs>,
|
||||
) {
|
||||
let seed_ref = if seed.is_empty() { None } else { Some(&seed) };
|
||||
let dynamic_pts = std::cell::RefCell::new(std::collections::HashMap::new());
|
||||
let transfer = SsaTaintTransfer {
|
||||
lang,
|
||||
namespace,
|
||||
|
|
@ -244,19 +249,19 @@ pub fn extract_ssa_func_summary_full(
|
|||
global_seed: seed_ref,
|
||||
param_seed: None,
|
||||
receiver_seed: None,
|
||||
const_values: None,
|
||||
const_values: Some(&probe_const_values),
|
||||
type_facts: local_type_facts_ref,
|
||||
xml_parser_config: None,
|
||||
xpath_config: None,
|
||||
ssa_summaries,
|
||||
extra_labels: None,
|
||||
base_aliases: None,
|
||||
base_aliases,
|
||||
callee_bodies: None,
|
||||
inline_cache: None,
|
||||
context_depth: 0,
|
||||
callback_bindings: None,
|
||||
points_to: None,
|
||||
dynamic_pts: None,
|
||||
points_to: Some(&probe_points_to),
|
||||
dynamic_pts: Some(&dynamic_pts),
|
||||
import_bindings: None,
|
||||
promisify_aliases: None,
|
||||
module_aliases,
|
||||
|
|
@ -824,7 +829,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
xpath_config: None,
|
||||
ssa_summaries,
|
||||
extra_labels: None,
|
||||
base_aliases: None,
|
||||
base_aliases,
|
||||
callee_bodies: None,
|
||||
inline_cache: None,
|
||||
context_depth: 0,
|
||||
|
|
|
|||
|
|
@ -1578,6 +1578,101 @@ fn c_source_to_sink() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_fgets_condition_to_execvp_argv_fires() {
|
||||
let src = br#"#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
int main(void) {
|
||||
char url_buf[256];
|
||||
if (!fgets(url_buf, sizeof url_buf, stdin)) return 1;
|
||||
const char *args[3];
|
||||
args[0] = "ssh";
|
||||
args[1] = url_buf;
|
||||
args[2] = 0;
|
||||
return execvp(args[0], (char *const *)args);
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "c", lang);
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
&file_cfg.summaries,
|
||||
None,
|
||||
Lang::C,
|
||||
"test.c",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
findings
|
||||
.iter()
|
||||
.any(|f| f.source_kind == crate::labels::SourceKind::UserInput),
|
||||
"C: fgets stdin should reach execvp argv, got {findings:#?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_execvp_ignores_env_config_executable_path() {
|
||||
let src = br#"#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
int main(void) {
|
||||
const char *ssh = getenv("GIT_SSH");
|
||||
const char *args[2];
|
||||
args[0] = ssh;
|
||||
args[1] = 0;
|
||||
return execvp(args[0], (char *const *)args);
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "c", lang);
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
&file_cfg.summaries,
|
||||
None,
|
||||
Lang::C,
|
||||
"test.c",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
"C: env-config executable path should not be treated as argv injection"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_dash_prefix_guard_suppresses_execvp_argv_injection() {
|
||||
let src = br#"#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
int main(void) {
|
||||
char url_buf[256];
|
||||
if (!fgets(url_buf, sizeof url_buf, stdin)) return 1;
|
||||
char *ssh_host = url_buf;
|
||||
if (ssh_host[0] == '-') return 1;
|
||||
const char *args[3];
|
||||
args[0] = "ssh";
|
||||
args[1] = ssh_host;
|
||||
args[2] = 0;
|
||||
return execvp(args[0], (char *const *)args);
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "c", lang);
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
&file_cfg.summaries,
|
||||
None,
|
||||
Lang::C,
|
||||
"test.c",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
"C: dash-prefix rejection should clear argv-injection taint, got {findings:#?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cpp_source_to_sink() {
|
||||
let src = b"void main() {\n char* x = getenv(\"SECRET\");\n system(x);\n}\n";
|
||||
|
|
@ -4548,6 +4643,248 @@ fn ssa_summary_param_to_sink() {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_summary_param_to_execvp_argv_sink() {
|
||||
use crate::state::symbol::SymbolInterner;
|
||||
|
||||
let src = br#"#include <unistd.h>
|
||||
int do_ssh_connect(char *url) {
|
||||
const char *ssh;
|
||||
char *ssh_host = url;
|
||||
const char *port = 0;
|
||||
get_host_and_port_min(&ssh_host, &port);
|
||||
if (!port) port = "22";
|
||||
ssh = getenv("GIT_SSH");
|
||||
if (!ssh) ssh = "ssh";
|
||||
const char *args[8];
|
||||
int nargs = 0;
|
||||
args[nargs++] = ssh;
|
||||
if (port) {
|
||||
args[nargs++] = "-p";
|
||||
args[nargs++] = port;
|
||||
}
|
||||
args[nargs++] = ssh_host;
|
||||
args[nargs++] = "git-upload-pack";
|
||||
args[nargs++] = 0;
|
||||
return execvp(args[0], (char *const *)args);
|
||||
}
|
||||
"#;
|
||||
let file_cfg = parse_lang(
|
||||
src,
|
||||
"c",
|
||||
tree_sitter::Language::from(tree_sitter_c::LANGUAGE),
|
||||
);
|
||||
for body in &file_cfg.bodies {
|
||||
if body.meta.name.as_deref() != Some("do_ssh_connect") {
|
||||
continue;
|
||||
}
|
||||
let interner = SymbolInterner::from_cfg(&body.graph);
|
||||
let ssa = crate::ssa::lower_to_ssa_with_params(
|
||||
&body.graph,
|
||||
body.entry,
|
||||
Some("do_ssh_connect"),
|
||||
false,
|
||||
&body.meta.params,
|
||||
)
|
||||
.expect("C function should lower to SSA");
|
||||
let param_count = body.meta.params.len();
|
||||
let summary = ssa_transfer::extract_ssa_func_summary(
|
||||
&ssa,
|
||||
&body.graph,
|
||||
&file_cfg.summaries,
|
||||
None,
|
||||
Lang::C,
|
||||
"test.c",
|
||||
&interner,
|
||||
param_count,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
summary
|
||||
.param_to_sink_caps()
|
||||
.iter()
|
||||
.any(|(idx, caps)| *idx == 0 && caps.contains(Cap::SHELL_ESCAPE)),
|
||||
"C summary should record url param reaching execvp argv, got {:?}",
|
||||
summary.param_to_sink_caps()
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
panic!("do_ssh_connect function not found");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_summary_dash_prefix_guard_suppresses_execvp_argv_sink() {
|
||||
use crate::state::symbol::SymbolInterner;
|
||||
|
||||
let src = br#"#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
int do_ssh_connect(char *url) {
|
||||
const char *ssh;
|
||||
char *ssh_host = url;
|
||||
const char *port = 0;
|
||||
if (!port) port = "22";
|
||||
if (ssh_host[0] == '-') {
|
||||
fprintf(stderr, "strange hostname '%s' blocked\n", ssh_host);
|
||||
exit(1);
|
||||
}
|
||||
ssh = getenv("GIT_SSH");
|
||||
if (!ssh) ssh = "ssh";
|
||||
const char *args[8];
|
||||
int nargs = 0;
|
||||
args[nargs++] = ssh;
|
||||
if (port) {
|
||||
args[nargs++] = "-p";
|
||||
args[nargs++] = port;
|
||||
}
|
||||
args[nargs++] = ssh_host;
|
||||
args[nargs++] = "git-upload-pack";
|
||||
args[nargs++] = 0;
|
||||
return execvp(args[0], (char *const *)args);
|
||||
}
|
||||
"#;
|
||||
let file_cfg = parse_lang(
|
||||
src,
|
||||
"c",
|
||||
tree_sitter::Language::from(tree_sitter_c::LANGUAGE),
|
||||
);
|
||||
for body in &file_cfg.bodies {
|
||||
if body.meta.name.as_deref() != Some("do_ssh_connect") {
|
||||
continue;
|
||||
}
|
||||
let interner = SymbolInterner::from_cfg(&body.graph);
|
||||
let ssa = crate::ssa::lower_to_ssa_with_params(
|
||||
&body.graph,
|
||||
body.entry,
|
||||
Some("do_ssh_connect"),
|
||||
false,
|
||||
&body.meta.params,
|
||||
)
|
||||
.expect("C function should lower to SSA");
|
||||
let summary = ssa_transfer::extract_ssa_func_summary(
|
||||
&ssa,
|
||||
&body.graph,
|
||||
&file_cfg.summaries,
|
||||
None,
|
||||
Lang::C,
|
||||
"test.c",
|
||||
&interner,
|
||||
body.meta.params.len(),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
!summary
|
||||
.param_to_sink_caps()
|
||||
.iter()
|
||||
.any(|(idx, caps)| *idx == 0 && caps.contains(Cap::SHELL_ESCAPE)),
|
||||
"dash-prefix guard should suppress argv-injection summary, got {:?}",
|
||||
summary.param_to_sink_caps()
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
panic!("do_ssh_connect function not found");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_fgets_reaches_execvp_argv_through_summary() {
|
||||
let src = br#"#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
int do_ssh_connect(char *url) {
|
||||
char *ssh_host = url;
|
||||
const char *args[3];
|
||||
args[0] = "ssh";
|
||||
args[1] = ssh_host;
|
||||
args[2] = 0;
|
||||
return execvp(args[0], (char *const *)args);
|
||||
}
|
||||
int main(void) {
|
||||
char url_buf[256];
|
||||
if (!fgets(url_buf, sizeof url_buf, stdin)) return 1;
|
||||
return do_ssh_connect(url_buf);
|
||||
}
|
||||
"#;
|
||||
let file_cfg = parse_lang(
|
||||
src,
|
||||
"c",
|
||||
tree_sitter::Language::from(tree_sitter_c::LANGUAGE),
|
||||
);
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
&file_cfg.summaries,
|
||||
None,
|
||||
Lang::C,
|
||||
"test.c",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
findings
|
||||
.iter()
|
||||
.any(|f| f.source_kind == crate::labels::SourceKind::UserInput),
|
||||
"C: fgets source should flow through do_ssh_connect summary, got {findings:#?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cve_2017_1000117_vulnerable_fixture_fires() {
|
||||
let src = include_bytes!("../../tests/benchmark/cve_corpus/c/CVE-2017-1000117/vulnerable.c");
|
||||
let file_cfg = parse_lang(
|
||||
src,
|
||||
"c",
|
||||
tree_sitter::Language::from(tree_sitter_c::LANGUAGE),
|
||||
);
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
&file_cfg.summaries,
|
||||
None,
|
||||
Lang::C,
|
||||
"vulnerable.c",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
findings
|
||||
.iter()
|
||||
.any(|f| f.source_kind == crate::labels::SourceKind::UserInput),
|
||||
"CVE-2017-1000117 vulnerable fixture should fire, got {findings:#?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cve_2017_1000117_patched_fixture_suppresses_dash_guard() {
|
||||
let src = include_bytes!("../../tests/benchmark/cve_corpus/c/CVE-2017-1000117/patched.c");
|
||||
let file_cfg = parse_lang(
|
||||
src,
|
||||
"c",
|
||||
tree_sitter::Language::from(tree_sitter_c::LANGUAGE),
|
||||
);
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
&file_cfg.summaries,
|
||||
None,
|
||||
Lang::C,
|
||||
"patched.c",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
findings
|
||||
.iter()
|
||||
.all(|f| f.source_kind != crate::labels::SourceKind::UserInput),
|
||||
"CVE-2017-1000117 patched fixture should suppress argv injection, got {findings:#?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ssa_cross_function_taint_with_sanitizer_wrapper() {
|
||||
// Cross-function: caller passes tainted data through sanitizer wrapper
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
# Benchmark Results
|
||||
|
||||
Current baseline (2026-05-02):
|
||||
Current baseline (2026-05-26):
|
||||
|
||||
| Metric | File-level | Rule-level | CI floor |
|
||||
|-----------|------------|------------|----------|
|
||||
| Precision | 1.000 | 1.000 | 0.861 |
|
||||
| Recall | 1.000 | 1.000 | 0.944 |
|
||||
| F1 | 1.000 | 1.000 | 0.901 |
|
||||
| Recall | 0.996 | 0.996 | 0.944 |
|
||||
| F1 | 0.998 | 0.998 | 0.901 |
|
||||
|
||||
Corpus: 507 cases across 10 languages, 504 evaluated (3 disabled). Per-run JSON lands in `tests/benchmark/results/` (`latest.json` plus dated snapshots). See `README.md` for what the scoring modes mean and how to run a subset.
|
||||
Corpus: 565 cases across 10 languages, 564 evaluated (1 disabled). Per-run JSON lands in `tests/benchmark/results/` (`latest.json` plus dated snapshots). See `README.md` for what the scoring modes mean and how to run a subset.
|
||||
|
||||
The corpus is mostly synthetic 8-20 line fixtures, one vulnerability or one safe pattern per file. A smaller real-CVE replay set under `cve_corpus/` covers 30 published advisories across all 10 languages. Both contribute to the headline numbers.
|
||||
|
||||
|
|
@ -53,14 +53,14 @@ Real disclosed CVEs reduced to minimal reproducers, vulnerable + patched pair pe
|
|||
| CVE-2024-32884 | Rust | gitoxide | Apache-2.0 OR MIT | CMDI | detected |
|
||||
| CVE-2025-53549 | Rust | matrix-rust-sdk | Apache-2.0 | SQL Injection | detected |
|
||||
| CVE-2016-3714 | C | ImageMagick (ImageTragick) | ImageMagick License | CMDI | detected |
|
||||
| CVE-2017-1000117 | C | git (ssh:// argv injection)| GPL-2.0 | cmdi (argv-inj) | deferred |
|
||||
| CVE-2017-1000117 | C | git (ssh:// argv injection)| GPL-2.0 | cmdi (argv-inj) | detected |
|
||||
| CVE-2019-18634 | C | sudo (pwfeedback) | ISC | memory_safety | detected |
|
||||
| CVE-2019-13132 | C++ | ZeroMQ libzmq | MPL-2.0 | memory_safety | detected |
|
||||
| CVE-2022-1941 | C++ | Protocol Buffers | BSD-3-Clause | memory_safety | detected |
|
||||
| CVE-2026-25544 | TypeScript | Payload (Drizzle adapter) | MIT | sql_injection | detected |
|
||||
| CVE-2026-42353 | JavaScript | i18next-http-middleware | MIT | path_traversal | detected |
|
||||
|
||||
Deferred entries are real bugs Nyx can't yet detect. The fixture stays committed with `disabled: true` in ground truth so the gap remains visible.
|
||||
No real-CVE entries are currently deferred. If a future real-CVE fixture exposes a detector gap, keep it committed with `disabled: true` in ground truth so the gap remains visible.
|
||||
|
||||
### How CVEs get picked
|
||||
|
||||
|
|
@ -83,7 +83,8 @@ Most recent first. Metrics are rule-level on the corpus size at that point.
|
|||
|
||||
| Date | Change | Corpus | P | R | F1 |
|
||||
|------------|------------------------------------------------------------------------------|--------|-------|-------|-------|
|
||||
| 2026-05-26 | Benchmark docs corrected for CVE-2026-25544: the Payload Drizzle SQL injection fixture is enabled and detected in `ground_truth.json`; only CVE-2017-1000117 remains deferred in the real-CVE table | 565 | 1.000 | 1.000 | 1.000 |
|
||||
| 2026-05-26 | C argv-injection taint now propagates through execvp argv arrays while recognising the upstream `ssh_host[0] == '-'` dash-prefix rejection and ignoring env-derived executable-path argv elements; CVE-2017-1000117 re-enabled and detected, patched counterpart stays clean | 565 | 1.000 | 0.996 | 0.998 |
|
||||
| 2026-05-26 | Benchmark docs corrected for CVE-2026-25544: the Payload Drizzle SQL injection fixture is enabled and detected in `ground_truth.json` | 565 | 1.000 | 1.000 | 1.000 |
|
||||
| 2026-05-04 | C cvehunt session-0014: CVE-2017-1000117 (git ssh:// hostname-as-argv injection) added in corpus disabled — three-layer C engine gap: (a) array-element taint propagation through `args[i] = ssh_host;` writes, (b) missing `c.cmdi.exec*` AST patterns in `src/patterns/c.rs`, (c) sanitizer recognition of the upstream `if (ssh_host[0] == '-') die(...)` dash-prefix guard | 565 | 1.000 | 1.000 | 1.000 |
|
||||
| 2026-05-04 | JS/TS array-method validator-callback narrowing (`try_array_method_validator_callback_narrowing` in `src/taint/ssa_transfer/mod.rs`) — `<arr>.filter(<isSafeXxx>)` / `.find` / `.findLast` strips `Cap::all()` from the call result when the callback resolves to a `BooleanTrueIsValid` validator; CVE-2026-42353 (i18next-http-middleware path traversal) re-enabled in ground truth, deferred queue cleared | 563 | 1.000 | 1.000 | 1.000 |
|
||||
| 2026-05-04 | JS/TS ternary-RHS source-classification fix in `src/cfg/conditions.rs::lower_ternary_branch` (segment-strip first_member_label on the branch AST) — `let arr = cond ? req.query.lng : "";` now propagates taint through the diamond's join phi instead of lowering both branches to labelless Assign-with-empty-uses; CVE-2026-42353 (i18next-http-middleware path traversal / SSRF) added in corpus disabled — needs Array.prototype.filter(known_validator_callback) precision bridge | 561 | 1.000 | 1.000 | 1.000 |
|
||||
|
|
|
|||
|
|
@ -5359,7 +5359,8 @@
|
|||
"taint-unsanitised-flow"
|
||||
],
|
||||
"allowed_alternative_rule_ids": [
|
||||
"c.cmdi.execvp"
|
||||
"c.cmdi.execvp",
|
||||
"cfg-unguarded-sink"
|
||||
],
|
||||
"forbidden_rule_ids": [],
|
||||
"expected_severity": "HIGH",
|
||||
|
|
@ -6078,7 +6079,8 @@
|
|||
"taint-unsanitised-flow"
|
||||
],
|
||||
"allowed_alternative_rule_ids": [
|
||||
"cpp.cmdi.execvp"
|
||||
"cpp.cmdi.execvp",
|
||||
"cfg-unguarded-sink"
|
||||
],
|
||||
"forbidden_rule_ids": [],
|
||||
"expected_severity": "HIGH",
|
||||
|
|
@ -11829,14 +11831,14 @@
|
|||
"expected_category": "Security",
|
||||
"expected_sink_lines": [
|
||||
[
|
||||
87,
|
||||
87
|
||||
95,
|
||||
95
|
||||
]
|
||||
],
|
||||
"expected_source_lines": [
|
||||
[
|
||||
92,
|
||||
92
|
||||
95,
|
||||
95
|
||||
]
|
||||
],
|
||||
"tags": [
|
||||
|
|
@ -11845,8 +11847,7 @@
|
|||
"argv-injection",
|
||||
"cmdi"
|
||||
],
|
||||
"disabled": true,
|
||||
"disabled_reason": "C taint engine does not propagate taint through C array-element writes (`args[i] = ssh_host;`) and has no `c.cmdi.exec*` AST pattern; even if such a pattern were added it would also fire on the patched fixture (precision miss) because the CVE is sanitised by a pre-call dash-prefix guard the engine does not classify as a validator. Three-layer deep fix tracked in CVE_DEFERRED.md.",
|
||||
"disabled": false,
|
||||
"notes": "CVE-2017-1000117 (git ssh:// argv injection): pre-2.7.6 git accepted `ssh://-oProxyCommand=...@host/repo` URLs and pushed the URL host as an argv element to ssh, where a leading dash was treated as an option flag. GPL-2.0"
|
||||
},
|
||||
{
|
||||
|
|
@ -11877,8 +11878,7 @@
|
|||
"patched",
|
||||
"negative"
|
||||
],
|
||||
"disabled": true,
|
||||
"disabled_reason": "Paired with cve-c-2017-1000117-vulnerable; precision side requires sanitizer recognition of the upstream `if (ssh_host[0] == '-') die(...)` guard so that adding any `c.cmdi.execvp` AST pattern would not also fire on the patched fixture.",
|
||||
"disabled": false,
|
||||
"notes": "CVE-2017-1000117 patched counterpart: dash-prefix gate added before argv assembly; regression guard that Nyx does not refire on the fix once the deferral lands"
|
||||
},
|
||||
{
|
||||
|
|
@ -17800,4 +17800,4 @@
|
|||
"notes": "Patched form of `sanitizeValue` from `@payloadcms/drizzle@v3.73.0` (MIT). Enabled after validated-flow propagation landed."
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"benchmark_version": "1.0",
|
||||
"timestamp": "2026-05-11T15:19:43Z",
|
||||
"timestamp": "2026-05-26T16:09:13Z",
|
||||
"scanner_version": "0.7.0",
|
||||
"scanner_config": {
|
||||
"analysis_mode": "Full",
|
||||
|
|
@ -9,10 +9,10 @@
|
|||
"state_analysis_enabled": true,
|
||||
"worker_threads": 1
|
||||
},
|
||||
"ground_truth_hash": "sha256:00a4629e50841ab26c7ba947adfdab43b909d72d7a0885d604e702cc56552eb4",
|
||||
"ground_truth_hash": "sha256:4ec1e5ec0d72129f458db49b8aab8579a03e704ed6fe6e67ef45038924868420",
|
||||
"corpus_size": 565,
|
||||
"cases_run": 562,
|
||||
"cases_skipped": 3,
|
||||
"cases_run": 564,
|
||||
"cases_skipped": 1,
|
||||
"outcomes": [
|
||||
{
|
||||
"case_id": "c-buf-001",
|
||||
|
|
@ -151,11 +151,11 @@
|
|||
"outcome_rule_level": "TP",
|
||||
"outcome_location_level": "TP",
|
||||
"matched_rule_ids": [
|
||||
"taint-unsanitised-flow (source 5:18)"
|
||||
"cfg-unguarded-sink"
|
||||
],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [
|
||||
"taint-unsanitised-flow (source 5:18)"
|
||||
"cfg-unguarded-sink"
|
||||
],
|
||||
"security_finding_count": 1,
|
||||
"non_security_finding_count": 0
|
||||
|
|
@ -680,11 +680,11 @@
|
|||
"outcome_rule_level": "TP",
|
||||
"outcome_location_level": "TP",
|
||||
"matched_rule_ids": [
|
||||
"taint-unsanitised-flow (source 5:18)"
|
||||
"cfg-unguarded-sink"
|
||||
],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [
|
||||
"taint-unsanitised-flow (source 5:18)"
|
||||
"cfg-unguarded-sink"
|
||||
],
|
||||
"security_finding_count": 1,
|
||||
"non_security_finding_count": 0
|
||||
|
|
@ -1126,6 +1126,40 @@
|
|||
"security_finding_count": 1,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "cve-c-2017-1000117-patched",
|
||||
"file": "cve_corpus/c/CVE-2017-1000117/patched.c",
|
||||
"language": "c",
|
||||
"vuln_class": "safe",
|
||||
"is_vulnerable": false,
|
||||
"outcome_file_level": "TN",
|
||||
"outcome_rule_level": "TN",
|
||||
"outcome_location_level": null,
|
||||
"matched_rule_ids": [],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [],
|
||||
"security_finding_count": 0,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "cve-c-2017-1000117-vulnerable",
|
||||
"file": "cve_corpus/c/CVE-2017-1000117/vulnerable.c",
|
||||
"language": "c",
|
||||
"vuln_class": "cmdi",
|
||||
"is_vulnerable": true,
|
||||
"outcome_file_level": "TP",
|
||||
"outcome_rule_level": "TP",
|
||||
"outcome_location_level": "TP",
|
||||
"matched_rule_ids": [
|
||||
"taint-unsanitised-flow (source 95:12)"
|
||||
],
|
||||
"unexpected_rule_ids": [],
|
||||
"all_finding_ids": [
|
||||
"taint-unsanitised-flow (source 95:12)"
|
||||
],
|
||||
"security_finding_count": 1,
|
||||
"non_security_finding_count": 0
|
||||
},
|
||||
{
|
||||
"case_id": "cve-c-2019-18634-patched",
|
||||
"file": "cve_corpus/c/CVE-2019-18634/patched.c",
|
||||
|
|
@ -10041,29 +10075,29 @@
|
|||
}
|
||||
],
|
||||
"aggregate_file_level": {
|
||||
"tp": 274,
|
||||
"tp": 275,
|
||||
"fp": 0,
|
||||
"fn_": 1,
|
||||
"tn": 287,
|
||||
"tn": 288,
|
||||
"precision": 1.0,
|
||||
"recall": 0.9963636363636363,
|
||||
"f1": 0.9981785063752276
|
||||
"recall": 0.9963768115942029,
|
||||
"f1": 0.9981851179673321
|
||||
},
|
||||
"aggregate_rule_level": {
|
||||
"tp": 274,
|
||||
"tp": 275,
|
||||
"fp": 0,
|
||||
"fn_": 1,
|
||||
"tn": 287,
|
||||
"tn": 288,
|
||||
"precision": 1.0,
|
||||
"recall": 0.9963636363636363,
|
||||
"f1": 0.9981785063752276
|
||||
"recall": 0.9963768115942029,
|
||||
"f1": 0.9981851179673321
|
||||
},
|
||||
"by_language": {
|
||||
"c": {
|
||||
"tp": 17,
|
||||
"tp": 18,
|
||||
"fp": 0,
|
||||
"fn_": 0,
|
||||
"tn": 17,
|
||||
"tn": 18,
|
||||
"precision": 1.0,
|
||||
"recall": 1.0,
|
||||
"f1": 1.0
|
||||
|
|
@ -10170,7 +10204,7 @@
|
|||
"f1": 1.0
|
||||
},
|
||||
"cmdi": {
|
||||
"tp": 58,
|
||||
"tp": 59,
|
||||
"fp": 0,
|
||||
"fn_": 0,
|
||||
"tn": 0,
|
||||
|
|
@ -10290,7 +10324,7 @@
|
|||
"tp": 0,
|
||||
"fp": 0,
|
||||
"fn_": 0,
|
||||
"tn": 284,
|
||||
"tn": 285,
|
||||
"precision": 1.0,
|
||||
"recall": 1.0,
|
||||
"f1": 1.0
|
||||
|
|
@ -10343,31 +10377,31 @@
|
|||
},
|
||||
"by_confidence": {
|
||||
">=High": {
|
||||
"tp": 85,
|
||||
"fp": 114,
|
||||
"fn_": 190,
|
||||
"tn": 173,
|
||||
"precision": 0.4271356783919598,
|
||||
"recall": 0.3090909090909091,
|
||||
"f1": 0.3586497890295359
|
||||
"tp": 81,
|
||||
"fp": 118,
|
||||
"fn_": 195,
|
||||
"tn": 170,
|
||||
"precision": 0.40703517587939697,
|
||||
"recall": 0.29347826086956524,
|
||||
"f1": 0.3410526315789474
|
||||
},
|
||||
">=Low": {
|
||||
"tp": 85,
|
||||
"fp": 142,
|
||||
"fn_": 190,
|
||||
"tn": 145,
|
||||
"precision": 0.3744493392070485,
|
||||
"recall": 0.3090909090909091,
|
||||
"f1": 0.33864541832669326
|
||||
"tp": 81,
|
||||
"fp": 147,
|
||||
"fn_": 195,
|
||||
"tn": 141,
|
||||
"precision": 0.35526315789473684,
|
||||
"recall": 0.29347826086956524,
|
||||
"f1": 0.3214285714285714
|
||||
},
|
||||
">=Medium": {
|
||||
"tp": 85,
|
||||
"fp": 133,
|
||||
"fn_": 190,
|
||||
"tn": 154,
|
||||
"precision": 0.38990825688073394,
|
||||
"recall": 0.3090909090909091,
|
||||
"f1": 0.3448275862068966
|
||||
"tp": 81,
|
||||
"fp": 139,
|
||||
"fn_": 195,
|
||||
"tn": 149,
|
||||
"precision": 0.36818181818181817,
|
||||
"recall": 0.29347826086956524,
|
||||
"f1": 0.3266129032258065
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// Phase 14 — Micronaut `@Controller`, benign.
|
||||
// Micronaut `@Controller`, benign.
|
||||
//
|
||||
// Same shape as the vuln but echoes a constant string instead of
|
||||
// concatenating the path variable into a shell command.
|
||||
|
|
|
|||
|
|
@ -1,17 +0,0 @@
|
|||
// Phase 14 fixture stub — minimal Micronaut `@Controller`.
|
||||
// Lives in `io.micronaut.http.annotation` so the fixture's
|
||||
// `import io.micronaut.http.annotation.Controller;` compiles under
|
||||
// plain javac (no Micronaut Maven dep required).
|
||||
|
||||
package io.micronaut.http.annotation;
|
||||
|
||||
import java.lang.annotation.ElementType;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
import java.lang.annotation.Target;
|
||||
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface Controller {
|
||||
String value() default "";
|
||||
}
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
// Phase 14 fixture stub — minimal Micronaut `@Get`.
|
||||
|
||||
package io.micronaut.http.annotation;
|
||||
|
||||
import java.lang.annotation.ElementType;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
import java.lang.annotation.Target;
|
||||
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.METHOD)
|
||||
public @interface Get {
|
||||
String value() default "";
|
||||
}
|
||||
|
|
@ -1,8 +1,9 @@
|
|||
// Phase 14 — Micronaut `@Controller`, vulnerable.
|
||||
// Micronaut `@Controller`, vulnerable.
|
||||
//
|
||||
// `@Controller("/run")` on the class + `@Get("/{id}")` on the handler
|
||||
// matches the Phase 14 [`JavaShape::MicronautRoute`]. The harness
|
||||
// invokes `show(payload)` via reflection.
|
||||
// matches `JavaShape::MicronautRoute`. The harness keeps the real
|
||||
// Micronaut annotations on the classpath and replays the route through
|
||||
// those annotations.
|
||||
|
||||
import io.micronaut.http.annotation.Controller;
|
||||
import io.micronaut.http.annotation.Get;
|
||||
|
|
|
|||
|
|
@ -14,5 +14,10 @@
|
|||
<artifactId>micronaut-http</artifactId>
|
||||
<version>4.4.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.micronaut</groupId>
|
||||
<artifactId>micronaut-core</artifactId>
|
||||
<version>4.4.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
// Phase 14 — Quarkus reactive route, benign.
|
||||
// Quarkus reactive route, benign.
|
||||
|
||||
// import io.quarkus.runtime.Quarkus;
|
||||
import io.quarkus.runtime.Quarkus;
|
||||
import jakarta.ws.rs.GET;
|
||||
import jakarta.ws.rs.Path;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.InputStreamReader;
|
||||
|
|
|
|||
|
|
@ -1,11 +0,0 @@
|
|||
// Phase 14 fixture stub — minimal `@GET` Jakarta REST annotation.
|
||||
|
||||
import java.lang.annotation.ElementType;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
import java.lang.annotation.Target;
|
||||
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.METHOD)
|
||||
public @interface GET {
|
||||
}
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
// Phase 14 fixture stub — minimal `@Path` annotation (Jakarta REST).
|
||||
// Lives in the default package; the fixture imports the symbol as
|
||||
// plain `@Path` so javac is happy without a Quarkus / Jakarta REST
|
||||
// Maven dep.
|
||||
|
||||
import java.lang.annotation.ElementType;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
import java.lang.annotation.Target;
|
||||
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target({ElementType.TYPE, ElementType.METHOD})
|
||||
public @interface Path {
|
||||
String value() default "";
|
||||
}
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
// Phase 14 — Quarkus reactive route, vulnerable.
|
||||
//
|
||||
// `@Path("/run")` on the type + `@GET` on the handler matches the
|
||||
// Phase 14 [`JavaShape::detect`] for Quarkus. The harness invokes
|
||||
// `run(payload)` via reflection.
|
||||
// Quarkus reactive route, vulnerable. The harness keeps the real
|
||||
// Jakarta REST annotations on the classpath and replays the route
|
||||
// through those annotations.
|
||||
|
||||
// import io.quarkus.runtime.Quarkus;
|
||||
import io.quarkus.runtime.Quarkus;
|
||||
import jakarta.ws.rs.GET;
|
||||
import jakarta.ws.rs.Path;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.InputStreamReader;
|
||||
|
|
|
|||
|
|
@ -14,5 +14,10 @@
|
|||
<artifactId>quarkus-resteasy-reactive</artifactId>
|
||||
<version>3.8.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>jakarta.ws.rs</groupId>
|
||||
<artifactId>jakarta.ws.rs-api</artifactId>
|
||||
<version>3.1.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
|
|
|||
|
|
@ -767,6 +767,40 @@ mod phase14_shape_tests {
|
|||
assert_not_confirmed("quarkus_route", &r);
|
||||
}
|
||||
|
||||
// ── micronaut_route ──────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn micronaut_route_vuln_is_confirmed() {
|
||||
let Some(r) = run(
|
||||
"micronaut_route",
|
||||
"Vuln.java",
|
||||
"show",
|
||||
Cap::CODE_EXEC,
|
||||
21,
|
||||
EntryKind::HttpRoute,
|
||||
PayloadSlot::Param(0),
|
||||
) else {
|
||||
return;
|
||||
};
|
||||
assert_confirmed("micronaut_route", &r);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn micronaut_route_benign_not_confirmed() {
|
||||
let Some(r) = run(
|
||||
"micronaut_route",
|
||||
"Benign.java",
|
||||
"show",
|
||||
Cap::CODE_EXEC,
|
||||
18,
|
||||
EntryKind::HttpRoute,
|
||||
PayloadSlot::Param(0),
|
||||
) else {
|
||||
return;
|
||||
};
|
||||
assert_not_confirmed("micronaut_route", &r);
|
||||
}
|
||||
|
||||
// ── Phase 09 staging assertion (Spring transitive dep pick-up) ──────────
|
||||
|
||||
/// Verify the Phase 09 staging path identifies Spring when the
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue