From 727bbbde7e876e165e21c1b3cded14d612bbfd16 Mon Sep 17 00:00:00 2001 From: pitboss Date: Fri, 22 May 2026 17:17:50 -0500 Subject: [PATCH] [pitboss/grind] deferred session-0014 (20260522T163126Z-7d60) --- src/dynamic/framework/adapters/crypto_java.rs | 184 ++++++++++++++++ src/dynamic/framework/adapters/crypto_js.rs | 188 ++++++++++++++++ .../framework/adapters/crypto_python.rs | 202 ++++++++++++++++++ .../framework/adapters/data_exfil_go.rs | 170 +++++++++++++++ .../framework/adapters/data_exfil_js.rs | 192 +++++++++++++++++ .../framework/adapters/data_exfil_python.rs | 194 +++++++++++++++++ src/dynamic/framework/adapters/mod.rs | 12 ++ src/dynamic/framework/mod.rs | 24 ++- src/dynamic/framework/registry.rs | 6 + 9 files changed, 1164 insertions(+), 8 deletions(-) create mode 100644 src/dynamic/framework/adapters/crypto_java.rs create mode 100644 src/dynamic/framework/adapters/crypto_js.rs create mode 100644 src/dynamic/framework/adapters/crypto_python.rs create mode 100644 src/dynamic/framework/adapters/data_exfil_go.rs create mode 100644 src/dynamic/framework/adapters/data_exfil_js.rs create mode 100644 src/dynamic/framework/adapters/data_exfil_python.rs diff --git a/src/dynamic/framework/adapters/crypto_java.rs b/src/dynamic/framework/adapters/crypto_java.rs new file mode 100644 index 00000000..0bb53d73 --- /dev/null +++ b/src/dynamic/framework/adapters/crypto_java.rs @@ -0,0 +1,184 @@ +//! Java [`super::super::FrameworkAdapter`] matching weak-crypto +//! sink constructions (`java.util.Random.nextBytes`, +//! `MessageDigest.getInstance("MD5"|"SHA-1")`, +//! `Cipher.getInstance("DES"|"RC4"|"AES/ECB")`, +//! `KeyGenerator.getInstance("DES")`). +//! +//! Phase 11 (Track L.9). Fires when the function body invokes one +//! of the canonical Java weak-crypto entry points and the +//! surrounding source imports the matching `java.util.Random` / +//! `java.security.*` / `javax.crypto.*` module. + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +pub struct CryptoJavaAdapter; + +const ADAPTER_NAME: &str = "crypto-java"; + +fn callee_is_weak_crypto(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!( + last, + "nextBytes" | "nextInt" | "nextLong" | "nextFloat" | "nextDouble" | "getInstance" + ) || matches!( + name, + "java.util.Random.nextBytes" + | "Random.nextBytes" + | "MessageDigest.getInstance" + | "Cipher.getInstance" + | "KeyGenerator.getInstance" + | "Mac.getInstance" + ) +} + +fn source_imports_java_crypto(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"java.util.Random", + b"java.security.MessageDigest", + b"javax.crypto.Cipher", + b"javax.crypto.KeyGenerator", + b"javax.crypto.Mac", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +/// Returns `true` when the surrounding source visibly routes the +/// crypto call through a hardened path (`SecureRandom`, +/// `MessageDigest.getInstance("SHA-256")` or stronger, +/// `Cipher.getInstance("AES/GCM/...")`). +fn source_routed_through_strong_path(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"java.security.SecureRandom", + b"SecureRandom.getInstanceStrong", + b"new SecureRandom", + b"\"SHA-256\"", + b"\"SHA-384\"", + b"\"SHA-512\"", + b"\"SHA3-256\"", + b"\"AES/GCM/", + b"\"AES/CBC/PKCS5Padding\"", + b"\"ChaCha20-Poly1305\"", + b"\"HmacSHA256\"", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +impl FrameworkAdapter for CryptoJavaAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::Java + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + if source_routed_through_strong_path(file_bytes) { + return None; + } + let matches_call = super::any_callee_matches(summary, callee_is_weak_crypto); + let matches_source = source_imports_java_crypto(file_bytes); + if matches_call && matches_source { + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_java(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_util_random_nextbytes() { + let src: &[u8] = b"import java.util.Random;\n\ + public class Vuln {\n public static byte[] run(String v) {\n Random r = new Random(0L);\n byte[] key = new byte[2];\n r.nextBytes(key);\n return key;\n }\n}\n"; + let tree = parse_java(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("nextBytes")], + ..Default::default() + }; + assert!( + CryptoJavaAdapter + .detect(&summary, tree.root_node(), src) + .is_some() + ); + } + + #[test] + fn fires_on_message_digest_md5() { + let src: &[u8] = b"import java.security.MessageDigest;\n\ + public class Vuln {\n public static byte[] sign(byte[] v) throws Exception {\n MessageDigest md = MessageDigest.getInstance(\"MD5\");\n return md.digest(v);\n }\n}\n"; + let tree = parse_java(src); + let summary = FuncSummary { + name: "sign".into(), + callees: vec![crate::summary::CalleeSite::bare("MessageDigest.getInstance")], + ..Default::default() + }; + assert!( + CryptoJavaAdapter + .detect(&summary, tree.root_node(), src) + .is_some() + ); + } + + #[test] + fn skips_when_source_routes_through_secure_random() { + let src: &[u8] = b"import java.util.Random;\nimport java.security.SecureRandom;\n\ + public class Vuln {\n public static byte[] run(String v) {\n if (v.contains(\"STRONG\")) { byte[] k = new byte[32]; new SecureRandom().nextBytes(k); return k; }\n Random r = new Random(0L);\n byte[] k = new byte[2];\n r.nextBytes(k);\n return k;\n }\n}\n"; + let tree = parse_java(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("nextBytes")], + ..Default::default() + }; + assert!( + CryptoJavaAdapter + .detect(&summary, tree.root_node(), src) + .is_none() + ); + } + + #[test] + fn skips_plain_method() { + let src: &[u8] = b"public class Plain { public static int add(int a, int b) { return a + b; } }\n"; + let tree = parse_java(src); + let summary = FuncSummary { + name: "add".into(), + ..Default::default() + }; + assert!( + CryptoJavaAdapter + .detect(&summary, tree.root_node(), src) + .is_none() + ); + } +} diff --git a/src/dynamic/framework/adapters/crypto_js.rs b/src/dynamic/framework/adapters/crypto_js.rs new file mode 100644 index 00000000..ef8eafe6 --- /dev/null +++ b/src/dynamic/framework/adapters/crypto_js.rs @@ -0,0 +1,188 @@ +//! JavaScript [`super::super::FrameworkAdapter`] matching weak-crypto +//! sink constructions (`Math.random` for key material, +//! `crypto.createHash('md5'|'sha1')`, `crypto.createCipheriv('des'|'rc4')`). +//! +//! Phase 11 (Track L.9). Fires when the function body invokes one +//! of the canonical Node weak-crypto entry points and the +//! surrounding source imports the matching `crypto` module (or uses +//! `Math.random` for key material). + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +pub struct CryptoJsAdapter; + +const ADAPTER_NAME: &str = "crypto-js"; + +fn callee_is_weak_crypto(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!( + last, + "random" | "createHash" | "createCipheriv" | "createCipher" | "pseudoRandomBytes" + ) || matches!( + name, + "Math.random" + | "crypto.createHash" + | "crypto.createCipher" + | "crypto.createCipheriv" + | "crypto.pseudoRandomBytes" + ) +} + +fn source_imports_js_crypto(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"require('crypto')", + b"require(\"crypto\")", + b"from 'crypto'", + b"from \"crypto\"", + b"import crypto", + b"Math.random(", + b"createHash('md5'", + b"createHash(\"md5\"", + b"createHash('sha1'", + b"createHash(\"sha1\"", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +/// Returns `true` when the surrounding source visibly routes the +/// crypto call through a hardened path +/// (`crypto.randomBytes` / `crypto.randomUUID` / +/// `createHash('sha256'+)`, `createCipheriv('aes-256-gcm')`). +fn source_routed_through_strong_path(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"crypto.randomBytes", + b"crypto.randomUUID", + b"crypto.randomInt", + b"crypto.webcrypto.getRandomValues", + b"createHash('sha256'", + b"createHash(\"sha256\"", + b"createHash('sha384'", + b"createHash(\"sha384\"", + b"createHash('sha512'", + b"createHash(\"sha512\"", + b"createCipheriv('aes-256-gcm'", + b"createCipheriv(\"aes-256-gcm\"", + b"createCipheriv('chacha20-poly1305'", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +impl FrameworkAdapter for CryptoJsAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::JavaScript + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + if source_routed_through_strong_path(file_bytes) { + return None; + } + let matches_call = super::any_callee_matches(summary, callee_is_weak_crypto); + let matches_source = source_imports_js_crypto(file_bytes); + if matches_call && matches_source { + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_js(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_math_random_key() { + let src: &[u8] = b"function run(value) { return Math.random(); }\nmodule.exports = { run };\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("Math.random")], + ..Default::default() + }; + assert!( + CryptoJsAdapter + .detect(&summary, tree.root_node(), src) + .is_some() + ); + } + + #[test] + fn fires_on_create_hash_md5() { + let src: &[u8] = b"const crypto = require('crypto');\nfunction sign(value) { return crypto.createHash('md5').update(value).digest('hex'); }\nmodule.exports = { sign };\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "sign".into(), + callees: vec![crate::summary::CalleeSite::bare("crypto.createHash")], + ..Default::default() + }; + assert!( + CryptoJsAdapter + .detect(&summary, tree.root_node(), src) + .is_some() + ); + } + + #[test] + fn skips_when_source_routes_through_random_bytes() { + let src: &[u8] = b"const crypto = require('crypto');\nfunction run(value) { if (value === 'STRONG') return crypto.randomBytes(32); return Math.random(); }\nmodule.exports = { run };\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![ + crate::summary::CalleeSite::bare("Math.random"), + crate::summary::CalleeSite::bare("crypto.randomBytes"), + ], + ..Default::default() + }; + assert!( + CryptoJsAdapter + .detect(&summary, tree.root_node(), src) + .is_none() + ); + } + + #[test] + fn skips_plain_function() { + let src: &[u8] = b"function add(a, b) { return a + b; }\nmodule.exports = { add };\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "add".into(), + ..Default::default() + }; + assert!( + CryptoJsAdapter + .detect(&summary, tree.root_node(), src) + .is_none() + ); + } +} diff --git a/src/dynamic/framework/adapters/crypto_python.rs b/src/dynamic/framework/adapters/crypto_python.rs new file mode 100644 index 00000000..f1f99e7e --- /dev/null +++ b/src/dynamic/framework/adapters/crypto_python.rs @@ -0,0 +1,202 @@ +//! Python [`super::super::FrameworkAdapter`] matching weak-crypto +//! sink constructions (`random.randint` / `random.random` for key +//! material, `hashlib.md5` / `hashlib.sha1` used without +//! `usedforsecurity=False`). +//! +//! Phase 11 (Track L.9). Fires when the function body invokes one +//! of the canonical Python weak-crypto entry points and the +//! surrounding source imports the matching stdlib module. + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +pub struct CryptoPythonAdapter; + +const ADAPTER_NAME: &str = "crypto-python"; + +fn callee_is_weak_crypto(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!( + last, + "randint" | "random" | "uniform" | "choice" | "seed" | "md5" | "sha1" | "new" + ) || matches!( + name, + "random.randint" + | "random.random" + | "random.uniform" + | "random.choice" + | "random.seed" + | "hashlib.md5" + | "hashlib.sha1" + | "Crypto.Hash.MD5.new" + | "Crypto.Hash.SHA1.new" + ) +} + +fn source_imports_python_crypto(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"import random", + b"from random ", + b"import hashlib", + b"from hashlib ", + b"from Crypto.Hash", + b"from Cryptodome.Hash", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +/// Returns `true` when the surrounding source visibly routes the +/// crypto call through a CSPRNG / hardened path (`secrets.*`, +/// `os.urandom`, or hashlib called with `usedforsecurity=False`). +fn source_routed_through_csprng(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"secrets.token_bytes", + b"secrets.token_hex", + b"secrets.token_urlsafe", + b"secrets.randbits", + b"secrets.choice", + b"secrets.SystemRandom", + b"os.urandom(", + b"usedforsecurity=False", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +impl FrameworkAdapter for CryptoPythonAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::Python + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + if source_routed_through_csprng(file_bytes) { + return None; + } + let matches_call = super::any_callee_matches(summary, callee_is_weak_crypto); + let matches_source = source_imports_python_crypto(file_bytes); + if matches_call && matches_source { + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_python(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_random_randint() { + let src: &[u8] = b"import random\n\ + def run(value):\n return random.randint(0, 0xFFFF)\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("random.randint")], + ..Default::default() + }; + assert!( + CryptoPythonAdapter + .detect(&summary, tree.root_node(), src) + .is_some() + ); + } + + #[test] + fn fires_on_hashlib_md5() { + let src: &[u8] = b"import hashlib\n\ + def sign(value):\n return hashlib.md5(value).hexdigest()\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "sign".into(), + callees: vec![crate::summary::CalleeSite::bare("hashlib.md5")], + ..Default::default() + }; + assert!( + CryptoPythonAdapter + .detect(&summary, tree.root_node(), src) + .is_some() + ); + } + + #[test] + fn skips_when_source_routes_through_secrets() { + let src: &[u8] = b"import random\nimport secrets\n\ + def run(value):\n if 'STRONG' in value:\n return secrets.token_bytes(32)\n return random.randint(0, 0xFFFF)\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![ + crate::summary::CalleeSite::bare("random.randint"), + crate::summary::CalleeSite::bare("secrets.token_bytes"), + ], + ..Default::default() + }; + assert!( + CryptoPythonAdapter + .detect(&summary, tree.root_node(), src) + .is_none() + ); + } + + #[test] + fn skips_when_md5_used_for_non_security() { + let src: &[u8] = b"import hashlib\n\ + def cache_key(value):\n return hashlib.md5(value, usedforsecurity=False).hexdigest()\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "cache_key".into(), + callees: vec![crate::summary::CalleeSite::bare("hashlib.md5")], + ..Default::default() + }; + assert!( + CryptoPythonAdapter + .detect(&summary, tree.root_node(), src) + .is_none() + ); + } + + #[test] + fn skips_plain_function() { + let src: &[u8] = b"def add(a, b):\n return a + b\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "add".into(), + ..Default::default() + }; + assert!( + CryptoPythonAdapter + .detect(&summary, tree.root_node(), src) + .is_none() + ); + } +} diff --git a/src/dynamic/framework/adapters/data_exfil_go.rs b/src/dynamic/framework/adapters/data_exfil_go.rs new file mode 100644 index 00000000..e5564f3f --- /dev/null +++ b/src/dynamic/framework/adapters/data_exfil_go.rs @@ -0,0 +1,170 @@ +//! Go [`super::super::FrameworkAdapter`] matching outbound-HTTP +//! sink constructions (`http.Get`, `http.Post`, `http.NewRequest`, +//! `http.DefaultClient.Do`). +//! +//! Phase 11 (Track L.9). Fires when the function body invokes one +//! of the canonical Go HTTP-client entry points and the +//! surrounding source imports `net/http`. + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +pub struct DataExfilGoAdapter; + +const ADAPTER_NAME: &str = "data-exfil-go"; + +fn callee_is_outbound_http(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!( + last, + "Get" | "Post" | "PostForm" | "Head" | "Do" | "NewRequest" | "NewRequestWithContext" + ) || matches!( + name, + "http.Get" + | "http.Post" + | "http.PostForm" + | "http.Head" + | "http.NewRequest" + | "http.NewRequestWithContext" + | "http.DefaultClient.Do" + | "http.Client.Do" + ) +} + +fn source_imports_go_http_client(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"\"net/http\"", + b"net/http\"", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +/// Returns `true` when the surrounding source visibly routes the +/// outbound URL through a host-allowlist / network-policy gate. +fn host_routed_through_allowlist(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"ALLOWLIST", + b"allowlist", + b"AllowedHosts", + b"allowedHosts", + b"\"127.0.0.1\"", + b"\"localhost\"", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +impl FrameworkAdapter for DataExfilGoAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::Go + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + if host_routed_through_allowlist(file_bytes) { + return None; + } + let matches_call = super::any_callee_matches(summary, callee_is_outbound_http); + let matches_source = source_imports_go_http_client(file_bytes); + if matches_call && matches_source { + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_go(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_http_get() { + let src: &[u8] = b"package vuln\nimport \"net/http\"\nfunc Run(host string) {\n http.Get(\"http://\" + host + \"/exfil\")\n}\n"; + let tree = parse_go(src); + let summary = FuncSummary { + name: "Run".into(), + callees: vec![crate::summary::CalleeSite::bare("http.Get")], + ..Default::default() + }; + assert!( + DataExfilGoAdapter + .detect(&summary, tree.root_node(), src) + .is_some() + ); + } + + #[test] + fn fires_on_http_post() { + let src: &[u8] = b"package vuln\nimport (\n \"net/http\"\n \"strings\"\n)\nfunc Run(host string) {\n http.Post(\"http://\" + host + \"/exfil\", \"application/json\", strings.NewReader(\"{}\"))\n}\n"; + let tree = parse_go(src); + let summary = FuncSummary { + name: "Run".into(), + callees: vec![crate::summary::CalleeSite::bare("http.Post")], + ..Default::default() + }; + assert!( + DataExfilGoAdapter + .detect(&summary, tree.root_node(), src) + .is_some() + ); + } + + #[test] + fn skips_when_host_in_allowlist_literal() { + let src: &[u8] = b"package vuln\nimport \"net/http\"\nfunc Run(host string) {\n if host != \"127.0.0.1\" { return }\n http.Get(\"http://\" + host + \"/exfil\")\n}\n"; + let tree = parse_go(src); + let summary = FuncSummary { + name: "Run".into(), + callees: vec![crate::summary::CalleeSite::bare("http.Get")], + ..Default::default() + }; + assert!( + DataExfilGoAdapter + .detect(&summary, tree.root_node(), src) + .is_none() + ); + } + + #[test] + fn skips_plain_function() { + let src: &[u8] = b"package vuln\nfunc Add(a, b int) int { return a + b }\n"; + let tree = parse_go(src); + let summary = FuncSummary { + name: "Add".into(), + ..Default::default() + }; + assert!( + DataExfilGoAdapter + .detect(&summary, tree.root_node(), src) + .is_none() + ); + } +} diff --git a/src/dynamic/framework/adapters/data_exfil_js.rs b/src/dynamic/framework/adapters/data_exfil_js.rs new file mode 100644 index 00000000..48157c60 --- /dev/null +++ b/src/dynamic/framework/adapters/data_exfil_js.rs @@ -0,0 +1,192 @@ +//! JavaScript [`super::super::FrameworkAdapter`] matching outbound-HTTP +//! sink constructions (`http.request`, `https.request`, `fetch`, +//! `axios.{get,post,put}`, `node-fetch`). +//! +//! Phase 11 (Track L.9). Fires when the function body invokes one +//! of the canonical Node HTTP-client entry points and the +//! surrounding source imports the matching client module (or uses +//! the global `fetch` API). + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +pub struct DataExfilJsAdapter; + +const ADAPTER_NAME: &str = "data-exfil-js"; + +fn callee_is_outbound_http(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!( + last, + "request" | "get" | "post" | "put" | "patch" | "delete" | "fetch" | "send" + ) || matches!( + name, + "http.request" + | "https.request" + | "http.get" + | "https.get" + | "axios.get" + | "axios.post" + | "axios.put" + | "axios.patch" + | "axios.delete" + | "axios.request" + | "fetch" + ) +} + +fn source_imports_js_http_client(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"require('http')", + b"require(\"http\")", + b"require('https')", + b"require(\"https\")", + b"require('axios')", + b"require(\"axios\")", + b"require('node-fetch')", + b"require(\"node-fetch\")", + b"from 'axios'", + b"from \"axios\"", + b"from 'node-fetch'", + b"from \"node-fetch\"", + b"from 'http'", + b"from \"http\"", + b"from 'https'", + b"from \"https\"", + b"fetch(", + b"globalThis.fetch", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +/// Returns `true` when the surrounding source visibly routes the +/// outbound URL through a host-allowlist / network-policy gate. +fn host_routed_through_allowlist(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"ALLOWLIST", + b"allowlist", + b"ALLOWED_HOSTS", + b"allowedHosts", + b"['127.0.0.1'", + b"[\"127.0.0.1\"", + b"Set(['127.0.0.1'", + b"Set([\"127.0.0.1\"", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +impl FrameworkAdapter for DataExfilJsAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::JavaScript + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + if host_routed_through_allowlist(file_bytes) { + return None; + } + let matches_call = super::any_callee_matches(summary, callee_is_outbound_http); + let matches_source = source_imports_js_http_client(file_bytes); + if matches_call && matches_source { + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_js(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_http_request() { + let src: &[u8] = b"const http = require('http');\nfunction run(host) { const req = http.request({ host, path: '/exfil', method: 'POST' }); req.end(); }\nmodule.exports = { run };\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("http.request")], + ..Default::default() + }; + assert!( + DataExfilJsAdapter + .detect(&summary, tree.root_node(), src) + .is_some() + ); + } + + #[test] + fn fires_on_axios_post() { + let src: &[u8] = b"const axios = require('axios');\nasync function run(host) { await axios.post(`http://${host}/exfil`, { token: 'x' }); }\nmodule.exports = { run };\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("axios.post")], + ..Default::default() + }; + assert!( + DataExfilJsAdapter + .detect(&summary, tree.root_node(), src) + .is_some() + ); + } + + #[test] + fn skips_when_host_routed_through_allowlist() { + let src: &[u8] = b"const http = require('http');\nconst ALLOWLIST = new Set(['127.0.0.1', 'localhost']);\nfunction run(host) { if (!ALLOWLIST.has(host)) return; const req = http.request({ host, path: '/exfil' }); req.end(); }\nmodule.exports = { run };\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("http.request")], + ..Default::default() + }; + assert!( + DataExfilJsAdapter + .detect(&summary, tree.root_node(), src) + .is_none() + ); + } + + #[test] + fn skips_plain_function() { + let src: &[u8] = b"function add(a, b) { return a + b; }\nmodule.exports = { add };\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "add".into(), + ..Default::default() + }; + assert!( + DataExfilJsAdapter + .detect(&summary, tree.root_node(), src) + .is_none() + ); + } +} diff --git a/src/dynamic/framework/adapters/data_exfil_python.rs b/src/dynamic/framework/adapters/data_exfil_python.rs new file mode 100644 index 00000000..25b69f11 --- /dev/null +++ b/src/dynamic/framework/adapters/data_exfil_python.rs @@ -0,0 +1,194 @@ +//! Python [`super::super::FrameworkAdapter`] matching outbound-HTTP +//! sink constructions (`urllib.request.urlopen`, `requests.{get,post,put}`, +//! `httpx.{get,post}`, `aiohttp.ClientSession.post`). +//! +//! Phase 11 (Track L.9). Fires when the function body invokes one +//! of the canonical Python HTTP-client entry points and the +//! surrounding source imports the matching client module. + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +pub struct DataExfilPythonAdapter; + +const ADAPTER_NAME: &str = "data-exfil-python"; + +fn callee_is_outbound_http(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!( + last, + "urlopen" + | "get" + | "post" + | "put" + | "patch" + | "delete" + | "request" + | "Request" + | "send" + ) || matches!( + name, + "urllib.request.urlopen" + | "requests.get" + | "requests.post" + | "requests.put" + | "requests.patch" + | "requests.delete" + | "requests.request" + | "httpx.get" + | "httpx.post" + | "httpx.AsyncClient.post" + | "aiohttp.ClientSession.post" + ) +} + +fn source_imports_python_http_client(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"import urllib.request", + b"from urllib.request", + b"import requests", + b"from requests", + b"import httpx", + b"from httpx", + b"import aiohttp", + b"from aiohttp", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +/// Returns `true` when the surrounding source visibly routes the +/// outbound URL through a host-allowlist / network-policy gate. +fn host_routed_through_allowlist(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"ALLOWLIST", + b"allowlist", + b"ALLOWED_HOSTS", + b"allowed_hosts", + b"in {'127.0.0.1'", + b"in (\"127.0.0.1\"", + b"in {\"127.0.0.1\"", + b"if host == 'localhost'", + b"netloc in ", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +impl FrameworkAdapter for DataExfilPythonAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::Python + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + if host_routed_through_allowlist(file_bytes) { + return None; + } + let matches_call = super::any_callee_matches(summary, callee_is_outbound_http); + let matches_source = source_imports_python_http_client(file_bytes); + if matches_call && matches_source { + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_python(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_urlopen() { + let src: &[u8] = b"import urllib.request\n\ + def run(host):\n urllib.request.urlopen(f\"http://{host}/exfil\")\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("urllib.request.urlopen")], + ..Default::default() + }; + assert!( + DataExfilPythonAdapter + .detect(&summary, tree.root_node(), src) + .is_some() + ); + } + + #[test] + fn fires_on_requests_post() { + let src: &[u8] = b"import requests\n\ + def run(host):\n requests.post(f\"http://{host}/exfil\", data={'token': 'x'})\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("requests.post")], + ..Default::default() + }; + assert!( + DataExfilPythonAdapter + .detect(&summary, tree.root_node(), src) + .is_some() + ); + } + + #[test] + fn skips_when_host_routed_through_allowlist() { + let src: &[u8] = b"import requests\n\ + ALLOWLIST = {'127.0.0.1', 'localhost'}\n\ + def run(host):\n if host not in ALLOWLIST:\n return\n requests.post(f\"http://{host}/exfil\")\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("requests.post")], + ..Default::default() + }; + assert!( + DataExfilPythonAdapter + .detect(&summary, tree.root_node(), src) + .is_none() + ); + } + + #[test] + fn skips_plain_function() { + let src: &[u8] = b"def add(a, b):\n return a + b\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "add".into(), + ..Default::default() + }; + assert!( + DataExfilPythonAdapter + .detect(&summary, tree.root_node(), src) + .is_none() + ); + } +} diff --git a/src/dynamic/framework/adapters/mod.rs b/src/dynamic/framework/adapters/mod.rs index bca42eda..249a09e9 100644 --- a/src/dynamic/framework/adapters/mod.rs +++ b/src/dynamic/framework/adapters/mod.rs @@ -11,6 +11,12 @@ //! the route / framework adapters; the per-cap sink adapters live //! here so the per-language verticals can ship independently. +pub mod crypto_java; +pub mod crypto_js; +pub mod crypto_python; +pub mod data_exfil_go; +pub mod data_exfil_js; +pub mod data_exfil_python; pub mod go_chi; pub mod go_echo; pub mod go_fiber; @@ -122,6 +128,12 @@ pub mod xxe_php; pub mod xxe_python; pub mod xxe_ruby; +pub use crypto_java::CryptoJavaAdapter; +pub use crypto_js::CryptoJsAdapter; +pub use crypto_python::CryptoPythonAdapter; +pub use data_exfil_go::DataExfilGoAdapter; +pub use data_exfil_js::DataExfilJsAdapter; +pub use data_exfil_python::DataExfilPythonAdapter; pub use go_chi::GoChiAdapter; pub use go_echo::GoEchoAdapter; pub use go_fiber::GoFiberAdapter; diff --git a/src/dynamic/framework/mod.rs b/src/dynamic/framework/mod.rs index cf2cd3da..6b8fe1cf 100644 --- a/src/dynamic/framework/mod.rs +++ b/src/dynamic/framework/mod.rs @@ -279,11 +279,19 @@ mod tests { // Go: +1 (GraphqlGqlgen) 9 → 10 // Rust: +1 (GraphqlJuniper) 6 → 7 // TypeScript / C / Cpp stay unchanged. + // + // Track L.9 starter slice (Phase 11 follow-up): adds per-cap + // adapters for `Cap::CRYPTO` (Python / Java / JavaScript) + // and `Cap::DATA_EXFIL` (Python / JavaScript / Go). + // Java: +1 (CryptoJava) 18 → 19 + // Python: +2 (CryptoPython, DataExfilPython) 22 → 24 + // JavaScript: +2 (CryptoJs, DataExfilJs) 20 → 22 + // Go: +1 (DataExfilGo) 11 → 12 let java_registered = registry::adapters_for(Lang::Java); assert_eq!( java_registered.len(), - 18, - "Java must have Phase 20 baseline (14) + M.3 Quartz/Spring-middleware (2) + Flyway (1) + Liquibase (1)", + 19, + "Java must have Phase 21 baseline (18) + Track L.9 CryptoJava (1)", ); for adapter in java_registered { assert_eq!(adapter.lang(), Lang::Java); @@ -300,8 +308,8 @@ mod tests { let python_registered = registry::adapters_for(Lang::Python); assert_eq!( python_registered.len(), - 22, - "Python must have Phase 20 baseline (15) + M.3 Phase-21 (7)", + 24, + "Python must have Phase 21 baseline (22) + Track L.9 (CryptoPython, DataExfilPython)", ); for adapter in python_registered { assert_eq!(adapter.lang(), Lang::Python); @@ -318,8 +326,8 @@ mod tests { let js_registered = registry::adapters_for(Lang::JavaScript); assert_eq!( js_registered.len(), - 20, - "JavaScript must have Phase 20 baseline (12) + M.3 Phase-21 (7) + Knex (1)", + 22, + "JavaScript must have Phase 21 baseline (20) + Track L.9 (CryptoJs, DataExfilJs)", ); for adapter in js_registered { assert_eq!(adapter.lang(), Lang::JavaScript); @@ -336,8 +344,8 @@ mod tests { let go_registered = registry::adapters_for(Lang::Go); assert_eq!( go_registered.len(), - 11, - "Go must have Phase 20 baseline (9) + M.3 gqlgen (1) + golang-migrate (1)", + 12, + "Go must have Phase 21 baseline (11) + Track L.9 DataExfilGo (1)", ); for adapter in go_registered { assert_eq!(adapter.lang(), Lang::Go); diff --git a/src/dynamic/framework/registry.rs b/src/dynamic/framework/registry.rs index f6238693..4455bd31 100644 --- a/src/dynamic/framework/registry.rs +++ b/src/dynamic/framework/registry.rs @@ -57,6 +57,7 @@ static RUST: &[&dyn FrameworkAdapter] = &[ static C: &[&dyn FrameworkAdapter] = &[]; static CPP: &[&dyn FrameworkAdapter] = &[]; static JAVA: &[&dyn FrameworkAdapter] = &[ + &super::adapters::CryptoJavaAdapter, &super::adapters::HeaderJavaAdapter, &super::adapters::JavaDeserializeAdapter, &super::adapters::JavaMicronautAdapter, @@ -77,6 +78,7 @@ static JAVA: &[&dyn FrameworkAdapter] = &[ &super::adapters::XxeJavaAdapter, ]; static GO: &[&dyn FrameworkAdapter] = &[ + &super::adapters::DataExfilGoAdapter, &super::adapters::GoChiAdapter, &super::adapters::GoEchoAdapter, &super::adapters::GoFiberAdapter, @@ -104,6 +106,8 @@ static PHP: &[&dyn FrameworkAdapter] = &[ &super::adapters::XxePhpAdapter, ]; static PYTHON: &[&dyn FrameworkAdapter] = &[ + &super::adapters::CryptoPythonAdapter, + &super::adapters::DataExfilPythonAdapter, &super::adapters::GraphqlGrapheneAdapter, &super::adapters::HeaderPythonAdapter, &super::adapters::KafkaPythonAdapter, @@ -148,6 +152,8 @@ static TYPESCRIPT: &[&dyn FrameworkAdapter] = &[ &super::adapters::TsNestAdapter, ]; static JAVASCRIPT: &[&dyn FrameworkAdapter] = &[ + &super::adapters::CryptoJsAdapter, + &super::adapters::DataExfilJsAdapter, &super::adapters::GraphqlApolloAdapter, &super::adapters::GraphqlRelayAdapter, &super::adapters::HeaderJsAdapter,