//! External-service detection. //! //! Walks the post-pass-2 [`GlobalSummaries`] looking for callees that //! launch outbound network requests (HTTP, gRPC, SMTP, DNS) and emits //! one [`SurfaceNode::ExternalService`] per call. Detection is by //! callee leaf name + `sink_caps & SSRF` heuristic — both signals are //! consulted so a probe with no SSRF cap (DNS resolver, SMTP sender) //! still surfaces as an external service. use super::{ExternalService, ExternalServiceKind, SourceLocation, SurfaceNode}; use crate::labels::Cap; use crate::summary::{CalleeSite, FuncSummary, GlobalSummaries}; struct ClientRule { leaf: &'static str, kind: ExternalServiceKind, label: &'static str, } const CLIENT_RULES: &[ClientRule] = &[ // HTTP ClientRule { leaf: "requests.get", kind: ExternalServiceKind::HttpApi, label: "requests (Python)" }, ClientRule { leaf: "requests.post", kind: ExternalServiceKind::HttpApi, label: "requests (Python)" }, ClientRule { leaf: "httpx.get", kind: ExternalServiceKind::HttpApi, label: "httpx (Python)" }, ClientRule { leaf: "httpx.post", kind: ExternalServiceKind::HttpApi, label: "httpx (Python)" }, ClientRule { leaf: "urllib.request.urlopen", kind: ExternalServiceKind::HttpApi, label: "urllib" }, ClientRule { leaf: "fetch", kind: ExternalServiceKind::HttpApi, label: "fetch (JS)" }, ClientRule { leaf: "axios.get", kind: ExternalServiceKind::HttpApi, label: "axios" }, ClientRule { leaf: "axios.post", kind: ExternalServiceKind::HttpApi, label: "axios" }, ClientRule { leaf: "http.request", kind: ExternalServiceKind::HttpApi, label: "node http" }, ClientRule { leaf: "got", kind: ExternalServiceKind::HttpApi, label: "got (JS)" }, ClientRule { leaf: "HttpClient.send", kind: ExternalServiceKind::HttpApi, label: "Java HttpClient" }, ClientRule { leaf: "HttpClient.execute", kind: ExternalServiceKind::HttpApi, label: "Java HttpClient" }, ClientRule { leaf: "RestTemplate.exchange", kind: ExternalServiceKind::HttpApi, label: "Spring RestTemplate" }, ClientRule { leaf: "RestTemplate.getForObject", kind: ExternalServiceKind::HttpApi, label: "Spring RestTemplate" }, ClientRule { leaf: "OkHttpClient.newCall", kind: ExternalServiceKind::HttpApi, label: "OkHttp" }, ClientRule { leaf: "http.Get", kind: ExternalServiceKind::HttpApi, label: "net/http (Go)" }, ClientRule { leaf: "http.Post", kind: ExternalServiceKind::HttpApi, label: "net/http (Go)" }, ClientRule { leaf: "http.NewRequest", kind: ExternalServiceKind::HttpApi, label: "net/http (Go)" }, ClientRule { leaf: "client.Do", kind: ExternalServiceKind::HttpApi, label: "go http client" }, ClientRule { leaf: "reqwest::get", kind: ExternalServiceKind::HttpApi, label: "reqwest (Rust)" }, ClientRule { leaf: "reqwest::Client", kind: ExternalServiceKind::HttpApi, label: "reqwest (Rust)" }, ClientRule { leaf: "Net::HTTP", kind: ExternalServiceKind::HttpApi, label: "Net::HTTP (Ruby)" }, ClientRule { leaf: "HTTParty.get", kind: ExternalServiceKind::HttpApi, label: "HTTParty" }, ClientRule { leaf: "Faraday", kind: ExternalServiceKind::HttpApi, label: "Faraday (Ruby)" }, ClientRule { leaf: "curl_exec", kind: ExternalServiceKind::HttpApi, label: "PHP curl" }, ClientRule { leaf: "file_get_contents", kind: ExternalServiceKind::HttpApi, label: "PHP file_get_contents" }, ClientRule { leaf: "Guzzle", kind: ExternalServiceKind::HttpApi, label: "Guzzle (PHP)" }, // Message brokers ClientRule { leaf: "kafka.send", kind: ExternalServiceKind::MessageBroker, label: "Kafka" }, ClientRule { leaf: "KafkaProducer.send", kind: ExternalServiceKind::MessageBroker, label: "Kafka" }, ClientRule { leaf: "rabbitmq.publish", kind: ExternalServiceKind::MessageBroker, label: "RabbitMQ" }, ClientRule { leaf: "amqp.publish", kind: ExternalServiceKind::MessageBroker, label: "AMQP" }, ClientRule { leaf: "sqs.send_message", kind: ExternalServiceKind::MessageBroker, label: "AWS SQS" }, ClientRule { leaf: "sns.publish", kind: ExternalServiceKind::MessageBroker, label: "AWS SNS" }, // Search indices ClientRule { leaf: "Elasticsearch", kind: ExternalServiceKind::SearchIndex, label: "Elasticsearch" }, ClientRule { leaf: "elasticsearch.search", kind: ExternalServiceKind::SearchIndex, label: "Elasticsearch" }, ClientRule { leaf: "OpenSearch", kind: ExternalServiceKind::SearchIndex, label: "OpenSearch" }, ClientRule { leaf: "Algolia", kind: ExternalServiceKind::SearchIndex, label: "Algolia" }, // Auth providers ClientRule { leaf: "auth0", kind: ExternalServiceKind::AuthProvider, label: "Auth0" }, ClientRule { leaf: "passport.authenticate", kind: ExternalServiceKind::AuthProvider, label: "Passport.js" }, ClientRule { leaf: "OAuth2Client", kind: ExternalServiceKind::AuthProvider, label: "OAuth2 client" }, ClientRule { leaf: "google.oauth2", kind: ExternalServiceKind::AuthProvider, label: "Google OAuth2" }, // SMTP ClientRule { leaf: "smtplib.SMTP", kind: ExternalServiceKind::HttpApi, label: "SMTP (Python)" }, ClientRule { leaf: "Mail::send", kind: ExternalServiceKind::HttpApi, label: "Laravel Mail" }, ClientRule { leaf: "ActionMailer", kind: ExternalServiceKind::HttpApi, label: "Rails ActionMailer" }, // DNS ClientRule { leaf: "socket.gethostbyname", kind: ExternalServiceKind::HttpApi, label: "DNS resolver" }, ClientRule { leaf: "dns.lookup", kind: ExternalServiceKind::HttpApi, label: "DNS resolver" }, ClientRule { leaf: "net.LookupIP", kind: ExternalServiceKind::HttpApi, label: "DNS resolver" }, // Type-qualified — fires when the SSA type-fact engine resolves a // receiver to `TypeKind::HttpClient` regardless of the bare callee // name (`session = requests.Session(); session.get(url)` → // typed_call_receivers maps the `.get` ordinal to "HttpClient", so // the bound-receiver call surfaces as an outbound HTTP node even // though `requests.get` is the only direct-import rule above). ClientRule { leaf: "HttpClient.get", kind: ExternalServiceKind::HttpApi, label: "HTTP client" }, ClientRule { leaf: "HttpClient.post", kind: ExternalServiceKind::HttpApi, label: "HTTP client" }, ClientRule { leaf: "HttpClient.put", kind: ExternalServiceKind::HttpApi, label: "HTTP client" }, ClientRule { leaf: "HttpClient.delete", kind: ExternalServiceKind::HttpApi, label: "HTTP client" }, ClientRule { leaf: "HttpClient.patch", kind: ExternalServiceKind::HttpApi, label: "HTTP client" }, ClientRule { leaf: "HttpClient.request", kind: ExternalServiceKind::HttpApi, label: "HTTP client" }, ClientRule { leaf: "HttpClient.head", kind: ExternalServiceKind::HttpApi, label: "HTTP client" }, ClientRule { leaf: "HttpClient.options", kind: ExternalServiceKind::HttpApi, label: "HTTP client" }, ClientRule { leaf: "RequestBuilder.send", kind: ExternalServiceKind::HttpApi, label: "HTTP request builder" }, ClientRule { leaf: "URL.openConnection", kind: ExternalServiceKind::HttpApi, label: "URL connection" }, ClientRule { leaf: "URL.openStream", kind: ExternalServiceKind::HttpApi, label: "URL connection" }, ]; /// Walk every function summary's callee list and emit one /// [`SurfaceNode::ExternalService`] per matched outbound-client call. /// /// When the bare callee name does not hit a rule, the type-fact engine's /// per-call `typed_call_receivers` map (read off the matching /// [`crate::summary::SsaFuncSummary`]) is consulted: a callee whose /// receiver was resolved to `TypeKind::HttpClient` / /// `TypeKind::RequestBuilder` / `TypeKind::Url` is retried under the /// type-qualified name `"{container}."`, picking up the /// bound-receiver call shapes (`client = requests.Session(); /// client.get(url)`) that the name-only matcher misses. pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec { let mut out: Vec = Vec::new(); let mut seen: std::collections::HashSet<(String, String)> = std::collections::HashSet::new(); for (key, summary) in summaries.iter() { let typed = summaries.get_ssa(key).map(|s| s.typed_call_receivers.as_slice()); for callee in &summary.callees { let rule = match_rule(&callee.name).or_else(|| { typed .and_then(|t| container_for_ordinal(t, callee.ordinal)) .and_then(|c| match_rule(&qualify(c, &callee.name))) }); let Some(rule) = rule else { continue }; let location = call_site_location(summary, Some(callee)); if !seen.insert((location.file.clone(), rule.label.to_string())) { continue; } out.push(SurfaceNode::ExternalService(ExternalService { location, kind: rule.kind, label: rule.label.to_string(), })); } } // Also surface any function whose own sink_caps include SSRF — the // function itself is an outbound network call site even if the // direct callee did not match the rule list. Use the function's // file as the location and synthesise a generic label. for (_key, summary) in summaries.iter() { if summary.sink_caps().contains(Cap::SSRF) { let loc = call_site_location(summary, None); let dedup = (loc.file.clone(), "Outbound HTTP".to_string()); if seen.insert(dedup) { out.push(SurfaceNode::ExternalService(ExternalService { location: loc, kind: ExternalServiceKind::HttpApi, label: "Outbound HTTP".to_string(), })); } } } out } fn leaf_segment(name: &str) -> &str { let after_colon = name.rsplit("::").next().unwrap_or(name); after_colon.rsplit('.').next().unwrap_or(after_colon) } fn qualify(container: &str, callee_name: &str) -> String { format!("{}.{}", container, leaf_segment(callee_name)) } fn container_for_ordinal(typed: &[(u32, String)], ordinal: u32) -> Option<&str> { typed.iter().find(|(o, _)| *o == ordinal).map(|(_, c)| c.as_str()) } fn match_rule(callee: &str) -> Option<&'static ClientRule> { let cl = callee.trim().to_ascii_lowercase(); let cl_segments = cl.replace("::", "."); CLIENT_RULES.iter().find(|r| { let rl = r.leaf.to_ascii_lowercase(); if r.leaf.contains('.') || r.leaf.contains("::") { // Qualified pattern: substring on full callee text. cl.contains(&rl) } else { // Bare leaf: whole-segment match only. Stops `prefetch` from // matching `fetch`, `Faraday` substrings, etc. cl_segments.split('.').any(|seg| seg == rl) } }) } /// Source location of an external-service call site. Reads the 1-based /// `(line, col)` recorded on the [`CalleeSite`] at CFG-build time when /// available; otherwise (sink-cap–only fallback path, or legacy summaries /// loaded from SQLite) returns the function's host file with line 0. fn call_site_location(summary: &FuncSummary, callee: Option<&CalleeSite>) -> SourceLocation { let (line, col) = callee.and_then(|c| c.span).unwrap_or((0, 0)); SourceLocation { file: summary.file_path.clone(), line, col, } } #[cfg(test)] mod tests { use super::*; use crate::summary::CalleeSite; use crate::symbol::{FuncKey, Lang}; #[test] fn detects_requests_get() { let mut gs = GlobalSummaries::new(); let key = FuncKey::new_function(Lang::Python, "client.py", "fetch_user", None); let summary = FuncSummary { name: "fetch_user".to_string(), file_path: "client.py".to_string(), lang: "python".to_string(), param_count: 0, callees: vec![CalleeSite::bare("requests.get".to_string())], ..Default::default() }; gs.insert(key, summary); let nodes = detect_external_services(&gs); assert_eq!(nodes.len(), 1); let SurfaceNode::ExternalService(es) = &nodes[0] else { panic!() }; assert_eq!(es.label, "requests (Python)"); } #[test] fn bare_fetch_rule_does_not_match_prefetch_or_cachekey() { let mut gs = GlobalSummaries::new(); let key = FuncKey::new_function(Lang::JavaScript, "client.js", "load", None); let summary = FuncSummary { name: "load".to_string(), file_path: "client.js".to_string(), lang: "javascript".to_string(), param_count: 0, callees: vec![ CalleeSite::bare("prefetch".to_string()), CalleeSite::bare("cacheKeyFetch".to_string()), CalleeSite::bare("Faraday_token".to_string()), ], ..Default::default() }; gs.insert(key, summary); let nodes = detect_external_services(&gs); assert!(nodes.is_empty(), "bare rules FP-matched on {nodes:?}"); } #[test] fn typed_receiver_http_client_resolves_bound_session_get() { // `client = requests.Session(); client.get(url)` — the bare // callee `client.get` is not in CLIENT_RULES, but the SSA type // engine resolves the receiver to `TypeKind::HttpClient`. The // detector retries under `HttpClient.get` and emits an HTTP // external-service node. use crate::summary::ssa_summary::SsaFuncSummary; let mut gs = GlobalSummaries::new(); let key = FuncKey::new_function(Lang::Python, "client.py", "fetch", None); let summary = FuncSummary { name: "fetch".into(), file_path: "client.py".into(), lang: "python".into(), param_count: 0, callees: vec![{ let mut c = CalleeSite::bare("client.get"); c.ordinal = 3; c.span = Some((9, 5)); c }], ..Default::default() }; gs.insert(key.clone(), summary); let mut ssa = SsaFuncSummary::default(); ssa.typed_call_receivers.push((3, "HttpClient".into())); gs.insert_ssa(key, ssa); let nodes = detect_external_services(&gs); assert_eq!(nodes.len(), 1, "expected typed retry to hit; got {nodes:?}"); let SurfaceNode::ExternalService(es) = &nodes[0] else { panic!() }; assert_eq!(es.label, "HTTP client"); } #[test] fn bare_got_rule_matches_segmented_callee() { let mut gs = GlobalSummaries::new(); let key = FuncKey::new_function(Lang::JavaScript, "client.js", "load", None); let summary = FuncSummary { name: "load".to_string(), file_path: "client.js".to_string(), lang: "javascript".to_string(), param_count: 0, callees: vec![CalleeSite::bare("got.post".to_string())], ..Default::default() }; gs.insert(key, summary); let nodes = detect_external_services(&gs); assert_eq!(nodes.len(), 1); let SurfaceNode::ExternalService(es) = &nodes[0] else { panic!() }; assert_eq!(es.label, "got (JS)"); } }