diff --git a/src/surface/datastore.rs b/src/surface/datastore.rs index b06f748b..7675db4b 100644 --- a/src/surface/datastore.rs +++ b/src/surface/datastore.rs @@ -129,17 +129,23 @@ pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec { } fn match_rule(callee: &str) -> Option<&'static DriverRule> { - let trimmed = callee.trim(); - let leaf = trimmed.rsplit("::").next().unwrap_or(trimmed); - let leaf = leaf.rsplit('.').next().unwrap_or(leaf); - DRIVER_RULES - .iter() - .find(|r| { - // Match either the full callee text or its leaf segment - // against each rule's leaf, case-insensitive. - trimmed.to_ascii_lowercase().contains(&r.leaf.to_ascii_lowercase()) - || leaf.eq_ignore_ascii_case(r.leaf) - }) + let cl = callee.trim().to_ascii_lowercase(); + // Normalize `::` → `.` so segment-split treats both as separators. + let cl_segments = cl.replace("::", "."); + DRIVER_RULES.iter().find(|r| { + let rl = r.leaf.to_ascii_lowercase(); + if r.leaf.contains('.') || r.leaf.contains("::") { + // Qualified pattern (e.g. `psycopg2.connect`, `Eloquent::find`): + // substring on the full callee text. Qualified shapes are + // unambiguous so substring is precise enough. + cl.contains(&rl) + } else { + // Bare leaf (e.g. `open`, `fetch`, `PrismaClient`): require a + // whole-segment match. Prevents `fopen` / `OpenSearch` / + // `getPrismaClient` from FP-matching short bare leaves. + cl_segments.split('.').any(|seg| seg == rl) + } + }) } /// Best-effort source location for a call site. We only have file + @@ -215,4 +221,45 @@ mod tests { let nodes = detect_data_stores(&gs); assert_eq!(nodes.len(), 1); } + + #[test] + fn bare_open_rule_does_not_match_fopen_or_opensearch() { + let mut gs = GlobalSummaries::new(); + let (k, s) = summary_with_callees( + "init", + "app.py", + &[ + "fopen", + "popen", + "OpenSearch", + "openssl_encrypt", + "MongoClient.openSession", + ], + ); + gs.insert(k, s); + let nodes = detect_data_stores(&gs); + assert!( + nodes.is_empty(), + "bare `open` rule should not FP on {nodes:?}", + ); + } + + #[test] + fn bare_open_rule_still_matches_real_open() { + let mut gs = GlobalSummaries::new(); + let (k, s) = summary_with_callees("loader", "app.py", &["open"]); + gs.insert(k, s); + let nodes = detect_data_stores(&gs); + assert_eq!(nodes.len(), 1); + let SurfaceNode::DataStore(ds) = &nodes[0] else { + panic!() + }; + assert_eq!(ds.kind, DataStoreKind::Filesystem); + + let mut gs = GlobalSummaries::new(); + let (k, s) = summary_with_callees("loader", "app.py", &["builtins.open"]); + gs.insert(k, s); + let nodes = detect_data_stores(&gs); + assert_eq!(nodes.len(), 1); + } } diff --git a/src/surface/external.rs b/src/surface/external.rs index b619f180..6700c108 100644 --- a/src/surface/external.rs +++ b/src/surface/external.rs @@ -119,12 +119,18 @@ pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec } fn match_rule(callee: &str) -> Option<&'static ClientRule> { - let trimmed = callee.trim(); - let leaf = trimmed.rsplit("::").next().unwrap_or(trimmed); - let leaf = leaf.rsplit('.').next().unwrap_or(leaf); + let cl = callee.trim().to_ascii_lowercase(); + let cl_segments = cl.replace("::", "."); CLIENT_RULES.iter().find(|r| { - trimmed.to_ascii_lowercase().contains(&r.leaf.to_ascii_lowercase()) - || leaf.eq_ignore_ascii_case(r.leaf) + let rl = r.leaf.to_ascii_lowercase(); + if r.leaf.contains('.') || r.leaf.contains("::") { + // Qualified pattern: substring on full callee text. + cl.contains(&rl) + } else { + // Bare leaf: whole-segment match only. Stops `prefetch` from + // matching `fetch`, `Faraday` substrings, etc. + cl_segments.split('.').any(|seg| seg == rl) + } }) } @@ -162,4 +168,46 @@ mod tests { }; assert_eq!(es.label, "requests (Python)"); } + + #[test] + fn bare_fetch_rule_does_not_match_prefetch_or_cachekey() { + let mut gs = GlobalSummaries::new(); + let key = FuncKey::new_function(Lang::JavaScript, "client.js", "load", None); + let summary = FuncSummary { + name: "load".to_string(), + file_path: "client.js".to_string(), + lang: "javascript".to_string(), + param_count: 0, + callees: vec![ + CalleeSite::bare("prefetch".to_string()), + CalleeSite::bare("cacheKeyFetch".to_string()), + CalleeSite::bare("Faraday_token".to_string()), + ], + ..Default::default() + }; + gs.insert(key, summary); + let nodes = detect_external_services(&gs); + assert!(nodes.is_empty(), "bare rules FP-matched on {nodes:?}"); + } + + #[test] + fn bare_got_rule_matches_segmented_callee() { + let mut gs = GlobalSummaries::new(); + let key = FuncKey::new_function(Lang::JavaScript, "client.js", "load", None); + let summary = FuncSummary { + name: "load".to_string(), + file_path: "client.js".to_string(), + lang: "javascript".to_string(), + param_count: 0, + callees: vec![CalleeSite::bare("got.post".to_string())], + ..Default::default() + }; + gs.insert(key, summary); + let nodes = detect_external_services(&gs); + assert_eq!(nodes.len(), 1); + let SurfaceNode::ExternalService(es) = &nodes[0] else { + panic!() + }; + assert_eq!(es.label, "got (JS)"); + } } diff --git a/src/surface/lang/java_quarkus.rs b/src/surface/lang/java_quarkus.rs index 957344b9..04ba91d8 100644 --- a/src/surface/lang/java_quarkus.rs +++ b/src/surface/lang/java_quarkus.rs @@ -3,8 +3,8 @@ //! Quarkus uses JAX-RS (`jakarta.ws.rs`) for HTTP routing on top of //! `RESTEasy Reactive` / `Quarkus REST`. The annotations are //! identical to plain JAX-RS, so this probe overlaps with -//! [`super::java_servlet`] but emits the [`Framework::JaxRs`] tag with -//! a Quarkus-specific recogniser: +//! [`super::java_servlet`] but emits the [`Framework::Quarkus`] tag +//! via a Quarkus-specific recogniser: //! //! * The class is annotated with `@ApplicationScoped`, //! `@RequestScoped`, or `@Singleton` (Quarkus DI markers); OR @@ -77,7 +77,7 @@ pub fn detect_quarkus_routes( let name = method_name(member, bytes).unwrap_or_default(); out.push(SurfaceNode::EntryPoint(EntryPoint { location: loc_for(member, &file_rel), - framework: Framework::JaxRs, + framework: Framework::Quarkus, method, route: method_path, handler_name: name, diff --git a/src/surface/lang/js_express.rs b/src/surface/lang/js_express.rs index ddf59d38..7a76d956 100644 --- a/src/surface/lang/js_express.rs +++ b/src/surface/lang/js_express.rs @@ -68,7 +68,9 @@ fn match_express_call(call: Node, bytes: &[u8], file_rel: &str) -> Option bool { } } -fn receiver_is_express(object: Node, bytes: &[u8]) -> bool { - fn name_matches(text: &str) -> bool { +fn receiver_is_express(object: Node, bytes: &[u8], has_express_witness: bool) -> bool { + fn name_matches_strong(text: &str) -> bool { let lower = text.to_ascii_lowercase(); lower == "app" - || lower == "router" || lower == "server" || lower.ends_with("_app") - || lower.ends_with("router") || lower.ends_with("api") } + fn name_matches_router(text: &str) -> bool { + let lower = text.to_ascii_lowercase(); + lower == "router" || lower.ends_with("router") + } + let check_name = |text: &str| -> bool { + // `router` / `*router` is ambiguous with koa-router; require a + // file-level `express` witness before claiming it. Strong + // shapes (`app`, `server`, `*_app`, `*api`) are Express-only + // conventions and don't need a witness. + if name_matches_strong(text) { + return true; + } + if name_matches_router(text) { + return has_express_witness; + } + false + }; match object.kind() { - "identifier" => object.utf8_text(bytes).ok().is_some_and(name_matches), + "identifier" => object.utf8_text(bytes).ok().is_some_and(check_name), "member_expression" => object .child_by_field_name("property") .and_then(|p| p.utf8_text(bytes).ok()) - .is_some_and(name_matches), + .is_some_and(check_name), "call_expression" => { let Some(callee) = object.child_by_field_name("function") else { return false; @@ -228,4 +245,22 @@ mod tests { }; assert!(ep.auth_required); } + + #[test] + fn router_receiver_without_express_witness_does_not_match() { + // Pure koa-router file — express probe must not claim it. + let src = "const Router = require('@koa/router');\nconst router = new Router();\nrouter.get('/users', async ctx => {});\n"; + let (tree, bytes) = parse(src); + let nodes = detect_express_routes(&tree, &bytes, &PathBuf::from("server.js"), None); + assert!(nodes.is_empty(), "express probe FP'd on koa-only file: {nodes:?}"); + } + + #[test] + fn router_receiver_with_express_witness_still_matches() { + // express + Router.get is a real Express idiom — must still detect. + let src = "const express = require('express');\nconst router = express.Router();\nrouter.get('/users', (req, res) => {});\n"; + let (tree, bytes) = parse(src); + let nodes = detect_express_routes(&tree, &bytes, &PathBuf::from("server.js"), None); + assert_eq!(nodes.len(), 1); + } } diff --git a/src/surface/lang/js_koa.rs b/src/surface/lang/js_koa.rs index f1ad29f2..faf25a31 100644 --- a/src/surface/lang/js_koa.rs +++ b/src/surface/lang/js_koa.rs @@ -101,7 +101,7 @@ fn match_koa_call(call: Node, bytes: &[u8], file_rel: &str) -> Option Option Option