[pitboss] sweep after phase 22: 3 deferred items resolved

This commit is contained in:
pitboss 2026-05-15 13:52:15 -05:00
parent 2395446655
commit 66a59200ae
9 changed files with 167 additions and 40 deletions

View file

@ -129,17 +129,23 @@ pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
}
fn match_rule(callee: &str) -> Option<&'static DriverRule> {
let trimmed = callee.trim();
let leaf = trimmed.rsplit("::").next().unwrap_or(trimmed);
let leaf = leaf.rsplit('.').next().unwrap_or(leaf);
DRIVER_RULES
.iter()
.find(|r| {
// Match either the full callee text or its leaf segment
// against each rule's leaf, case-insensitive.
trimmed.to_ascii_lowercase().contains(&r.leaf.to_ascii_lowercase())
|| leaf.eq_ignore_ascii_case(r.leaf)
})
let cl = callee.trim().to_ascii_lowercase();
// Normalize `::` → `.` so segment-split treats both as separators.
let cl_segments = cl.replace("::", ".");
DRIVER_RULES.iter().find(|r| {
let rl = r.leaf.to_ascii_lowercase();
if r.leaf.contains('.') || r.leaf.contains("::") {
// Qualified pattern (e.g. `psycopg2.connect`, `Eloquent::find`):
// substring on the full callee text. Qualified shapes are
// unambiguous so substring is precise enough.
cl.contains(&rl)
} else {
// Bare leaf (e.g. `open`, `fetch`, `PrismaClient`): require a
// whole-segment match. Prevents `fopen` / `OpenSearch` /
// `getPrismaClient` from FP-matching short bare leaves.
cl_segments.split('.').any(|seg| seg == rl)
}
})
}
/// Best-effort source location for a call site. We only have file +
@ -215,4 +221,45 @@ mod tests {
let nodes = detect_data_stores(&gs);
assert_eq!(nodes.len(), 1);
}
#[test]
fn bare_open_rule_does_not_match_fopen_or_opensearch() {
let mut gs = GlobalSummaries::new();
let (k, s) = summary_with_callees(
"init",
"app.py",
&[
"fopen",
"popen",
"OpenSearch",
"openssl_encrypt",
"MongoClient.openSession",
],
);
gs.insert(k, s);
let nodes = detect_data_stores(&gs);
assert!(
nodes.is_empty(),
"bare `open` rule should not FP on {nodes:?}",
);
}
#[test]
fn bare_open_rule_still_matches_real_open() {
let mut gs = GlobalSummaries::new();
let (k, s) = summary_with_callees("loader", "app.py", &["open"]);
gs.insert(k, s);
let nodes = detect_data_stores(&gs);
assert_eq!(nodes.len(), 1);
let SurfaceNode::DataStore(ds) = &nodes[0] else {
panic!()
};
assert_eq!(ds.kind, DataStoreKind::Filesystem);
let mut gs = GlobalSummaries::new();
let (k, s) = summary_with_callees("loader", "app.py", &["builtins.open"]);
gs.insert(k, s);
let nodes = detect_data_stores(&gs);
assert_eq!(nodes.len(), 1);
}
}

View file

@ -119,12 +119,18 @@ pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec<SurfaceNode>
}
fn match_rule(callee: &str) -> Option<&'static ClientRule> {
let trimmed = callee.trim();
let leaf = trimmed.rsplit("::").next().unwrap_or(trimmed);
let leaf = leaf.rsplit('.').next().unwrap_or(leaf);
let cl = callee.trim().to_ascii_lowercase();
let cl_segments = cl.replace("::", ".");
CLIENT_RULES.iter().find(|r| {
trimmed.to_ascii_lowercase().contains(&r.leaf.to_ascii_lowercase())
|| leaf.eq_ignore_ascii_case(r.leaf)
let rl = r.leaf.to_ascii_lowercase();
if r.leaf.contains('.') || r.leaf.contains("::") {
// Qualified pattern: substring on full callee text.
cl.contains(&rl)
} else {
// Bare leaf: whole-segment match only. Stops `prefetch` from
// matching `fetch`, `Faraday` substrings, etc.
cl_segments.split('.').any(|seg| seg == rl)
}
})
}
@ -162,4 +168,46 @@ mod tests {
};
assert_eq!(es.label, "requests (Python)");
}
#[test]
fn bare_fetch_rule_does_not_match_prefetch_or_cachekey() {
let mut gs = GlobalSummaries::new();
let key = FuncKey::new_function(Lang::JavaScript, "client.js", "load", None);
let summary = FuncSummary {
name: "load".to_string(),
file_path: "client.js".to_string(),
lang: "javascript".to_string(),
param_count: 0,
callees: vec![
CalleeSite::bare("prefetch".to_string()),
CalleeSite::bare("cacheKeyFetch".to_string()),
CalleeSite::bare("Faraday_token".to_string()),
],
..Default::default()
};
gs.insert(key, summary);
let nodes = detect_external_services(&gs);
assert!(nodes.is_empty(), "bare rules FP-matched on {nodes:?}");
}
#[test]
fn bare_got_rule_matches_segmented_callee() {
let mut gs = GlobalSummaries::new();
let key = FuncKey::new_function(Lang::JavaScript, "client.js", "load", None);
let summary = FuncSummary {
name: "load".to_string(),
file_path: "client.js".to_string(),
lang: "javascript".to_string(),
param_count: 0,
callees: vec![CalleeSite::bare("got.post".to_string())],
..Default::default()
};
gs.insert(key, summary);
let nodes = detect_external_services(&gs);
assert_eq!(nodes.len(), 1);
let SurfaceNode::ExternalService(es) = &nodes[0] else {
panic!()
};
assert_eq!(es.label, "got (JS)");
}
}

View file

@ -3,8 +3,8 @@
//! Quarkus uses JAX-RS (`jakarta.ws.rs`) for HTTP routing on top of
//! `RESTEasy Reactive` / `Quarkus REST`. The annotations are
//! identical to plain JAX-RS, so this probe overlaps with
//! [`super::java_servlet`] but emits the [`Framework::JaxRs`] tag with
//! a Quarkus-specific recogniser:
//! [`super::java_servlet`] but emits the [`Framework::Quarkus`] tag
//! via a Quarkus-specific recogniser:
//!
//! * The class is annotated with `@ApplicationScoped`,
//! `@RequestScoped`, or `@Singleton` (Quarkus DI markers); OR
@ -77,7 +77,7 @@ pub fn detect_quarkus_routes(
let name = method_name(member, bytes).unwrap_or_default();
out.push(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(member, &file_rel),
framework: Framework::JaxRs,
framework: Framework::Quarkus,
method,
route: method_path,
handler_name: name,

View file

@ -68,7 +68,9 @@ fn match_express_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<Surfac
return None;
}
let object = func.child_by_field_name("object")?;
if !receiver_is_express(object, bytes) {
let file_text = std::str::from_utf8(bytes).unwrap_or("");
let has_express_witness = file_text.contains("express");
if !receiver_is_express(object, bytes, has_express_witness) {
return None;
}
let prop = func.child_by_field_name("property")?;
@ -161,22 +163,37 @@ fn arg_is_auth_marker(node: Node, bytes: &[u8]) -> bool {
}
}
fn receiver_is_express(object: Node, bytes: &[u8]) -> bool {
fn name_matches(text: &str) -> bool {
fn receiver_is_express(object: Node, bytes: &[u8], has_express_witness: bool) -> bool {
fn name_matches_strong(text: &str) -> bool {
let lower = text.to_ascii_lowercase();
lower == "app"
|| lower == "router"
|| lower == "server"
|| lower.ends_with("_app")
|| lower.ends_with("router")
|| lower.ends_with("api")
}
fn name_matches_router(text: &str) -> bool {
let lower = text.to_ascii_lowercase();
lower == "router" || lower.ends_with("router")
}
let check_name = |text: &str| -> bool {
// `router` / `*router` is ambiguous with koa-router; require a
// file-level `express` witness before claiming it. Strong
// shapes (`app`, `server`, `*_app`, `*api`) are Express-only
// conventions and don't need a witness.
if name_matches_strong(text) {
return true;
}
if name_matches_router(text) {
return has_express_witness;
}
false
};
match object.kind() {
"identifier" => object.utf8_text(bytes).ok().is_some_and(name_matches),
"identifier" => object.utf8_text(bytes).ok().is_some_and(check_name),
"member_expression" => object
.child_by_field_name("property")
.and_then(|p| p.utf8_text(bytes).ok())
.is_some_and(name_matches),
.is_some_and(check_name),
"call_expression" => {
let Some(callee) = object.child_by_field_name("function") else {
return false;
@ -228,4 +245,22 @@ mod tests {
};
assert!(ep.auth_required);
}
#[test]
fn router_receiver_without_express_witness_does_not_match() {
// Pure koa-router file — express probe must not claim it.
let src = "const Router = require('@koa/router');\nconst router = new Router();\nrouter.get('/users', async ctx => {});\n";
let (tree, bytes) = parse(src);
let nodes = detect_express_routes(&tree, &bytes, &PathBuf::from("server.js"), None);
assert!(nodes.is_empty(), "express probe FP'd on koa-only file: {nodes:?}");
}
#[test]
fn router_receiver_with_express_witness_still_matches() {
// express + Router.get is a real Express idiom — must still detect.
let src = "const express = require('express');\nconst router = express.Router();\nrouter.get('/users', (req, res) => {});\n";
let (tree, bytes) = parse(src);
let nodes = detect_express_routes(&tree, &bytes, &PathBuf::from("server.js"), None);
assert_eq!(nodes.len(), 1);
}
}

View file

@ -101,7 +101,7 @@ fn match_koa_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNod
let handler_name = handler_function_name(*handler_node, bytes).unwrap_or_default();
Some(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(call, file_rel),
framework: Framework::Express, // koa shares the Express variant tag — Phase 22 reuses
framework: Framework::Koa,
method,
route,
handler_name,

View file

@ -87,7 +87,7 @@ fn match_laravel_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<Surfac
let auth_required = check_chained_middleware(call, bytes);
Some(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(call, file_rel),
framework: Framework::Sinatra, // PHP frameworks reuse the closest tag — Laravel folds into a generic surface entry-point
framework: Framework::Laravel,
method: *method,
route,
handler_name,

View file

@ -81,7 +81,7 @@ fn match_slim_call(call: Node, bytes: &[u8], file_rel: &str) -> Option<SurfaceNo
.unwrap_or_default();
Some(SurfaceNode::EntryPoint(EntryPoint {
location: loc_for(call, file_rel),
framework: Framework::Sinatra,
framework: Framework::Slim,
method: *method,
route,
handler_name,

View file

@ -63,10 +63,14 @@ pub enum Framework {
FastApi,
Django,
Express,
Koa,
Spring,
JaxRs,
Quarkus,
Rails,
Sinatra,
Laravel,
Slim,
Axum,
Actix,
Rocket,

View file

@ -111,11 +111,7 @@ fn js_express_fixture() {
#[test]
fn js_koa_fixture() {
let map = build("js_koa");
// koa probe currently emits the Express variant tag because the
// SurfaceMap framework taxonomy folds koa-router under the
// generic "node http microframework" bucket. See
// [`nyx_scanner::surface::lang::js_koa`] doc comment.
assert_entry(&map, Framework::Express, "/users");
assert_entry(&map, Framework::Koa, "/users");
}
#[test]
@ -139,7 +135,7 @@ fn java_servlet_fixture() {
#[test]
fn java_quarkus_fixture() {
let map = build("java_quarkus");
assert_entry(&map, Framework::JaxRs, "/api/hello");
assert_entry(&map, Framework::Quarkus, "/api/hello");
}
#[test]
@ -157,16 +153,13 @@ fn go_gin_fixture() {
#[test]
fn php_laravel_fixture() {
let map = build("php_laravel");
// Laravel folds into the generic Sinatra-like framework bucket
// because the SurfaceMap framework taxonomy is method-call shaped
// rather than per-stack. See `surface::lang::php_laravel`.
assert_entry(&map, Framework::Sinatra, "/users");
assert_entry(&map, Framework::Laravel, "/users");
}
#[test]
fn php_slim_fixture() {
let map = build("php_slim");
assert_entry(&map, Framework::Sinatra, "/users");
assert_entry(&map, Framework::Slim, "/users");
}
#[test]