[pitboss/grind] deferred session-0003 (20260516T052512Z-20f8)

This commit is contained in:
pitboss 2026-05-16 02:26:41 -05:00
parent 282acddbbf
commit 678f0f5d48
35 changed files with 737 additions and 109 deletions

View file

@ -92,6 +92,20 @@ const DRIVER_RULES: &[DriverRule] = &[
DriverRule { leaf: "diesel::sql_query", kind: DataStoreKind::Sql, label: "Diesel" },
DriverRule { leaf: "PgConnection::establish", kind: DataStoreKind::Sql, label: "Diesel" },
// Type-qualified — fires when the SSA type-fact engine resolves a
// receiver to `TypeKind::DatabaseConnection` regardless of the bare
// callee name (e.g. `conn = psycopg2.connect(); conn.cursor()` →
// typed_call_receivers maps the `.cursor` ordinal to "DatabaseConnection").
DriverRule { leaf: "DatabaseConnection.cursor", kind: DataStoreKind::Sql, label: "Database connection" },
DriverRule { leaf: "DatabaseConnection.execute", kind: DataStoreKind::Sql, label: "Database connection" },
DriverRule { leaf: "DatabaseConnection.query", kind: DataStoreKind::Sql, label: "Database connection" },
DriverRule { leaf: "DatabaseConnection.exec", kind: DataStoreKind::Sql, label: "Database connection" },
DriverRule { leaf: "DatabaseConnection.prepare", kind: DataStoreKind::Sql, label: "Database connection" },
DriverRule { leaf: "DatabaseConnection.commit", kind: DataStoreKind::Sql, label: "Database connection" },
DriverRule { leaf: "FileHandle.read", kind: DataStoreKind::Filesystem, label: "Filesystem" },
DriverRule { leaf: "FileHandle.write", kind: DataStoreKind::Filesystem, label: "Filesystem" },
DriverRule { leaf: "FileHandle.close", kind: DataStoreKind::Filesystem, label: "Filesystem" },
// Filesystem (best-effort: language-agnostic open()-family)
DriverRule { leaf: "open", kind: DataStoreKind::Filesystem, label: "Filesystem" },
];
@ -99,15 +113,28 @@ const DRIVER_RULES: &[DriverRule] = &[
/// Walk every function summary's callee list and emit one
/// [`SurfaceNode::DataStore`] per matched driver call. De-duped on
/// `(file, line, label)`.
///
/// When the bare callee name does not hit a rule, the type-fact engine's
/// per-call `typed_call_receivers` map (read off the matching
/// [`crate::summary::SsaFuncSummary`]) is consulted: a callee whose
/// receiver was resolved to `TypeKind::DatabaseConnection` or
/// `TypeKind::FileHandle` is retried under the type-qualified name
/// `"DatabaseConnection.<method>"` / `"FileHandle.<method>"`, picking up
/// the bound-receiver call shapes (`conn.cursor()` after
/// `conn = psycopg2.connect()`) that the name-only matcher misses.
pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
let mut out: Vec<SurfaceNode> = Vec::new();
let mut seen: std::collections::HashSet<(String, u32, String)> =
std::collections::HashSet::new();
for (key, summary) in summaries.iter() {
let typed = summaries.get_ssa(key).map(|s| s.typed_call_receivers.as_slice());
for callee in &summary.callees {
let Some(rule) = match_rule(&callee.name) else {
continue;
};
let rule = match_rule(&callee.name).or_else(|| {
typed
.and_then(|t| container_for_ordinal(t, callee.ordinal))
.and_then(|c| match_rule(&qualify(c, &callee.name)))
});
let Some(rule) = rule else { continue };
let location = call_site_location(summary, callee);
let dedup = (
location.file.clone(),
@ -117,7 +144,6 @@ pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
if !seen.insert(dedup) {
continue;
}
let _ = key;
out.push(SurfaceNode::DataStore(DataStore {
location,
kind: rule.kind,
@ -128,6 +154,25 @@ pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
out
}
/// Last segment of a callee text after the final `.` or `::`.
fn leaf_segment(name: &str) -> &str {
let after_colon = name.rsplit("::").next().unwrap_or(name);
after_colon.rsplit('.').next().unwrap_or(after_colon)
}
/// Build a type-qualified callee name (`"{container}.{method}"`) for
/// retry-matching when the bare callee text did not hit any rule.
fn qualify(container: &str, callee_name: &str) -> String {
format!("{}.{}", container, leaf_segment(callee_name))
}
/// Linear-scan helper since `typed_call_receivers` is a small
/// `Vec<(ordinal, container)>` per function. Typical lengths are 0 to a
/// few dozen; a HashMap-per-summary would be wasteful.
fn container_for_ordinal(typed: &[(u32, String)], ordinal: u32) -> Option<&str> {
typed.iter().find(|(o, _)| *o == ordinal).map(|(_, c)| c.as_str())
}
fn match_rule(callee: &str) -> Option<&'static DriverRule> {
let cl = callee.trim().to_ascii_lowercase();
// Normalize `::` → `.` so segment-split treats both as separators.
@ -290,4 +335,56 @@ mod tests {
let nodes = detect_data_stores(&gs);
assert_eq!(nodes.len(), 1);
}
#[test]
fn typed_receiver_database_connection_resolves_bound_cursor() {
// `conn = psycopg2.connect(); conn.cursor()` — the bare callee
// `conn.cursor` is not in DRIVER_RULES, but the SSA type-fact
// engine populates `typed_call_receivers` with
// `(ordinal, "DatabaseConnection")` for the `.cursor` ordinal.
// The detector retries under `DatabaseConnection.cursor` and
// emits a Sql datastore node.
use crate::summary::ssa_summary::SsaFuncSummary;
let mut gs = GlobalSummaries::new();
let key = FuncKey::new_function(Lang::Python, "app.py", "load", None);
let summary = FuncSummary {
name: "load".into(),
file_path: "app.py".into(),
lang: "python".into(),
param_count: 0,
callees: vec![
{
let mut c = CalleeSite::bare("conn.cursor");
c.ordinal = 7;
c.span = Some((4, 8));
c
},
],
..Default::default()
};
gs.insert(key.clone(), summary);
let mut ssa = SsaFuncSummary::default();
ssa.typed_call_receivers
.push((7, "DatabaseConnection".into()));
gs.insert_ssa(key, ssa);
let nodes = detect_data_stores(&gs);
assert_eq!(nodes.len(), 1, "expected typed retry to hit; got {nodes:?}");
let SurfaceNode::DataStore(ds) = &nodes[0] else {
panic!()
};
assert_eq!(ds.kind, DataStoreKind::Sql);
assert_eq!(ds.label, "Database connection");
assert_eq!(ds.location.line, 4);
}
#[test]
fn typed_receiver_without_ssa_summary_falls_through() {
// No SsaFuncSummary inserted → bare `client.cursor` does not match
// any rule and `typed_call_receivers` is unreachable. Detector
// emits zero nodes (no panic on missing SSA side).
let mut gs = GlobalSummaries::new();
let (k, s) = summary_with_callees("load", "app.py", &["client.cursor"]);
gs.insert(k, s);
assert!(detect_data_stores(&gs).is_empty());
}
}

View file

@ -76,17 +76,50 @@ const CLIENT_RULES: &[ClientRule] = &[
ClientRule { leaf: "socket.gethostbyname", kind: ExternalServiceKind::HttpApi, label: "DNS resolver" },
ClientRule { leaf: "dns.lookup", kind: ExternalServiceKind::HttpApi, label: "DNS resolver" },
ClientRule { leaf: "net.LookupIP", kind: ExternalServiceKind::HttpApi, label: "DNS resolver" },
// Type-qualified — fires when the SSA type-fact engine resolves a
// receiver to `TypeKind::HttpClient` regardless of the bare callee
// name (`session = requests.Session(); session.get(url)` →
// typed_call_receivers maps the `.get` ordinal to "HttpClient", so
// the bound-receiver call surfaces as an outbound HTTP node even
// though `requests.get` is the only direct-import rule above).
ClientRule { leaf: "HttpClient.get", kind: ExternalServiceKind::HttpApi, label: "HTTP client" },
ClientRule { leaf: "HttpClient.post", kind: ExternalServiceKind::HttpApi, label: "HTTP client" },
ClientRule { leaf: "HttpClient.put", kind: ExternalServiceKind::HttpApi, label: "HTTP client" },
ClientRule { leaf: "HttpClient.delete", kind: ExternalServiceKind::HttpApi, label: "HTTP client" },
ClientRule { leaf: "HttpClient.patch", kind: ExternalServiceKind::HttpApi, label: "HTTP client" },
ClientRule { leaf: "HttpClient.request", kind: ExternalServiceKind::HttpApi, label: "HTTP client" },
ClientRule { leaf: "HttpClient.head", kind: ExternalServiceKind::HttpApi, label: "HTTP client" },
ClientRule { leaf: "HttpClient.options", kind: ExternalServiceKind::HttpApi, label: "HTTP client" },
ClientRule { leaf: "RequestBuilder.send", kind: ExternalServiceKind::HttpApi, label: "HTTP request builder" },
ClientRule { leaf: "URL.openConnection", kind: ExternalServiceKind::HttpApi, label: "URL connection" },
ClientRule { leaf: "URL.openStream", kind: ExternalServiceKind::HttpApi, label: "URL connection" },
];
/// Walk every function summary's callee list and emit one
/// [`SurfaceNode::ExternalService`] per matched outbound-client call.
///
/// When the bare callee name does not hit a rule, the type-fact engine's
/// per-call `typed_call_receivers` map (read off the matching
/// [`crate::summary::SsaFuncSummary`]) is consulted: a callee whose
/// receiver was resolved to `TypeKind::HttpClient` /
/// `TypeKind::RequestBuilder` / `TypeKind::Url` is retried under the
/// type-qualified name `"{container}.<method>"`, picking up the
/// bound-receiver call shapes (`client = requests.Session();
/// client.get(url)`) that the name-only matcher misses.
pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
let mut out: Vec<SurfaceNode> = Vec::new();
let mut seen: std::collections::HashSet<(String, String)> =
std::collections::HashSet::new();
for (_key, summary) in summaries.iter() {
for (key, summary) in summaries.iter() {
let typed = summaries.get_ssa(key).map(|s| s.typed_call_receivers.as_slice());
for callee in &summary.callees {
let Some(rule) = match_rule(&callee.name) else {
continue;
};
let rule = match_rule(&callee.name).or_else(|| {
typed
.and_then(|t| container_for_ordinal(t, callee.ordinal))
.and_then(|c| match_rule(&qualify(c, &callee.name)))
});
let Some(rule) = rule else { continue };
let location = call_site_location(summary, Some(callee));
if !seen.insert((location.file.clone(), rule.label.to_string())) {
continue;
@ -118,6 +151,19 @@ pub fn detect_external_services(summaries: &GlobalSummaries) -> Vec<SurfaceNode>
out
}
fn leaf_segment(name: &str) -> &str {
let after_colon = name.rsplit("::").next().unwrap_or(name);
after_colon.rsplit('.').next().unwrap_or(after_colon)
}
fn qualify(container: &str, callee_name: &str) -> String {
format!("{}.{}", container, leaf_segment(callee_name))
}
fn container_for_ordinal(typed: &[(u32, String)], ordinal: u32) -> Option<&str> {
typed.iter().find(|(o, _)| *o == ordinal).map(|(_, c)| c.as_str())
}
fn match_rule(callee: &str) -> Option<&'static ClientRule> {
let cl = callee.trim().to_ascii_lowercase();
let cl_segments = cl.replace("::", ".");
@ -195,6 +241,41 @@ mod tests {
assert!(nodes.is_empty(), "bare rules FP-matched on {nodes:?}");
}
#[test]
fn typed_receiver_http_client_resolves_bound_session_get() {
// `client = requests.Session(); client.get(url)` — the bare
// callee `client.get` is not in CLIENT_RULES, but the SSA type
// engine resolves the receiver to `TypeKind::HttpClient`. The
// detector retries under `HttpClient.get` and emits an HTTP
// external-service node.
use crate::summary::ssa_summary::SsaFuncSummary;
let mut gs = GlobalSummaries::new();
let key = FuncKey::new_function(Lang::Python, "client.py", "fetch", None);
let summary = FuncSummary {
name: "fetch".into(),
file_path: "client.py".into(),
lang: "python".into(),
param_count: 0,
callees: vec![{
let mut c = CalleeSite::bare("client.get");
c.ordinal = 3;
c.span = Some((9, 5));
c
}],
..Default::default()
};
gs.insert(key.clone(), summary);
let mut ssa = SsaFuncSummary::default();
ssa.typed_call_receivers.push((3, "HttpClient".into()));
gs.insert_ssa(key, ssa);
let nodes = detect_external_services(&gs);
assert_eq!(nodes.len(), 1, "expected typed retry to hit; got {nodes:?}");
let SurfaceNode::ExternalService(es) = &nodes[0] else {
panic!()
};
assert_eq!(es.label, "HTTP client");
}
#[test]
fn bare_got_rule_matches_segmented_callee() {
let mut gs = GlobalSummaries::new();

View file

@ -18,16 +18,7 @@ use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
pub const AUTH_MIDDLEWARES: &[&str] = &[
"AuthRequired",
"JWT",
"JWTAuth",
"Auth",
"RequireAuth",
"RequireUser",
"VerifyToken",
"BasicAuth",
];
pub use crate::auth_analysis::auth_markers::GIN_MIDDLEWARES as AUTH_MIDDLEWARES;
const VERBS: &[&str] = &[
"GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD", "Any",

View file

@ -21,12 +21,7 @@ use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
pub const AUTH_ANNOTATIONS: &[&str] = &[
"Authenticated",
"RolesAllowed",
"DenyAll",
"RequiresAuthentication",
];
pub use crate::auth_analysis::auth_markers::QUARKUS_ANNOTATIONS as AUTH_ANNOTATIONS;
const QUARKUS_DI: &[&str] = &[
"ApplicationScoped",

View file

@ -18,12 +18,7 @@ use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
pub const AUTH_ANNOTATIONS: &[&str] = &[
"RolesAllowed",
"DenyAll",
"RequiresAuthentication",
"RequiresUser",
];
pub use crate::auth_analysis::auth_markers::SERVLET_ANNOTATIONS as AUTH_ANNOTATIONS;
const SERVLET_VERBS: &[(&str, HttpMethod)] = &[
("doGet", HttpMethod::GET),

View file

@ -16,13 +16,7 @@ use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
pub const AUTH_ANNOTATIONS: &[&str] = &[
"PreAuthorize",
"PostAuthorize",
"Secured",
"RolesAllowed",
"AuthenticationPrincipal",
];
pub use crate::auth_analysis::auth_markers::SPRING_ANNOTATIONS as AUTH_ANNOTATIONS;
const MAPPING_ANNOTATIONS: &[(&str, Option<HttpMethod>)] = &[
("RequestMapping", None),

View file

@ -17,20 +17,7 @@ use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
pub const AUTH_MIDDLEWARES: &[&str] = &[
"requireAuth",
"requireUser",
"isAuthenticated",
"ensureAuthenticated",
"ensureLoggedIn",
"authenticate",
"authMiddleware",
"verifyToken",
"verifyJwt",
"checkJwt",
"passport",
"jwt",
];
pub use crate::auth_analysis::auth_markers::EXPRESS_MIDDLEWARES as AUTH_MIDDLEWARES;
const VERBS: &[&str] = &[
"get", "post", "put", "delete", "patch", "options", "head", "all",

View file

@ -15,20 +15,7 @@ use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
pub const AUTH_MIDDLEWARES: &[&str] = &[
"requireAuth",
"requireUser",
"isAuthenticated",
"ensureAuthenticated",
"authenticate",
"authMiddleware",
"verifyToken",
"verifyJwt",
"checkJwt",
"passport",
"jwt",
"koaJwt",
];
pub use crate::auth_analysis::auth_markers::KOA_MIDDLEWARES as AUTH_MIDDLEWARES;
const VERBS: &[&str] = &[
"get", "post", "put", "delete", "patch", "options", "head", "all",

View file

@ -26,15 +26,7 @@ use std::collections::HashMap;
use std::path::Path;
use tree_sitter::{Node, Tree};
pub const AUTH_DECORATORS: &[&str] = &[
"login_required",
"permission_required",
"user_passes_test",
"staff_member_required",
"csrf_protect",
"require_authenticated",
"auth_required",
];
pub use crate::auth_analysis::auth_markers::DJANGO_DECORATORS as AUTH_DECORATORS;
const CBV_BASES: &[&str] = &[
"View",

View file

@ -21,17 +21,7 @@ use tree_sitter::{Node, Tree};
/// Auth markers recognised in the decorator stack. FastAPI's primary
/// auth idiom is `Depends(...)` parameter injection, handled separately.
pub const AUTH_DECORATORS: &[&str] = &[
"login_required",
"auth_required",
"jwt_required",
"token_required",
"requires_auth",
"authenticated",
"require_auth",
"require_login",
"current_user",
];
pub use crate::auth_analysis::auth_markers::FASTAPI_DECORATORS as AUTH_DECORATORS;
/// Auth-callee names recognised inside a `Depends(...)` parameter.
const AUTH_DEPENDS_CALLEES: &[&str] = &[

View file

@ -28,15 +28,7 @@ use tree_sitter::{Node, Tree};
/// last `attribute` / `identifier` segment — so `@login_required`,
/// `@auth.login_required`, and `@flask_login.login_required` all
/// match. Match is case-insensitive on the underscored form.
pub const AUTH_DECORATORS: &[&str] = &[
"login_required",
"auth_required",
"jwt_required",
"token_required",
"requires_auth",
"authenticated",
"require_login",
];
pub use crate::auth_analysis::auth_markers::FLASK_DECORATORS as AUTH_DECORATORS;
/// Detect every Flask route in a parsed Python file.
///

View file

@ -16,14 +16,7 @@ use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
use std::path::Path;
use tree_sitter::{Node, Tree};
pub const AUTH_EXTRACTORS: &[&str] = &[
"Identity",
"BearerAuth",
"BasicAuth",
"JwtClaims",
"Authenticated",
"User",
];
pub use crate::auth_analysis::auth_markers::ACTIX_EXTRACTORS as AUTH_EXTRACTORS;
const ROUTE_MACROS: &[(&str, Option<HttpMethod>)] = &[
("get", Some(HttpMethod::GET)),

View file

@ -25,13 +25,7 @@ const VERBS: &[(&str, HttpMethod)] = &[
("options", HttpMethod::OPTIONS),
];
pub const AUTH_EXTRACTORS: &[&str] = &[
"Extension<User",
"BearerAuth",
"RequireAuth",
"AuthenticatedUser",
"JwtClaims",
];
pub use crate::auth_analysis::auth_markers::AXUM_EXTRACTORS as AUTH_EXTRACTORS;
pub fn detect_axum_routes(
tree: &Tree,