2026-05-15 13:28:58 -05:00
|
|
|
//! Data-store detection.
|
|
|
|
|
//!
|
|
|
|
|
//! Walks the post-pass-2 [`GlobalSummaries`] looking for callees whose
|
|
|
|
|
//! name is a known database / cache / blob-store driver entry point,
|
|
|
|
|
//! and emits one [`SurfaceNode::DataStore`] per resolved store.
|
|
|
|
|
//!
|
|
|
|
|
//! The detector is name-based on purpose: the receiver's full type is
|
|
|
|
|
//! often unknown after pass 2, but the leaf name of a driver call
|
|
|
|
|
//! (`psycopg2.connect`, `mysql.createConnection`, `gorm.Open`,
|
|
|
|
|
//! `Eloquent::find`, `ActiveRecord::Base.connection`) carries enough
|
|
|
|
|
//! signal for surface-level chain composition. False positives here
|
|
|
|
|
//! are forgiving — the surface map is informational, not a finding
|
|
|
|
|
//! that fires on its own.
|
|
|
|
|
|
|
|
|
|
use super::{DataStore, DataStoreKind, SourceLocation, SurfaceNode};
|
2026-05-16 01:46:35 -05:00
|
|
|
use crate::summary::{CalleeSite, FuncSummary, GlobalSummaries};
|
2026-05-15 13:28:58 -05:00
|
|
|
|
|
|
|
|
/// One detection rule: leaf-name pattern → store kind + label. Stored
|
|
|
|
|
/// as a flat list so adding a new ORM / driver is a one-line edit.
|
|
|
|
|
struct DriverRule {
|
|
|
|
|
/// Substring to match against the callee's leaf name (case-insensitive).
|
|
|
|
|
leaf: &'static str,
|
|
|
|
|
kind: DataStoreKind,
|
|
|
|
|
/// Human-readable label attached to the emitted node. Used by the
|
|
|
|
|
/// chain composer and the `nyx surface` CLI tree.
|
|
|
|
|
label: &'static str,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const DRIVER_RULES: &[DriverRule] = &[
|
|
|
|
|
// Python — relational
|
|
|
|
|
DriverRule { leaf: "psycopg2.connect", kind: DataStoreKind::Sql, label: "PostgreSQL (psycopg2)" },
|
|
|
|
|
DriverRule { leaf: "psycopg.connect", kind: DataStoreKind::Sql, label: "PostgreSQL (psycopg3)" },
|
|
|
|
|
DriverRule { leaf: "mysql.connector.connect", kind: DataStoreKind::Sql, label: "MySQL (mysql.connector)" },
|
|
|
|
|
DriverRule { leaf: "MySQLdb.connect", kind: DataStoreKind::Sql, label: "MySQL (MySQLdb)" },
|
|
|
|
|
DriverRule { leaf: "pymysql.connect", kind: DataStoreKind::Sql, label: "MySQL (PyMySQL)" },
|
|
|
|
|
DriverRule { leaf: "sqlite3.connect", kind: DataStoreKind::Sql, label: "SQLite (sqlite3)" },
|
|
|
|
|
DriverRule { leaf: "sqlalchemy.create_engine", kind: DataStoreKind::Sql, label: "SQLAlchemy" },
|
|
|
|
|
DriverRule { leaf: "django.db.connection", kind: DataStoreKind::Sql, label: "Django ORM" },
|
|
|
|
|
// Python — kv / doc
|
|
|
|
|
DriverRule { leaf: "redis.Redis", kind: DataStoreKind::KeyValue, label: "Redis" },
|
|
|
|
|
DriverRule { leaf: "redis.from_url", kind: DataStoreKind::KeyValue, label: "Redis" },
|
|
|
|
|
DriverRule { leaf: "pymongo.MongoClient", kind: DataStoreKind::Document, label: "MongoDB" },
|
|
|
|
|
DriverRule { leaf: "boto3.client", kind: DataStoreKind::BlobStore, label: "AWS (boto3)" },
|
|
|
|
|
DriverRule { leaf: "boto3.resource", kind: DataStoreKind::BlobStore, label: "AWS (boto3)" },
|
|
|
|
|
|
|
|
|
|
// JavaScript / TypeScript — relational
|
|
|
|
|
DriverRule { leaf: "knex", kind: DataStoreKind::Sql, label: "Knex.js" },
|
|
|
|
|
DriverRule { leaf: "createConnection", kind: DataStoreKind::Sql, label: "MySQL/Postgres (mysql/pg)" },
|
|
|
|
|
DriverRule { leaf: "Sequelize", kind: DataStoreKind::Sql, label: "Sequelize" },
|
|
|
|
|
DriverRule { leaf: "TypeORM.createConnection", kind: DataStoreKind::Sql, label: "TypeORM" },
|
|
|
|
|
DriverRule { leaf: "PrismaClient", kind: DataStoreKind::Sql, label: "Prisma" },
|
|
|
|
|
DriverRule { leaf: "pool.query", kind: DataStoreKind::Sql, label: "pg/mysql pool" },
|
|
|
|
|
DriverRule { leaf: "client.query", kind: DataStoreKind::Sql, label: "pg client" },
|
|
|
|
|
DriverRule { leaf: "db.query", kind: DataStoreKind::Sql, label: "Generic SQL driver" },
|
|
|
|
|
// JS — kv / doc
|
|
|
|
|
DriverRule { leaf: "redis.createClient", kind: DataStoreKind::KeyValue, label: "Redis (node-redis)" },
|
|
|
|
|
DriverRule { leaf: "ioredis", kind: DataStoreKind::KeyValue, label: "ioredis" },
|
|
|
|
|
DriverRule { leaf: "MongoClient.connect", kind: DataStoreKind::Document, label: "MongoDB (node)" },
|
|
|
|
|
DriverRule { leaf: "AWS.S3", kind: DataStoreKind::BlobStore, label: "AWS S3" },
|
|
|
|
|
|
|
|
|
|
// Java — JDBC / Hibernate
|
|
|
|
|
DriverRule { leaf: "DriverManager.getConnection", kind: DataStoreKind::Sql, label: "JDBC" },
|
|
|
|
|
DriverRule { leaf: "JdbcTemplate", kind: DataStoreKind::Sql, label: "Spring JdbcTemplate" },
|
|
|
|
|
DriverRule { leaf: "EntityManager", kind: DataStoreKind::Sql, label: "JPA EntityManager" },
|
|
|
|
|
DriverRule { leaf: "SessionFactory.openSession", kind: DataStoreKind::Sql, label: "Hibernate" },
|
|
|
|
|
DriverRule { leaf: "Jedis", kind: DataStoreKind::KeyValue, label: "Jedis (Redis)" },
|
|
|
|
|
DriverRule { leaf: "MongoClients.create", kind: DataStoreKind::Document, label: "MongoDB (java-driver)" },
|
|
|
|
|
|
|
|
|
|
// Go — sql + ORM
|
|
|
|
|
DriverRule { leaf: "sql.Open", kind: DataStoreKind::Sql, label: "database/sql" },
|
|
|
|
|
DriverRule { leaf: "gorm.Open", kind: DataStoreKind::Sql, label: "GORM" },
|
|
|
|
|
DriverRule { leaf: "sqlx.Connect", kind: DataStoreKind::Sql, label: "sqlx" },
|
|
|
|
|
DriverRule { leaf: "sqlx.Open", kind: DataStoreKind::Sql, label: "sqlx" },
|
|
|
|
|
DriverRule { leaf: "redis.NewClient", kind: DataStoreKind::KeyValue, label: "go-redis" },
|
|
|
|
|
DriverRule { leaf: "mongo.Connect", kind: DataStoreKind::Document, label: "MongoDB (go-driver)" },
|
|
|
|
|
|
|
|
|
|
// PHP — Eloquent / PDO
|
|
|
|
|
DriverRule { leaf: "PDO", kind: DataStoreKind::Sql, label: "PDO" },
|
|
|
|
|
DriverRule { leaf: "Eloquent::find", kind: DataStoreKind::Sql, label: "Laravel Eloquent" },
|
|
|
|
|
DriverRule { leaf: "Eloquent::where", kind: DataStoreKind::Sql, label: "Laravel Eloquent" },
|
|
|
|
|
DriverRule { leaf: "DB::connection", kind: DataStoreKind::Sql, label: "Laravel DB" },
|
|
|
|
|
DriverRule { leaf: "Doctrine", kind: DataStoreKind::Sql, label: "Doctrine ORM" },
|
|
|
|
|
|
|
|
|
|
// Ruby — ActiveRecord
|
|
|
|
|
DriverRule { leaf: "ActiveRecord::Base.connection", kind: DataStoreKind::Sql, label: "ActiveRecord" },
|
|
|
|
|
DriverRule { leaf: "ActiveRecord::Base.find", kind: DataStoreKind::Sql, label: "ActiveRecord" },
|
|
|
|
|
DriverRule { leaf: ".find_by_sql", kind: DataStoreKind::Sql, label: "ActiveRecord raw SQL" },
|
|
|
|
|
|
|
|
|
|
// Rust — sqlx / diesel
|
|
|
|
|
DriverRule { leaf: "sqlx::query", kind: DataStoreKind::Sql, label: "sqlx" },
|
|
|
|
|
DriverRule { leaf: "sqlx::query_as", kind: DataStoreKind::Sql, label: "sqlx" },
|
|
|
|
|
DriverRule { leaf: "diesel::sql_query", kind: DataStoreKind::Sql, label: "Diesel" },
|
|
|
|
|
DriverRule { leaf: "PgConnection::establish", kind: DataStoreKind::Sql, label: "Diesel" },
|
|
|
|
|
|
2026-05-16 02:26:41 -05:00
|
|
|
// Type-qualified — fires when the SSA type-fact engine resolves a
|
|
|
|
|
// receiver to `TypeKind::DatabaseConnection` regardless of the bare
|
|
|
|
|
// callee name (e.g. `conn = psycopg2.connect(); conn.cursor()` →
|
|
|
|
|
// typed_call_receivers maps the `.cursor` ordinal to "DatabaseConnection").
|
|
|
|
|
DriverRule { leaf: "DatabaseConnection.cursor", kind: DataStoreKind::Sql, label: "Database connection" },
|
|
|
|
|
DriverRule { leaf: "DatabaseConnection.execute", kind: DataStoreKind::Sql, label: "Database connection" },
|
|
|
|
|
DriverRule { leaf: "DatabaseConnection.query", kind: DataStoreKind::Sql, label: "Database connection" },
|
|
|
|
|
DriverRule { leaf: "DatabaseConnection.exec", kind: DataStoreKind::Sql, label: "Database connection" },
|
|
|
|
|
DriverRule { leaf: "DatabaseConnection.prepare", kind: DataStoreKind::Sql, label: "Database connection" },
|
|
|
|
|
DriverRule { leaf: "DatabaseConnection.commit", kind: DataStoreKind::Sql, label: "Database connection" },
|
|
|
|
|
DriverRule { leaf: "FileHandle.read", kind: DataStoreKind::Filesystem, label: "Filesystem" },
|
|
|
|
|
DriverRule { leaf: "FileHandle.write", kind: DataStoreKind::Filesystem, label: "Filesystem" },
|
|
|
|
|
DriverRule { leaf: "FileHandle.close", kind: DataStoreKind::Filesystem, label: "Filesystem" },
|
|
|
|
|
|
2026-05-15 13:28:58 -05:00
|
|
|
// Filesystem (best-effort: language-agnostic open()-family)
|
|
|
|
|
DriverRule { leaf: "open", kind: DataStoreKind::Filesystem, label: "Filesystem" },
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
/// Walk every function summary's callee list and emit one
|
|
|
|
|
/// [`SurfaceNode::DataStore`] per matched driver call. De-duped on
|
|
|
|
|
/// `(file, line, label)`.
|
2026-05-16 02:26:41 -05:00
|
|
|
///
|
|
|
|
|
/// When the bare callee name does not hit a rule, the type-fact engine's
|
|
|
|
|
/// per-call `typed_call_receivers` map (read off the matching
|
|
|
|
|
/// [`crate::summary::SsaFuncSummary`]) is consulted: a callee whose
|
|
|
|
|
/// receiver was resolved to `TypeKind::DatabaseConnection` or
|
|
|
|
|
/// `TypeKind::FileHandle` is retried under the type-qualified name
|
|
|
|
|
/// `"DatabaseConnection.<method>"` / `"FileHandle.<method>"`, picking up
|
|
|
|
|
/// the bound-receiver call shapes (`conn.cursor()` after
|
|
|
|
|
/// `conn = psycopg2.connect()`) that the name-only matcher misses.
|
2026-05-15 13:28:58 -05:00
|
|
|
pub fn detect_data_stores(summaries: &GlobalSummaries) -> Vec<SurfaceNode> {
|
|
|
|
|
let mut out: Vec<SurfaceNode> = Vec::new();
|
|
|
|
|
let mut seen: std::collections::HashSet<(String, u32, String)> =
|
|
|
|
|
std::collections::HashSet::new();
|
|
|
|
|
for (key, summary) in summaries.iter() {
|
2026-05-16 02:26:41 -05:00
|
|
|
let typed = summaries.get_ssa(key).map(|s| s.typed_call_receivers.as_slice());
|
2026-05-15 13:28:58 -05:00
|
|
|
for callee in &summary.callees {
|
2026-05-16 02:26:41 -05:00
|
|
|
let rule = match_rule(&callee.name).or_else(|| {
|
|
|
|
|
typed
|
|
|
|
|
.and_then(|t| container_for_ordinal(t, callee.ordinal))
|
|
|
|
|
.and_then(|c| match_rule(&qualify(c, &callee.name)))
|
|
|
|
|
});
|
|
|
|
|
let Some(rule) = rule else { continue };
|
2026-05-16 01:46:35 -05:00
|
|
|
let location = call_site_location(summary, callee);
|
2026-05-15 13:28:58 -05:00
|
|
|
let dedup = (
|
|
|
|
|
location.file.clone(),
|
|
|
|
|
location.line,
|
|
|
|
|
rule.label.to_string(),
|
|
|
|
|
);
|
|
|
|
|
if !seen.insert(dedup) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
out.push(SurfaceNode::DataStore(DataStore {
|
|
|
|
|
location,
|
|
|
|
|
kind: rule.kind,
|
|
|
|
|
label: rule.label.to_string(),
|
|
|
|
|
}));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
out
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 02:26:41 -05:00
|
|
|
/// Last segment of a callee text after the final `.` or `::`.
|
|
|
|
|
fn leaf_segment(name: &str) -> &str {
|
|
|
|
|
let after_colon = name.rsplit("::").next().unwrap_or(name);
|
|
|
|
|
after_colon.rsplit('.').next().unwrap_or(after_colon)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Build a type-qualified callee name (`"{container}.{method}"`) for
|
|
|
|
|
/// retry-matching when the bare callee text did not hit any rule.
|
|
|
|
|
fn qualify(container: &str, callee_name: &str) -> String {
|
|
|
|
|
format!("{}.{}", container, leaf_segment(callee_name))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Linear-scan helper since `typed_call_receivers` is a small
|
|
|
|
|
/// `Vec<(ordinal, container)>` per function. Typical lengths are 0 to a
|
|
|
|
|
/// few dozen; a HashMap-per-summary would be wasteful.
|
|
|
|
|
fn container_for_ordinal(typed: &[(u32, String)], ordinal: u32) -> Option<&str> {
|
|
|
|
|
typed.iter().find(|(o, _)| *o == ordinal).map(|(_, c)| c.as_str())
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-15 13:28:58 -05:00
|
|
|
fn match_rule(callee: &str) -> Option<&'static DriverRule> {
|
2026-05-15 13:52:15 -05:00
|
|
|
let cl = callee.trim().to_ascii_lowercase();
|
|
|
|
|
// Normalize `::` → `.` so segment-split treats both as separators.
|
|
|
|
|
let cl_segments = cl.replace("::", ".");
|
|
|
|
|
DRIVER_RULES.iter().find(|r| {
|
|
|
|
|
let rl = r.leaf.to_ascii_lowercase();
|
|
|
|
|
if r.leaf.contains('.') || r.leaf.contains("::") {
|
|
|
|
|
// Qualified pattern (e.g. `psycopg2.connect`, `Eloquent::find`):
|
|
|
|
|
// substring on the full callee text. Qualified shapes are
|
|
|
|
|
// unambiguous so substring is precise enough.
|
|
|
|
|
cl.contains(&rl)
|
|
|
|
|
} else {
|
|
|
|
|
// Bare leaf (e.g. `open`, `fetch`, `PrismaClient`): require a
|
|
|
|
|
// whole-segment match. Prevents `fopen` / `OpenSearch` /
|
|
|
|
|
// `getPrismaClient` from FP-matching short bare leaves.
|
|
|
|
|
cl_segments.split('.').any(|seg| seg == rl)
|
|
|
|
|
}
|
|
|
|
|
})
|
2026-05-15 13:28:58 -05:00
|
|
|
}
|
|
|
|
|
|
2026-05-16 01:46:35 -05:00
|
|
|
/// Source location of a call site. Reads the 1-based `(line, col)`
|
|
|
|
|
/// recorded on the [`CalleeSite`] at CFG-build time (populated for every
|
|
|
|
|
/// summary produced after the span field landed); for legacy summaries
|
|
|
|
|
/// loaded from SQLite with no span, falls back to the function's host
|
|
|
|
|
/// file with line 0.
|
|
|
|
|
fn call_site_location(summary: &FuncSummary, callee: &CalleeSite) -> SourceLocation {
|
|
|
|
|
let (line, col) = callee.span.unwrap_or((0, 0));
|
2026-05-15 13:28:58 -05:00
|
|
|
SourceLocation {
|
|
|
|
|
file: summary.file_path.clone(),
|
2026-05-16 01:46:35 -05:00
|
|
|
line,
|
|
|
|
|
col,
|
2026-05-15 13:28:58 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
use crate::symbol::{FuncKey, Lang};
|
|
|
|
|
|
|
|
|
|
fn summary_with_callees(name: &str, file: &str, callees: &[&str]) -> (FuncKey, FuncSummary) {
|
|
|
|
|
let key = FuncKey::new_function(Lang::Python, file, name, None);
|
|
|
|
|
let summary = FuncSummary {
|
|
|
|
|
name: name.to_string(),
|
|
|
|
|
file_path: file.to_string(),
|
|
|
|
|
lang: "python".to_string(),
|
|
|
|
|
param_count: 0,
|
|
|
|
|
callees: callees
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|c| CalleeSite::bare(c.to_string()))
|
|
|
|
|
.collect(),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
(key, summary)
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 01:46:35 -05:00
|
|
|
#[test]
|
|
|
|
|
fn datastore_carries_callee_span_when_present() {
|
|
|
|
|
// When the CFG populates `CalleeSite.span`, the detected datastore
|
|
|
|
|
// node's `SourceLocation` must reflect that 1-based `(line, col)`
|
|
|
|
|
// — not the legacy `(0, 0)` fallback.
|
|
|
|
|
let mut gs = GlobalSummaries::new();
|
|
|
|
|
let key = FuncKey::new_function(Lang::Python, "app.py", "init", None);
|
|
|
|
|
let mut callee = CalleeSite::bare("psycopg2.connect");
|
|
|
|
|
callee.span = Some((42, 13));
|
|
|
|
|
let summary = FuncSummary {
|
|
|
|
|
name: "init".into(),
|
|
|
|
|
file_path: "app.py".into(),
|
|
|
|
|
lang: "python".into(),
|
|
|
|
|
param_count: 0,
|
|
|
|
|
callees: vec![callee],
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
gs.insert(key, summary);
|
|
|
|
|
let nodes = detect_data_stores(&gs);
|
|
|
|
|
assert_eq!(nodes.len(), 1);
|
|
|
|
|
let SurfaceNode::DataStore(ds) = &nodes[0] else {
|
|
|
|
|
panic!()
|
|
|
|
|
};
|
|
|
|
|
assert_eq!(ds.location.line, 42);
|
|
|
|
|
assert_eq!(ds.location.col, 13);
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-15 13:28:58 -05:00
|
|
|
#[test]
|
|
|
|
|
fn detects_psycopg2_connect() {
|
|
|
|
|
let mut gs = GlobalSummaries::new();
|
|
|
|
|
let (k, s) = summary_with_callees("init", "app.py", &["psycopg2.connect"]);
|
|
|
|
|
gs.insert(k, s);
|
|
|
|
|
let nodes = detect_data_stores(&gs);
|
|
|
|
|
assert_eq!(nodes.len(), 1);
|
|
|
|
|
let SurfaceNode::DataStore(ds) = &nodes[0] else {
|
|
|
|
|
panic!()
|
|
|
|
|
};
|
|
|
|
|
assert_eq!(ds.kind, DataStoreKind::Sql);
|
|
|
|
|
assert_eq!(ds.label, "PostgreSQL (psycopg2)");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn detects_gorm_open() {
|
|
|
|
|
let mut gs = GlobalSummaries::new();
|
|
|
|
|
let (k, s) = summary_with_callees("init", "main.go", &["gorm.Open"]);
|
|
|
|
|
gs.insert(k, s);
|
|
|
|
|
let nodes = detect_data_stores(&gs);
|
|
|
|
|
assert_eq!(nodes.len(), 1);
|
|
|
|
|
let SurfaceNode::DataStore(ds) = &nodes[0] else {
|
|
|
|
|
panic!()
|
|
|
|
|
};
|
|
|
|
|
assert_eq!(ds.label, "GORM");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn dedup_collapses_repeats_in_same_file() {
|
|
|
|
|
let mut gs = GlobalSummaries::new();
|
|
|
|
|
let (k, s) = summary_with_callees(
|
|
|
|
|
"init",
|
|
|
|
|
"app.py",
|
|
|
|
|
&["psycopg2.connect", "psycopg2.connect"],
|
|
|
|
|
);
|
|
|
|
|
gs.insert(k, s);
|
|
|
|
|
let nodes = detect_data_stores(&gs);
|
|
|
|
|
assert_eq!(nodes.len(), 1);
|
|
|
|
|
}
|
2026-05-15 13:52:15 -05:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn bare_open_rule_does_not_match_fopen_or_opensearch() {
|
|
|
|
|
let mut gs = GlobalSummaries::new();
|
|
|
|
|
let (k, s) = summary_with_callees(
|
|
|
|
|
"init",
|
|
|
|
|
"app.py",
|
|
|
|
|
&[
|
|
|
|
|
"fopen",
|
|
|
|
|
"popen",
|
|
|
|
|
"OpenSearch",
|
|
|
|
|
"openssl_encrypt",
|
|
|
|
|
"MongoClient.openSession",
|
|
|
|
|
],
|
|
|
|
|
);
|
|
|
|
|
gs.insert(k, s);
|
|
|
|
|
let nodes = detect_data_stores(&gs);
|
|
|
|
|
assert!(
|
|
|
|
|
nodes.is_empty(),
|
|
|
|
|
"bare `open` rule should not FP on {nodes:?}",
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn bare_open_rule_still_matches_real_open() {
|
|
|
|
|
let mut gs = GlobalSummaries::new();
|
|
|
|
|
let (k, s) = summary_with_callees("loader", "app.py", &["open"]);
|
|
|
|
|
gs.insert(k, s);
|
|
|
|
|
let nodes = detect_data_stores(&gs);
|
|
|
|
|
assert_eq!(nodes.len(), 1);
|
|
|
|
|
let SurfaceNode::DataStore(ds) = &nodes[0] else {
|
|
|
|
|
panic!()
|
|
|
|
|
};
|
|
|
|
|
assert_eq!(ds.kind, DataStoreKind::Filesystem);
|
|
|
|
|
|
|
|
|
|
let mut gs = GlobalSummaries::new();
|
|
|
|
|
let (k, s) = summary_with_callees("loader", "app.py", &["builtins.open"]);
|
|
|
|
|
gs.insert(k, s);
|
|
|
|
|
let nodes = detect_data_stores(&gs);
|
|
|
|
|
assert_eq!(nodes.len(), 1);
|
|
|
|
|
}
|
2026-05-16 02:26:41 -05:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn typed_receiver_database_connection_resolves_bound_cursor() {
|
|
|
|
|
// `conn = psycopg2.connect(); conn.cursor()` — the bare callee
|
|
|
|
|
// `conn.cursor` is not in DRIVER_RULES, but the SSA type-fact
|
|
|
|
|
// engine populates `typed_call_receivers` with
|
|
|
|
|
// `(ordinal, "DatabaseConnection")` for the `.cursor` ordinal.
|
|
|
|
|
// The detector retries under `DatabaseConnection.cursor` and
|
|
|
|
|
// emits a Sql datastore node.
|
|
|
|
|
use crate::summary::ssa_summary::SsaFuncSummary;
|
|
|
|
|
let mut gs = GlobalSummaries::new();
|
|
|
|
|
let key = FuncKey::new_function(Lang::Python, "app.py", "load", None);
|
|
|
|
|
let summary = FuncSummary {
|
|
|
|
|
name: "load".into(),
|
|
|
|
|
file_path: "app.py".into(),
|
|
|
|
|
lang: "python".into(),
|
|
|
|
|
param_count: 0,
|
|
|
|
|
callees: vec![
|
|
|
|
|
{
|
|
|
|
|
let mut c = CalleeSite::bare("conn.cursor");
|
|
|
|
|
c.ordinal = 7;
|
|
|
|
|
c.span = Some((4, 8));
|
|
|
|
|
c
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
gs.insert(key.clone(), summary);
|
|
|
|
|
let mut ssa = SsaFuncSummary::default();
|
|
|
|
|
ssa.typed_call_receivers
|
|
|
|
|
.push((7, "DatabaseConnection".into()));
|
|
|
|
|
gs.insert_ssa(key, ssa);
|
|
|
|
|
let nodes = detect_data_stores(&gs);
|
|
|
|
|
assert_eq!(nodes.len(), 1, "expected typed retry to hit; got {nodes:?}");
|
|
|
|
|
let SurfaceNode::DataStore(ds) = &nodes[0] else {
|
|
|
|
|
panic!()
|
|
|
|
|
};
|
|
|
|
|
assert_eq!(ds.kind, DataStoreKind::Sql);
|
|
|
|
|
assert_eq!(ds.label, "Database connection");
|
|
|
|
|
assert_eq!(ds.location.line, 4);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn typed_receiver_without_ssa_summary_falls_through() {
|
|
|
|
|
// No SsaFuncSummary inserted → bare `client.cursor` does not match
|
|
|
|
|
// any rule and `typed_call_receivers` is unreachable. Detector
|
|
|
|
|
// emits zero nodes (no panic on missing SSA side).
|
|
|
|
|
let mut gs = GlobalSummaries::new();
|
|
|
|
|
let (k, s) = summary_with_callees("load", "app.py", &["client.cursor"]);
|
|
|
|
|
gs.insert(k, s);
|
|
|
|
|
assert!(detect_data_stores(&gs).is_empty());
|
|
|
|
|
}
|
2026-05-15 13:28:58 -05:00
|
|
|
}
|