mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-06 19:35:13 +02:00
* docs: Enhance module documentation across various files for clarity and completeness * fix: Remove unnecessary blank line in build.rs for cleaner code * docs: Update documentation to improve clarity and consistency in code comments
2801 lines
111 KiB
Rust
2801 lines
111 KiB
Rust
//! Whole-program call graph built from pass-1 function summaries.
|
||
//!
|
||
//! Nodes are [`FuncKey`]s (one per function definition across all files).
|
||
//! Edges represent call-site relationships resolved after pass 1 completes.
|
||
//! Unresolved and ambiguous callees are tracked separately so they can be
|
||
//! surfaced in diagnostics without blocking analysis.
|
||
//!
|
||
//! [`CallGraphAnalysis`] computes SCCs and topological order. The scanner
|
||
//! uses topo order in pass 2 so callees are analysed before their callers,
|
||
//! and iterates over SCC groups to a fixed point for mutually recursive
|
||
//! functions.
|
||
|
||
use crate::interop::InteropEdge;
|
||
use crate::rust_resolve::RustUseMap;
|
||
use crate::summary::{CalleeQuery, CalleeResolution, GlobalSummaries};
|
||
use crate::symbol::{FuncKey, Lang};
|
||
use petgraph::graph::NodeIndex;
|
||
use petgraph::prelude::*;
|
||
use smallvec::SmallVec;
|
||
use std::collections::{BTreeMap, HashMap};
|
||
use std::path::{Path, PathBuf};
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// Types
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
/// Metadata attached to each call-graph edge.
|
||
#[derive(Debug, Clone)]
|
||
pub struct CallEdge {
|
||
/// The raw callee string as it appeared in source (e.g. `"env::var"`).
|
||
/// Preserved for diagnostics, **not** the normalized form used for resolution.
|
||
#[allow(dead_code)] // used for future diagnostics and path display
|
||
pub call_site: String,
|
||
}
|
||
|
||
/// A callee that could not be resolved to any known function definition.
|
||
#[derive(Debug, Clone)]
|
||
pub struct UnresolvedCallee {
|
||
pub caller: FuncKey,
|
||
pub callee_name: String,
|
||
}
|
||
|
||
/// A callee that matched multiple function definitions, ambiguous.
|
||
#[derive(Debug, Clone)]
|
||
pub struct AmbiguousCallee {
|
||
pub caller: FuncKey,
|
||
pub callee_name: String,
|
||
pub candidates: Vec<FuncKey>,
|
||
}
|
||
|
||
/// The whole-program call graph.
|
||
///
|
||
/// Nodes are [`FuncKey`]s (one per function definition across all files).
|
||
/// Edges represent call-site relationships resolved after pass 1.
|
||
pub struct CallGraph {
|
||
pub graph: DiGraph<FuncKey, CallEdge>,
|
||
/// `FuncKey → NodeIndex` for quick lookup.
|
||
#[allow(dead_code)] // used for future topo-ordered analysis and call-graph queries
|
||
pub index: HashMap<FuncKey, NodeIndex>,
|
||
/// Callee strings that could not be resolved to any [`FuncKey`].
|
||
pub unresolved_not_found: Vec<UnresolvedCallee>,
|
||
/// Callee strings that matched multiple candidates.
|
||
pub unresolved_ambiguous: Vec<AmbiguousCallee>,
|
||
}
|
||
|
||
/// Result of SCC / topological analysis on the call graph.
|
||
pub struct CallGraphAnalysis {
|
||
/// Strongly connected components.
|
||
pub sccs: Vec<Vec<NodeIndex>>,
|
||
/// Maps each `NodeIndex` to its SCC index in `sccs`.
|
||
#[allow(dead_code)] // used for future topo-ordered taint propagation
|
||
pub node_to_scc: HashMap<NodeIndex, usize>,
|
||
/// SCC indices in **callee-first** (leaves-first) order.
|
||
///
|
||
/// Functions with no callees appear first; callers appear later.
|
||
/// Suitable for bottom-up taint propagation.
|
||
pub topo_scc_callee_first: Vec<usize>,
|
||
}
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// Callee-name normalization
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
/// Extract the last segment of a qualified callee name for resolution.
|
||
///
|
||
/// ```text
|
||
/// "env::var" → "var"
|
||
/// "std::process::Command" → "Command"
|
||
/// "obj.method" → "method"
|
||
/// "pkg.mod.func" → "func"
|
||
/// "foo" → "foo" (unchanged)
|
||
/// "" → "" (edge case)
|
||
/// ```
|
||
///
|
||
/// The original raw text is preserved on [`CallEdge::call_site`] for
|
||
/// diagnostics; this function only produces the lookup key.
|
||
/// Preserve the last **two** segments for better disambiguation.
|
||
///
|
||
/// ```text
|
||
/// "std::env::var" → "env::var"
|
||
/// "env::var" → "env::var"
|
||
/// "pkg.mod.func" → "mod.func"
|
||
/// "http_client.send" → "http_client.send"
|
||
/// "send" → "send"
|
||
/// "" → ""
|
||
/// ```
|
||
pub(crate) fn normalize_callee_name(raw: &str) -> &str {
|
||
// Try "::" separators first (Rust / C++ qualification)
|
||
if let Some(pos) = raw.rfind("::") {
|
||
let before_last = &raw[..pos];
|
||
if let Some(pos2) = before_last.rfind("::") {
|
||
// ≥3 segments → keep last two: "std::env::var" → "env::var"
|
||
return &raw[pos2 + 2..];
|
||
}
|
||
// ≤2 segments → keep all: "env::var" → "env::var"
|
||
return raw;
|
||
}
|
||
|
||
// Try "." separators (method calls, Python/JS dotted paths)
|
||
if let Some(pos) = raw.rfind('.') {
|
||
let before_last = &raw[..pos];
|
||
if let Some(pos2) = before_last.rfind('.') {
|
||
// ≥3 segments → keep last two: "pkg.mod.func" → "mod.func"
|
||
return &raw[pos2 + 1..];
|
||
}
|
||
// ≤2 segments → keep all: "http_client.send" → "http_client.send"
|
||
return raw;
|
||
}
|
||
|
||
// No separators → return as-is
|
||
raw
|
||
}
|
||
|
||
/// Extract the final (leaf) segment after `::` or `.` separators.
|
||
///
|
||
/// This is the original single-segment normalization, used for direct
|
||
/// map lookups where keys are stored as bare function names.
|
||
///
|
||
/// ```text
|
||
/// "std::env::var" → "var"
|
||
/// "obj.method" → "method"
|
||
/// "foo" → "foo"
|
||
/// ```
|
||
pub(crate) fn callee_leaf_name(raw: &str) -> &str {
|
||
let after_colons = raw.rsplit("::").next().unwrap_or(raw);
|
||
after_colons.rsplit('.').next().unwrap_or(after_colons)
|
||
}
|
||
|
||
/// Extract the segment *immediately before* the leaf as a container hint.
|
||
///
|
||
/// For `"OrderService::process"` this yields `"OrderService"`; for
|
||
/// `"obj.method"`, `"obj"`. When the raw name is unqualified (`"send"`) the
|
||
/// hint is empty. The intent is to give [`resolve_callee_key_with_container`]
|
||
/// enough context to pick the right method when two classes in the same file
|
||
/// define the same leaf name.
|
||
pub(crate) fn callee_container_hint(raw: &str) -> &str {
|
||
if let Some(pos) = raw.rfind("::") {
|
||
let prefix = &raw[..pos];
|
||
return prefix.rsplit("::").next().unwrap_or(prefix);
|
||
}
|
||
if let Some(pos) = raw.rfind('.') {
|
||
let prefix = &raw[..pos];
|
||
return prefix.rsplit('.').next().unwrap_or(prefix);
|
||
}
|
||
""
|
||
}
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// Class / container → method index
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
/// Per-language `(container, method_name)` → candidate [`FuncKey`] index.
|
||
///
|
||
/// Built once per call-graph construction over every merged
|
||
/// [`crate::summary::FuncSummary`]. Used by edge insertion to restrict an indirect method
|
||
/// call (`receiver.method(...)`) to only those targets whose defining
|
||
/// container matches the receiver's static type. Without a container
|
||
/// hint the index falls back to the bare-name list, matching today's
|
||
/// name-only resolution byte-for-byte.
|
||
///
|
||
/// Key design notes:
|
||
///
|
||
/// * Keys are **language-scoped**, a Java `findById` and a Python
|
||
/// `findById` never alias. Every other index in this module is also
|
||
/// language-scoped (`by_lang_name`, `by_lang_qualified`); keeping the
|
||
/// same partition here means devirtualisation's "subset of today's
|
||
/// targets" invariant is structurally preserved.
|
||
/// * The container key carries the [`FuncKey::container`] verbatim
|
||
/// (e.g. `"Repository"` or nested `"Outer::Inner"`). Empty containers
|
||
/// are not indexed in `by_container`, free top-level functions live
|
||
/// only in `by_name` and are looked up via the `None` container path.
|
||
/// * `SmallVec` inline capacity is sized for the common case (≤ 2 same-
|
||
/// container overloads, ≤ 4 same-name candidates across containers);
|
||
/// spillover allocates but keeps lookups O(1) amortised.
|
||
#[derive(Debug, Default, Clone)]
|
||
pub struct ClassMethodIndex {
|
||
/// `(lang, container, method_name)` → all candidate `FuncKey`s
|
||
/// whose defining container matches. Empty containers are not
|
||
/// indexed here; use the `None` arm of [`Self::resolve`] for those.
|
||
by_container: HashMap<(Lang, String, String), SmallVec<[FuncKey; 2]>>,
|
||
/// `(lang, method_name)` → every `FuncKey` with that leaf name in
|
||
/// the language, regardless of container. This is the fallback
|
||
/// path for calls with no resolvable receiver type and matches
|
||
/// today's name-only edge insertion.
|
||
by_name: HashMap<(Lang, String), SmallVec<[FuncKey; 4]>>,
|
||
}
|
||
|
||
impl ClassMethodIndex {
|
||
/// Build the index from a [`GlobalSummaries`] map.
|
||
///
|
||
/// Iteration is over every `FuncKey` in the map; each key is
|
||
/// inserted into `by_name` and (when its container is non-empty)
|
||
/// into `by_container`. No ordering guarantees on the candidate
|
||
/// vectors, call sites that need determinism should sort downstream.
|
||
pub fn build(summaries: &GlobalSummaries) -> Self {
|
||
let mut by_container: HashMap<(Lang, String, String), SmallVec<[FuncKey; 2]>> =
|
||
HashMap::new();
|
||
let mut by_name: HashMap<(Lang, String), SmallVec<[FuncKey; 4]>> = HashMap::new();
|
||
|
||
for (key, _) in summaries.iter() {
|
||
let name_key = (key.lang, key.name.clone());
|
||
by_name.entry(name_key).or_default().push(key.clone());
|
||
|
||
if !key.container.is_empty() {
|
||
let cont_key = (key.lang, key.container.clone(), key.name.clone());
|
||
by_container.entry(cont_key).or_default().push(key.clone());
|
||
}
|
||
}
|
||
|
||
ClassMethodIndex {
|
||
by_container,
|
||
by_name,
|
||
}
|
||
}
|
||
|
||
/// Resolve `(container, method)` to its candidate target set.
|
||
///
|
||
/// * `container = Some(c)`, return only candidates whose defining
|
||
/// container equals `c`. Empty slice when no such target exists,
|
||
/// even if a same-name function lives in another container.
|
||
/// This is the **devirtualised** path: a hard subset of `by_name`.
|
||
/// * `container = None`, return every same-name candidate in the
|
||
/// language. This is the **fallback** path used when the receiver
|
||
/// type is unknown; matches today's name-only behaviour.
|
||
///
|
||
/// The returned slice is borrowed from the index; lifetime ties to
|
||
/// `&self`. Callers may need to clone keys before mutating the
|
||
/// owning graph.
|
||
pub fn resolve(&self, lang: Lang, container: Option<&str>, method: &str) -> &[FuncKey] {
|
||
match container {
|
||
Some(c) if !c.is_empty() => self
|
||
.by_container
|
||
.get(&(lang, c.to_string(), method.to_string()))
|
||
.map(|v| v.as_slice())
|
||
.unwrap_or_default(),
|
||
_ => self
|
||
.by_name
|
||
.get(&(lang, method.to_string()))
|
||
.map(|v| v.as_slice())
|
||
.unwrap_or_default(),
|
||
}
|
||
}
|
||
|
||
/// Number of distinct `(lang, container, method)` keys. Exposed
|
||
/// for diagnostics / tests; production code uses [`Self::resolve`].
|
||
#[allow(dead_code)]
|
||
pub fn container_keys_len(&self) -> usize {
|
||
self.by_container.len()
|
||
}
|
||
|
||
/// Number of distinct `(lang, method)` keys. Exposed for
|
||
/// diagnostics / tests.
|
||
#[allow(dead_code)]
|
||
pub fn name_keys_len(&self) -> usize {
|
||
self.by_name.len()
|
||
}
|
||
}
|
||
|
||
// ── Type hierarchy index ────────────────────────────────────────────────
|
||
|
||
/// Per-language `(super_type) → sub-types` index built from every merged
|
||
/// [`crate::summary::FuncSummary::hierarchy_edges`]. Lets virtual
|
||
/// dispatch fan out to every concrete implementer's matching method.
|
||
///
|
||
/// Covers Java `extends`/`implements`, Rust `impl Trait for Type`, TS
|
||
/// `extends`/`implements`, Python `class X(Base)`, plus PHP/Ruby/C++
|
||
/// (see `crate::cfg::hierarchy`). Go's structural interfaces are
|
||
/// intentionally omitted, name-only resolution is used instead.
|
||
///
|
||
/// Container names are bare (no namespace), so cross-namespace aliases
|
||
/// may over-fan-out. That is conservative for correctness.
|
||
#[derive(Debug, Default, Clone)]
|
||
pub struct TypeHierarchyIndex {
|
||
/// `(lang, super_type)` → distinct sub-type / impl container names.
|
||
by_super: HashMap<(Lang, String), SmallVec<[String; 4]>>,
|
||
/// `(lang, sub_type)` → super-types this type extends / implements.
|
||
/// Future use for `super.method()` resolution; populated for
|
||
/// completeness today.
|
||
#[allow(dead_code)]
|
||
by_sub: HashMap<(Lang, String), SmallVec<[String; 2]>>,
|
||
}
|
||
|
||
impl TypeHierarchyIndex {
|
||
/// Build the index from every merged
|
||
/// [`crate::summary::FuncSummary::hierarchy_edges`] vector. Each
|
||
/// `(sub, super)` pair is inserted once per language; duplicates
|
||
/// across files (the same edge written into every per-file
|
||
/// summary) collapse via the membership check.
|
||
pub fn build(summaries: &GlobalSummaries) -> Self {
|
||
let mut by_super: HashMap<(Lang, String), SmallVec<[String; 4]>> = HashMap::new();
|
||
let mut by_sub: HashMap<(Lang, String), SmallVec<[String; 2]>> = HashMap::new();
|
||
|
||
for (key, summary) in summaries.iter() {
|
||
let lang = key.lang;
|
||
for (sub, sup) in &summary.hierarchy_edges {
|
||
if sub.is_empty() || sup.is_empty() {
|
||
continue;
|
||
}
|
||
let subs = by_super.entry((lang, sup.clone())).or_default();
|
||
if !subs.iter().any(|s| s == sub) {
|
||
subs.push(sub.clone());
|
||
}
|
||
let sups = by_sub.entry((lang, sub.clone())).or_default();
|
||
if !sups.iter().any(|s| s == sup) {
|
||
sups.push(sup.clone());
|
||
}
|
||
}
|
||
}
|
||
|
||
TypeHierarchyIndex { by_super, by_sub }
|
||
}
|
||
|
||
/// Return the distinct sub-type / impl container names for
|
||
/// `super_type`. Empty slice when the type has no recorded
|
||
/// subs (i.e. either it's a leaf type or no matching
|
||
/// hierarchy edges were extracted).
|
||
pub fn subs_of(&self, lang: Lang, super_type: &str) -> &[String] {
|
||
self.by_super
|
||
.get(&(lang, super_type.to_string()))
|
||
.map(|v| v.as_slice())
|
||
.unwrap_or_default()
|
||
}
|
||
|
||
/// Return the recorded super-types of `sub_type`. Empty when
|
||
/// `sub_type` has no recorded super-types in this language.
|
||
#[allow(dead_code)]
|
||
pub fn supers_of(&self, lang: Lang, sub_type: &str) -> &[String] {
|
||
self.by_sub
|
||
.get(&(lang, sub_type.to_string()))
|
||
.map(|v| v.as_slice())
|
||
.unwrap_or_default()
|
||
}
|
||
|
||
/// Number of distinct `(lang, super_type)` keys. Exposed for
|
||
/// diagnostics / tests.
|
||
#[allow(dead_code)]
|
||
pub fn super_keys_len(&self) -> usize {
|
||
self.by_super.len()
|
||
}
|
||
|
||
/// Resolve `(container, method)` widened by hierarchy lookup,
|
||
/// returning every concrete-implementer FuncKey whose container
|
||
/// is `container` itself OR a known sub-type of `container`.
|
||
///
|
||
/// Behaviour:
|
||
/// * `container = None` → falls through to
|
||
/// [`ClassMethodIndex::resolve`]'s name-only path; the
|
||
/// hierarchy lookup is a no-op.
|
||
/// * `container = Some(c)` and `c` has no recorded sub-types →
|
||
/// identical to `ClassMethodIndex::resolve(_, Some(c), _)`.
|
||
/// * `container = Some(c)` with sub-types `s1, s2, …` → union of
|
||
/// `resolve(_, Some(c), m)` ∪ `resolve(_, Some(s1), m)` ∪
|
||
/// `resolve(_, Some(s2), m)` ∪ …. Dedup is applied.
|
||
///
|
||
/// The returned `Vec` is a fresh allocation since the union is
|
||
/// computed across multiple borrowed slices in the underlying
|
||
/// [`ClassMethodIndex`] and cannot share storage with any of them.
|
||
/// Cost: O(k · m) where k = number of sub-types and m = average
|
||
/// candidates per `(container, method)` lookup; in practice k is
|
||
/// in the single digits.
|
||
pub fn resolve_with_hierarchy(
|
||
&self,
|
||
method_index: &ClassMethodIndex,
|
||
lang: Lang,
|
||
container: Option<&str>,
|
||
method: &str,
|
||
) -> Vec<FuncKey> {
|
||
let Some(c) = container.filter(|s| !s.is_empty()) else {
|
||
return method_index.resolve(lang, None, method).to_vec();
|
||
};
|
||
let mut out: Vec<FuncKey> = Vec::new();
|
||
let push_unique = |dst: &mut Vec<FuncKey>, src: &[FuncKey]| {
|
||
for k in src {
|
||
if !dst.iter().any(|e| e == k) {
|
||
dst.push(k.clone());
|
||
}
|
||
}
|
||
};
|
||
// Direct container match first.
|
||
push_unique(&mut out, method_index.resolve(lang, Some(c), method));
|
||
// Each known sub-type of `c`.
|
||
for sub in self.subs_of(lang, c) {
|
||
push_unique(
|
||
&mut out,
|
||
method_index.resolve(lang, Some(sub.as_str()), method),
|
||
);
|
||
}
|
||
out
|
||
}
|
||
}
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// Call-graph construction
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
/// Build the whole-program call graph from merged summaries.
|
||
///
|
||
/// Resolution strategy:
|
||
/// 1. Extract leaf name for `resolve_callee_key` lookup
|
||
/// 2. Same-language, arity-filtered, namespace-disambiguated lookup
|
||
/// 3. On ambiguity: use two-segment qualified name to narrow candidates
|
||
/// 4. Interop edges (explicit cross-language bridges)
|
||
///
|
||
/// Typed-call devirtualisation: when the caller's SSA summary carries
|
||
/// a typed container for a call ordinal, that site is first resolved
|
||
/// via [`ClassMethodIndex`] restricted to the receiver type. Exact
|
||
/// match → edge; multi-candidate → fed back through
|
||
/// `CalleeQuery.receiver_type`; zero match → name-only fallback.
|
||
///
|
||
/// Unresolved and ambiguous callees are recorded for diagnostics but
|
||
/// do **not** create edges.
|
||
pub fn build_call_graph(summaries: &GlobalSummaries, interop_edges: &[InteropEdge]) -> CallGraph {
|
||
let mut graph = DiGraph::new();
|
||
let mut index = HashMap::new();
|
||
|
||
// 1. Create one node per FuncKey.
|
||
for (key, _) in summaries.iter() {
|
||
let idx = graph.add_node(key.clone());
|
||
index.insert(key.clone(), idx);
|
||
}
|
||
|
||
// build a single `(lang, container, name) → candidates`
|
||
// index from the merged summaries. Used below to devirtualise
|
||
// every method-call edge whose receiver has a recoverable type
|
||
// fact. Cost is one allocation per FuncKey across the program;
|
||
// amortised against the per-call-site savings, this is a clear
|
||
// win on codebases with many same-name methods.
|
||
let method_index = ClassMethodIndex::build(summaries);
|
||
|
||
// build a sibling `(lang, super_type) → sub_types` index
|
||
// from every merged summary's `hierarchy_edges`. Consumed below
|
||
// to fan out method-call edges to all known concrete
|
||
// implementers when a receiver's static type is a super-class /
|
||
// trait / interface. Empty for languages without an extractor
|
||
// (Go, C) and for files with no inheritance / impl declarations.
|
||
let hierarchy = TypeHierarchyIndex::build(summaries);
|
||
|
||
let mut unresolved_not_found = Vec::new();
|
||
let mut unresolved_ambiguous = Vec::new();
|
||
|
||
// 2. Resolve callees and add edges.
|
||
for (caller_key, summary) in summaries.iter() {
|
||
let caller_node = index[caller_key];
|
||
|
||
// Rebuild the caller's `use` map once per function rather than per
|
||
// call site. Non-Rust callers always get `None`.
|
||
let rust_use_map: Option<RustUseMap> = if caller_key.lang == Lang::Rust {
|
||
match (&summary.rust_use_map, &summary.rust_wildcards) {
|
||
(None, None) => None,
|
||
(a, w) => Some(RustUseMap {
|
||
aliases: a.clone().unwrap_or_default(),
|
||
wildcards: w.clone().unwrap_or_default(),
|
||
}),
|
||
}
|
||
} else {
|
||
None
|
||
};
|
||
|
||
// per-caller `(call_ordinal → container_name)` map
|
||
// pulled from the caller's SSA summary, when one exists.
|
||
// Empty when the caller has no SSA summary (zero-param trivial
|
||
// bodies skip extraction unless they had typed receivers) or
|
||
// when no method call inside the caller had a recoverable
|
||
// receiver type. Empty maps mean today's resolution path
|
||
// applies unchanged for every site in this caller.
|
||
let typed_receivers: HashMap<u32, &str> = summaries
|
||
.get_ssa(caller_key)
|
||
.map(|ssa| {
|
||
ssa.typed_call_receivers
|
||
.iter()
|
||
.map(|(ord, c)| (*ord, c.as_str()))
|
||
.collect()
|
||
})
|
||
.unwrap_or_default();
|
||
|
||
for site in &summary.callees {
|
||
let raw_callee = site.name.as_str();
|
||
// Use leaf name for the initial lookup (FuncKey.name is always leaf).
|
||
let leaf = callee_leaf_name(raw_callee);
|
||
// Two-segment form for diagnostics / fallback disambiguation.
|
||
let qualified = normalize_callee_name(raw_callee);
|
||
// Structured arity carried per call site, used to disambiguate
|
||
// same-name/different-arity overloads during resolution.
|
||
let arity_hint: Option<usize> = site.arity;
|
||
|
||
// Devirtualisation: for method calls whose SSA summary
|
||
// recorded a typed container, resolve via ClassMethodIndex
|
||
// first. Single match → direct edge; multi → fall through
|
||
// with `receiver_type` set; zero → name-only fallback so
|
||
// misclassified receivers never silently drop edges.
|
||
let typed_container: Option<&str> = if site.receiver.is_some() {
|
||
typed_receivers.get(&site.ordinal).copied()
|
||
} else {
|
||
None
|
||
};
|
||
|
||
if let Some(container) = typed_container {
|
||
// Resolve the typed container plus every known
|
||
// sub-type / impl, so a super-class / trait / interface
|
||
// receiver fans out to every concrete implementer.
|
||
// No hierarchy entry → direct-container lookup.
|
||
let widened: Vec<FuncKey> = hierarchy.resolve_with_hierarchy(
|
||
&method_index,
|
||
caller_key.lang,
|
||
Some(container),
|
||
leaf,
|
||
);
|
||
let arity_filtered: Vec<&FuncKey> = widened
|
||
.iter()
|
||
.filter(|k| match arity_hint {
|
||
Some(a) => k.arity == Some(a),
|
||
None => true,
|
||
})
|
||
.collect();
|
||
if arity_filtered.len() == 1 {
|
||
if let Some(&target_node) = index.get(arity_filtered[0]) {
|
||
graph.add_edge(
|
||
caller_node,
|
||
target_node,
|
||
CallEdge {
|
||
call_site: raw_callee.to_string(),
|
||
},
|
||
);
|
||
}
|
||
continue;
|
||
}
|
||
// multiple arity-filtered candidates means
|
||
// genuine virtual dispatch through a super-type, fan
|
||
// out to *every* implementer. This widens edges
|
||
// (correctly: the call genuinely may target any
|
||
// implementer at runtime) so SCC sizes may grow on
|
||
// codebases with deep inheritance hierarchies.
|
||
//
|
||
// Authoritative narrowing via `resolve_callee` only
|
||
// applies when the typed container is a *concrete*
|
||
// class (sub-types empty); we detect this by checking
|
||
// whether the direct method_index lookup would yield
|
||
// every arity-filtered candidate. If hierarchy
|
||
// expansion produced extra candidates, fan out.
|
||
let direct_matches: Vec<&FuncKey> = method_index
|
||
.resolve(caller_key.lang, Some(container), leaf)
|
||
.iter()
|
||
.filter(|k| match arity_hint {
|
||
Some(a) => k.arity == Some(a),
|
||
None => true,
|
||
})
|
||
.collect();
|
||
if !arity_filtered.is_empty() && arity_filtered.len() > direct_matches.len() {
|
||
// Hierarchy fan-out path: add an edge per
|
||
// implementer. Continue past the
|
||
// `resolve_callee` block so we don't double-add.
|
||
for &target_key in &arity_filtered {
|
||
if let Some(&target_node) = index.get(target_key) {
|
||
graph.add_edge(
|
||
caller_node,
|
||
target_node,
|
||
CallEdge {
|
||
call_site: raw_callee.to_string(),
|
||
},
|
||
);
|
||
}
|
||
}
|
||
continue;
|
||
}
|
||
// Either zero matches (fall through to legacy path) or
|
||
// multiple matches on the direct container, let
|
||
// `resolve_callee` apply its authoritative
|
||
// receiver_type filter + tie-breakers.
|
||
if !arity_filtered.is_empty() {
|
||
let caller_container: Option<&str> = if caller_key.container.is_empty() {
|
||
None
|
||
} else {
|
||
Some(caller_key.container.as_str())
|
||
};
|
||
let resolution = summaries.resolve_callee(&CalleeQuery {
|
||
name: leaf,
|
||
caller_lang: caller_key.lang,
|
||
caller_namespace: &caller_key.namespace,
|
||
caller_container,
|
||
receiver_type: Some(container),
|
||
namespace_qualifier: site.qualifier.as_deref(),
|
||
receiver_var: site.receiver.as_deref(),
|
||
arity: arity_hint,
|
||
});
|
||
if let CalleeResolution::Resolved(key) = resolution
|
||
&& let Some(&target_node) = index.get(&key)
|
||
{
|
||
graph.add_edge(
|
||
caller_node,
|
||
target_node,
|
||
CallEdge {
|
||
call_site: raw_callee.to_string(),
|
||
},
|
||
);
|
||
continue;
|
||
}
|
||
// Authoritative receiver_type miss with multiple
|
||
// bare candidates: fall through to today's path.
|
||
}
|
||
}
|
||
|
||
// Rust callers with a module-qualified call (no receiver) go
|
||
// through the `use`-map aware resolver first. When the call has
|
||
// a structured receiver it is a method call, the qualifier is
|
||
// an impl/trait name, not a module path, so we fall back to the
|
||
// structured resolver. All other languages skip the use-map
|
||
// branch entirely.
|
||
let use_rust_path = caller_key.lang == Lang::Rust && site.receiver.is_none();
|
||
let resolution = if use_rust_path {
|
||
summaries.resolve_callee_key_rust(
|
||
leaf,
|
||
site.qualifier.as_deref(),
|
||
arity_hint,
|
||
&caller_key.namespace,
|
||
rust_use_map.as_ref(),
|
||
)
|
||
} else {
|
||
// Non-Rust, or Rust method call with a receiver: route
|
||
// through the qualified-first resolver. We deliberately
|
||
// categorize each hint so the resolver can apply the right
|
||
// policy:
|
||
//
|
||
// * `namespace_qualifier`, structured module/namespace
|
||
// prefix (`env` in `env::var`, `http` in `http.Get`).
|
||
// * `receiver_var`, syntactic receiver variable (e.g.
|
||
// `obj` in `obj.method`); used only as a last tie-break.
|
||
// * `caller_container`, caller's own class/impl, so bare
|
||
// `foo()` inside a method resolves to the same class.
|
||
//
|
||
// The raw text-parsed container (legacy
|
||
// `callee_container_hint`) is only consulted when the
|
||
// structured `CalleeSite` fields are absent (e.g. old
|
||
// summaries loaded from SQLite without `qualifier`).
|
||
let parsed_container = {
|
||
let raw = callee_container_hint(raw_callee);
|
||
if raw.is_empty() {
|
||
None
|
||
} else {
|
||
Some(raw.to_string())
|
||
}
|
||
};
|
||
let namespace_qualifier = site.qualifier.clone().or_else(|| {
|
||
if site.receiver.is_none() {
|
||
parsed_container.clone()
|
||
} else {
|
||
None
|
||
}
|
||
});
|
||
let receiver_var = site.receiver.clone();
|
||
let caller_container: Option<&str> = if caller_key.container.is_empty() {
|
||
None
|
||
} else {
|
||
Some(caller_key.container.as_str())
|
||
};
|
||
summaries.resolve_callee(&CalleeQuery {
|
||
name: leaf,
|
||
caller_lang: caller_key.lang,
|
||
caller_namespace: &caller_key.namespace,
|
||
caller_container,
|
||
receiver_type: None,
|
||
namespace_qualifier: namespace_qualifier.as_deref(),
|
||
receiver_var: receiver_var.as_deref(),
|
||
arity: arity_hint,
|
||
})
|
||
};
|
||
|
||
match resolution {
|
||
CalleeResolution::Resolved(target_key) => {
|
||
if let Some(&target_node) = index.get(&target_key) {
|
||
graph.add_edge(
|
||
caller_node,
|
||
target_node,
|
||
CallEdge {
|
||
call_site: raw_callee.to_string(),
|
||
},
|
||
);
|
||
}
|
||
}
|
||
CalleeResolution::NotFound => {
|
||
// Try interop edges before recording as not-found.
|
||
if let Some(target_key) =
|
||
resolve_via_interop(raw_callee, caller_key, interop_edges)
|
||
&& let Some(&target_node) = index.get(&target_key)
|
||
{
|
||
graph.add_edge(
|
||
caller_node,
|
||
target_node,
|
||
CallEdge {
|
||
call_site: raw_callee.to_string(),
|
||
},
|
||
);
|
||
continue;
|
||
}
|
||
unresolved_not_found.push(UnresolvedCallee {
|
||
caller: caller_key.clone(),
|
||
callee_name: raw_callee.to_string(),
|
||
});
|
||
}
|
||
CalleeResolution::Ambiguous(candidates) => {
|
||
// Use the two-segment qualified name to narrow ambiguous candidates.
|
||
// If the callee was qualified (e.g. "env::var"), prefer candidates
|
||
// whose namespace contains the qualifier prefix.
|
||
if qualified != leaf {
|
||
let qualifier =
|
||
&qualified[..qualified.len() - leaf.len()].trim_end_matches([':', '.']);
|
||
let narrowed: Vec<_> = candidates
|
||
.iter()
|
||
.filter(|k| k.namespace.contains(qualifier))
|
||
.cloned()
|
||
.collect();
|
||
if narrowed.len() == 1
|
||
&& let Some(&target_node) = index.get(&narrowed[0])
|
||
{
|
||
graph.add_edge(
|
||
caller_node,
|
||
target_node,
|
||
CallEdge {
|
||
call_site: raw_callee.to_string(),
|
||
},
|
||
);
|
||
continue;
|
||
}
|
||
}
|
||
unresolved_ambiguous.push(AmbiguousCallee {
|
||
caller: caller_key.clone(),
|
||
callee_name: raw_callee.to_string(),
|
||
candidates,
|
||
});
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
CallGraph {
|
||
graph,
|
||
index,
|
||
unresolved_not_found,
|
||
unresolved_ambiguous,
|
||
}
|
||
}
|
||
|
||
/// Check interop edges for a matching cross-language bridge.
|
||
fn resolve_via_interop(
|
||
raw_callee: &str,
|
||
caller_key: &FuncKey,
|
||
interop_edges: &[InteropEdge],
|
||
) -> Option<FuncKey> {
|
||
for edge in interop_edges {
|
||
if edge.from.caller_lang == caller_key.lang
|
||
&& edge.from.caller_namespace == caller_key.namespace
|
||
&& edge.from.callee_symbol == raw_callee
|
||
&& (edge.from.caller_func.is_empty() || edge.from.caller_func == caller_key.name)
|
||
{
|
||
return Some(edge.to.clone());
|
||
}
|
||
}
|
||
None
|
||
}
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// SCC / topological analysis
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
/// Compute SCC decomposition and topological ordering of the call graph.
|
||
///
|
||
/// `petgraph::algo::tarjan_scc` returns SCCs in *reverse* topological order
|
||
/// of the condensation DAG, i.e. leaf SCCs (no outgoing cross-SCC edges)
|
||
/// come **first**. That is exactly the **callee-first** order suitable for
|
||
/// bottom-up taint propagation.
|
||
pub fn analyse(cg: &CallGraph) -> CallGraphAnalysis {
|
||
let sccs = petgraph::algo::tarjan_scc(&cg.graph);
|
||
|
||
let mut node_to_scc = HashMap::with_capacity(cg.graph.node_count());
|
||
for (scc_idx, scc) in sccs.iter().enumerate() {
|
||
for &node in scc {
|
||
node_to_scc.insert(node, scc_idx);
|
||
}
|
||
}
|
||
|
||
// tarjan_scc already gives callee-first ordering.
|
||
let topo_scc_callee_first: Vec<usize> = (0..sccs.len()).collect();
|
||
|
||
CallGraphAnalysis {
|
||
sccs,
|
||
node_to_scc,
|
||
topo_scc_callee_first,
|
||
}
|
||
}
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// File-level batch ordering
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
/// A batch of files at a single topological position, annotated with whether
|
||
/// any contributing SCC contains mutual recursion (len > 1) and whether any
|
||
/// such SCC has nodes in more than one file (`cross_file`).
|
||
///
|
||
/// `has_mutual_recursion` triggers the SCC fixed-point loop in
|
||
/// `run_topo_batches`. `cross_file` is a tighter
|
||
/// signal used by joint fixed-point convergence: it implies the
|
||
/// recursion involves at least one cross-file call edge, so the inline
|
||
/// cache and per-iteration findings need joint convergence, not just
|
||
/// summary convergence.
|
||
pub struct FileBatch<'a> {
|
||
pub files: Vec<&'a PathBuf>,
|
||
pub has_mutual_recursion: bool,
|
||
/// True when at least one SCC contributing to this batch has nodes
|
||
/// in more than one distinct file (namespace). When `true`, the
|
||
/// SCC iteration loop should consult the cross-file inline cache
|
||
/// fingerprint as part of its convergence check.
|
||
///
|
||
/// `cross_file` ⊆ `has_mutual_recursion`: a cross-file SCC must be
|
||
/// recursive (else it would topo-sort linearly across files and not
|
||
/// be batched together).
|
||
pub cross_file: bool,
|
||
}
|
||
|
||
/// Returns `true` when the given SCC has nodes belonging to more than one
|
||
/// distinct namespace (file). Used to flag cross-file SCCs that need the
|
||
/// cross-file joint fixed-point treatment.
|
||
///
|
||
/// Single-node SCCs always return `false`. Multi-node SCCs whose nodes
|
||
/// all belong to the same namespace return `false`.
|
||
/// Reverse-edge traversal: return every [`FuncKey`] that has a call
|
||
/// edge *into* `callee`. Used by the Phase-B worklist to compute
|
||
/// which callers need re-analysis after a callee's summary has
|
||
/// changed.
|
||
///
|
||
/// Returns an empty vector when the callee is unknown to the call
|
||
/// graph (e.g. summary was never produced, or the key was synthesised
|
||
/// post-build).
|
||
///
|
||
/// Cost: O(in_degree) via petgraph's `Incoming` neighbours iterator;
|
||
/// no allocation beyond the returned `Vec`.
|
||
pub fn callers_of(cg: &CallGraph, callee: &FuncKey) -> Vec<FuncKey> {
|
||
let Some(&node) = cg.index.get(callee) else {
|
||
return Vec::new();
|
||
};
|
||
cg.graph
|
||
.neighbors_directed(node, petgraph::Direction::Incoming)
|
||
.map(|caller_node| cg.graph[caller_node].clone())
|
||
.collect()
|
||
}
|
||
|
||
/// Compute the set of file namespaces that must be re-analysed when a
|
||
/// given set of callee [`FuncKey`]s have had their summaries refined.
|
||
///
|
||
/// Fans out from each changed callee to its callers via
|
||
/// [`callers_of`], then projects onto `FuncKey::namespace`. The
|
||
/// result is a `HashSet<String>` suitable for membership checks while
|
||
/// filtering the batch's file list.
|
||
///
|
||
/// A changed callee's *own* namespace is also included, if the
|
||
/// callee's summary was refined, the file it lives in may itself
|
||
/// have been a caller (intra-file recursion) or may carry sibling
|
||
/// functions whose analysis should be re-run alongside the callee
|
||
/// for consistency.
|
||
///
|
||
/// Deterministic: returns a [`std::collections::HashSet`] so iteration
|
||
/// order is not guaranteed, but membership is deterministic. Callers
|
||
/// that need ordered output should collect and sort.
|
||
pub fn namespaces_for_callers(
|
||
cg: &CallGraph,
|
||
changed: &std::collections::HashSet<FuncKey>,
|
||
) -> std::collections::HashSet<String> {
|
||
let mut result = std::collections::HashSet::new();
|
||
for key in changed {
|
||
result.insert(key.namespace.clone());
|
||
for caller in callers_of(cg, key) {
|
||
result.insert(caller.namespace);
|
||
}
|
||
}
|
||
result
|
||
}
|
||
|
||
pub fn scc_spans_files(cg: &CallGraph, scc: &[NodeIndex]) -> bool {
|
||
if scc.len() < 2 {
|
||
return false;
|
||
}
|
||
let mut iter = scc.iter();
|
||
let first_ns = iter.next().map(|n| cg.graph[*n].namespace.as_str());
|
||
let Some(first_ns) = first_ns else {
|
||
return false;
|
||
};
|
||
iter.any(|n| cg.graph[*n].namespace.as_str() != first_ns)
|
||
}
|
||
|
||
/// Like [`scc_file_batches`] but annotates each batch with whether any
|
||
/// contributing SCC has mutual recursion (`len > 1`).
|
||
///
|
||
/// Returns `(ordered_batches, orphan_files)`.
|
||
pub fn scc_file_batches_with_metadata<'a>(
|
||
cg: &CallGraph,
|
||
analysis: &CallGraphAnalysis,
|
||
all_files: &'a [PathBuf],
|
||
root: &Path,
|
||
) -> (Vec<FileBatch<'a>>, Vec<&'a PathBuf>) {
|
||
let root_str = root.to_string_lossy();
|
||
|
||
// 1. Map relative-path → &PathBuf for each file in all_files.
|
||
let mut rel_to_path: HashMap<String, &'a PathBuf> = HashMap::with_capacity(all_files.len());
|
||
for p in all_files {
|
||
let abs = p.to_string_lossy();
|
||
let rel = crate::symbol::normalize_namespace(&abs, Some(&root_str));
|
||
rel_to_path.insert(rel, p);
|
||
}
|
||
|
||
// 2. Build file relative-path → (min topo index, has_mutual_recursion, cross_file).
|
||
// `cross_file` is set whenever the file participates in an SCC whose
|
||
// nodes span more than one namespace, the cross-file signal.
|
||
let mut file_topo: HashMap<&str, (usize, bool, bool)> = HashMap::new();
|
||
for (topo_pos, &scc_idx) in analysis.topo_scc_callee_first.iter().enumerate() {
|
||
let scc_recursive = analysis.sccs[scc_idx].len() > 1;
|
||
let scc_cross_file = scc_spans_files(cg, &analysis.sccs[scc_idx]);
|
||
for &node in &analysis.sccs[scc_idx] {
|
||
let ns = &cg.graph[node].namespace;
|
||
file_topo
|
||
.entry(ns.as_str())
|
||
.and_modify(|(min_pos, recursive, cross_file)| {
|
||
if topo_pos < *min_pos {
|
||
*min_pos = topo_pos;
|
||
}
|
||
*recursive |= scc_recursive;
|
||
*cross_file |= scc_cross_file;
|
||
})
|
||
.or_insert((topo_pos, scc_recursive, scc_cross_file));
|
||
}
|
||
}
|
||
|
||
// 3. Group files by min topo index, preserving order via BTreeMap.
|
||
// Track mutual-recursion and cross-file flags per group.
|
||
let mut topo_groups: BTreeMap<usize, (Vec<&'a PathBuf>, bool, bool)> = BTreeMap::new();
|
||
let mut orphans: Vec<&'a PathBuf> = Vec::new();
|
||
|
||
for p in all_files {
|
||
let abs = p.to_string_lossy();
|
||
let rel = crate::symbol::normalize_namespace(&abs, Some(&root_str));
|
||
if let Some(&(topo_pos, recursive, cross_file)) = file_topo.get(rel.as_str()) {
|
||
let entry = topo_groups
|
||
.entry(topo_pos)
|
||
.or_insert_with(|| (Vec::new(), false, false));
|
||
entry.0.push(p);
|
||
entry.1 |= recursive;
|
||
entry.2 |= cross_file;
|
||
} else {
|
||
orphans.push(p);
|
||
}
|
||
}
|
||
|
||
let batches: Vec<FileBatch<'a>> = topo_groups
|
||
.into_values()
|
||
.map(|(files, has_mutual_recursion, cross_file)| FileBatch {
|
||
files,
|
||
has_mutual_recursion,
|
||
cross_file,
|
||
})
|
||
.collect();
|
||
(batches, orphans)
|
||
}
|
||
|
||
/// Map SCC topological order to an ordered sequence of file-path batches.
|
||
///
|
||
/// Uses **min** topo index: a file is placed in the earliest batch where any
|
||
/// of its functions appear. This ensures leaf callees are available as early
|
||
/// as possible for files that depend on them. Caller functions in the same
|
||
/// file that happen to be in a later SCC are no worse off than the current
|
||
/// fully-parallel approach, they simply don't yet benefit from ordering,
|
||
/// but nothing is lost.
|
||
///
|
||
/// Returns `(ordered_batches, orphan_files)` where orphan_files are paths
|
||
/// from `all_files` that have no functions in the call graph.
|
||
#[allow(dead_code)] // kept for tests; production callers use scc_file_batches_with_metadata
|
||
pub fn scc_file_batches<'a>(
|
||
cg: &CallGraph,
|
||
analysis: &CallGraphAnalysis,
|
||
all_files: &'a [PathBuf],
|
||
root: &Path,
|
||
) -> (Vec<Vec<&'a PathBuf>>, Vec<&'a PathBuf>) {
|
||
let root_str = root.to_string_lossy();
|
||
|
||
// 1. Map relative-path → &PathBuf for each file in all_files.
|
||
let mut rel_to_path: HashMap<String, &'a PathBuf> = HashMap::with_capacity(all_files.len());
|
||
for p in all_files {
|
||
let abs = p.to_string_lossy();
|
||
let rel = crate::symbol::normalize_namespace(&abs, Some(&root_str));
|
||
rel_to_path.insert(rel, p);
|
||
}
|
||
|
||
// 2. Build file relative-path → min topo index.
|
||
let mut file_min_topo: HashMap<&str, usize> = HashMap::new();
|
||
for (topo_pos, &scc_idx) in analysis.topo_scc_callee_first.iter().enumerate() {
|
||
for &node in &analysis.sccs[scc_idx] {
|
||
let ns = &cg.graph[node].namespace;
|
||
file_min_topo.entry(ns.as_str()).or_insert(topo_pos);
|
||
}
|
||
}
|
||
|
||
// 3. Group files by min topo index, preserving order via BTreeMap.
|
||
let mut topo_groups: BTreeMap<usize, Vec<&'a PathBuf>> = BTreeMap::new();
|
||
let mut orphans: Vec<&'a PathBuf> = Vec::new();
|
||
|
||
for p in all_files {
|
||
let abs = p.to_string_lossy();
|
||
let rel = crate::symbol::normalize_namespace(&abs, Some(&root_str));
|
||
if let Some(&topo_pos) = file_min_topo.get(rel.as_str()) {
|
||
topo_groups.entry(topo_pos).or_default().push(p);
|
||
} else {
|
||
orphans.push(p);
|
||
}
|
||
}
|
||
|
||
let batches: Vec<Vec<&'a PathBuf>> = topo_groups.into_values().collect();
|
||
(batches, orphans)
|
||
}
|
||
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
// Tests
|
||
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
use crate::interop::CallSiteKey;
|
||
use crate::summary::{CalleeSite, FuncSummary, merge_summaries};
|
||
use crate::symbol::Lang;
|
||
|
||
/// Helper to create a minimal FuncSummary.
|
||
fn make_summary(
|
||
name: &str,
|
||
file_path: &str,
|
||
lang: &str,
|
||
param_count: usize,
|
||
callees: Vec<&str>,
|
||
) -> FuncSummary {
|
||
FuncSummary {
|
||
name: name.into(),
|
||
file_path: file_path.into(),
|
||
lang: lang.into(),
|
||
param_count,
|
||
param_names: vec![],
|
||
source_caps: 0,
|
||
sanitizer_caps: 0,
|
||
sink_caps: 0,
|
||
propagating_params: vec![],
|
||
propagates_taint: false,
|
||
tainted_sink_params: vec![],
|
||
callees: callees
|
||
.into_iter()
|
||
.map(crate::summary::CalleeSite::bare)
|
||
.collect(),
|
||
..Default::default()
|
||
}
|
||
}
|
||
|
||
// ── normalize_callee_name (two-segment) ─────────────────────────────
|
||
|
||
#[test]
|
||
fn normalize_callee_two_segment() {
|
||
// Two-segment normalization preserves one level of qualification.
|
||
assert_eq!(normalize_callee_name("env::var"), "env::var");
|
||
assert_eq!(normalize_callee_name("std::env::var"), "env::var");
|
||
assert_eq!(
|
||
normalize_callee_name("std::process::Command"),
|
||
"process::Command"
|
||
);
|
||
assert_eq!(normalize_callee_name("a::b::c"), "b::c");
|
||
assert_eq!(normalize_callee_name("obj.method"), "obj.method");
|
||
assert_eq!(normalize_callee_name("pkg.mod.func"), "mod.func");
|
||
assert_eq!(
|
||
normalize_callee_name("http_client.send"),
|
||
"http_client.send"
|
||
);
|
||
assert_eq!(normalize_callee_name("send"), "send");
|
||
assert_eq!(normalize_callee_name("foo"), "foo");
|
||
assert_eq!(normalize_callee_name(""), "");
|
||
}
|
||
|
||
// ── callee_leaf_name (single-segment, backward compat) ───────────────
|
||
|
||
#[test]
|
||
fn callee_leaf_basic() {
|
||
assert_eq!(callee_leaf_name("env::var"), "var");
|
||
assert_eq!(callee_leaf_name("std::process::Command"), "Command");
|
||
assert_eq!(callee_leaf_name("obj.method"), "method");
|
||
assert_eq!(callee_leaf_name("pkg.mod.func"), "func");
|
||
assert_eq!(callee_leaf_name("foo"), "foo");
|
||
assert_eq!(callee_leaf_name(""), "");
|
||
}
|
||
|
||
// ── same name, different Rust modules ────────────────────────────────
|
||
|
||
#[test]
|
||
fn same_name_different_rust_modules() {
|
||
let helper_a = make_summary("helper", "src/a.rs", "rust", 0, vec![]);
|
||
let helper_b = make_summary("helper", "src/b.rs", "rust", 0, vec![]);
|
||
let caller = make_summary("caller", "src/a.rs", "rust", 0, vec!["helper"]);
|
||
|
||
let gs = merge_summaries(vec![helper_a, helper_b, caller], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
|
||
// Two helper nodes + one caller node = 3 nodes
|
||
assert_eq!(cg.graph.node_count(), 3);
|
||
|
||
// Caller is in src/a.rs, so "helper" resolves to src/a.rs::helper
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/a.rs".into(),
|
||
name: "caller".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
let helper_a_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/a.rs".into(),
|
||
name: "helper".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
|
||
let caller_node = cg.index[&caller_key];
|
||
let helper_a_node = cg.index[&helper_a_key];
|
||
|
||
// Exactly one edge: caller → helper_a
|
||
let edges: Vec<_> = cg
|
||
.graph
|
||
.edges(caller_node)
|
||
.filter(|e| e.target() == helper_a_node)
|
||
.collect();
|
||
assert_eq!(edges.len(), 1);
|
||
assert!(cg.unresolved_not_found.is_empty());
|
||
assert!(cg.unresolved_ambiguous.is_empty());
|
||
}
|
||
|
||
// ── same name, Python vs Rust ────────────────────────────────────────
|
||
|
||
#[test]
|
||
fn same_name_python_and_rust() {
|
||
let py_foo = make_summary("foo", "handler.py", "python", 0, vec![]);
|
||
let rs_foo = make_summary("foo", "handler.rs", "rust", 0, vec![]);
|
||
// Python caller calls "foo", should only see the Python one
|
||
let py_caller = make_summary("main", "app.py", "python", 0, vec!["foo"]);
|
||
|
||
let gs = merge_summaries(vec![py_foo, rs_foo, py_caller], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
|
||
assert_eq!(cg.graph.node_count(), 3);
|
||
|
||
let py_foo_key = FuncKey {
|
||
lang: Lang::Python,
|
||
namespace: "handler.py".into(),
|
||
name: "foo".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Python,
|
||
namespace: "app.py".into(),
|
||
name: "main".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
|
||
let caller_node = cg.index[&caller_key];
|
||
let py_foo_node = cg.index[&py_foo_key];
|
||
|
||
// Edge goes to Python foo, not Rust foo
|
||
let edges: Vec<_> = cg.graph.edges(caller_node).collect();
|
||
assert_eq!(edges.len(), 1);
|
||
assert_eq!(edges[0].target(), py_foo_node);
|
||
}
|
||
|
||
// ── arity differences → separate nodes ───────────────────────────────
|
||
|
||
#[test]
|
||
fn arity_differences_separate_nodes() {
|
||
let helper1 = make_summary("helper", "lib.rs", "rust", 1, vec![]);
|
||
let helper2 = make_summary("helper", "lib.rs", "rust", 2, vec![]);
|
||
|
||
let gs = merge_summaries(vec![helper1, helper2], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
|
||
// Two separate nodes (different arity → different FuncKey)
|
||
assert_eq!(cg.graph.node_count(), 2);
|
||
|
||
let key1 = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "lib.rs".into(),
|
||
name: "helper".into(),
|
||
arity: Some(1),
|
||
..Default::default()
|
||
};
|
||
let key2 = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "lib.rs".into(),
|
||
name: "helper".into(),
|
||
arity: Some(2),
|
||
..Default::default()
|
||
};
|
||
assert!(cg.index.contains_key(&key1));
|
||
assert!(cg.index.contains_key(&key2));
|
||
}
|
||
|
||
// ── recursive SCC detection ──────────────────────────────────────────
|
||
|
||
#[test]
|
||
fn recursive_scc_detection() {
|
||
let a = make_summary("a", "lib.rs", "rust", 0, vec!["b"]);
|
||
let b = make_summary("b", "lib.rs", "rust", 0, vec!["a"]);
|
||
|
||
let gs = merge_summaries(vec![a, b], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
|
||
assert_eq!(cg.graph.edge_count(), 2); // a→b and b→a
|
||
|
||
let analysis = analyse(&cg);
|
||
|
||
// Both nodes should be in the same SCC
|
||
let key_a = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "lib.rs".into(),
|
||
name: "a".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
let key_b = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "lib.rs".into(),
|
||
name: "b".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
|
||
let scc_a = analysis.node_to_scc[&cg.index[&key_a]];
|
||
let scc_b = analysis.node_to_scc[&cg.index[&key_b]];
|
||
assert_eq!(scc_a, scc_b);
|
||
assert_eq!(analysis.sccs[scc_a].len(), 2);
|
||
}
|
||
|
||
// ── unresolved callee → recorded as not found ────────────────────────
|
||
|
||
#[test]
|
||
fn unresolved_callee_recorded_as_not_found() {
|
||
let caller = make_summary("caller", "lib.rs", "rust", 0, vec!["nonexistent"]);
|
||
|
||
let gs = merge_summaries(vec![caller], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
|
||
assert_eq!(cg.graph.edge_count(), 0);
|
||
assert_eq!(cg.unresolved_not_found.len(), 1);
|
||
assert_eq!(cg.unresolved_not_found[0].callee_name, "nonexistent");
|
||
assert!(cg.unresolved_ambiguous.is_empty());
|
||
}
|
||
|
||
// ── ambiguous callee → recorded as ambiguous ─────────────────────────
|
||
|
||
#[test]
|
||
fn ambiguous_callee_recorded() {
|
||
// Two "helper" functions in different namespaces.
|
||
let helper_a = make_summary("helper", "a.rs", "rust", 0, vec![]);
|
||
let helper_b = make_summary("helper", "b.rs", "rust", 0, vec![]);
|
||
// Caller is in a THIRD namespace, so neither is preferred.
|
||
let caller = make_summary("caller", "c.rs", "rust", 0, vec!["helper"]);
|
||
|
||
let gs = merge_summaries(vec![helper_a, helper_b, caller], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
|
||
assert_eq!(cg.graph.edge_count(), 0); // no edge, ambiguous
|
||
assert!(cg.unresolved_not_found.is_empty());
|
||
assert_eq!(cg.unresolved_ambiguous.len(), 1);
|
||
assert_eq!(cg.unresolved_ambiguous[0].callee_name, "helper");
|
||
assert_eq!(cg.unresolved_ambiguous[0].candidates.len(), 2);
|
||
}
|
||
|
||
// ── diamond topo order (callee-first) ────────────────────────────────
|
||
|
||
#[test]
|
||
fn diamond_topo_callee_first() {
|
||
// A → B, A → C, B → D, C → D
|
||
let d = make_summary("d", "lib.rs", "rust", 0, vec![]);
|
||
let b = make_summary("b", "lib.rs", "rust", 0, vec!["d"]);
|
||
let c = make_summary("c", "lib.rs", "rust", 0, vec!["d"]);
|
||
let a = make_summary("a", "lib.rs", "rust", 0, vec!["b", "c"]);
|
||
|
||
let gs = merge_summaries(vec![a, b, c, d], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
|
||
assert_eq!(cg.graph.node_count(), 4);
|
||
|
||
let analysis = analyse(&cg);
|
||
|
||
let key = |name: &str| FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "lib.rs".into(),
|
||
name: name.into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
|
||
let scc_of = |name: &str| analysis.node_to_scc[&cg.index[&key(name)]];
|
||
let topo_pos = |name: &str| {
|
||
analysis
|
||
.topo_scc_callee_first
|
||
.iter()
|
||
.position(|&s| s == scc_of(name))
|
||
.unwrap()
|
||
};
|
||
|
||
// D (leaf) must come before B and C, which must come before A (root).
|
||
assert!(topo_pos("d") < topo_pos("b"));
|
||
assert!(topo_pos("d") < topo_pos("c"));
|
||
assert!(topo_pos("b") < topo_pos("a"));
|
||
assert!(topo_pos("c") < topo_pos("a"));
|
||
}
|
||
|
||
// ── interop edge resolution ──────────────────────────────────────────
|
||
|
||
#[test]
|
||
fn interop_edge_resolution() {
|
||
let py_caller = make_summary("process", "handler.py", "python", 0, vec!["js_func"]);
|
||
let js_target = make_summary("js_func", "util.js", "javascript", 1, vec![]);
|
||
|
||
let gs = merge_summaries(vec![py_caller, js_target], None);
|
||
|
||
let interop = vec![InteropEdge {
|
||
from: CallSiteKey {
|
||
caller_lang: Lang::Python,
|
||
caller_namespace: "handler.py".into(),
|
||
caller_func: String::new(), // wildcard
|
||
callee_symbol: "js_func".into(),
|
||
ordinal: 0,
|
||
},
|
||
to: FuncKey {
|
||
lang: Lang::JavaScript,
|
||
namespace: "util.js".into(),
|
||
name: "js_func".into(),
|
||
arity: Some(1),
|
||
..Default::default()
|
||
},
|
||
}];
|
||
|
||
let cg = build_call_graph(&gs, &interop);
|
||
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Python,
|
||
namespace: "handler.py".into(),
|
||
name: "process".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
let target_key = FuncKey {
|
||
lang: Lang::JavaScript,
|
||
namespace: "util.js".into(),
|
||
name: "js_func".into(),
|
||
arity: Some(1),
|
||
..Default::default()
|
||
};
|
||
|
||
let caller_node = cg.index[&caller_key];
|
||
let target_node = cg.index[&target_key];
|
||
|
||
let edges: Vec<_> = cg
|
||
.graph
|
||
.edges(caller_node)
|
||
.filter(|e| e.target() == target_node)
|
||
.collect();
|
||
assert_eq!(edges.len(), 1);
|
||
assert!(cg.unresolved_not_found.is_empty());
|
||
}
|
||
|
||
// ── namespace normalization consistency ───────────────────────────────
|
||
|
||
#[test]
|
||
fn namespace_normalization_consistency() {
|
||
// FuncSummary::func_key with a scan root produces the same namespace
|
||
// string that would be used as caller_namespace in resolution.
|
||
let summary = FuncSummary {
|
||
name: "my_func".into(),
|
||
file_path: "/home/user/proj/src/lib.rs".into(),
|
||
lang: "rust".into(),
|
||
param_count: 0,
|
||
param_names: vec![],
|
||
source_caps: 0,
|
||
sanitizer_caps: 0,
|
||
sink_caps: 0,
|
||
propagating_params: vec![],
|
||
propagates_taint: false,
|
||
tainted_sink_params: vec![],
|
||
callees: vec![],
|
||
..Default::default()
|
||
};
|
||
|
||
let root = "/home/user/proj";
|
||
let key = summary.func_key(Some(root));
|
||
|
||
// The namespace in the key must be the same as what normalize_namespace produces
|
||
let expected_ns = crate::symbol::normalize_namespace(&summary.file_path, Some(root));
|
||
assert_eq!(key.namespace, expected_ns);
|
||
assert_eq!(key.namespace, "src/lib.rs");
|
||
}
|
||
|
||
// ── raw call_site preserved on edge ──────────────────────────────────
|
||
|
||
#[test]
|
||
fn raw_call_site_preserved_on_edge() {
|
||
// Callee "env::var" normalizes to "var" for resolution, but
|
||
// the edge should retain the original raw text.
|
||
let source = make_summary("var", "util.rs", "rust", 0, vec![]);
|
||
let caller = make_summary("main", "util.rs", "rust", 0, vec!["env::var"]);
|
||
|
||
let gs = merge_summaries(vec![source, caller], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "util.rs".into(),
|
||
name: "main".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
let caller_node = cg.index[&caller_key];
|
||
|
||
let edges: Vec<_> = cg.graph.edges(caller_node).collect();
|
||
assert_eq!(edges.len(), 1);
|
||
// Raw call_site preserved, not the normalized "var"
|
||
assert_eq!(edges[0].weight().call_site, "env::var");
|
||
}
|
||
|
||
// ── scc_file_batches ────────────────────────────────────────────────
|
||
|
||
/// Helper: build summaries, call graph, analysis, and file batches in one go.
|
||
fn build_batches<'a>(
|
||
summaries: Vec<FuncSummary>,
|
||
all_files: &'a [PathBuf],
|
||
root: &Path,
|
||
) -> (Vec<Vec<&'a PathBuf>>, Vec<&'a PathBuf>) {
|
||
let gs = merge_summaries(summaries, Some(&root.to_string_lossy()));
|
||
let cg = build_call_graph(&gs, &[]);
|
||
let analysis = analyse(&cg);
|
||
scc_file_batches(&cg, &analysis, all_files, root)
|
||
}
|
||
|
||
#[test]
|
||
fn scc_file_batches_linear_chain() {
|
||
// A (a.rs) → B (b.rs) → C (c.rs)
|
||
let root = Path::new("/proj");
|
||
let c = make_summary("c_fn", "/proj/c.rs", "rust", 0, vec![]);
|
||
let b = make_summary("b_fn", "/proj/b.rs", "rust", 0, vec!["c_fn"]);
|
||
let a = make_summary("a_fn", "/proj/a.rs", "rust", 0, vec!["b_fn"]);
|
||
|
||
let files: Vec<PathBuf> = vec![
|
||
PathBuf::from("/proj/a.rs"),
|
||
PathBuf::from("/proj/b.rs"),
|
||
PathBuf::from("/proj/c.rs"),
|
||
];
|
||
|
||
let (batches, orphans) = build_batches(vec![a, b, c], &files, root);
|
||
|
||
assert!(orphans.is_empty());
|
||
assert_eq!(batches.len(), 3, "3 files in a linear chain → 3 batches");
|
||
|
||
// C's file in first batch, B's in second, A's in third
|
||
let batch_of = |name: &str| {
|
||
batches
|
||
.iter()
|
||
.position(|batch: &Vec<&PathBuf>| {
|
||
batch.iter().any(|p| p.to_str().unwrap().ends_with(name))
|
||
})
|
||
.unwrap()
|
||
};
|
||
assert!(batch_of("c.rs") < batch_of("b.rs"));
|
||
assert!(batch_of("b.rs") < batch_of("a.rs"));
|
||
}
|
||
|
||
#[test]
|
||
fn scc_file_batches_orphan_files() {
|
||
let root = Path::new("/proj");
|
||
let a = make_summary("a_fn", "/proj/a.rs", "rust", 0, vec![]);
|
||
|
||
let files: Vec<PathBuf> = vec![
|
||
PathBuf::from("/proj/a.rs"),
|
||
PathBuf::from("/proj/orphan.rs"),
|
||
];
|
||
|
||
let (batches, orphans) = build_batches(vec![a], &files, root);
|
||
|
||
// a.rs is in the graph, orphan.rs is not
|
||
assert_eq!(orphans.len(), 1);
|
||
assert!(orphans[0].to_str().unwrap().ends_with("orphan.rs"));
|
||
// a.rs should be in exactly one batch
|
||
let total_in_batches: usize = batches.iter().map(|b: &Vec<&PathBuf>| b.len()).sum();
|
||
assert_eq!(total_in_batches, 1);
|
||
}
|
||
|
||
#[test]
|
||
fn scc_file_batches_multi_scc_same_file() {
|
||
// File has a leaf fn (SCC 0) and a caller fn (SCC 2) that calls
|
||
// through a middle function in another file.
|
||
// leaf (a.rs) ← mid (b.rs) ← caller (a.rs)
|
||
// With min-topo, a.rs placed at earliest SCC (leaf's position).
|
||
let root = Path::new("/proj");
|
||
let leaf = make_summary("leaf", "/proj/a.rs", "rust", 0, vec![]);
|
||
let mid = make_summary("mid", "/proj/b.rs", "rust", 0, vec!["leaf"]);
|
||
let caller = make_summary("caller", "/proj/a.rs", "rust", 0, vec!["mid"]);
|
||
|
||
let files: Vec<PathBuf> = vec![PathBuf::from("/proj/a.rs"), PathBuf::from("/proj/b.rs")];
|
||
|
||
let (batches, orphans) = build_batches(vec![leaf, mid, caller], &files, root);
|
||
|
||
assert!(orphans.is_empty());
|
||
let batch_of = |name: &str| {
|
||
batches
|
||
.iter()
|
||
.position(|batch: &Vec<&PathBuf>| {
|
||
batch.iter().any(|p| p.to_str().unwrap().ends_with(name))
|
||
})
|
||
.unwrap()
|
||
};
|
||
// a.rs should be in the earliest batch (min topo from leaf)
|
||
assert!(
|
||
batch_of("a.rs") < batch_of("b.rs"),
|
||
"a.rs has leaf fn so should be in earlier batch than b.rs"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn scc_file_batches_mutual_recursion() {
|
||
// Two mutually-recursive functions across two files → same SCC → same batch.
|
||
let root = Path::new("/proj");
|
||
let a = make_summary("ping", "/proj/a.rs", "rust", 0, vec!["pong"]);
|
||
let b = make_summary("pong", "/proj/b.rs", "rust", 0, vec!["ping"]);
|
||
|
||
let files: Vec<PathBuf> = vec![PathBuf::from("/proj/a.rs"), PathBuf::from("/proj/b.rs")];
|
||
|
||
let (batches, orphans) = build_batches(vec![a, b], &files, root);
|
||
|
||
assert!(orphans.is_empty());
|
||
// Both files should be in the same batch (same SCC)
|
||
assert_eq!(
|
||
batches.len(),
|
||
1,
|
||
"mutual recursion → single SCC → single batch"
|
||
);
|
||
assert_eq!(batches[0].len(), 2);
|
||
}
|
||
|
||
#[test]
|
||
fn scc_file_batches_empty_graph() {
|
||
let root = Path::new("/proj");
|
||
let files: Vec<PathBuf> = vec![PathBuf::from("/proj/a.rs"), PathBuf::from("/proj/b.rs")];
|
||
|
||
let gs = merge_summaries(vec![], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
let analysis = analyse(&cg);
|
||
let (batches, orphans) = scc_file_batches(&cg, &analysis, &files, root);
|
||
|
||
assert!(batches.is_empty(), "empty graph → no batches");
|
||
assert_eq!(orphans.len(), 2, "all files are orphans");
|
||
}
|
||
|
||
// ── scc_file_batches_with_metadata ────────────────────────────────
|
||
|
||
/// Helper: build summaries, call graph, analysis, and metadata batches.
|
||
fn build_metadata_batches<'a>(
|
||
summaries: Vec<FuncSummary>,
|
||
all_files: &'a [PathBuf],
|
||
root: &Path,
|
||
) -> (Vec<FileBatch<'a>>, Vec<&'a PathBuf>) {
|
||
let gs = merge_summaries(summaries, Some(&root.to_string_lossy()));
|
||
let cg = build_call_graph(&gs, &[]);
|
||
let analysis = analyse(&cg);
|
||
scc_file_batches_with_metadata(&cg, &analysis, all_files, root)
|
||
}
|
||
|
||
#[test]
|
||
fn scc_file_batches_with_metadata_marks_recursive() {
|
||
// Two mutually-recursive functions → SCC with len > 1 → has_mutual_recursion = true
|
||
let root = Path::new("/proj");
|
||
let a = make_summary("ping", "/proj/a.rs", "rust", 0, vec!["pong"]);
|
||
let b = make_summary("pong", "/proj/b.rs", "rust", 0, vec!["ping"]);
|
||
|
||
let files: Vec<PathBuf> = vec![PathBuf::from("/proj/a.rs"), PathBuf::from("/proj/b.rs")];
|
||
|
||
let (batches, orphans) = build_metadata_batches(vec![a, b], &files, root);
|
||
|
||
assert!(orphans.is_empty());
|
||
assert_eq!(batches.len(), 1, "mutual recursion → single batch");
|
||
assert!(
|
||
batches[0].has_mutual_recursion,
|
||
"batch with mutual recursion should be marked"
|
||
);
|
||
assert_eq!(batches[0].files.len(), 2);
|
||
}
|
||
|
||
#[test]
|
||
fn scc_file_batches_with_metadata_marks_cross_file() {
|
||
// Two mutually-recursive functions in different files → cross_file = true
|
||
let root = Path::new("/proj");
|
||
let a = make_summary("ping", "/proj/a.rs", "rust", 0, vec!["pong"]);
|
||
let b = make_summary("pong", "/proj/b.rs", "rust", 0, vec!["ping"]);
|
||
|
||
let files: Vec<PathBuf> = vec![PathBuf::from("/proj/a.rs"), PathBuf::from("/proj/b.rs")];
|
||
|
||
let (batches, _orphans) = build_metadata_batches(vec![a, b], &files, root);
|
||
assert_eq!(
|
||
batches.len(),
|
||
1,
|
||
"cross-file mutual recursion → single batch"
|
||
);
|
||
assert!(batches[0].has_mutual_recursion);
|
||
assert!(
|
||
batches[0].cross_file,
|
||
"batch whose SCC spans two namespaces should be marked cross_file"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn scc_file_batches_with_metadata_intra_file_scc_not_cross_file() {
|
||
// Two mutually-recursive functions in the SAME file → not cross_file
|
||
let root = Path::new("/proj");
|
||
let a = make_summary("ping", "/proj/a.rs", "rust", 0, vec!["pong"]);
|
||
let b = make_summary("pong", "/proj/a.rs", "rust", 0, vec!["ping"]);
|
||
|
||
let files: Vec<PathBuf> = vec![PathBuf::from("/proj/a.rs")];
|
||
|
||
let (batches, _orphans) = build_metadata_batches(vec![a, b], &files, root);
|
||
assert_eq!(batches.len(), 1);
|
||
assert!(batches[0].has_mutual_recursion);
|
||
assert!(
|
||
!batches[0].cross_file,
|
||
"single-file SCC must not be flagged as cross_file"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn scc_spans_files_single_node() {
|
||
// Singleton SCC is never cross-file.
|
||
let root = Path::new("/proj");
|
||
let a = make_summary("f", "/proj/a.rs", "rust", 0, vec![]);
|
||
let gs = merge_summaries(vec![a], Some(&root.to_string_lossy()));
|
||
let cg = build_call_graph(&gs, &[]);
|
||
let analysis = analyse(&cg);
|
||
for scc in &analysis.sccs {
|
||
assert!(!scc_spans_files(&cg, scc));
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn scc_file_batches_with_metadata_singleton_not_recursive() {
|
||
// Linear chain: no mutual recursion → has_mutual_recursion = false for all batches
|
||
let root = Path::new("/proj");
|
||
let c = make_summary("c_fn", "/proj/c.rs", "rust", 0, vec![]);
|
||
let b = make_summary("b_fn", "/proj/b.rs", "rust", 0, vec!["c_fn"]);
|
||
let a = make_summary("a_fn", "/proj/a.rs", "rust", 0, vec!["b_fn"]);
|
||
|
||
let files: Vec<PathBuf> = vec![
|
||
PathBuf::from("/proj/a.rs"),
|
||
PathBuf::from("/proj/b.rs"),
|
||
PathBuf::from("/proj/c.rs"),
|
||
];
|
||
|
||
let (batches, orphans) = build_metadata_batches(vec![a, b, c], &files, root);
|
||
|
||
assert!(orphans.is_empty());
|
||
assert_eq!(batches.len(), 3, "3 files in linear chain → 3 batches");
|
||
for (i, batch) in batches.iter().enumerate() {
|
||
assert!(
|
||
!batch.has_mutual_recursion,
|
||
"batch {i} should not be marked as recursive"
|
||
);
|
||
}
|
||
}
|
||
|
||
// ── qualified disambiguation resolves ambiguous common names ──────
|
||
|
||
#[test]
|
||
fn qualified_callee_disambiguates_ambiguous() {
|
||
// Two "send" functions in different namespaces.
|
||
let send_http = make_summary("send", "src/http.rs", "rust", 0, vec![]);
|
||
let send_mail = make_summary("send", "src/mail.rs", "rust", 0, vec![]);
|
||
// Caller is in a third namespace, calling "http::send", leaf "send"
|
||
// is ambiguous, but "http" qualifier should match "src/http.rs".
|
||
let caller = make_summary("caller", "src/main.rs", "rust", 0, vec!["http::send"]);
|
||
|
||
let gs = merge_summaries(vec![send_http, send_mail, caller], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/main.rs".into(),
|
||
name: "caller".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
let send_http_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/http.rs".into(),
|
||
name: "send".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
|
||
let caller_node = cg.index[&caller_key];
|
||
let send_http_node = cg.index[&send_http_key];
|
||
|
||
// The qualified name "http::send" disambiguates to src/http.rs::send
|
||
let edges: Vec<_> = cg.graph.edges(caller_node).collect();
|
||
assert_eq!(
|
||
edges.len(),
|
||
1,
|
||
"qualified name should resolve the ambiguity"
|
||
);
|
||
assert_eq!(edges[0].target(), send_http_node);
|
||
assert!(cg.unresolved_ambiguous.is_empty());
|
||
}
|
||
|
||
#[test]
|
||
fn unqualified_callee_stays_ambiguous() {
|
||
// Same setup but caller uses unqualified "send", no disambiguation
|
||
let send_http = make_summary("send", "src/http.rs", "rust", 0, vec![]);
|
||
let send_mail = make_summary("send", "src/mail.rs", "rust", 0, vec![]);
|
||
let caller = make_summary("caller", "src/main.rs", "rust", 0, vec!["send"]);
|
||
|
||
let gs = merge_summaries(vec![send_http, send_mail, caller], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/main.rs".into(),
|
||
name: "caller".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
let caller_node = cg.index[&caller_key];
|
||
|
||
// Unqualified "send" → still ambiguous (no edge)
|
||
let edges: Vec<_> = cg.graph.edges(caller_node).collect();
|
||
assert_eq!(edges.len(), 0, "unqualified name should remain ambiguous");
|
||
assert_eq!(cg.unresolved_ambiguous.len(), 1);
|
||
}
|
||
|
||
#[test]
|
||
fn simple_unqualified_resolves_as_before() {
|
||
// Regression: a simple unqualified callee that isn't ambiguous should still resolve.
|
||
let helper = make_summary("helper", "src/lib.rs", "rust", 0, vec![]);
|
||
let caller = make_summary("caller", "src/lib.rs", "rust", 0, vec!["helper"]);
|
||
|
||
let gs = merge_summaries(vec![helper, caller], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
|
||
assert_eq!(cg.graph.edge_count(), 1);
|
||
assert!(cg.unresolved_not_found.is_empty());
|
||
assert!(cg.unresolved_ambiguous.is_empty());
|
||
}
|
||
|
||
// ── structured-metadata disambiguation (callee metadata) ─────────────
|
||
|
||
/// Helper: build a summary whose callees carry structured CalleeSite
|
||
/// metadata, used by the tests below to exercise arity / receiver /
|
||
/// qualifier propagation into resolution.
|
||
fn summary_with_sites(
|
||
name: &str,
|
||
file_path: &str,
|
||
lang: &str,
|
||
param_count: usize,
|
||
sites: Vec<CalleeSite>,
|
||
) -> FuncSummary {
|
||
FuncSummary {
|
||
name: name.into(),
|
||
file_path: file_path.into(),
|
||
lang: lang.into(),
|
||
param_count,
|
||
param_names: vec![],
|
||
source_caps: 0,
|
||
sanitizer_caps: 0,
|
||
sink_caps: 0,
|
||
propagating_params: vec![],
|
||
propagates_taint: false,
|
||
tainted_sink_params: vec![],
|
||
callees: sites,
|
||
..Default::default()
|
||
}
|
||
}
|
||
|
||
/// Arity in the structured `CalleeSite` must disambiguate two same-name
|
||
/// overloads in the same namespace that previously could only be
|
||
/// distinguished after caller-namespace narrowing.
|
||
#[test]
|
||
fn arity_hint_disambiguates_same_name_overloads() {
|
||
// Two `encode` functions in the same file, different arities.
|
||
let encode1 = make_summary("encode", "src/codec.rs", "rust", 1, vec![]);
|
||
let encode2 = make_summary("encode", "src/codec.rs", "rust", 2, vec![]);
|
||
// Caller lives in *another* file so namespace does not disambiguate ,
|
||
// the only signal is the per-call-site arity.
|
||
let caller = summary_with_sites(
|
||
"driver",
|
||
"src/main.rs",
|
||
"rust",
|
||
0,
|
||
vec![CalleeSite {
|
||
name: "encode".into(),
|
||
arity: Some(2),
|
||
..Default::default()
|
||
}],
|
||
);
|
||
|
||
let gs = merge_summaries(vec![encode1, encode2, caller], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/main.rs".into(),
|
||
name: "driver".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
let encode2_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/codec.rs".into(),
|
||
name: "encode".into(),
|
||
arity: Some(2),
|
||
..Default::default()
|
||
};
|
||
let caller_node = cg.index[&caller_key];
|
||
let encode2_node = cg.index[&encode2_key];
|
||
let edges: Vec<_> = cg.graph.edges(caller_node).collect();
|
||
assert_eq!(edges.len(), 1, "arity hint should pick the 2-arg overload");
|
||
assert_eq!(edges[0].target(), encode2_node);
|
||
assert!(cg.unresolved_ambiguous.is_empty());
|
||
}
|
||
|
||
/// Without an arity hint the same setup would be genuinely ambiguous.
|
||
/// This is the negative control for the arity disambiguation test above.
|
||
#[test]
|
||
fn no_arity_hint_stays_ambiguous() {
|
||
let encode1 = make_summary("encode", "src/codec.rs", "rust", 1, vec![]);
|
||
let encode2 = make_summary("encode", "src/codec.rs", "rust", 2, vec![]);
|
||
// Legacy-style callee entry with no structured metadata.
|
||
let caller = summary_with_sites(
|
||
"driver",
|
||
"src/main.rs",
|
||
"rust",
|
||
0,
|
||
vec![CalleeSite::bare("encode")],
|
||
);
|
||
|
||
let gs = merge_summaries(vec![encode1, encode2, caller], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
assert_eq!(cg.graph.edge_count(), 0, "no arity hint → ambiguous");
|
||
assert_eq!(cg.unresolved_ambiguous.len(), 1);
|
||
}
|
||
|
||
/// Structured `receiver` field should route to the correct container
|
||
/// when two classes in the same file define the same method name.
|
||
#[test]
|
||
fn receiver_field_disambiguates_methods() {
|
||
// Two `process` methods on two classes in the same file.
|
||
let mut fs_order = make_summary("process", "src/app.rs", "rust", 1, vec![]);
|
||
fs_order.container = "OrderService".into();
|
||
let mut fs_user = make_summary("process", "src/app.rs", "rust", 1, vec![]);
|
||
fs_user.container = "UserService".into();
|
||
|
||
// Caller in another file uses the structured receiver field rather
|
||
// than baking the receiver into the callee name string.
|
||
let caller = summary_with_sites(
|
||
"main",
|
||
"src/main.rs",
|
||
"rust",
|
||
0,
|
||
vec![CalleeSite {
|
||
name: "process".into(),
|
||
arity: Some(1),
|
||
receiver: Some("OrderService".into()),
|
||
..Default::default()
|
||
}],
|
||
);
|
||
|
||
let gs = merge_summaries(vec![fs_order, fs_user, caller], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/main.rs".into(),
|
||
name: "main".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
let order_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/app.rs".into(),
|
||
container: "OrderService".into(),
|
||
name: "process".into(),
|
||
arity: Some(1),
|
||
..Default::default()
|
||
};
|
||
let caller_node = cg.index[&caller_key];
|
||
let order_node = cg.index[&order_key];
|
||
let edges: Vec<_> = cg.graph.edges(caller_node).collect();
|
||
assert_eq!(
|
||
edges.len(),
|
||
1,
|
||
"structured receiver should route to OrderService::process"
|
||
);
|
||
assert_eq!(edges[0].target(), order_node);
|
||
}
|
||
|
||
/// The `qualifier` field carries the non-method qualifier (`env` in
|
||
/// `env::var`) directly, removing the need to re-parse the raw string.
|
||
#[test]
|
||
fn qualifier_field_disambiguates_non_method_calls() {
|
||
let var_env = make_summary("var", "src/env.rs", "rust", 1, vec![]);
|
||
// A same-named function that would otherwise be a tie-breaker target.
|
||
let var_local = make_summary("var", "src/locals.rs", "rust", 1, vec![]);
|
||
let caller = summary_with_sites(
|
||
"main",
|
||
"src/main.rs",
|
||
"rust",
|
||
0,
|
||
vec![CalleeSite {
|
||
name: "env::var".into(),
|
||
arity: Some(1),
|
||
qualifier: Some("env".into()),
|
||
..Default::default()
|
||
}],
|
||
);
|
||
|
||
let gs = merge_summaries(vec![var_env, var_local, caller], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/main.rs".into(),
|
||
name: "main".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
let env_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/env.rs".into(),
|
||
name: "var".into(),
|
||
arity: Some(1),
|
||
..Default::default()
|
||
};
|
||
let caller_node = cg.index[&caller_key];
|
||
let env_node = cg.index[&env_key];
|
||
let edges: Vec<_> = cg.graph.edges(caller_node).collect();
|
||
assert_eq!(edges.len(), 1);
|
||
assert_eq!(
|
||
edges[0].target(),
|
||
env_node,
|
||
"qualifier `env` should select src/env.rs::var"
|
||
);
|
||
}
|
||
|
||
/// When the legacy `Vec<String>` form is loaded from an old database row,
|
||
/// resolution should still work for unambiguous callers (no regression).
|
||
#[test]
|
||
fn legacy_string_callees_still_resolve() {
|
||
let helper = make_summary("helper", "src/lib.rs", "rust", 0, vec![]);
|
||
// make_summary already returns CalleeSite::bare entries, i.e. the
|
||
// "lifted legacy" form with no arity or receiver metadata.
|
||
let caller = make_summary("main", "src/lib.rs", "rust", 0, vec!["helper"]);
|
||
let gs = merge_summaries(vec![helper, caller], None);
|
||
let cg = build_call_graph(&gs, &[]);
|
||
assert_eq!(cg.graph.edge_count(), 1);
|
||
assert!(cg.unresolved_not_found.is_empty());
|
||
assert!(cg.unresolved_ambiguous.is_empty());
|
||
}
|
||
|
||
// ── ClassMethodIndex ────────────────────────────────────────────────
|
||
|
||
/// Helper: `(name, container)` pairs in the same file. Builds two
|
||
/// summaries with the same leaf name on different containers so the
|
||
/// container-keyed map has a non-trivial discriminator to preserve.
|
||
fn make_method_summary(
|
||
name: &str,
|
||
container: &str,
|
||
file_path: &str,
|
||
lang: &str,
|
||
param_count: usize,
|
||
) -> FuncSummary {
|
||
let mut s = make_summary(name, file_path, lang, param_count, vec![]);
|
||
s.container = container.into();
|
||
s
|
||
}
|
||
|
||
#[test]
|
||
fn class_method_index_disambiguates_same_name_across_containers() {
|
||
// Two `findById` definitions on different classes in different
|
||
// files. The container-keyed lookup must return only the
|
||
// container-matching candidate; the bare-name lookup must
|
||
// return both.
|
||
let repo = make_method_summary("findById", "Repository", "src/repo.rs", "rust", 1);
|
||
let cache = make_method_summary("findById", "Cache", "src/cache.rs", "rust", 1);
|
||
|
||
let gs = merge_summaries(vec![repo, cache], None);
|
||
let idx = ClassMethodIndex::build(&gs);
|
||
|
||
let repo_hits = idx.resolve(Lang::Rust, Some("Repository"), "findById");
|
||
assert_eq!(
|
||
repo_hits.len(),
|
||
1,
|
||
"Repository::findById should resolve to exactly one target"
|
||
);
|
||
assert_eq!(repo_hits[0].container, "Repository");
|
||
|
||
let cache_hits = idx.resolve(Lang::Rust, Some("Cache"), "findById");
|
||
assert_eq!(cache_hits.len(), 1);
|
||
assert_eq!(cache_hits[0].container, "Cache");
|
||
|
||
// Bare-name lookup keeps both candidates, fallback behaviour.
|
||
let bare_hits = idx.resolve(Lang::Rust, None, "findById");
|
||
assert_eq!(
|
||
bare_hits.len(),
|
||
2,
|
||
"bare-name lookup should keep both same-name candidates"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn class_method_index_falls_back_to_name_when_container_unknown() {
|
||
// `None` container or empty-string container both route to
|
||
// the bare-name index, equivalent to today's name-only edge
|
||
// insertion.
|
||
let svc = make_method_summary("process", "OrderService", "src/svc.rs", "rust", 1);
|
||
let helper = make_summary("process", "src/util.rs", "rust", 1, vec![]);
|
||
|
||
let gs = merge_summaries(vec![svc, helper], None);
|
||
let idx = ClassMethodIndex::build(&gs);
|
||
|
||
// None → bare-name list (both targets).
|
||
let none_hits = idx.resolve(Lang::Rust, None, "process");
|
||
assert_eq!(none_hits.len(), 2);
|
||
|
||
// Empty string container behaves identically to None, it is
|
||
// not stored under any container key.
|
||
let empty_hits = idx.resolve(Lang::Rust, Some(""), "process");
|
||
assert_eq!(empty_hits.len(), 2);
|
||
|
||
// Container `"OrderService"` narrows to the method only; the
|
||
// free-function helper lives under empty container and does
|
||
// not appear here.
|
||
let cont_hits = idx.resolve(Lang::Rust, Some("OrderService"), "process");
|
||
assert_eq!(cont_hits.len(), 1);
|
||
assert_eq!(cont_hits[0].container, "OrderService");
|
||
}
|
||
|
||
#[test]
|
||
fn class_method_index_empty_for_unknown_method() {
|
||
let svc = make_method_summary("findById", "Repository", "src/repo.rs", "rust", 1);
|
||
let gs = merge_summaries(vec![svc], None);
|
||
let idx = ClassMethodIndex::build(&gs);
|
||
|
||
// Wrong method name on the right container → empty.
|
||
assert!(
|
||
idx.resolve(Lang::Rust, Some("Repository"), "findByName")
|
||
.is_empty()
|
||
);
|
||
// Right method, wrong container → empty (no fallback to bare-name
|
||
// when a container is supplied, that's the whole devirtualisation
|
||
// promise).
|
||
assert!(
|
||
idx.resolve(Lang::Rust, Some("OtherClass"), "findById")
|
||
.is_empty()
|
||
);
|
||
// Unknown method name with no container → empty.
|
||
assert!(idx.resolve(Lang::Rust, None, "doesNotExist").is_empty());
|
||
}
|
||
|
||
#[test]
|
||
fn class_method_index_partitions_by_language() {
|
||
// Same `(container, name)` in Java and TypeScript → must not
|
||
// alias. Cross-language calls are forbidden by the rest of the
|
||
// pipeline; the index reflects that partition.
|
||
let java_repo = make_method_summary("findById", "Repository", "Repo.java", "java", 1);
|
||
let ts_repo = make_method_summary("findById", "Repository", "repo.ts", "typescript", 1);
|
||
|
||
let gs = merge_summaries(vec![java_repo, ts_repo], None);
|
||
let idx = ClassMethodIndex::build(&gs);
|
||
|
||
let java_hits = idx.resolve(Lang::Java, Some("Repository"), "findById");
|
||
assert_eq!(java_hits.len(), 1);
|
||
assert_eq!(java_hits[0].lang, Lang::Java);
|
||
|
||
let ts_hits = idx.resolve(Lang::TypeScript, Some("Repository"), "findById");
|
||
assert_eq!(ts_hits.len(), 1);
|
||
assert_eq!(ts_hits[0].lang, Lang::TypeScript);
|
||
}
|
||
|
||
#[test]
|
||
fn class_method_index_handles_arity_overloads() {
|
||
// Two arity overloads on the same container are both kept under
|
||
// the same `(container, name)` key, arity narrowing is the
|
||
// caller's responsibility (today's resolver also does this).
|
||
let one = make_method_summary("encode", "Codec", "src/codec.rs", "rust", 1);
|
||
let two = make_method_summary("encode", "Codec", "src/codec.rs", "rust", 2);
|
||
|
||
let gs = merge_summaries(vec![one, two], None);
|
||
let idx = ClassMethodIndex::build(&gs);
|
||
|
||
let hits = idx.resolve(Lang::Rust, Some("Codec"), "encode");
|
||
assert_eq!(
|
||
hits.len(),
|
||
2,
|
||
"arity overloads should both appear under the same container key"
|
||
);
|
||
}
|
||
|
||
// ── devirtualised edge insertion via typed_call_receivers ──
|
||
|
||
/// Two `findById` definitions live on different containers in
|
||
/// different files. A caller whose SSA summary records the
|
||
/// receiver type as `"Repository"` for the relevant ordinal must
|
||
/// produce an edge **only** to `Repository::findById`, not to
|
||
/// `Cache::findById`. Without typed_call_receivers, today's
|
||
/// receiver_var-based resolution would have to guess between the
|
||
/// two and would record the call as ambiguous (no edge at all).
|
||
#[test]
|
||
fn typed_call_receivers_devirtualises_method_call() {
|
||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||
|
||
let repo = make_method_summary("findById", "Repository", "src/repo.rs", "rust", 1);
|
||
let cache = make_method_summary("findById", "Cache", "src/cache.rs", "rust", 1);
|
||
// Caller's SSA summary will record `(ordinal=0, "Repository")`
|
||
// for the single method call below.
|
||
let caller = summary_with_sites(
|
||
"lookup",
|
||
"src/main.rs",
|
||
"rust",
|
||
0,
|
||
vec![CalleeSite {
|
||
name: "findById".into(),
|
||
arity: Some(1),
|
||
receiver: Some("repo".into()),
|
||
ordinal: 0,
|
||
..Default::default()
|
||
}],
|
||
);
|
||
|
||
let mut gs = merge_summaries(vec![repo, cache, caller], None);
|
||
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/main.rs".into(),
|
||
name: "lookup".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
gs.insert_ssa(
|
||
caller_key.clone(),
|
||
SsaFuncSummary {
|
||
typed_call_receivers: vec![(0, "Repository".to_string())],
|
||
..Default::default()
|
||
},
|
||
);
|
||
|
||
let cg = build_call_graph(&gs, &[]);
|
||
|
||
let repo_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/repo.rs".into(),
|
||
container: "Repository".into(),
|
||
name: "findById".into(),
|
||
arity: Some(1),
|
||
..Default::default()
|
||
};
|
||
let caller_node = cg.index[&caller_key];
|
||
let repo_node = cg.index[&repo_key];
|
||
|
||
let edges: Vec<_> = cg.graph.edges(caller_node).collect();
|
||
assert_eq!(
|
||
edges.len(),
|
||
1,
|
||
"typed receiver should resolve to exactly one target; got {edges:?}"
|
||
);
|
||
assert_eq!(
|
||
edges[0].target(),
|
||
repo_node,
|
||
"edge must point to Repository::findById, not Cache::findById"
|
||
);
|
||
assert!(cg.unresolved_ambiguous.is_empty());
|
||
}
|
||
|
||
/// Negative control: when typed_call_receivers points at a
|
||
/// container that doesn't define the method, devirtualisation
|
||
/// must NOT silently drop the edge. We fall through to today's
|
||
/// name-only resolution so a stale or misclassified type fact
|
||
/// can never cause regression.
|
||
#[test]
|
||
fn typed_call_receivers_falls_through_on_zero_match() {
|
||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||
|
||
// Single `process` on `Worker`. No `process` exists on
|
||
// `Other`, that's the receiver type the caller's SSA
|
||
// summary will (incorrectly) record.
|
||
let worker = make_method_summary("process", "Worker", "src/worker.rs", "rust", 1);
|
||
let caller = summary_with_sites(
|
||
"drive",
|
||
"src/main.rs",
|
||
"rust",
|
||
0,
|
||
vec![CalleeSite {
|
||
name: "process".into(),
|
||
arity: Some(1),
|
||
receiver: Some("worker".into()),
|
||
ordinal: 0,
|
||
..Default::default()
|
||
}],
|
||
);
|
||
|
||
let mut gs = merge_summaries(vec![worker, caller], None);
|
||
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/main.rs".into(),
|
||
name: "drive".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
gs.insert_ssa(
|
||
caller_key.clone(),
|
||
SsaFuncSummary {
|
||
// Wrong receiver type, `Other::process` does not exist.
|
||
typed_call_receivers: vec![(0, "Other".to_string())],
|
||
..Default::default()
|
||
},
|
||
);
|
||
|
||
let cg = build_call_graph(&gs, &[]);
|
||
|
||
let caller_node = cg.index[&caller_key];
|
||
let edges: Vec<_> = cg.graph.edges(caller_node).collect();
|
||
// Today's name-only resolution finds the unique `process`
|
||
// candidate (Worker::process) and records the edge. The
|
||
// typed_call_receivers miss must not have suppressed it.
|
||
assert_eq!(
|
||
edges.len(),
|
||
1,
|
||
"stale/wrong type fact must fall through to today's resolution; \
|
||
got {edges:?} (cf. ambiguous: {:?})",
|
||
cg.unresolved_ambiguous,
|
||
);
|
||
}
|
||
|
||
// ── TypeHierarchyIndex ───────────────────────────────────
|
||
|
||
/// Helper: build a hierarchy index from a list of
|
||
/// `(lang, sub, super)` edges by injecting them onto a single
|
||
/// per-file FuncSummary. Mirrors the production path:
|
||
/// `merge_summaries` would receive these via
|
||
/// `FuncSummary::hierarchy_edges`.
|
||
fn hierarchy_from_edges(edges: Vec<(Lang, &str, &str)>) -> TypeHierarchyIndex {
|
||
let mut summary = make_summary("dummy", "dummy.rs", "rust", 0, vec![]);
|
||
// The lang on the FuncSummary is per-edge, so we group by
|
||
// language and produce one summary per language.
|
||
let mut by_lang: std::collections::HashMap<Lang, Vec<(String, String)>> =
|
||
std::collections::HashMap::new();
|
||
for (lang, sub, sup) in edges {
|
||
by_lang
|
||
.entry(lang)
|
||
.or_default()
|
||
.push((sub.to_string(), sup.to_string()));
|
||
}
|
||
let _ = &mut summary; // silence the dummy
|
||
let mut all: Vec<FuncSummary> = Vec::new();
|
||
for (lang, edges) in by_lang {
|
||
let slug = match lang {
|
||
Lang::Rust => "rust",
|
||
Lang::Java => "java",
|
||
Lang::Python => "python",
|
||
Lang::TypeScript => "typescript",
|
||
Lang::JavaScript => "javascript",
|
||
Lang::Go => "go",
|
||
Lang::Php => "php",
|
||
Lang::Ruby => "ruby",
|
||
Lang::C => "c",
|
||
Lang::Cpp => "cpp",
|
||
};
|
||
let mut s = make_summary("dummy", "dummy.x", slug, 0, vec![]);
|
||
s.hierarchy_edges = edges;
|
||
all.push(s);
|
||
}
|
||
let gs = merge_summaries(all, None);
|
||
TypeHierarchyIndex::build(&gs)
|
||
}
|
||
|
||
/// B-1: Round-trip, a hierarchy built from a small set of edges
|
||
/// answers `subs_of` correctly and `super_keys_len` matches the
|
||
/// distinct super count.
|
||
#[test]
|
||
fn b1_type_hierarchy_index_round_trip() {
|
||
let h = hierarchy_from_edges(vec![
|
||
(Lang::Java, "UserRepo", "Repository"),
|
||
(Lang::Java, "CacheRepo", "Repository"),
|
||
(Lang::Java, "Derived", "Base"),
|
||
]);
|
||
let mut subs: Vec<&str> = h
|
||
.subs_of(Lang::Java, "Repository")
|
||
.iter()
|
||
.map(|s| s.as_str())
|
||
.collect();
|
||
subs.sort();
|
||
assert_eq!(subs, vec!["CacheRepo", "UserRepo"]);
|
||
assert_eq!(h.subs_of(Lang::Java, "Base"), &["Derived".to_string()]);
|
||
assert_eq!(h.subs_of(Lang::Java, "Unknown"), &[] as &[String]);
|
||
assert_eq!(h.super_keys_len(), 2);
|
||
}
|
||
|
||
/// B-2: Java interface dispatch, `Repository r; r.findById(...)`
|
||
/// fans out to every concrete implementer's `findById`.
|
||
#[test]
|
||
fn b2_java_interface_dispatch_fans_out_to_all_impls() {
|
||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||
|
||
let user_repo = make_method_summary("findById", "UserRepo", "src/UserRepo.java", "java", 1);
|
||
let cache_repo =
|
||
make_method_summary("findById", "CacheRepo", "src/CacheRepo.java", "java", 1);
|
||
let mut iface_marker = make_method_summary(
|
||
"__placeholder",
|
||
"Repository",
|
||
"src/Repository.java",
|
||
"java",
|
||
0,
|
||
);
|
||
iface_marker.hierarchy_edges = vec![
|
||
("UserRepo".to_string(), "Repository".to_string()),
|
||
("CacheRepo".to_string(), "Repository".to_string()),
|
||
];
|
||
let caller = summary_with_sites(
|
||
"lookup",
|
||
"src/main.java",
|
||
"java",
|
||
0,
|
||
vec![CalleeSite {
|
||
name: "findById".into(),
|
||
arity: Some(1),
|
||
receiver: Some("r".into()),
|
||
ordinal: 0,
|
||
..Default::default()
|
||
}],
|
||
);
|
||
|
||
let mut gs = merge_summaries(vec![user_repo, cache_repo, iface_marker, caller], None);
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Java,
|
||
namespace: "src/main.java".into(),
|
||
name: "lookup".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
gs.insert_ssa(
|
||
caller_key.clone(),
|
||
SsaFuncSummary {
|
||
typed_call_receivers: vec![(0, "Repository".to_string())],
|
||
..Default::default()
|
||
},
|
||
);
|
||
|
||
let cg = build_call_graph(&gs, &[]);
|
||
let caller_node = cg.index[&caller_key];
|
||
let targets: Vec<&FuncKey> = cg
|
||
.graph
|
||
.edges(caller_node)
|
||
.map(|e| &cg.graph[e.target()])
|
||
.collect();
|
||
let containers: Vec<&str> = targets.iter().map(|k| k.container.as_str()).collect();
|
||
assert!(
|
||
containers.contains(&"UserRepo") && containers.contains(&"CacheRepo"),
|
||
"B-2: edges must reach BOTH UserRepo::findById and CacheRepo::findById; got {targets:?}"
|
||
);
|
||
assert_eq!(targets.len(), 2, "B-2: exactly two fan-out edges expected");
|
||
}
|
||
|
||
/// B-3: Java extends, `Base b; b.foo()` reaches Base AND Derived
|
||
/// when Derived extends Base. Pins inheritance fan-out separately
|
||
/// from interface implements.
|
||
#[test]
|
||
fn b3_java_extends_fans_out_to_subclass() {
|
||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||
|
||
let base = make_method_summary("foo", "Base", "src/Base.java", "java", 0);
|
||
let mut derived = make_method_summary("foo", "Derived", "src/Derived.java", "java", 0);
|
||
derived.hierarchy_edges = vec![("Derived".to_string(), "Base".to_string())];
|
||
let caller = summary_with_sites(
|
||
"go",
|
||
"src/main.java",
|
||
"java",
|
||
0,
|
||
vec![CalleeSite {
|
||
name: "foo".into(),
|
||
arity: Some(0),
|
||
receiver: Some("b".into()),
|
||
ordinal: 0,
|
||
..Default::default()
|
||
}],
|
||
);
|
||
|
||
let mut gs = merge_summaries(vec![base, derived, caller], None);
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Java,
|
||
namespace: "src/main.java".into(),
|
||
name: "go".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
gs.insert_ssa(
|
||
caller_key.clone(),
|
||
SsaFuncSummary {
|
||
typed_call_receivers: vec![(0, "Base".to_string())],
|
||
..Default::default()
|
||
},
|
||
);
|
||
|
||
let cg = build_call_graph(&gs, &[]);
|
||
let caller_node = cg.index[&caller_key];
|
||
let targets: Vec<&FuncKey> = cg
|
||
.graph
|
||
.edges(caller_node)
|
||
.map(|e| &cg.graph[e.target()])
|
||
.collect();
|
||
let containers: Vec<&str> = targets.iter().map(|k| k.container.as_str()).collect();
|
||
assert!(
|
||
containers.contains(&"Base"),
|
||
"B-3: edge must reach Base::foo; got {targets:?}"
|
||
);
|
||
assert!(
|
||
containers.contains(&"Derived"),
|
||
"B-3: edge must reach Derived::foo; got {targets:?}"
|
||
);
|
||
}
|
||
|
||
/// B-4: Rust trait dispatch, `Box<dyn Repo>; r.find(...)` reaches
|
||
/// every `impl Repo for X` `find`.
|
||
#[test]
|
||
fn b4_rust_trait_dispatch_fans_out_to_impls() {
|
||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||
|
||
let user_repo = make_method_summary("find", "UserRepo", "src/user_repo.rs", "rust", 1);
|
||
let cache_repo = make_method_summary("find", "CacheRepo", "src/cache_repo.rs", "rust", 1);
|
||
let mut hierarchy_carrier = make_method_summary("__h", "Repo", "src/repo.rs", "rust", 0);
|
||
hierarchy_carrier.hierarchy_edges = vec![
|
||
("UserRepo".to_string(), "Repo".to_string()),
|
||
("CacheRepo".to_string(), "Repo".to_string()),
|
||
];
|
||
let caller = summary_with_sites(
|
||
"lookup",
|
||
"src/main.rs",
|
||
"rust",
|
||
0,
|
||
vec![CalleeSite {
|
||
name: "find".into(),
|
||
arity: Some(1),
|
||
receiver: Some("r".into()),
|
||
ordinal: 0,
|
||
..Default::default()
|
||
}],
|
||
);
|
||
|
||
let mut gs = merge_summaries(vec![user_repo, cache_repo, hierarchy_carrier, caller], None);
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/main.rs".into(),
|
||
name: "lookup".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
gs.insert_ssa(
|
||
caller_key.clone(),
|
||
SsaFuncSummary {
|
||
typed_call_receivers: vec![(0, "Repo".to_string())],
|
||
..Default::default()
|
||
},
|
||
);
|
||
|
||
let cg = build_call_graph(&gs, &[]);
|
||
let caller_node = cg.index[&caller_key];
|
||
let targets: Vec<&FuncKey> = cg
|
||
.graph
|
||
.edges(caller_node)
|
||
.map(|e| &cg.graph[e.target()])
|
||
.collect();
|
||
let containers: Vec<&str> = targets.iter().map(|k| k.container.as_str()).collect();
|
||
assert!(
|
||
containers.contains(&"UserRepo") && containers.contains(&"CacheRepo"),
|
||
"B-4: edges must fan out to both impls; got {targets:?}"
|
||
);
|
||
}
|
||
|
||
/// B-7: Empty hierarchy, when the typed container has no recorded
|
||
/// sub-types, `resolve_with_hierarchy` collapses to the direct
|
||
/// `ClassMethodIndex::resolve` lookup.
|
||
#[test]
|
||
fn b7_empty_hierarchy_falls_back_to_single_container() {
|
||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||
|
||
let repo = make_method_summary("findById", "Repository", "src/repo.rs", "rust", 1);
|
||
let cache = make_method_summary("findById", "Cache", "src/cache.rs", "rust", 1);
|
||
let caller = summary_with_sites(
|
||
"lookup",
|
||
"src/main.rs",
|
||
"rust",
|
||
0,
|
||
vec![CalleeSite {
|
||
name: "findById".into(),
|
||
arity: Some(1),
|
||
receiver: Some("repo".into()),
|
||
ordinal: 0,
|
||
..Default::default()
|
||
}],
|
||
);
|
||
|
||
let mut gs = merge_summaries(vec![repo, cache, caller], None);
|
||
// No hierarchy_edges set anywhere, Repository has no
|
||
// sub-types, so devirtualisation collapses to direct match.
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/main.rs".into(),
|
||
name: "lookup".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
gs.insert_ssa(
|
||
caller_key.clone(),
|
||
SsaFuncSummary {
|
||
typed_call_receivers: vec![(0, "Repository".to_string())],
|
||
..Default::default()
|
||
},
|
||
);
|
||
|
||
let cg = build_call_graph(&gs, &[]);
|
||
let caller_node = cg.index[&caller_key];
|
||
let targets: Vec<&FuncKey> = cg
|
||
.graph
|
||
.edges(caller_node)
|
||
.map(|e| &cg.graph[e.target()])
|
||
.collect();
|
||
assert_eq!(targets.len(), 1, "B-7: empty hierarchy → single edge");
|
||
assert_eq!(targets[0].container, "Repository");
|
||
}
|
||
|
||
/// B-8: Concrete sub-type, when the receiver is typed as the
|
||
/// concrete sub-class (not the super-type), no hierarchy
|
||
/// expansion fires.
|
||
#[test]
|
||
fn b8_concrete_subtype_does_not_widen() {
|
||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||
|
||
let user_repo = make_method_summary("findById", "UserRepo", "src/user_repo.rs", "rust", 1);
|
||
let cache_repo =
|
||
make_method_summary("findById", "CacheRepo", "src/cache_repo.rs", "rust", 1);
|
||
let mut h = make_method_summary("__h", "Repo", "src/repo.rs", "rust", 0);
|
||
h.hierarchy_edges = vec![
|
||
("UserRepo".to_string(), "Repo".to_string()),
|
||
("CacheRepo".to_string(), "Repo".to_string()),
|
||
];
|
||
let caller = summary_with_sites(
|
||
"lookup",
|
||
"src/main.rs",
|
||
"rust",
|
||
0,
|
||
vec![CalleeSite {
|
||
name: "findById".into(),
|
||
arity: Some(1),
|
||
receiver: Some("r".into()),
|
||
ordinal: 0,
|
||
..Default::default()
|
||
}],
|
||
);
|
||
|
||
let mut gs = merge_summaries(vec![user_repo, cache_repo, h, caller], None);
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/main.rs".into(),
|
||
name: "lookup".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
// Caller types the receiver as `UserRepo` (concrete).
|
||
// `subs_of(Lang::Rust, "UserRepo")` returns `[]` so the
|
||
// hierarchy expansion is a no-op and only `UserRepo::findById`
|
||
// is reached.
|
||
gs.insert_ssa(
|
||
caller_key.clone(),
|
||
SsaFuncSummary {
|
||
typed_call_receivers: vec![(0, "UserRepo".to_string())],
|
||
..Default::default()
|
||
},
|
||
);
|
||
|
||
let cg = build_call_graph(&gs, &[]);
|
||
let caller_node = cg.index[&caller_key];
|
||
let targets: Vec<&FuncKey> = cg
|
||
.graph
|
||
.edges(caller_node)
|
||
.map(|e| &cg.graph[e.target()])
|
||
.collect();
|
||
assert_eq!(
|
||
targets.len(),
|
||
1,
|
||
"B-8: concrete sub-type must not fan out; got {targets:?}"
|
||
);
|
||
assert_eq!(targets[0].container, "UserRepo");
|
||
}
|
||
|
||
/// B-9: Diamond, multiple impls sharing a super-type, dedup
|
||
/// applied per call site so each FuncKey is edged at most once.
|
||
#[test]
|
||
fn b9_diamond_dedup_one_edge_per_funckey() {
|
||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||
|
||
let a = make_method_summary("doIt", "A", "src/A.java", "java", 0);
|
||
let b = make_method_summary("doIt", "B", "src/B.java", "java", 0);
|
||
// A and B both extend Iface in two separate file emissions ,
|
||
// hierarchy_edges duplicates across files; dedup expected.
|
||
let mut h1 = make_method_summary("__h", "Iface", "src/I1.java", "java", 0);
|
||
h1.hierarchy_edges = vec![
|
||
("A".to_string(), "Iface".to_string()),
|
||
("B".to_string(), "Iface".to_string()),
|
||
];
|
||
let mut h2 = make_method_summary("__h2", "Iface", "src/I2.java", "java", 0);
|
||
h2.hierarchy_edges = vec![
|
||
("A".to_string(), "Iface".to_string()),
|
||
("B".to_string(), "Iface".to_string()),
|
||
];
|
||
let caller = summary_with_sites(
|
||
"go",
|
||
"src/main.java",
|
||
"java",
|
||
0,
|
||
vec![CalleeSite {
|
||
name: "doIt".into(),
|
||
arity: Some(0),
|
||
receiver: Some("x".into()),
|
||
ordinal: 0,
|
||
..Default::default()
|
||
}],
|
||
);
|
||
|
||
let mut gs = merge_summaries(vec![a, b, h1, h2, caller], None);
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Java,
|
||
namespace: "src/main.java".into(),
|
||
name: "go".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
gs.insert_ssa(
|
||
caller_key.clone(),
|
||
SsaFuncSummary {
|
||
typed_call_receivers: vec![(0, "Iface".to_string())],
|
||
..Default::default()
|
||
},
|
||
);
|
||
|
||
let cg = build_call_graph(&gs, &[]);
|
||
let caller_node = cg.index[&caller_key];
|
||
let targets: Vec<&FuncKey> = cg
|
||
.graph
|
||
.edges(caller_node)
|
||
.map(|e| &cg.graph[e.target()])
|
||
.collect();
|
||
// Each unique implementer reached at most once.
|
||
let containers: std::collections::HashSet<&str> =
|
||
targets.iter().map(|k| k.container.as_str()).collect();
|
||
assert_eq!(
|
||
containers.len(),
|
||
targets.len(),
|
||
"B-9: dedup must give one edge per FuncKey; got {targets:?}"
|
||
);
|
||
assert!(containers.contains("A") && containers.contains("B"));
|
||
}
|
||
|
||
/// B-13: Stale hierarchy edge, sub-type referenced by an edge
|
||
/// no longer has a matching FuncKey. Resolver must not panic
|
||
/// and must still resolve to whatever IS present.
|
||
#[test]
|
||
fn b13_stale_subtype_no_panic() {
|
||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||
|
||
// `Base` exists; `Derived` referenced by hierarchy_edges but
|
||
// its `foo` is never defined. Resolver must not panic and
|
||
// must still emit the Base::foo edge.
|
||
let base = make_method_summary("foo", "Base", "src/Base.java", "java", 0);
|
||
let mut h = make_method_summary("__h", "X", "src/X.java", "java", 0);
|
||
h.hierarchy_edges = vec![("Derived".to_string(), "Base".to_string())];
|
||
let caller = summary_with_sites(
|
||
"go",
|
||
"src/main.java",
|
||
"java",
|
||
0,
|
||
vec![CalleeSite {
|
||
name: "foo".into(),
|
||
arity: Some(0),
|
||
receiver: Some("b".into()),
|
||
ordinal: 0,
|
||
..Default::default()
|
||
}],
|
||
);
|
||
|
||
let mut gs = merge_summaries(vec![base, h, caller], None);
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Java,
|
||
namespace: "src/main.java".into(),
|
||
name: "go".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
gs.insert_ssa(
|
||
caller_key.clone(),
|
||
SsaFuncSummary {
|
||
typed_call_receivers: vec![(0, "Base".to_string())],
|
||
..Default::default()
|
||
},
|
||
);
|
||
|
||
// Build must not panic.
|
||
let cg = build_call_graph(&gs, &[]);
|
||
let caller_node = cg.index[&caller_key];
|
||
let targets: Vec<&FuncKey> = cg
|
||
.graph
|
||
.edges(caller_node)
|
||
.map(|e| &cg.graph[e.target()])
|
||
.collect();
|
||
assert!(
|
||
targets
|
||
.iter()
|
||
.any(|k| k.container == "Base" && k.name == "foo"),
|
||
"B-13: stale Derived must not block Base::foo edge; got {targets:?}"
|
||
);
|
||
}
|
||
|
||
/// Free-function calls (no receiver on the CalleeSite) must
|
||
/// never trigger the devirtualisation path, even if some bogus
|
||
/// typed_call_receivers entry happened to match the ordinal.
|
||
/// Regression guard: today's namespace + use-map resolution
|
||
/// stays in charge for free-function calls.
|
||
#[test]
|
||
fn typed_call_receivers_skips_free_function_sites() {
|
||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||
|
||
let helper = make_summary("helper", "src/lib.rs", "rust", 0, vec![]);
|
||
let caller = summary_with_sites(
|
||
"main",
|
||
"src/lib.rs",
|
||
"rust",
|
||
0,
|
||
// No receiver on the call site → free function.
|
||
vec![CalleeSite {
|
||
name: "helper".into(),
|
||
arity: Some(0),
|
||
receiver: None,
|
||
ordinal: 0,
|
||
..Default::default()
|
||
}],
|
||
);
|
||
|
||
let mut gs = merge_summaries(vec![helper, caller], None);
|
||
|
||
let caller_key = FuncKey {
|
||
lang: Lang::Rust,
|
||
namespace: "src/lib.rs".into(),
|
||
name: "main".into(),
|
||
arity: Some(0),
|
||
..Default::default()
|
||
};
|
||
// A typed_call_receivers entry with ordinal=0, but since the
|
||
// site has receiver=None, this MUST be ignored.
|
||
gs.insert_ssa(
|
||
caller_key.clone(),
|
||
SsaFuncSummary {
|
||
typed_call_receivers: vec![(0, "FakeContainer".to_string())],
|
||
..Default::default()
|
||
},
|
||
);
|
||
|
||
let cg = build_call_graph(&gs, &[]);
|
||
// Standard same-namespace resolution still finds `helper`.
|
||
assert_eq!(cg.graph.edge_count(), 1);
|
||
assert!(cg.unresolved_not_found.is_empty());
|
||
assert!(cg.unresolved_ambiguous.is_empty());
|
||
}
|
||
}
|