mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-21 20:18:06 +02:00
Performance and precision pass (#64)
This commit is contained in:
parent
c7c5e0f3a1
commit
fb698d2c27
97 changed files with 9932 additions and 517 deletions
|
|
@ -21,6 +21,7 @@ pub mod ssa_summary;
|
|||
use crate::labels::Cap;
|
||||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||||
use crate::symbol::{FuncKey, FuncKind, Lang, normalize_namespace};
|
||||
use rustc_hash::FxHashMap;
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
use smallvec::SmallVec;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
|
|
@ -517,15 +518,20 @@ impl<'a> CalleeQuery<'a> {
|
|||
/// for same-language resolution in the taint engine.
|
||||
#[derive(Default)]
|
||||
pub struct GlobalSummaries {
|
||||
by_key: HashMap<FuncKey, FuncSummary>,
|
||||
/// FxHashMap (rustc_hash) replaces stdlib SipHash. FuncKey carries 3
|
||||
/// String fields, so any HashMap operation walks ≥30 bytes through the
|
||||
/// hasher; FxHash is ~5x faster than SipHash on this workload. Seed
|
||||
/// is fixed (no DoS hardening), which is fine for an in-process index
|
||||
/// keyed by static program-derived names.
|
||||
by_key: FxHashMap<FuncKey, FuncSummary>,
|
||||
/// Bare leaf-name index, kept for compatibility with callers that only
|
||||
/// see an unqualified call string. A single name may map to many keys
|
||||
/// across containers / files / arities.
|
||||
by_lang_name: HashMap<(Lang, String), Vec<FuncKey>>,
|
||||
by_lang_name: FxHashMap<(Lang, String), Vec<FuncKey>>,
|
||||
/// Container-qualified index: keyed on `"{container}::{name}"` (or just
|
||||
/// `name` for free functions). Used to resolve calls when the call-site
|
||||
/// can supply a receiver / container hint (e.g. `OrderService::process`).
|
||||
by_lang_qualified: HashMap<(Lang, String), Vec<FuncKey>>,
|
||||
by_lang_qualified: FxHashMap<(Lang, String), Vec<FuncKey>>,
|
||||
/// Rust-only secondary index keyed on `(module_path, name)`.
|
||||
///
|
||||
/// Populated whenever a Rust [`FuncSummary`] is inserted with a
|
||||
|
|
@ -533,7 +539,7 @@ pub struct GlobalSummaries {
|
|||
/// candidates by their crate-relative module rather than their
|
||||
/// filesystem path. Same name / module / arity overloads land on the
|
||||
/// same vector, arity narrowing happens at resolution time.
|
||||
by_rust_module: HashMap<(String, String), Vec<FuncKey>>,
|
||||
by_rust_module: FxHashMap<(String, String), Vec<FuncKey>>,
|
||||
/// Precise SSA-derived per-parameter summaries, keyed by `FuncKey`.
|
||||
/// These take precedence over `FuncSummary` during callee resolution.
|
||||
ssa_by_key: HashMap<FuncKey, SsaFuncSummary>,
|
||||
|
|
@ -546,6 +552,18 @@ pub struct GlobalSummaries {
|
|||
/// pass 1 and consumed by
|
||||
/// [`crate::auth_analysis::run_auth_analysis`] during pass 2.
|
||||
auth_by_key: HashMap<FuncKey, crate::auth_analysis::model::AuthCheckSummary>,
|
||||
/// Per-Python-file router declarations + `include_router` edges,
|
||||
/// keyed by `module_id_for_storage(file_path)` (basename without
|
||||
/// `.py`, or `parent_dir::__init__` for `__init__.py`). Populated
|
||||
/// during pass 1 and consumed by
|
||||
/// [`Self::resolve_cross_file_router_deps`] at pass 2 entry to lift
|
||||
/// FastAPI router-level `dependencies=[Security(...)]` declared in a
|
||||
/// parent file (`__init__.py` calling
|
||||
/// `<parent>.include_router(<child>.router, ...)`) onto the bare
|
||||
/// child router declared in another file — closing the airflow
|
||||
/// execution-API auth-recognition gap on routes attached to bare
|
||||
/// child routers.
|
||||
router_facts_by_module: HashMap<String, crate::auth_analysis::router_facts::PerFileRouterFacts>,
|
||||
/// Type hierarchy index for runtime virtual-dispatch fan-out.
|
||||
///
|
||||
/// Installed by [`Self::install_hierarchy`] after pass 1 from the
|
||||
|
|
@ -856,6 +874,11 @@ impl GlobalSummaries {
|
|||
for (key, auth_sum) in other.auth_by_key {
|
||||
self.auth_by_key.insert(key, auth_sum);
|
||||
}
|
||||
// Router facts: last-writer-wins per (module_id) key. Re-analysing
|
||||
// a file produces a fresh snapshot of its router declarations + edges.
|
||||
for (module_id, facts) in other.router_facts_by_module {
|
||||
self.router_facts_by_module.insert(module_id, facts);
|
||||
}
|
||||
// Hierarchy index: invalidate after a merge so the next consumer
|
||||
// sees a freshly-built view that includes `other`'s edges. The
|
||||
// alternative, point-merging two indexes, is racy when the
|
||||
|
|
@ -991,6 +1014,80 @@ impl GlobalSummaries {
|
|||
self.auth_by_key.len()
|
||||
}
|
||||
|
||||
/// Insert a per-file `PerFileRouterFacts` snapshot. Last-writer-wins
|
||||
/// per `module_id` key — re-analysing a file produces a fresh
|
||||
/// snapshot of its router declarations and `include_router` edges.
|
||||
pub fn insert_router_facts(
|
||||
&mut self,
|
||||
module_id: String,
|
||||
facts: crate::auth_analysis::router_facts::PerFileRouterFacts,
|
||||
) {
|
||||
self.router_facts_by_module.insert(module_id, facts);
|
||||
}
|
||||
|
||||
/// Resolve cross-file router-level deps for the file identified by
|
||||
/// `child_module_id`. Walks every other file's persisted
|
||||
/// `RouterIncludeEdge` list, finds edges whose `child_module_id`
|
||||
/// matches, and accumulates the parent file's
|
||||
/// `local_router_deps[parent_var]` against `child_var` — producing
|
||||
/// a `<child_var> → Vec<(CallSite, scoped_security)>` map ready to
|
||||
/// merge into the active file's
|
||||
/// `AuthorizationModel.cross_file_router_deps`.
|
||||
///
|
||||
/// Single-hop only. Transitive lifts (`grandparent.include_router(parent);
|
||||
/// parent.include_router(child)`) are not currently resolved — the
|
||||
/// airflow shape that motivated this fix is single-hop, and adding
|
||||
/// transitive resolution is a follow-up that would also need to
|
||||
/// model the bare-identifier `outer.include_router(inner_router)`
|
||||
/// case which the extractor presently skips.
|
||||
///
|
||||
/// Returns an empty map when `child_module_id` matches no edges or
|
||||
/// when the index is empty.
|
||||
pub fn resolve_cross_file_router_deps(
|
||||
&self,
|
||||
child_module_id: &str,
|
||||
) -> HashMap<String, Vec<(crate::auth_analysis::model::CallSite, bool)>> {
|
||||
let mut out: HashMap<String, Vec<(crate::auth_analysis::model::CallSite, bool)>> =
|
||||
HashMap::new();
|
||||
if self.router_facts_by_module.is_empty() {
|
||||
return out;
|
||||
}
|
||||
for facts in self.router_facts_by_module.values() {
|
||||
for edge in &facts.include_router_edges {
|
||||
if edge.child_module_id != child_module_id {
|
||||
continue;
|
||||
}
|
||||
// Look up the parent's deps in the SAME file's
|
||||
// local_router_deps map (parent declarations and the
|
||||
// include_router edge live in the same file).
|
||||
let Some(parent_deps) = facts.local_router_deps.get(&edge.parent_var) else {
|
||||
continue;
|
||||
};
|
||||
if parent_deps.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let entry = out.entry(edge.child_var.clone()).or_default();
|
||||
for dep in parent_deps {
|
||||
// Dedup by (callee name, scoped flag) so multiple
|
||||
// parents declaring the same dep don't double-fire.
|
||||
let already = entry
|
||||
.iter()
|
||||
.any(|(call, scoped)| call.name == dep.0.name && *scoped == dep.1);
|
||||
if !already {
|
||||
entry.push(dep.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Count of files that contributed router facts to the index.
|
||||
/// Exposed for `tracing::debug!` observability.
|
||||
pub fn router_facts_len(&self) -> usize {
|
||||
self.router_facts_by_module.len()
|
||||
}
|
||||
|
||||
/// Insert a cross-file callee body.
|
||||
///
|
||||
/// See [`insert_ssa`](Self::insert_ssa) for the identity-safety rule.
|
||||
|
|
@ -1050,7 +1147,10 @@ impl GlobalSummaries {
|
|||
|
||||
#[allow(dead_code)] // used by tests and future call-graph consumers
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.by_key.is_empty() && self.ssa_by_key.is_empty() && self.auth_by_key.is_empty()
|
||||
self.by_key.is_empty()
|
||||
&& self.ssa_by_key.is_empty()
|
||||
&& self.auth_by_key.is_empty()
|
||||
&& self.router_facts_by_module.is_empty()
|
||||
}
|
||||
|
||||
/// Iterate over all (key, summary) pairs.
|
||||
|
|
@ -1582,6 +1682,7 @@ impl std::fmt::Debug for GlobalSummaries {
|
|||
.field("ssa_len", &self.ssa_by_key.len())
|
||||
.field("bodies_len", &self.bodies_by_key.len())
|
||||
.field("auth_len", &self.auth_by_key.len())
|
||||
.field("router_facts_len", &self.router_facts_by_module.len())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3851,6 +3851,126 @@ fn cross_file_devirt_does_not_union_unrelated_findbyids() {
|
|||
assert_eq!(cache_sum.tainted_sink_params, vec![0]);
|
||||
}
|
||||
|
||||
/// Cross-file router-dep resolution: parent `__init__.py` declares
|
||||
/// `Security(...)` deps on a router and lifts them onto a child via
|
||||
/// `<parent>.include_router(<child_module>.<child_var>, ...)`. The
|
||||
/// resolution must produce a `<child_var> → Vec<(CallSite, scoped)>`
|
||||
/// map for the child file's `module_id`, and absent edges must yield
|
||||
/// empty.
|
||||
#[test]
|
||||
fn resolve_cross_file_router_deps_lifts_parent_security_dep_onto_child_router() {
|
||||
use crate::auth_analysis::model::CallSite;
|
||||
use crate::auth_analysis::router_facts::{PerFileRouterFacts, RouterIncludeEdge};
|
||||
|
||||
let mut gs = GlobalSummaries::new();
|
||||
// Parent (__init__.py) declares scoped Security on `authenticated_router`
|
||||
// and emits two include_router edges (task_instances + dag_runs).
|
||||
let parent_callsite = CallSite {
|
||||
name: "require_auth".into(),
|
||||
args: Vec::new(),
|
||||
span: (0, 0),
|
||||
args_value_refs: Vec::new(),
|
||||
};
|
||||
let mut parent_facts = PerFileRouterFacts::default();
|
||||
parent_facts.local_router_deps.insert(
|
||||
"authenticated_router".into(),
|
||||
vec![(parent_callsite.clone(), true)],
|
||||
);
|
||||
parent_facts.include_router_edges.push(RouterIncludeEdge {
|
||||
parent_var: "authenticated_router".into(),
|
||||
child_module_id: "task_instances".into(),
|
||||
child_var: "router".into(),
|
||||
});
|
||||
parent_facts.include_router_edges.push(RouterIncludeEdge {
|
||||
parent_var: "authenticated_router".into(),
|
||||
child_module_id: "dag_runs".into(),
|
||||
child_var: "router".into(),
|
||||
});
|
||||
gs.insert_router_facts("routes::__init__".into(), parent_facts);
|
||||
|
||||
// Child (task_instances.py) declares a bare router → expects to
|
||||
// inherit the parent's deps via the cross-file resolution.
|
||||
gs.insert_router_facts("task_instances".into(), PerFileRouterFacts::default());
|
||||
|
||||
// Resolve for task_instances → should get one entry under `router`
|
||||
// carrying the require_auth (scoped=true) dep.
|
||||
let resolved = gs.resolve_cross_file_router_deps("task_instances");
|
||||
let deps = resolved.get("router").expect("router child resolved");
|
||||
assert_eq!(deps.len(), 1);
|
||||
assert_eq!(deps[0].0.name, "require_auth");
|
||||
assert!(deps[0].1, "scoped flag preserved");
|
||||
|
||||
// dag_runs has the same parent → same lift.
|
||||
let resolved_dag = gs.resolve_cross_file_router_deps("dag_runs");
|
||||
assert_eq!(resolved_dag.get("router").map(|v| v.len()), Some(1));
|
||||
|
||||
// Unrelated module → no lift.
|
||||
let resolved_other = gs.resolve_cross_file_router_deps("nonexistent");
|
||||
assert!(resolved_other.is_empty());
|
||||
}
|
||||
|
||||
/// Edge: parent without local deps for the named var emits nothing —
|
||||
/// the resolver requires both an edge AND a non-empty parent dep list.
|
||||
#[test]
|
||||
fn resolve_cross_file_router_deps_skips_edges_with_no_parent_deps() {
|
||||
use crate::auth_analysis::router_facts::{PerFileRouterFacts, RouterIncludeEdge};
|
||||
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let mut parent = PerFileRouterFacts::default();
|
||||
parent.include_router_edges.push(RouterIncludeEdge {
|
||||
parent_var: "ghost_router".into(),
|
||||
child_module_id: "child".into(),
|
||||
child_var: "router".into(),
|
||||
});
|
||||
gs.insert_router_facts("parent".into(), parent);
|
||||
|
||||
let resolved = gs.resolve_cross_file_router_deps("child");
|
||||
assert!(resolved.is_empty());
|
||||
}
|
||||
|
||||
/// Multiple parents declaring different deps for the same child
|
||||
/// accumulate without duplication. Same dep declared twice (one
|
||||
/// from each parent) must dedup by (callee.name, scoped).
|
||||
#[test]
|
||||
fn resolve_cross_file_router_deps_dedups_duplicate_parent_deps() {
|
||||
use crate::auth_analysis::model::CallSite;
|
||||
use crate::auth_analysis::router_facts::{PerFileRouterFacts, RouterIncludeEdge};
|
||||
|
||||
let cs = CallSite {
|
||||
name: "require_auth".into(),
|
||||
args: Vec::new(),
|
||||
span: (0, 0),
|
||||
args_value_refs: Vec::new(),
|
||||
};
|
||||
let mut gs = GlobalSummaries::new();
|
||||
|
||||
// Parent A: include_router(child.router) with `require_auth` dep.
|
||||
let mut p_a = PerFileRouterFacts::default();
|
||||
p_a.local_router_deps
|
||||
.insert("router_a".into(), vec![(cs.clone(), true)]);
|
||||
p_a.include_router_edges.push(RouterIncludeEdge {
|
||||
parent_var: "router_a".into(),
|
||||
child_module_id: "child".into(),
|
||||
child_var: "router".into(),
|
||||
});
|
||||
gs.insert_router_facts("parent_a".into(), p_a);
|
||||
|
||||
// Parent B: SAME dep, different parent file.
|
||||
let mut p_b = PerFileRouterFacts::default();
|
||||
p_b.local_router_deps
|
||||
.insert("router_b".into(), vec![(cs, true)]);
|
||||
p_b.include_router_edges.push(RouterIncludeEdge {
|
||||
parent_var: "router_b".into(),
|
||||
child_module_id: "child".into(),
|
||||
child_var: "router".into(),
|
||||
});
|
||||
gs.insert_router_facts("parent_b".into(), p_b);
|
||||
|
||||
let resolved = gs.resolve_cross_file_router_deps("child");
|
||||
let deps = resolved.get("router").expect("router resolved");
|
||||
assert_eq!(deps.len(), 1, "duplicate (callee, scoped) deduplicated");
|
||||
}
|
||||
|
||||
// ── the analysis ────────────────────
|
||||
//
|
||||
// `GlobalSummaries::resolve_callee_widened` is the runtime counterpart of
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue