mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
278 lines
11 KiB
Rust
278 lines
11 KiB
Rust
use super::config::AuthAnalysisRules;
|
|
use super::model::{AuthorizationModel, CallSite};
|
|
use crate::utils::project::{FrameworkContext, rust_file_imports_web_framework};
|
|
use std::collections::HashMap;
|
|
use std::path::Path;
|
|
use tree_sitter::Tree;
|
|
|
|
pub mod actix_web;
|
|
pub mod axum;
|
|
pub mod common;
|
|
pub mod django;
|
|
pub mod echo;
|
|
pub mod express;
|
|
pub mod fastify;
|
|
pub mod flask;
|
|
pub mod gin;
|
|
pub mod koa;
|
|
pub mod rails;
|
|
pub mod rocket;
|
|
pub mod sinatra;
|
|
pub mod spring;
|
|
|
|
pub trait AuthExtractor {
|
|
fn supports(&self, lang: &str, framework_ctx: Option<&FrameworkContext>) -> bool;
|
|
|
|
/// Returns true when this extractor expects the orchestrator to
|
|
/// have already populated `model.units` with one
|
|
/// `AnalysisUnitKind::Function` entry per top-level function /
|
|
/// method via [`common::collect_top_level_units`]. Defaults to
|
|
/// `true`; framework extractors that build their own unit set
|
|
/// (Spring, Rails) override to `false` so the orchestrator skips
|
|
/// the shared collection pass when only those extractors match.
|
|
fn requires_top_level_units(&self) -> bool {
|
|
true
|
|
}
|
|
|
|
fn extract(
|
|
&self,
|
|
tree: &Tree,
|
|
bytes: &[u8],
|
|
path: &Path,
|
|
rules: &AuthAnalysisRules,
|
|
model: &mut AuthorizationModel,
|
|
);
|
|
}
|
|
|
|
pub fn extract_authorization_model(
|
|
lang: &str,
|
|
framework_ctx: Option<&FrameworkContext>,
|
|
tree: &Tree,
|
|
bytes: &[u8],
|
|
path: &Path,
|
|
rules: &AuthAnalysisRules,
|
|
cross_file_router_deps: Option<&HashMap<String, Vec<(CallSite, bool)>>>,
|
|
) -> AuthorizationModel {
|
|
let extractors: [&dyn AuthExtractor; 13] = [
|
|
&express::ExpressExtractor,
|
|
&koa::KoaExtractor,
|
|
&fastify::FastifyExtractor,
|
|
&gin::GinExtractor,
|
|
&echo::EchoExtractor,
|
|
&flask::FlaskExtractor,
|
|
&django::DjangoExtractor,
|
|
&spring::SpringExtractor,
|
|
&rails::RailsExtractor,
|
|
&sinatra::SinatraExtractor,
|
|
&axum::AxumExtractor,
|
|
&actix_web::ActixWebExtractor,
|
|
&rocket::RocketExtractor,
|
|
];
|
|
let mut model = AuthorizationModel {
|
|
lang: lang.to_string(),
|
|
..Default::default()
|
|
};
|
|
// Pre-populate the cross-file router-dep map BEFORE extractors run.
|
|
// FlaskExtractor reads `model.cross_file_router_deps` and merges the
|
|
// resolved deps into its local router-deps map at extraction time,
|
|
// so per-route auth attribution sees both the local-file
|
|
// `dependencies=[Security(...)]` declarations and the cross-file
|
|
// lift from `<parent>.include_router(<this_file>.<router>, ...)`
|
|
// edges visible elsewhere in the project. Empty / `None` for every
|
|
// non-Python language and for files with no matching child edges.
|
|
if let Some(deps) = cross_file_router_deps {
|
|
model.cross_file_router_deps = deps.clone();
|
|
}
|
|
|
|
// **Hoist `collect_top_level_units` out of the per-extractor loop.**
|
|
// For multi-extractor languages (Go: gin+echo, JS/TS: express+koa+
|
|
// fastify, Python: flask+django, Rust: axum+actix_web+rocket, Ruby:
|
|
// sinatra) the legacy code re-walked the entire AST and rebuilt the
|
|
// `Function`-kind unit set per extractor (then deduped by span).
|
|
// `collect_top_level_units` was the dominant cost in
|
|
// `extract_authorization_model` (46% of total wall-clock on the
|
|
// mattermost/server/channels/app subtree, 2026-05-04 profile).
|
|
//
|
|
// After the hoist each extractor receives a `&mut model` that
|
|
// already carries the shared unit set; framework-specific work
|
|
// (route detection, middleware injection, typed-extractor guards)
|
|
// augments and promotes those units in place via the existing
|
|
// `attach_route_handler` "promote-or-create" path.
|
|
//
|
|
// Spring + Rails build their own unit set (`maybe_collect_controller`
|
|
// / Rails' `collect_nodes`), so they opt out via
|
|
// `requires_top_level_units = false`; the shared pass runs only
|
|
// when at least one matching extractor needs it.
|
|
let any_requires_units = extractors
|
|
.iter()
|
|
.any(|e| e.supports(lang, framework_ctx) && e.requires_top_level_units());
|
|
if any_requires_units {
|
|
common::collect_top_level_units(tree.root_node(), bytes, rules, &mut model);
|
|
}
|
|
|
|
for extractor in extractors {
|
|
if extractor.supports(lang, framework_ctx) {
|
|
extractor.extract(tree, bytes, path, rules, &mut model);
|
|
}
|
|
}
|
|
|
|
// Per-language web-framework signal used to gate the param-name arm
|
|
// of `unit_has_user_input_evidence`. Combines the project-root
|
|
// manifest detection (`framework_ctx`) with a per-file `use`/`import`
|
|
// check, so a single file in a workspace whose root manifest does
|
|
// not name a web framework can still opt back in by directly
|
|
// importing one (e.g. `crates/collab/src/rpc.rs` in zed: workspace
|
|
// root has no axum, but the file uses `axum::Router`).
|
|
//
|
|
// Three-valued: `Some(true)` keeps step 3 firing, `Some(false)`
|
|
// suppresses it, `None` means no detection ran ─ behavior unchanged.
|
|
model.lang_web_framework_signal = compute_web_framework_signal(lang, framework_ctx, bytes);
|
|
|
|
// **Dedup units by span across extractors.** Multiple extractors
|
|
// (e.g. Flask + Django on a Python file) each call
|
|
// `collect_top_level_units`, producing one unit per top-level
|
|
// function. When one extractor also recognises a route on that
|
|
// function and promotes its copy to `RouteHandler` (with injected
|
|
// middleware auth checks), the *other* extractor's untouched
|
|
// `Function` copy still runs through `check_ownership_gaps` and
|
|
// emits the FP from a unit that never saw the middleware-derived
|
|
// auth check.
|
|
//
|
|
// This step keeps a single canonical unit per source span,
|
|
// preferring `RouteHandler` over `Function`, merging auth_checks
|
|
// and folding operation lists conservatively. Route registrations
|
|
// are remapped to the surviving unit index.
|
|
deduplicate_units_by_span(&mut model);
|
|
|
|
model
|
|
}
|
|
|
|
/// Compute the per-file web-framework signal used to gate the
|
|
/// param-name arm of `unit_has_user_input_evidence`.
|
|
///
|
|
/// Currently emits a non-`None` value only for Rust files. The Rust
|
|
/// auth analysis is the single biggest source of internal-helper FPs
|
|
/// in non-web crates (zed's GUI / editor crates); the other languages
|
|
/// have their own handler-classification policies that already filter
|
|
/// effectively, so they keep their existing behavior (None →
|
|
/// fall-through to the param-name heuristic) until each is validated.
|
|
///
|
|
/// Three-valued semantics:
|
|
/// * `Some(true)` ─ project root manifest names a Rust web framework
|
|
/// (axum / actix_web / rocket), OR the file directly imports one.
|
|
/// Param-name evidence stays on.
|
|
/// * `Some(false)` ─ project root manifest was inspected (Cargo.toml
|
|
/// exists) and named no Rust web framework, AND the file does not
|
|
/// directly import one. Param-name evidence is suppressed: the
|
|
/// project has no HTTP boundary in Rust.
|
|
/// * `None` ─ no detection ran (no `framework_ctx`, no Cargo.toml
|
|
/// inspected). Behavior unchanged.
|
|
fn compute_web_framework_signal(
|
|
lang: &str,
|
|
framework_ctx: Option<&FrameworkContext>,
|
|
bytes: &[u8],
|
|
) -> Option<bool> {
|
|
if !matches!(lang, "rust" | "rs") {
|
|
return None;
|
|
}
|
|
let project_signal = framework_ctx.and_then(|ctx| ctx.lang_has_web_framework("rust"));
|
|
if project_signal == Some(true) {
|
|
return Some(true);
|
|
}
|
|
// Project says "no Rust framework" or never inspected. Consult the
|
|
// file's own imports as a per-file fallback; if the file uses an
|
|
// axum / actix_web / rocket symbol directly, treat it as a handler
|
|
// file even when the workspace-root Cargo.toml does not list the
|
|
// crate. (Real example: zed's `crates/collab/src/rpc.rs` imports
|
|
// axum but the workspace root Cargo.toml does not.)
|
|
if rust_file_imports_web_framework(bytes) {
|
|
return Some(true);
|
|
}
|
|
// No file-level evidence either. Only flip to `Some(false)` if a
|
|
// Cargo.toml manifest was actually inspected — single-file scans
|
|
// without project context get `None` and preserve prior behavior.
|
|
project_signal
|
|
}
|
|
|
|
fn deduplicate_units_by_span(model: &mut AuthorizationModel) {
|
|
use crate::auth_analysis::model::{AnalysisUnit, AnalysisUnitKind};
|
|
use std::collections::HashMap;
|
|
|
|
// First pass: choose a winner for each span, prefer the
|
|
// first-seen `RouteHandler` over any `Function` copy.
|
|
let mut winner_by_span: HashMap<(usize, usize), usize> = HashMap::new();
|
|
for (idx, unit) in model.units.iter().enumerate() {
|
|
let key = unit.span;
|
|
match winner_by_span.get(&key) {
|
|
None => {
|
|
winner_by_span.insert(key, idx);
|
|
}
|
|
Some(&existing) => {
|
|
let prev_kind = model.units[existing].kind;
|
|
if prev_kind != AnalysisUnitKind::RouteHandler
|
|
&& unit.kind == AnalysisUnitKind::RouteHandler
|
|
{
|
|
winner_by_span.insert(key, idx);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Second pass: drain auth_checks from losers so we can append them
|
|
// to the winners after the layout collapses.
|
|
let mut moved_checks: Vec<Vec<crate::auth_analysis::model::AuthCheck>> =
|
|
Vec::with_capacity(model.units.len());
|
|
for old_idx in 0..model.units.len() {
|
|
let span = model.units[old_idx].span;
|
|
let winner = *winner_by_span.get(&span).unwrap_or(&old_idx);
|
|
if winner == old_idx {
|
|
moved_checks.push(Vec::new());
|
|
} else {
|
|
moved_checks.push(std::mem::take(&mut model.units[old_idx].auth_checks));
|
|
}
|
|
}
|
|
|
|
// Third pass: emit surviving units (clone the winners) and build
|
|
// the old-idx → new-idx remap.
|
|
let mut new_idx_for_old: HashMap<usize, usize> = HashMap::new();
|
|
let mut surviving: Vec<AnalysisUnit> = Vec::with_capacity(winner_by_span.len());
|
|
for old_idx in 0..model.units.len() {
|
|
let span = model.units[old_idx].span;
|
|
let winner = *winner_by_span.get(&span).unwrap_or(&old_idx);
|
|
if winner == old_idx {
|
|
new_idx_for_old.insert(old_idx, surviving.len());
|
|
surviving.push(model.units[old_idx].clone());
|
|
}
|
|
}
|
|
|
|
// Fourth pass: drain loser auth_checks into their winners, deduping
|
|
// by (span, callee). Operations are not merged: both extractor
|
|
// passes recompute the same operation list from the AST, so the
|
|
// winner already carries the canonical set.
|
|
for (old_idx, checks) in moved_checks.iter_mut().enumerate() {
|
|
let span = model.units[old_idx].span;
|
|
let winner = *winner_by_span.get(&span).unwrap_or(&old_idx);
|
|
if winner == old_idx {
|
|
continue;
|
|
}
|
|
let Some(&new_winner_idx) = new_idx_for_old.get(&winner) else {
|
|
continue;
|
|
};
|
|
for check in checks.drain(..) {
|
|
let already_present = surviving[new_winner_idx]
|
|
.auth_checks
|
|
.iter()
|
|
.any(|existing| existing.span == check.span && existing.callee == check.callee);
|
|
if !already_present {
|
|
surviving[new_winner_idx].auth_checks.push(check);
|
|
}
|
|
}
|
|
}
|
|
|
|
model.units = surviving;
|
|
for route in &mut model.routes {
|
|
if let Some(&new_idx) = new_idx_for_old.get(&route.unit_idx) {
|
|
route.unit_idx = new_idx;
|
|
}
|
|
}
|
|
}
|