use super::config::AuthAnalysisRules; use super::model::{AuthorizationModel, CallSite}; use crate::utils::project::{FrameworkContext, rust_file_imports_web_framework}; use std::collections::HashMap; use std::path::Path; use tree_sitter::Tree; pub mod actix_web; pub mod axum; pub mod common; pub mod django; pub mod echo; pub mod express; pub mod fastify; pub mod flask; pub mod gin; pub mod koa; pub mod rails; pub mod rocket; pub mod sinatra; pub mod spring; pub trait AuthExtractor { fn supports(&self, lang: &str, framework_ctx: Option<&FrameworkContext>) -> bool; /// Returns true when this extractor expects the orchestrator to /// have already populated `model.units` with one /// `AnalysisUnitKind::Function` entry per top-level function / /// method via [`common::collect_top_level_units`]. Defaults to /// `true`; framework extractors that build their own unit set /// (Spring, Rails) override to `false` so the orchestrator skips /// the shared collection pass when only those extractors match. fn requires_top_level_units(&self) -> bool { true } fn extract( &self, tree: &Tree, bytes: &[u8], path: &Path, rules: &AuthAnalysisRules, model: &mut AuthorizationModel, ); } pub fn extract_authorization_model( lang: &str, framework_ctx: Option<&FrameworkContext>, tree: &Tree, bytes: &[u8], path: &Path, rules: &AuthAnalysisRules, cross_file_router_deps: Option<&HashMap>>, ) -> AuthorizationModel { let extractors: [&dyn AuthExtractor; 13] = [ &express::ExpressExtractor, &koa::KoaExtractor, &fastify::FastifyExtractor, &gin::GinExtractor, &echo::EchoExtractor, &flask::FlaskExtractor, &django::DjangoExtractor, &spring::SpringExtractor, &rails::RailsExtractor, &sinatra::SinatraExtractor, &axum::AxumExtractor, &actix_web::ActixWebExtractor, &rocket::RocketExtractor, ]; let mut model = AuthorizationModel { lang: lang.to_string(), ..Default::default() }; // Pre-populate the cross-file router-dep map BEFORE extractors run. // FlaskExtractor reads `model.cross_file_router_deps` and merges the // resolved deps into its local router-deps map at extraction time, // so per-route auth attribution sees both the local-file // `dependencies=[Security(...)]` declarations and the cross-file // lift from `.include_router(., ...)` // edges visible elsewhere in the project. Empty / `None` for every // non-Python language and for files with no matching child edges. if let Some(deps) = cross_file_router_deps { model.cross_file_router_deps = deps.clone(); } // **Hoist `collect_top_level_units` out of the per-extractor loop.** // For multi-extractor languages (Go: gin+echo, JS/TS: express+koa+ // fastify, Python: flask+django, Rust: axum+actix_web+rocket, Ruby: // sinatra) the legacy code re-walked the entire AST and rebuilt the // `Function`-kind unit set per extractor (then deduped by span). // `collect_top_level_units` was the dominant cost in // `extract_authorization_model` (46% of total wall-clock on the // mattermost/server/channels/app subtree, 2026-05-04 profile). // // After the hoist each extractor receives a `&mut model` that // already carries the shared unit set; framework-specific work // (route detection, middleware injection, typed-extractor guards) // augments and promotes those units in place via the existing // `attach_route_handler` "promote-or-create" path. // // Spring + Rails build their own unit set (`maybe_collect_controller` // / Rails' `collect_nodes`), so they opt out via // `requires_top_level_units = false`; the shared pass runs only // when at least one matching extractor needs it. let any_requires_units = extractors .iter() .any(|e| e.supports(lang, framework_ctx) && e.requires_top_level_units()); if any_requires_units { common::collect_top_level_units(tree.root_node(), bytes, rules, &mut model); } for extractor in extractors { if extractor.supports(lang, framework_ctx) { extractor.extract(tree, bytes, path, rules, &mut model); } } // Per-language web-framework signal used to gate the param-name arm // of `unit_has_user_input_evidence`. Combines the project-root // manifest detection (`framework_ctx`) with a per-file `use`/`import` // check, so a single file in a workspace whose root manifest does // not name a web framework can still opt back in by directly // importing one (e.g. `crates/collab/src/rpc.rs` in zed: workspace // root has no axum, but the file uses `axum::Router`). // // Three-valued: `Some(true)` keeps step 3 firing, `Some(false)` // suppresses it, `None` means no detection ran ─ behavior unchanged. model.lang_web_framework_signal = compute_web_framework_signal(lang, framework_ctx, bytes); // **Dedup units by span across extractors.** Multiple extractors // (e.g. Flask + Django on a Python file) each call // `collect_top_level_units`, producing one unit per top-level // function. When one extractor also recognises a route on that // function and promotes its copy to `RouteHandler` (with injected // middleware auth checks), the *other* extractor's untouched // `Function` copy still runs through `check_ownership_gaps` and // emits the FP from a unit that never saw the middleware-derived // auth check. // // This step keeps a single canonical unit per source span, // preferring `RouteHandler` over `Function`, merging auth_checks // and folding operation lists conservatively. Route registrations // are remapped to the surviving unit index. deduplicate_units_by_span(&mut model); model } /// Compute the per-file web-framework signal used to gate the /// param-name arm of `unit_has_user_input_evidence`. /// /// Currently emits a non-`None` value only for Rust files. The Rust /// auth analysis is the single biggest source of internal-helper FPs /// in non-web crates (zed's GUI / editor crates); the other languages /// have their own handler-classification policies that already filter /// effectively, so they keep their existing behavior (None → /// fall-through to the param-name heuristic) until each is validated. /// /// Three-valued semantics: /// * `Some(true)` ─ project root manifest names a Rust web framework /// (axum / actix_web / rocket), OR the file directly imports one. /// Param-name evidence stays on. /// * `Some(false)` ─ project root manifest was inspected (Cargo.toml /// exists) and named no Rust web framework, AND the file does not /// directly import one. Param-name evidence is suppressed: the /// project has no HTTP boundary in Rust. /// * `None` ─ no detection ran (no `framework_ctx`, no Cargo.toml /// inspected). Behavior unchanged. fn compute_web_framework_signal( lang: &str, framework_ctx: Option<&FrameworkContext>, bytes: &[u8], ) -> Option { if !matches!(lang, "rust" | "rs") { return None; } let project_signal = framework_ctx.and_then(|ctx| ctx.lang_has_web_framework("rust")); if project_signal == Some(true) { return Some(true); } // Project says "no Rust framework" or never inspected. Consult the // file's own imports as a per-file fallback; if the file uses an // axum / actix_web / rocket symbol directly, treat it as a handler // file even when the workspace-root Cargo.toml does not list the // crate. (Real example: zed's `crates/collab/src/rpc.rs` imports // axum but the workspace root Cargo.toml does not.) if rust_file_imports_web_framework(bytes) { return Some(true); } // No file-level evidence either. Only flip to `Some(false)` if a // Cargo.toml manifest was actually inspected — single-file scans // without project context get `None` and preserve prior behavior. project_signal } fn deduplicate_units_by_span(model: &mut AuthorizationModel) { use crate::auth_analysis::model::{AnalysisUnit, AnalysisUnitKind}; use std::collections::HashMap; // First pass: choose a winner for each span, prefer the // first-seen `RouteHandler` over any `Function` copy. let mut winner_by_span: HashMap<(usize, usize), usize> = HashMap::new(); for (idx, unit) in model.units.iter().enumerate() { let key = unit.span; match winner_by_span.get(&key) { None => { winner_by_span.insert(key, idx); } Some(&existing) => { let prev_kind = model.units[existing].kind; if prev_kind != AnalysisUnitKind::RouteHandler && unit.kind == AnalysisUnitKind::RouteHandler { winner_by_span.insert(key, idx); } } } } // Second pass: drain auth_checks from losers so we can append them // to the winners after the layout collapses. let mut moved_checks: Vec> = Vec::with_capacity(model.units.len()); for old_idx in 0..model.units.len() { let span = model.units[old_idx].span; let winner = *winner_by_span.get(&span).unwrap_or(&old_idx); if winner == old_idx { moved_checks.push(Vec::new()); } else { moved_checks.push(std::mem::take(&mut model.units[old_idx].auth_checks)); } } // Third pass: emit surviving units (clone the winners) and build // the old-idx → new-idx remap. let mut new_idx_for_old: HashMap = HashMap::new(); let mut surviving: Vec = Vec::with_capacity(winner_by_span.len()); for old_idx in 0..model.units.len() { let span = model.units[old_idx].span; let winner = *winner_by_span.get(&span).unwrap_or(&old_idx); if winner == old_idx { new_idx_for_old.insert(old_idx, surviving.len()); surviving.push(model.units[old_idx].clone()); } } // Fourth pass: drain loser auth_checks into their winners, deduping // by (span, callee). Operations are not merged: both extractor // passes recompute the same operation list from the AST, so the // winner already carries the canonical set. for (old_idx, checks) in moved_checks.iter_mut().enumerate() { let span = model.units[old_idx].span; let winner = *winner_by_span.get(&span).unwrap_or(&old_idx); if winner == old_idx { continue; } let Some(&new_winner_idx) = new_idx_for_old.get(&winner) else { continue; }; for check in checks.drain(..) { let already_present = surviving[new_winner_idx] .auth_checks .iter() .any(|existing| existing.span == check.span && existing.callee == check.callee); if !already_present { surviving[new_winner_idx].auth_checks.push(check); } } } model.units = surviving; for route in &mut model.routes { if let Some(&new_idx) = new_idx_for_old.get(&route.unit_idx) { route.unit_idx = new_idx; } } }