use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet}; use std::path::PathBuf; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Framework { Express, Koa, Fastify, Gin, Echo, Flask, Django, Spring, Rails, Sinatra, Axum, ActixWeb, Rocket, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum HttpMethod { Get, Post, Put, Delete, Patch, All, Use, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum AnalysisUnitKind { RouteHandler, Function, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum AuthCheckKind { LoginGuard, AdminGuard, Ownership, Membership, TokenExpiry, TokenRecipient, Other, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum OperationKind { Read, Mutation, TokenLookup, } /// Classification of a sensitive operation by the resource it targets. /// `check_ownership_gaps` only fires on the first five classes , /// `InMemoryLocal` is never authorization-relevant. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum SinkClass { /// A write against a persistent datastore (SQL, ORM, or KV that /// crosses tenant boundaries). DbMutation, /// A read against a persistent datastore that may return rows /// belonging to another tenant without an explicit ownership check. DbCrossTenantRead, /// A publish / broadcast against a realtime bus (pub/sub, websocket /// channel, event stream). Always auth-relevant because receivers /// are typically scoped by tenant id. RealtimePublish, /// An outbound HTTP / RPC call whose target or payload can encode a /// tenant-scoped identifier. OutboundNetwork, /// A cache read/write whose keys routinely cross tenant boundaries /// (Redis / memcache / distributed cache client). CacheCrossTenant, /// A method call against a local, in-memory collection (HashMap, /// HashSet, Vec, …), never authorization-relevant. InMemoryLocal, } impl SinkClass { /// Does this sink class participate in the missing-ownership gate? /// Only `InMemoryLocal` is excluded; all other classes are treated /// as potential cross-tenant sinks. pub fn is_auth_relevant(&self) -> bool { !matches!(self, SinkClass::InMemoryLocal) } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ValueSourceKind { RequestParam, RequestBody, RequestQuery, Session, Identifier, MemberField, TokenField, ArrayIndex, } #[derive(Debug, Clone, PartialEq, Eq)] pub struct ValueRef { pub source_kind: ValueSourceKind, pub name: String, pub base: Option, pub field: Option, pub index: Option, pub span: (usize, usize), } #[derive(Debug, Clone, PartialEq, Eq)] pub struct CallSite { pub name: String, pub args: Vec, pub span: (usize, usize), /// Per-positional-argument value-refs. Populated only by the /// structured `collect_call` path (the auxiliary /// `call_site_from_node` constructor leaves this empty); used to /// attribute synthesised helper-call auth checks to the concrete /// subjects passed by the caller. pub args_value_refs: Vec>, } #[derive(Debug, Clone)] pub struct AuthCheck { pub kind: AuthCheckKind, pub callee: String, pub subjects: Vec, pub span: (usize, usize), pub line: usize, pub args: Vec, pub condition_text: Option, /// True when the check was declared at the route boundary /// (decorator / middleware / dependency-injection list) rather /// than as a per-call check inside the handler body. /// /// Route-level non-login-guard checks authorize the *entire* /// handler, they gate every value the handler receives, every /// row the handler fetches, and every operation downstream. An /// in-body `auth_check_covers_subject` walk that requires a /// per-name subject match cannot model that semantics: a /// FastAPI `dependencies=[Depends(requires_access_dag(method= /// "POST", access_entity=DagAccessEntity.RUN))]` is opaque to /// the engine, the inner `requires_access_dag` call carries no /// per-arg subject ref pointing to `dag_id` or `dag.id`. The /// flag tells `auth_check_covers_subject` to short-circuit /// `true` for any non-login-guard route-level check, leaving /// only the LoginGuard / TokenExpiry / TokenRecipient kinds /// (already excluded upstream by `has_prior_subject_auth`'s /// filter) to be ignored. /// /// Set by `inject_middleware_auth` (Django, Flask, FastAPI) at /// the route-decorator entry point. Default `false` for /// in-body checks (`require_membership(user, group_id)`, /// `is_admin(user)`, etc.), those still flow through the /// per-subject coverage logic so a check on /// `community.creator_id` doesn't blanket-suppress every other /// subject in the unit. pub is_route_level: bool, } #[derive(Debug, Clone)] pub struct SensitiveOperation { pub kind: OperationKind, /// Sink classification. `None` means the operation was recorded /// for taxonomy completeness but does not match any known resource /// class, defensive, and currently unused. pub sink_class: Option, pub callee: String, pub subjects: Vec, pub span: (usize, usize), pub line: usize, pub text: String, } #[derive(Debug, Clone)] pub struct AnalysisUnit { pub kind: AnalysisUnitKind, pub name: Option, pub span: (usize, usize), pub params: Vec, pub context_inputs: Vec, pub call_sites: Vec, pub auth_checks: Vec, pub operations: Vec, pub value_refs: Vec, pub condition_texts: Vec, pub line: usize, /// Map from local variable name to the row binding it was read from. /// Populated when the extractor sees `let V = ROW.method(..)` or /// `let V = ROW.field`. Used by `auth_check_covers_subject` so a /// row-level ownership-equality check on the row implicitly covers /// downstream uses of fields read from the same row. pub row_field_vars: HashMap, /// Map from local variable name to the full member-chain expression /// it was bound from (`let community_id = req.community_id` → /// `community_id → "req.community_id"`). Distinct from /// `row_field_vars`, which records only the receiver (loses the /// field name). Powers the row-population reverse-walk's local- /// alias case: when a sink subject is a plain identifier, the /// reverse walk consults this map to also accept rows whose /// population args contain the aliased chain. pub var_alias_chain: HashMap, /// Per row-binding metadata: the `let ROW = CALL(..)` declaration /// line and the value-refs appearing in the call's arguments. /// Populated for every `let V = call(..)` shape. Powers the /// "fetch-then-authorize" exemption in `checks.rs`: if a row-fetch /// operation produces variable `V` and SOME auth check elsewhere /// in the unit names `V`, the row-fetch operation is considered /// authorized, even though the check appears textually after the /// fetch. This is the standard idiom in row-level authz code: /// fetch the row first to extract the resource id, then call /// `check__(&user, &row, ...)` to authorize it. pub row_population_data: HashMap)>, /// Variables bound to an authenticated-user value. Populated from /// `let V = require_auth(..).await?` (or any call matching the /// configured login-guard / authorization-check names) and from /// typed route-handler parameters (`CurrentUser`, `AuthUser`, …). /// Consulted by `is_actor_context_subject` so `V.id`-shaped subjects /// are treated as the caller's own id, not as a scoped foreign id. pub self_actor_vars: HashSet, /// Variables holding the authenticated actor's identifier (transitive /// copies of `V.id` / `V.user_id` / `V.uid` / `V.userId` for some /// `V ∈ self_actor_vars`). Populated when the extractor sees /// `let X = V.id` or `let X = (V.id as ..).into()` / `V.id.into()` /// shapes, anywhere a route-handler reduces the authenticated /// principal to a scalar id and reuses it as a SQL parameter. /// Consulted by `is_actor_context_subject` so subjects whose `name` /// is in this set count as actor context, not foreign scoped IDs. pub self_actor_id_vars: HashSet, /// Local variables bound (directly or transitively) to a SQL query /// whose literal text classifies as authorization-gated by /// `sql_semantics::classify_sql_query`. Includes: /// * the `let X = db.prepare(LIT)…` result var, /// * the loop var of `for ROW in X`, /// * column-binding vars `let Y = ROW.get(..)` whose receiver is /// itself in this set. /// /// `auth_check_covers_subject` walks `row_field_vars` transitively /// and treats a subject as covered when the chain terminates in /// one of these names. pub authorized_sql_vars: HashSet, /// Local variables bound (by `let`, `:=`, `var`, `const`) to a /// pure literal, string, integer, float, or boolean. These are /// developer-chosen constants and cannot be user-controlled, so /// they must never trip `.auth.missing_ownership_check` /// even when the variable name passes `is_id_like`. Closes the /// gin/context_test.go FP where `id := "id"` triggered the rule. pub const_bound_vars: HashSet, /// Function parameter names whose static type maps to a /// payload-incompatible scalar ([`crate::ssa::type_facts::TypeKind::Int`] /// or [`crate::ssa::type_facts::TypeKind::Bool`]). Populated /// per-file by `apply_typed_bounded_params` using the /// SSA-derived `VarTypes` map. Consulted by /// `is_typed_bounded_subject` so parameters like Spring `Long /// userId`, Axum `Path`, or FastAPI `user_id: int` are not /// classified as scoped-identifier subjects even when their name /// passes `is_id_like`, the framework guarantees the value is a /// number that cannot carry a SQL/file/shell payload. pub typed_bounded_vars: HashSet, /// per-DTO-extractor parameter, the field names whose /// declared type is a payload-incompatible scalar. Map key is the /// parameter name (e.g. `dto`), value is the list of field names /// (e.g. `["age", "count"]`). Populated by /// `apply_typed_bounded_params` only when the parameter /// itself was recognised as a typed extractor, bare parameters /// with no framework gate never lift their fields. pub typed_bounded_dto_fields: HashMap>, /// Per-unit dynamic session-base text set, supplementing the /// hard-coded list in `is_self_scoped_session_base`. Populated by /// the extractor when a parameter's static type signals a known /// auth-context shape, e.g. TRPC's `Options { ctx: { user: /// NonNullable } }` adds `.user` so /// downstream `ctx.user.id` accesses count as actor context. Each /// entry is the dotted base text (e.g. `"ctx.user"`, /// `"opts.ctx.user"`) that should match a subject's `base` when /// the subject's `field` is an id-like field name. Distinct from /// `self_actor_vars` (single-segment locals) because TRPC /// destructures route through a base chain, not a top-level /// binding. pub self_scoped_session_bases: HashSet, /// True when this JS/TS unit is the body of a NextAuth options /// factory: its function body contains an object literal with a /// `callbacks: { ... }` property whose nested entries name at /// least one NextAuth canonical callback (`signIn` / `session` / /// `jwt` / `redirect` / `authorize` / `authorized`). Set by /// `build_function_unit_with_meta` when the file structures the /// options as `export const X = (...) => ({ callbacks: { ... } })` /// (cal.com's `getOptions` shape) rather than the flat /// `export const authOptions = { callbacks: { ... } }` shape. /// Operations inside the inner callback bodies still get /// accumulated under the outer factory unit (the unit-creation /// pass does not descend into object-literal method shorthands), /// so the outer unit is the only place the auth analyser can /// recognise the identity-resolution context. Consulted by /// `is_nextauth_callback_unit` so the missing-ownership check /// suppresses operations inside the factory. pub is_nextauth_options_factory: bool, } /// Per-function summary of which positional parameters are /// auth-checked inside the function body. When a caller invokes this /// function with `subject` at position K, and the summary says param /// K has an auth check of kind `kind`, the caller's subject is /// considered covered as if it were checked at the call site. /// /// Serialises as a `Vec<(usize, AuthCheckKind)>` so same-shape on-disk /// rows survive across HashMap iteration-order changes; the in-memory /// type stays a HashMap for point-lookup efficiency. #[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct AuthCheckSummary { #[serde( serialize_with = "serialize_param_auth_kinds", deserialize_with = "deserialize_param_auth_kinds" )] pub param_auth_kinds: HashMap, } fn serialize_param_auth_kinds( map: &HashMap, serializer: S, ) -> Result where S: serde::Serializer, { use serde::ser::SerializeSeq; let mut entries: Vec<(usize, AuthCheckKind)> = map.iter().map(|(idx, kind)| (*idx, *kind)).collect(); entries.sort_by_key(|(idx, _)| *idx); let mut seq = serializer.serialize_seq(Some(entries.len()))?; for entry in entries { seq.serialize_element(&entry)?; } seq.end() } fn deserialize_param_auth_kinds<'de, D>( deserializer: D, ) -> Result, D::Error> where D: serde::Deserializer<'de>, { let entries: Vec<(usize, AuthCheckKind)> = Vec::deserialize(deserializer)?; Ok(entries.into_iter().collect()) } #[derive(Debug, Clone)] pub struct RouteRegistration { pub framework: Framework, pub method: HttpMethod, pub path: String, pub middleware: Vec, pub handler_span: (usize, usize), pub handler_params: Vec, pub file: PathBuf, pub line: usize, pub unit_idx: usize, pub middleware_calls: Vec, } #[derive(Debug, Clone, Default)] pub struct AuthorizationModel { pub routes: Vec, pub units: Vec, /// Per-language web-framework presence signal used to gate the /// `is_external_input_param_name` arm of `unit_has_user_input_evidence`. /// /// `None` means detection did not run (single-file unit-test paths, /// languages without a framework gate yet). `Some(true)` means the /// project manifest or the file's imports name a web framework that /// matches this language ─ helper functions are plausibly reachable /// from a route handler, so the param-name heuristic stays on. /// `Some(false)` means detection ran and named no matching framework /// ─ the file lives in a project with no HTTP boundary, so internal /// helper params named `*_id` / `req` / `payload` are not user input. /// /// Currently set only for Rust by `extract_authorization_model`. pub lang_web_framework_signal: Option, /// Source language of the file the model was built from. Used by /// `unit_has_user_input_evidence` to apply per-language narrowing /// of the framework-request-name allow-list. Empty string when no /// language was supplied (single-file unit-test paths). pub lang: String, /// Cross-file router-dependency lift, keyed by **local** router /// variable name. Pre-populated by the orchestrator before /// extractors run, sourced from `GlobalSummaries.router_facts_by_module` /// for every project file whose `.include_router(.)` /// edge targets a router in the current file. FlaskExtractor merges /// these in alongside locally-declared `dependencies=[...]` so routes /// attached to a bare child router still inherit the parent's /// `Security(...)` / `Depends(...)` deps. Empty when no cross-file /// resolution applies (most files) or when global summaries are not /// available (unit-test / single-file scan paths). pub cross_file_router_deps: HashMap>, } impl AuthorizationModel { pub fn extend(&mut self, other: AuthorizationModel) { let unit_offset = self.units.len(); self.units.extend(other.units); self.routes .extend(other.routes.into_iter().map(|mut route| { route.unit_idx += unit_offset; route })); // Take the strongest signal across extractor outputs: `Some(true)` // wins over `Some(false)` wins over `None`. In practice every // extractor for a given file sees the same `framework_ctx + bytes` // so they all derive identical signals; this is just a defensive // merge. self.lang_web_framework_signal = max_signal( self.lang_web_framework_signal, other.lang_web_framework_signal, ); } } fn max_signal(a: Option, b: Option) -> Option { match (a, b) { (Some(true), _) | (_, Some(true)) => Some(true), (Some(false), _) | (_, Some(false)) => Some(false), _ => None, } }