use crate::auth_analysis::config::{AuthAnalysisRules, canonical_name, matches_name, strip_quotes}; use crate::auth_analysis::model::{ AnalysisUnit, AnalysisUnitKind, AuthCheck, AuthCheckKind, AuthorizationModel, CallSite, Framework, HttpMethod, OperationKind, RouteRegistration, SensitiveOperation, SinkClass, ValueRef, ValueSourceKind, }; use crate::labels::bare_method_name; use std::collections::{HashMap, HashSet}; use std::path::Path; use tree_sitter::Node; pub fn collect_top_level_units( root: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules, model: &mut AuthorizationModel, ) { let file_meta = FileMeta::scan(root, bytes); for idx in 0..root.named_child_count() { let Some(child) = root.named_child(idx as u32) else { continue; }; collect_top_level_from_node(child, bytes, rules, model, &file_meta); } } fn collect_top_level_from_node( node: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules, model: &mut AuthorizationModel, file_meta: &FileMeta, ) { match node.kind() { "function_declaration" | "function_definition" | "method_declaration" | "function_item" | "method" | "singleton_method" => { model.units.push(build_function_unit_with_meta( node, AnalysisUnitKind::Function, function_name(node, bytes), bytes, rules, Some(file_meta), )); } "decorated_definition" if decorated_definition_child(node) .is_some_and(|definition| definition.kind() == "function_definition") => { // Celery / Airflow / DRF background-task decorators // (`@instrumented_task`, `@shared_task`, `@app.task`, // `@celery.task`, `@beat.shared_task`, `@periodic_task`, // `@receiver`) mark a function as an internal scheduled // job, not a user-reachable handler. Any id-shaped // parameter name (`uuid: str`, `release_id: int`, // `voucher_code_ids: list[int]`) refers to an // internally-generated identifier, by construction the // task is invoked from `task.delay(...)` in already-auth- // checked code, never from an HTTP request directly. // // Skipping the unit at extract time stops the ownership / // token-override / partial-batch-authorization rules from // examining its operations. Real route handlers go // through the framework extractors (Flask / // FastAPI / Django / DRF) and re-add a `RouteHandler` // unit with auth_checks injected from the route // decorator, so this skip never hides a real handler. if python_decorated_definition_is_background_task(node, bytes) { return; } model.units.push(build_function_unit_with_meta( node, AnalysisUnitKind::Function, function_name(node, bytes), bytes, rules, Some(file_meta), )); } "lexical_declaration" | "variable_declaration" => { for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if child.kind() == "variable_declarator" && let Some(unit) = function_unit_from_var_declarator(child, bytes, rules, Some(file_meta)) { model.units.push(unit); } } } "export_statement" => { for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if child.is_named() { collect_top_level_from_node(child, bytes, rules, model, file_meta); } } } "program" | "source_file" | "module" | "class_declaration" | "class_body" | "body_statement" => { for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; collect_top_level_from_node(child, bytes, rules, model, file_meta); } } // Ruby `class Foo; ... end`. Gate method descent through the // visibility / callback-target filter so private helpers and // `before_action :foo`-style callback targets are not emitted // as `Function` units (the upstream cause of // `rb.auth.missing_ownership_check` FPs on `set_X` row-fetch // helpers in mastodon / diaspora controllers). Non-method // class-body children (nested `class` / `module` / // `singleton_method`) still recurse normally. "class" => { let body = node.child_by_field_name("body"); let visibility = body .map(|b| ruby_method_visibility(b, bytes)) .unwrap_or_default(); let callbacks = body .map(|b| ruby_callback_target_names(b, bytes)) .unwrap_or_default(); for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if Some(child) == body { for body_idx in 0..child.named_child_count() { let Some(grand) = child.named_child(body_idx as u32) else { continue; }; if grand.kind() == "method" { let name = function_name(grand, bytes).unwrap_or_default(); if !name.is_empty() && ruby_method_is_callback_or_private( &name, &visibility, &callbacks, ) { continue; } } collect_top_level_from_node(grand, bytes, rules, model, file_meta); } } else { collect_top_level_from_node(child, bytes, rules, model, file_meta); } } } _ => {} } } #[derive(Copy, Clone, Eq, PartialEq, Debug)] pub enum RubyVisibility { Public, Protected, Private, } /// Walk a Ruby class body in source order and attribute each method /// definition's visibility, mirroring Ruby's `private` / `protected` / /// `public` directive semantics. /// /// Two directive forms are recognised: /// 1. **Bare** (`private`). Tree-sitter parses these as a top-level /// `(identifier "private")` sibling. Toggles default visibility /// for every subsequent method. /// 2. **Targeted** (`private :foo, :bar`). Parsed as /// `(call method:identifier arguments:argument_list ...)`. /// Explicitly marks the named methods; does not change default. pub fn ruby_method_visibility( body: Node<'_>, bytes: &[u8], ) -> std::collections::HashMap { use crate::auth_analysis::config::matches_name; use std::collections::HashMap; let mut map: HashMap = HashMap::new(); let mut current = RubyVisibility::Public; for child in named_children(body) { match child.kind() { "identifier" => { if let Some(vis) = ruby_visibility_for_directive(text(child, bytes).trim()) { current = vis; } } "call" => { let callee_full = call_name(child, bytes); let callee = bare_method_name(&callee_full); let Some(target_vis) = ruby_visibility_for_directive(callee) else { continue; }; let arguments = child.child_by_field_name("arguments"); let args: Vec> = arguments .map(|node| named_children(node)) .unwrap_or_default(); if args.is_empty() { current = target_vis; continue; } let mut targeted_any = false; for arg in args { for name in ruby_symbol_names(arg, bytes) { if name.is_empty() { continue; } map.insert(name, target_vis); targeted_any = true; } if arg.kind() == "method" && let Some(name_node) = arg.child_by_field_name("name") { let name = text(name_node, bytes); if !name.is_empty() { map.insert(name, target_vis); targeted_any = true; } } } if !targeted_any { current = target_vis; } let _ = matches_name; } "method" => { if let Some(name_node) = child.child_by_field_name("name") { let name = text(name_node, bytes); if !name.is_empty() { map.insert(name, current); } } } _ => {} } } map } fn ruby_visibility_for_directive(name: &str) -> Option { match name { "private" => Some(RubyVisibility::Private), "protected" => Some(RubyVisibility::Protected), "public" => Some(RubyVisibility::Public), _ => None, } } /// Collect names of methods registered as Rails filter callbacks /// (`before_action`, `after_action`, `around_action`, with their /// `prepend_*` / `append_*` / `skip_*` siblings, plus the legacy /// `*_filter` aliases). Such methods may be public but are invoked /// only as part of an action's request cycle, never as standalone /// routes — so emitting them as units produces spurious /// `missing_ownership_check` flags on the helper body's row fetches. pub fn ruby_callback_target_names( body: Node<'_>, bytes: &[u8], ) -> std::collections::HashSet { use std::collections::HashSet; let mut targets: HashSet = HashSet::new(); for child in named_children(body) { if child.kind() != "call" { continue; } let callee_full = call_name(child, bytes); let callee = bare_method_name(&callee_full); if !ruby_is_filter_callback_directive(callee) { continue; } let Some(arguments) = child.child_by_field_name("arguments") else { continue; }; for arg in named_children(arguments) { if arg.kind() == "pair" { continue; } for name in ruby_symbol_names(arg, bytes) { if name.is_empty() { continue; } targets.insert(name); } } } targets } fn ruby_is_filter_callback_directive(name: &str) -> bool { matches!( name, "before_action" | "after_action" | "around_action" | "prepend_before_action" | "prepend_after_action" | "prepend_around_action" | "append_before_action" | "append_after_action" | "append_around_action" | "skip_before_action" | "skip_after_action" | "skip_around_action" | "before_filter" | "after_filter" | "around_filter" | "prepend_before_filter" | "prepend_after_filter" | "prepend_around_filter" | "append_before_filter" | "append_after_filter" | "append_around_filter" | "skip_before_filter" | "skip_after_filter" | "skip_around_filter" ) } fn ruby_symbol_names(node: Node<'_>, bytes: &[u8]) -> Vec { match node.kind() { "simple_symbol" | "hash_key_symbol" | "identifier" | "string" => { vec![ strip_quotes(&text(node, bytes)) .trim_start_matches(':') .to_string(), ] } "array" => named_children(node) .into_iter() .flat_map(|child| ruby_symbol_names(child, bytes)) .collect(), _ => Vec::new(), } } pub fn ruby_method_is_callback_or_private( name: &str, visibility: &std::collections::HashMap, callbacks: &std::collections::HashSet, ) -> bool { let vis = visibility .get(name) .copied() .unwrap_or(RubyVisibility::Public); if vis != RubyVisibility::Public { return true; } callbacks.contains(name) } fn function_unit_from_var_declarator( node: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules, file_meta: Option<&FileMeta>, ) -> Option { let value = node.child_by_field_name("value")?; if !is_function_like(value) { return None; } let name = node .child_by_field_name("name") .map(|n| text(n, bytes)) .filter(|s| !s.is_empty()); Some(build_function_unit_with_meta( value, AnalysisUnitKind::Function, name, bytes, rules, file_meta, )) } pub struct ResolvedHandler { pub unit_idx: usize, pub span: (usize, usize), pub params: Vec, pub line: usize, } pub fn visit_named_nodes(node: Node<'_>, visit: &mut impl FnMut(Node<'_>)) { visit(node); for child in named_children(node) { visit_named_nodes(child, visit); } } pub fn attach_route_handler( root: Node<'_>, handler_expr: Node<'_>, route_name: String, bytes: &[u8], rules: &AuthAnalysisRules, model: &mut AuthorizationModel, ) -> Option { let handler_node = resolve_handler_node(root, handler_expr, bytes)?; // `attach_route_handler` is called by route-aware extractors (express, // koa, fastify, axum, …) which already hold the file root. Build // the FileMeta once here so the JS/TS TRPC pre-scan only walks the // top-level decl set per file (instead of per route). let file_meta = FileMeta::scan(root, bytes); let line = handler_node.start_position().row + 1; let handler_span = span(handler_node); let definition = function_definition_node(handler_node); // Route-handler-aware param list: includes id-like Python typed // params (`dag_id: str`, `dag_run_id: str`) that // `collect_param_names`'s default branch filters out for internal // helpers. `inject_middleware_auth` clones this list into the // synthetic-subject set on each middleware-injected auth check so // `auth_check_covers_subject` matches the operation subjects // produced by the handler body (e.g. `filter_by(dag_id=dag_id, // run_id=dag_run_id)`). let route_handler_params = function_params_route_handler(definition, bytes); // **Promote-or-create.** Most route-aware extractors invoke // `collect_top_level_units` first, which already produced a // [`AnalysisUnitKind::Function`] unit covering this same span. // Pushing a brand-new RouteHandler unit duplicates the analysis // surface, `check_ownership_gaps` then evaluates the operation // twice and emits the FP from the (un-injected) Function unit even // when the RouteHandler unit's middleware-derived auth check // suppresses it. Promoting the existing unit keeps the model // single-tenanted per handler so downstream auth-check injection // (FastAPI `dependencies=[Depends(...)]`, Express middleware, ...) // lands on the unit that's evaluated. if let Some((idx, existing)) = model .units .iter_mut() .enumerate() .find(|(_, u)| u.kind == AnalysisUnitKind::Function && u.span == handler_span) { existing.kind = AnalysisUnitKind::RouteHandler; existing.name = Some(route_name); existing.params = route_handler_params.clone(); return Some(ResolvedHandler { unit_idx: idx, span: handler_span, params: route_handler_params, line, }); } let unit_idx = model.units.len(); let mut unit = build_function_unit_with_meta( handler_node, AnalysisUnitKind::RouteHandler, Some(route_name), bytes, rules, Some(&file_meta), ); unit.params = route_handler_params.clone(); model.units.push(unit); Some(ResolvedHandler { unit_idx, span: handler_span, params: route_handler_params, line, }) } /// Per-file metadata gathered once at the top of /// [`collect_top_level_units`] / [`attach_route_handler`] and passed /// down through unit construction. Currently carries the set of TS /// type-alias names whose body references a TRPC-marker type; future /// fields can be added without changing the per-unit signature. #[derive(Default, Debug, Clone)] pub struct FileMeta { pub trpc_alias_names: HashSet, } impl FileMeta { pub fn scan(root: Node<'_>, bytes: &[u8]) -> Self { let mut trpc_alias_names = HashSet::new(); scan_trpc_aliases_visit(root, bytes, &mut trpc_alias_names); Self { trpc_alias_names } } } pub fn push_route_registration( model: &mut AuthorizationModel, framework: Framework, method: HttpMethod, path: String, file: &Path, handler: ResolvedHandler, middleware_calls: Vec, ) { model.routes.push(RouteRegistration { framework, method, path, middleware: middleware_names(&middleware_calls), handler_span: handler.span, handler_params: handler.params, file: file.to_path_buf(), line: handler.line, unit_idx: handler.unit_idx, middleware_calls, }); } pub fn middleware_names(middleware_calls: &[CallSite]) -> Vec { middleware_calls .iter() .map(|call| call.name.clone()) .collect() } pub fn resolve_handler_node<'tree>( root: Node<'tree>, handler_expr: Node<'tree>, bytes: &[u8], ) -> Option> { if is_function_like(handler_expr) { return Some(handler_expr); } if !is_handler_reference(handler_expr) { return None; } let candidate = callee_name(handler_expr, bytes); let name = candidate.rsplit('.').next().unwrap_or(&candidate); if name.is_empty() { return None; } find_top_level_function_node(root, name, bytes) } fn find_top_level_function_node<'tree>( root: Node<'tree>, name: &str, bytes: &[u8], ) -> Option> { for idx in 0..root.named_child_count() { let Some(child) = root.named_child(idx as u32) else { continue; }; if let Some(found) = find_top_level_function_node_in_child(child, name, bytes) { return Some(found); } } None } fn find_top_level_function_node_in_child<'tree>( node: Node<'tree>, name: &str, bytes: &[u8], ) -> Option> { match node.kind() { "function_declaration" | "function_definition" | "method_declaration" => { if function_name(node, bytes).as_deref() == Some(name) { Some(node) } else { None } } "function_item" => { if function_name(node, bytes).as_deref() == Some(name) { Some(node) } else { None } } "decorated_definition" => { let definition = decorated_definition_child(node)?; if definition.kind() == "function_definition" && function_name(node, bytes).as_deref() == Some(name) { Some(node) } else { None } } "lexical_declaration" | "variable_declaration" => { for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if child.kind() != "variable_declarator" { continue; } let Some(var_name) = child.child_by_field_name("name") else { continue; }; if text(var_name, bytes) != name { continue; } let Some(value) = child.child_by_field_name("value") else { continue; }; if is_function_like(value) { return Some(value); } } None } "export_statement" => { for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if child.is_named() && let Some(found) = find_top_level_function_node_in_child(child, name, bytes) { return Some(found); } } None } "program" | "source_file" | "class_declaration" | "class_body" => { for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if let Some(found) = find_top_level_function_node_in_child(child, name, bytes) { return Some(found); } } None } _ => None, } } pub fn build_function_unit( node: Node<'_>, kind: AnalysisUnitKind, name: Option, bytes: &[u8], rules: &AuthAnalysisRules, ) -> AnalysisUnit { build_function_unit_with_meta(node, kind, name, bytes, rules, None) } /// Internal variant of [`build_function_unit`] that accepts a /// pre-computed file-level [`FileMeta`]. When `file_meta` is /// `Some`, its `trpc_alias_names` set is copied into `UnitState` /// once per unit so the per-parameter pre-pass doesn't re-scan the /// source-file root. Pre-built `FileMeta` is required to keep /// `tests/hostile_input_tests::many_small_functions_do_not_explode` /// inside its 15s budget on N×N files. pub fn build_function_unit_with_meta( node: Node<'_>, kind: AnalysisUnitKind, name: Option, bytes: &[u8], rules: &AuthAnalysisRules, file_meta: Option<&FileMeta>, ) -> AnalysisUnit { let definition = function_definition_node(node); let params = function_params(definition, bytes); // Structurally-typed bounded params: walk the parameter list and // mark any param whose type annotation resolves to an integer or // boolean scalar (`int`, `bool`, `Optional[int]`, `list[int]`, // `Iterable[int]`, …) as typed-bounded. Mirrors the SSA-derived // `apply_typed_bounded_params` lift but runs even when the SSA // var_types map isn't supplied (internal helpers analysed without // a CFG, ad-hoc unit lookups, …). Without this, a Python helper // signature like `get_release_project_new_group_count(environment_ids: // list[int], project_ids: list[int])` would drop into the // ownership rule because the param names match `is_id_like` even // though the static type proves the values are bounded numerics // that can't carry a SQL/file/shell payload. let preseeded_bounded = python_int_bounded_typed_params(definition, bytes); let line = node.start_position().row + 1; let mut state = UnitState::default(); // Seed Go's method-receiver name (`func (c *Cache) ...` → `c`) into // `non_sink_vars` so calls of the form `c.foo(..)` / // `c.field.foo(..)` route through the in-memory-local sink class // and skip the verb-name fallback. These are intra-struct // dispatches; without type tracking, the auth analyser cannot tell // a `*Cache` field-call from a `*sql.DB` call by name alone, so we // err on the safe side per the deferred memo // (`project_realrepo_hugo.md`). Only Go's `method_declaration` // exposes a `receiver` field, Rust/Java instance methods route // through `self`/`this` keywords and are unaffected. if let Some(receiver_name) = method_receiver_name(definition, bytes) { state.non_sink_vars.insert(receiver_name); } if let Some(meta) = file_meta { state.trpc_alias_names = meta.trpc_alias_names.clone(); } collect_unit_state(node, bytes, rules, &mut state); dedup_value_refs(&mut state.value_refs); let context_inputs: Vec = state .value_refs .iter() .filter(|value| { matches!( value.source_kind, ValueSourceKind::RequestParam | ValueSourceKind::RequestBody | ValueSourceKind::RequestQuery | ValueSourceKind::Session ) }) .cloned() .collect(); let is_nextauth_options_factory = body_returns_nextauth_options(node, bytes); AnalysisUnit { kind, name, span: span(node), params, context_inputs, call_sites: state.call_sites, auth_checks: state.auth_checks, operations: state.operations, value_refs: state.value_refs, condition_texts: state.condition_texts, line, row_field_vars: state.row_field_vars, var_alias_chain: state.var_alias_chain, row_population_data: state.row_population_data, self_actor_vars: state.self_actor_vars, self_actor_id_vars: state.self_actor_id_vars, authorized_sql_vars: state.authorized_sql_vars, const_bound_vars: state.const_bound_vars, typed_bounded_vars: preseeded_bounded, typed_bounded_dto_fields: std::collections::HashMap::new(), self_scoped_session_bases: state.self_scoped_session_bases, is_nextauth_options_factory, } } /// True when the function body at `node` is a NextAuth authority /// surface. Recognises two shapes: /// /// 1. An object literal with a `callbacks: { ... }` property whose /// nested entries name at least one canonical NextAuth callback /// (`signIn`, `session`, `jwt`, `redirect`, `authorize`, /// `authorized`). Matches the cal.com idiom /// `export const getOptions = (...) => ({ callbacks: { ... } })`. /// /// 2. An object literal whose entries name at least one distinctive /// NextAuth Adapter method (`getUserByAccount`, `linkAccount`, /// `unlinkAccount`, `createVerificationToken`, /// `useVerificationToken`, `getSessionAndUser`) AND at least one /// other canonical Adapter method. Matches the cal.com idiom /// `function CalComAdapter(prisma): Adapter { return { ... } }` /// where the returned Adapter object holds the implementation. /// /// In both shapes the inner method bodies are NOT enumerated as /// separate units (object method shorthands stay anonymous), so every /// identity-resolution operation from the inner methods accumulates /// onto the outer factory's unit. Without this flag the outer unit's /// name is `getOptions` / `CalComAdapter`, so `is_nextauth_callback_unit` /// cannot match by name and the missing-ownership rule fires on every /// identity lookup inside the surface. /// /// JS/TS-only by construction (matches `object` / `pair` / /// `method_definition` / `shorthand_property_identifier` node kinds). /// Returns false on other languages. fn body_returns_nextauth_options(node: Node<'_>, bytes: &[u8]) -> bool { fn scan(node: Node<'_>, bytes: &[u8]) -> bool { if matches!(node.kind(), "object" | "object_expression") && (object_has_nextauth_callbacks_property(node, bytes) || object_is_nextauth_adapter(node, bytes)) { return true; } for child in named_children(node) { if scan(child, bytes) { return true; } } false } scan(node, bytes) } fn object_has_nextauth_callbacks_property(node: Node<'_>, bytes: &[u8]) -> bool { for entry in named_children(node) { let Some((key_text, value_node)) = object_entry_key_value(entry, bytes) else { continue; }; if key_text != "callbacks" { continue; } if matches!(value_node.kind(), "object" | "object_expression") && object_contains_nextauth_callback_method(value_node, bytes) { return true; } } false } fn object_contains_nextauth_callback_method(node: Node<'_>, bytes: &[u8]) -> bool { for entry in named_children(node) { if entry.kind() == "method_definition" { if let Some(name_node) = entry.child_by_field_name("name") { let name = text(name_node, bytes); if is_nextauth_callback_name(&name) { return true; } } continue; } if let Some((key_text, _value_node)) = object_entry_key_value(entry, bytes) && is_nextauth_callback_name(&key_text) { return true; } } false } fn object_entry_key_value<'a>(entry: Node<'a>, bytes: &[u8]) -> Option<(String, Node<'a>)> { match entry.kind() { "pair" => { let key = entry.child_by_field_name("key")?; let value = entry.child_by_field_name("value")?; Some((object_key_text(key, bytes), value)) } "method_definition" => { let name = entry.child_by_field_name("name")?; Some((text(name, bytes), entry)) } _ => None, } } fn object_key_text(node: Node<'_>, bytes: &[u8]) -> String { match node.kind() { "property_identifier" | "identifier" | "shorthand_property_identifier" => text(node, bytes), "string" | "string_literal" => { let raw = text(node, bytes); raw.trim_matches(|c| c == '"' || c == '\'' || c == '`') .to_string() } "computed_property_name" => { if let Some(inner) = node.named_child(0) { object_key_text(inner, bytes) } else { String::new() } } _ => text(node, bytes), } } fn is_nextauth_callback_name(name: &str) -> bool { matches!( name, "signIn" | "session" | "jwt" | "redirect" | "authorize" | "authorized" ) } /// True when the object literal at `node` looks like a NextAuth /// Adapter implementation: at least one distinctive Adapter method /// name AND at least two canonical Adapter method names overall. /// The distinctive subset (`getUserByAccount`, `linkAccount`, /// `unlinkAccount`, `createVerificationToken`, `useVerificationToken`, /// `getSessionAndUser`) names operations that are unique to the /// NextAuth Adapter contract; the broader canonical set (createUser / /// getUser / getUserByEmail / updateUser / deleteUser / createSession / /// updateSession / deleteSession) overlaps with generic CRUD repos, so /// the distinctive-name witness gates the recognition. fn object_is_nextauth_adapter(node: Node<'_>, bytes: &[u8]) -> bool { let mut distinctive_seen = false; let mut total = 0_usize; for entry in named_children(node) { let Some(key_text) = adapter_object_entry_key(entry, bytes) else { continue; }; if !is_nextauth_adapter_method_name(&key_text) { continue; } total += 1; if is_nextauth_adapter_distinctive_method_name(&key_text) { distinctive_seen = true; } } distinctive_seen && total >= 2 } fn adapter_object_entry_key(entry: Node<'_>, bytes: &[u8]) -> Option { match entry.kind() { "method_definition" => entry .child_by_field_name("name") .map(|n| object_key_text(n, bytes)), "pair" => entry .child_by_field_name("key") .map(|n| object_key_text(n, bytes)), "shorthand_property_identifier" => Some(text(entry, bytes)), _ => None, } } fn is_nextauth_adapter_method_name(name: &str) -> bool { matches!( name, "createUser" | "getUser" | "getUserByEmail" | "getUserByAccount" | "updateUser" | "deleteUser" | "linkAccount" | "unlinkAccount" | "createSession" | "getSessionAndUser" | "updateSession" | "deleteSession" | "createVerificationToken" | "useVerificationToken" ) } fn is_nextauth_adapter_distinctive_method_name(name: &str) -> bool { matches!( name, "getUserByAccount" | "linkAccount" | "unlinkAccount" | "createVerificationToken" | "useVerificationToken" | "getSessionAndUser" ) } #[derive(Default)] struct UnitState { call_sites: Vec, auth_checks: Vec, operations: Vec, value_refs: Vec, condition_texts: Vec, /// Local variable names bound to a known non-sink collection /// (e.g. `HashMap::new()`, `Vec::with_capacity(_)`, `vec![]`, /// or via an explicit type annotation). Consulted by /// `collect_call` so method calls on these bindings /// (`map.insert(…)`, `set.remove(…)`) aren't classified as /// auth-relevant Read/Mutation operations. non_sink_vars: HashSet, /// Map from local variable name to the row binding it was read /// from (`let owner_id = existing.get("user_id")` → `owner_id → /// existing`). Powers A2's row-level ownership-equality check so /// downstream uses of fields from the same row are implicitly /// covered by a check on the row's owner column. row_field_vars: HashMap, /// Full chain text for `let X = BASE.FIELD` shapes (or /// transitively through method calls / try / await wrappers when /// the value resolves to a member access). Stored alongside /// `row_field_vars` so the row-population reverse-walk can match /// plain-identifier sink subjects against population args by /// their original chain text. See /// [`crate::auth_analysis::model::AnalysisUnit::var_alias_chain`]. var_alias_chain: HashMap, /// Per row-binding metadata from the `let ROW = CALL(...)` site: /// the declaration line and the set of `ValueRef`s appearing in /// the call's arguments. When an A2 AuthCheck fires against /// `ROW`, we back-date the check to this line and merge these /// argument value-refs into its subjects so the original fetch /// (e.g. `db.query_one(..., &[doc_id])`) is also covered. row_population_data: HashMap)>, /// A3: local variables bound to the authenticated actor. /// Populated from `let V = require_auth(..).await?` (or any call /// matching `rules.is_login_guard` / `rules.is_authorization_check`) /// and from typed route-handler parameters whose type names the /// authenticated user (`CurrentUser`, `AuthUser`, …). Copied onto /// `AnalysisUnit.self_actor_vars` so `checks.rs` can recognize /// `V.id` as actor context rather than a foreign scoped id. self_actor_vars: HashSet, /// Transitive copies of the authenticated actor's id field /// (`let X = V.id` / `let X = (V.id as ..).into()` / /// `let X = V.user_id` / `V.uid`). Populated by /// `collect_self_actor_id_binding`. Copied onto /// `AnalysisUnit.self_actor_id_vars` so subjects whose name appears /// here count as actor context, closes the FP where a route /// handler does `let uid = user.id; query_all(.., &[uid])` and the /// engine sees `uid` only as a plain scoped id. self_actor_id_vars: HashSet, /// B3: local variables bound (directly or transitively) to a /// SQL query whose literal text is auth-gated. Populated by /// `collect_sql_authorized_binding` and the `for ROW in X` / /// `let Y = ROW.method(..)` propagation paths inside /// `collect_row_field_binding` and `collect_for_row_binding`. authorized_sql_vars: HashSet, /// Local variables whose declaration binds them to a string, /// numeric, or boolean literal, `id := "id"` / `let id = "1"` / /// `String id = "id";`. These cannot be user-controlled and so /// must not be treated as scoped-identifier subjects by /// `is_relevant_target_subject`. Closes the gin/context_test.go /// FP where `id := "id"; c.AddParam(id, value)` triggered /// `go.auth.missing_ownership_check` because the local `id` /// matched `is_id_like` but had no actor-context exemption. const_bound_vars: HashSet, /// Dynamic per-unit session-base set lifted into the /// `AnalysisUnit` of the same name. Populated by /// [`collect_trpc_ctx_param`] when a TS parameter's type /// references a TRPC-shaped Options alias. See the field doc on /// [`crate::auth_analysis::model::AnalysisUnit::self_scoped_session_bases`]. self_scoped_session_bases: HashSet, /// File-level set of TS type-alias names whose body references a /// TRPC-marker type (`TrpcSessionUser` etc.). Populated once per /// unit at the top of [`build_function_unit`] by walking up to /// the source-file root and scanning every /// `type_alias_declaration` / `interface_declaration`. Read by /// [`collect_trpc_ctx_param`] to decide whether a parameter's /// type annotation (often just an alias name like `GetOptions`) /// resolves to a TRPC handler signature. Empty for non-TS /// languages, the scanner only matches TS-grammar node kinds. trpc_alias_names: HashSet, } fn collect_unit_state( node: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules, state: &mut UnitState, ) { match node.kind() { "call_expression" | "call" | "method_invocation" | "method_call_expression" => { collect_call(node, bytes, rules, state) } "while_statement" | "do_statement" | "while_modifier" | "until_modifier" | "while_expression" | "unless" | "unless_modifier" => { if let Some(condition) = node.child_by_field_name("condition") { collect_condition(condition, bytes, rules, state); } } "if_statement" | "elif_clause" | "if_expression" | "if" | "if_modifier" => { if let Some(condition) = node.child_by_field_name("condition") { collect_condition(condition, bytes, rules, state); } detect_ownership_equality_check(node, bytes, state); } "conditional_expression" => collect_condition(node, bytes, rules, state), "let_declaration" => { collect_non_sink_binding(node, bytes, rules, state); collect_row_field_binding(node, bytes, state); collect_member_alias_binding(node, bytes, state); collect_row_population(node, bytes, state); collect_self_actor_binding(node, bytes, rules, state); collect_self_actor_id_binding(node, bytes, state); collect_sql_authorized_binding(node, bytes, rules, state); propagate_sql_authorized_through_field_read(node, bytes, state); collect_const_string_binding(node, bytes, state); } // JS/TS `variable_declarator` inside `lexical_declaration` // (`const X = ...`, `let X = ...`), exposes `name` + `value` // fields. Run the same self-actor / self-actor-id binding // recognition as the Rust `let_declaration` arm above so the // session-self-actor copy chain (`const session = await // getServerSession(...)`; `const userId = session.user.id`) // populates `self_actor_vars` / `self_actor_id_vars`. "variable_declarator" => { collect_self_actor_binding(node, bytes, rules, state); collect_self_actor_id_binding(node, bytes, state); collect_const_string_binding(node, bytes, state); // JS/TS row-fetch declarators (`const webhook = await // repo.findById(id)`) need row-population recognition so // the post-fetch ownership-equality detector can attribute // back to the row's let line. `collect_row_population` // accepts the `name` field used by `variable_declarator`. collect_row_population(node, bytes, state); } // Go `id := "id"` / Python `id = "id"` / Java `String id = "id";` / // Ruby `id = "id"`, language-specific binding nodes that the // let_declaration arm above doesn't catch. Const-only, never // marks self_actor / row_field / sql vars (those need richer // right-hand-side analysis already provided by the // let_declaration arm). "short_var_declaration" | "const_declaration" | "var_declaration" | "var_spec" | "lexical_declaration" | "local_variable_declaration" | "assignment" | "assignment_expression" | "augmented_assignment" | "expression_statement" => { collect_const_string_binding(node, bytes, state); // Ruby `@issue = Issue.find(params[:id])` is the canonical // controller idiom: instance-variable assignment whose RHS // is a row-fetch call. The let_declaration arm above // doesn't fire for this kind, so register the row // population separately. `collect_row_population` reads // either `pattern`/`value` or `left`/`right`, so it works // unchanged for Ruby `assignment` once the LHS recognises // `instance_variable`. if matches!(node.kind(), "assignment" | "assignment_expression") { collect_row_population(node, bytes, state); // Python `verified_ids = set()` / // `cache: dict[str,int] = {}` and JS analogues bind a // local non-sink container. `collect_non_sink_binding` // accepts both `pattern`/`value` and `left`/`right` // field names so the same recognition path covers // these assignment-node shapes. collect_non_sink_binding(node, bytes, rules, state); } } "for_expression" => { collect_for_row_binding(node, bytes, state); } "parameter" => { collect_typed_extractor_self_actor(node, bytes, state); } // TS `required_parameter` / `optional_parameter`, the analogous // arm to Rust's `parameter`. Recognise TRPC-shaped Options // params (`{ ctx, input }: GetOptions`) and add the destructured // ctx-base to `self_scoped_session_bases` so downstream // `ctx.user.id` accesses count as actor context. "required_parameter" | "optional_parameter" => { collect_trpc_ctx_param(node, bytes, state); } _ => {} } // O(1) per-node shallow value-ref emission, then descend. // // Pre-fix this site called `extract_value_refs(node, bytes)` which walks // node's entire subtree. Combined with the recursion below — which // visits every descendant and re-runs the same call at each level — the // total work was O(N * subtree_size) ≈ O(N²) per function body. On // mm/channels/app the inner-walk dominated `build_function_unit_with_meta` // and its descendants (~17%+15%+11% of total wall-clock split across // `build_function_unit_with_meta`, `collect_unit_state`, and // `extract_value_refs` in the post-shared-model profile, 2026-05-04). // // The recursion below already visits every descendant once. Emitting a // shallow value-ref per node — only the ref the node itself represents — // produces the same SET of value-refs after `dedup_value_refs` runs in // `build_function_unit_with_meta`, because every ref-emitting kind // (member chain, subscript, accessor call, identifier) is reachable as a // single node visit. Public callers of `extract_value_refs` (e.g. // `collect_call`, `collect_condition`, assignment-side extraction) keep // the deep walk: they intentionally want refs from the full subtree // rooted at the argument they pass. append_shallow_value_ref(node, bytes, &mut state.value_refs); for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; collect_unit_state(child, bytes, rules, state); } } /// Per-node value-ref emission used inside `collect_unit_state`'s tree walk. /// /// Returns the value-ref the node itself represents (a member chain, a /// subscript, an accessor call's chain, or an identifier-like leaf), without /// descending into descendants. The caller's existing AST recursion handles /// children; relying on that recursion turns the previously O(N²) per-body /// walk into O(N). fn append_shallow_value_ref(node: Node<'_>, bytes: &[u8], refs: &mut Vec) { match node.kind() { "member_expression" | "attribute" | "selector_expression" | "field_expression" | "field_access" => { if let Some(value) = member_value_ref(node, bytes) { refs.push(value); } } "subscript_expression" | "subscript" | "element_reference" | "index_expression" => { if let Some(value) = subscript_value_ref(node, bytes) { refs.push(value); } } "call_expression" | "call" | "method_invocation" | "method_call_expression" => { // Accessor-call chains (`cache.get(key)`, `req.params.id`) absorb // into a single chain ValueRef; non-accessor calls return None // here and rely on recursion to visit `function` + arg children // so each leaf identifier emits its own ref. if let Some(value) = call_value_ref(node, bytes) { refs.push(value); } } // Bare identifier and Ruby `@foo` / `@@foo` / `$foo` leaves: emit a // single Identifier-kind ValueRef. Mirrors `extract_value_refs`'s // identifier arm so `dedup_value_refs` collapses any cross-path // duplicates against existing emissions from sibling deep walks // (e.g. `collect_condition`'s `extract_value_refs(condition)`). "identifier" | "instance_variable" | "class_variable" | "global_variable" => { refs.push(ValueRef { source_kind: ValueSourceKind::Identifier, name: text(node, bytes), base: None, field: None, index: None, span: span(node), }); } _ => {} } } fn collect_call(node: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules, state: &mut UnitState) { let callee = call_name(node, bytes); if callee.is_empty() { return; } let args = node .child_by_field_name("arguments") .map(named_children) .unwrap_or_default(); let mut subjects: Vec = call_receiver_subjects(node, bytes); subjects.extend( args.iter() .flat_map(|arg| extract_value_refs(*arg, bytes)) .collect::>(), ); let line = node.start_position().row + 1; let string_args: Vec = args.iter().map(|arg| text(*arg, bytes)).collect(); let args_value_refs: Vec> = args .iter() .map(|arg| extract_value_refs(*arg, bytes)) .collect(); let node_text = text(node, bytes); state.call_sites.push(CallSite { name: callee.clone(), args: string_args.clone(), span: span(node), args_value_refs, }); if rules.is_authorization_check(&callee) { state.auth_checks.push(AuthCheck { kind: classify_auth_check(&callee, rules), callee: callee.clone(), subjects: subjects.clone(), span: span(node), line, args: string_args, condition_text: None, is_route_level: false, }); } // Split classification into OperationKind (what verb?) and // SinkClass (what resource?). The sink class drives the // ownership gate; OperationKind is kept for partial-batch / stale- // session checks that care about read-vs-mutation semantics. let (op_kind, sink_class) = if rules.is_token_lookup_call(&callee, &node_text) { (Some(OperationKind::TokenLookup), None) } else if let Some(class) = rules.classify_sink_class(&callee, &state.non_sink_vars) { let op = match class { SinkClass::DbCrossTenantRead => OperationKind::Read, // InMemoryLocal: keep the verb for telemetry but the // ownership gate ignores this class. SinkClass::InMemoryLocal => { if rules.is_mutation(&callee) { OperationKind::Mutation } else { OperationKind::Read } } // Publish / outbound / cache / DB mutation, treat as // write-shaped by default unless the callee name is a // read verb (e.g. `cache.get(tenant_id)`). _ => { if rules.is_read(&callee) && !rules.is_mutation(&callee) { OperationKind::Read } else { OperationKind::Mutation } } }; (Some(op), Some(class)) } else { (None, None) }; if let Some(kind) = op_kind { state.operations.push(SensitiveOperation { kind, sink_class, callee, subjects, span: span(node), line, text: node_text, }); } } fn collect_condition( node: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules, state: &mut UnitState, ) { let condition_text = text(node, bytes); if condition_text.is_empty() { return; } state.condition_texts.push(condition_text.clone()); let subjects = extract_value_refs(node, bytes); let line = node.start_position().row + 1; if rules.has_expiry_field(&condition_text) { state.auth_checks.push(AuthCheck { kind: AuthCheckKind::TokenExpiry, callee: "(condition)".into(), subjects: subjects.clone(), span: span(node), line, args: Vec::new(), condition_text: Some(condition_text.clone()), is_route_level: false, }); } if rules.has_recipient_field(&condition_text) { state.auth_checks.push(AuthCheck { kind: AuthCheckKind::TokenRecipient, callee: "(condition)".into(), subjects, span: span(node), line, args: Vec::new(), condition_text: Some(condition_text), is_route_level: false, }); } } /// Detect bindings that produce a known non-sink collection /// (e.g. `HashMap::new()`, `Vec::with_capacity(_)`, `vec![]`, an /// explicit type annotation like `: HashMap<_, _>`, or Python's /// bare `set()` / `dict()` / `collections.defaultdict(list)`). /// Registered variable names are consulted by `collect_call` so /// later method calls on those bindings (`map.insert(..)`, /// `set.remove(..)`, `verified_ids.update(..)`) aren't treated as /// auth-relevant Read/Mutation operations. /// /// Field names accepted: Rust `let_declaration` uses `pattern` / /// `value`; Python `assignment` and JS `assignment_expression` use /// `left` / `right`. Both shapes share the same recognition path. fn collect_non_sink_binding( node: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules, state: &mut UnitState, ) { let Some(pattern) = node .child_by_field_name("pattern") .or_else(|| node.child_by_field_name("left")) else { return; }; let Some(var_name) = first_identifier_name(pattern, bytes) else { return; }; if var_name.is_empty() { return; } if let Some(ty_node) = node.child_by_field_name("type") { let ty_text = text(ty_node, bytes); if rules.is_non_sink_receiver_type(&ty_text) { state.non_sink_vars.insert(var_name); return; } } if let Some(value) = node .child_by_field_name("value") .or_else(|| node.child_by_field_name("right")) && value_is_non_sink_constructor(value, bytes, rules) { state.non_sink_vars.insert(var_name); } } fn first_identifier_name(node: Node<'_>, bytes: &[u8]) -> Option { if matches!( node.kind(), "identifier" | "shorthand_property_identifier_pattern" // Ruby `@foo` instance vars and `@@foo` class vars: // Rails controllers populate the row via `@issue = // Issue.find(...)`, so the row var is the *full* `@issue` // text, chain_root in checks.rs strips on `.` only, so an // auth check on `@issue.visible?` resolves to root `@issue`, // matching the row var. | "instance_variable" | "class_variable" // Ruby globals `$foo` are unusual but match the same // handler-state idiom, kept symmetric with @-vars. | "global_variable" ) { let value = text(node, bytes); if !value.is_empty() { return Some(value); } } for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if let Some(found) = first_identifier_name(child, bytes) { return Some(found); } } None } fn value_is_non_sink_constructor(node: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules) -> bool { match node.kind() { "call_expression" | "call" | "method_invocation" | "method_call_expression" => { let callee = call_name(node, bytes); rules.is_non_sink_constructor_callee(&callee) } "macro_invocation" => { let name = node .child_by_field_name("macro") .map(|m| text(m, bytes)) .unwrap_or_default(); let last = name.rsplit("::").next().unwrap_or(&name); matches!(last, "vec" | "smallvec") } "try_expression" | "await_expression" | "reference_expression" => { for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if value_is_non_sink_constructor(child, bytes, rules) { return true; } } false } _ => false, } } /// Track `let V = ROW.method(..)` or `let V = ROW.field` so later /// row-level ownership-equality checks on `V` (or on another var read /// from the same `ROW`) can be attributed back to `ROW`. See /// `detect_ownership_equality_check` for the consumer. fn collect_row_field_binding(node: Node<'_>, bytes: &[u8], state: &mut UnitState) { let Some(pattern) = node.child_by_field_name("pattern") else { return; }; let Some(var_name) = first_identifier_name(pattern, bytes) else { return; }; if var_name.is_empty() { return; } let Some(value) = node.child_by_field_name("value") else { return; }; let Some(row_name) = extract_row_receiver_name(value, bytes) else { return; }; state.row_field_vars.insert(var_name, row_name); } /// Track `let X = BASE.FIELD` (or `BASE.FIELD?` / `(BASE.FIELD).await`) /// so a downstream sink whose subject is the bare identifier `X` can be /// matched against row-population args that recorded the original /// chain text. Distinct from `collect_row_field_binding`, which only /// records the receiver name (loses the field). /// /// Only fires when the value resolves to a member-access node and the /// resulting chain has at least two segments (`req.community_id`, /// `data.user.id`, …), single-ident receivers are uninteresting and a /// chain of length one would just duplicate the binding's own name. /// /// Defensive: never overwrites an existing entry, first writer wins. /// Re-binding the same local name (rare in idiomatic Rust) is treated /// as a separate variable scope; the rest of the analysis already /// works on the first binding seen during a top-down walk. fn collect_member_alias_binding(node: Node<'_>, bytes: &[u8], state: &mut UnitState) { let Some(pattern) = node.child_by_field_name("pattern") else { return; }; let Some(var_name) = first_identifier_name(pattern, bytes) else { return; }; if var_name.is_empty() { return; } let Some(value) = node.child_by_field_name("value") else { return; }; let target = unwrap_try_like(value); if !matches!( target.kind(), "member_expression" | "attribute" | "selector_expression" | "field_expression" | "field_access" ) { return; } let chain = member_chain(target, bytes); if chain.len() < 2 { return; } let chain_text = chain.join("."); state.var_alias_chain.entry(var_name).or_insert(chain_text); } /// Record the line and argument value-refs of a `let ROW = CALL(..)`. /// When A2 synthesises an `AuthCheck` on `ROW` later, we back-date the /// check to this line and merge the args into its subjects so the /// original fetch (e.g. `db.query_one(.., &[doc_id])`) is also covered. /// /// The recorded line is the **call**'s start line, not the /// `let_declaration`'s. These differ for multi-line bindings such as /// /// ```ignore /// let orig = // let_declaration starts here /// CommentView::read(&mut pool, comment_id, ..).await?; // call starts here /// ``` /// /// `has_row_fetch_exemption` looks for a row var "declared at this /// op's line", where `op.line` is the call site. Recording the /// let-line caused the multi-line shape to fall through the exemption ///, surfaced on lemmy's `comment/lock.rs:31`, where every fetch-then- /// check route handler that wraps the read across two lines was /// flagged despite a textual auth check on the resulting row. fn collect_row_population(node: Node<'_>, bytes: &[u8], state: &mut UnitState) { // Most languages expose `pattern`/`value` on let / const / var // declarations. Ruby `assignment` uses `left`/`right` instead. // JS/TS `variable_declarator` uses `name`/`value`. Accept any of // them; when none is present the node isn't an RHS-bound binding // and we skip. let Some(pattern) = node .child_by_field_name("pattern") .or_else(|| node.child_by_field_name("name")) .or_else(|| node.child_by_field_name("left")) else { return; }; let Some(var_name) = first_identifier_name(pattern, bytes) else { return; }; if var_name.is_empty() { return; } let Some(value) = node .child_by_field_name("value") .or_else(|| node.child_by_field_name("right")) else { return; }; let call_node = unwrap_try_like(value); if !matches!( call_node.kind(), "call_expression" | "call" | "method_invocation" | "method_call_expression" ) { return; } let args = call_node .child_by_field_name("arguments") .map(named_children) .unwrap_or_default(); let mut arg_refs: Vec = Vec::new(); for arg in args { arg_refs.extend(extract_value_refs(arg, bytes)); } let call_line = call_node.start_position().row + 1; state .row_population_data .insert(var_name, (call_line, arg_refs)); } /// A3: record `let V = CALL(..)` (or `.await?` / `?` / reference /// chains wrapping such a call) where `CALL` matches a configured /// login-guard or authorization-check name. `V` is then treated as the /// authenticated actor, `V.id`-shaped subjects are actor context and /// shouldn't be flagged as foreign scoped IDs. fn collect_self_actor_binding( node: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules, state: &mut UnitState, ) { // Rust `let_declaration` exposes `pattern`; JS/TS // `variable_declarator` exposes `name`. Try both so the same // recognition fires across languages. let Some(pattern) = node .child_by_field_name("pattern") .or_else(|| node.child_by_field_name("name")) else { return; }; let Some(value) = node.child_by_field_name("value") else { return; }; // Destructuring: `const { user } = ctx.session;` / // `const { user } = await getServerSession();` / // `const { id } = req.user;`. These bind LOCAL variables that are // semantically the actor (or the actor's id), and the existing // single-ident path can't see them because `first_identifier_name` // either picks the wrong key when several are destructured or // misses the session-container RHS shape entirely. if pattern.kind() == "object_pattern" { collect_destructured_self_actor_binding(pattern, value, bytes, rules, state); return; } let Some(var_name) = first_identifier_name(pattern, bytes) else { return; }; if var_name.is_empty() { return; } if value_is_self_actor_call(value, bytes, rules) { state.self_actor_vars.insert(var_name); } } /// Pattern is `object_pattern` (JS/TS destructure). Walk the keys and /// classify the RHS to decide what each destructured local should /// register as: /// /// * `const { user } = ctx.session` / `const { user } = await /// getServerSession()`, RHS is a session container, so a /// destructured `user` (or `currentUser`) becomes the unit's /// self-actor binding. /// * `const { id } = req.user` / `const { userId } = session.user` , /// RHS is the canonical authed-user base from /// `is_self_scoped_session_base_text`, so a destructured `id` / /// `userId` / `user_id` / `uid` becomes a self-actor-id binding. /// * `const { user } = await loginGuardCall()`, also accepted /// because `value_is_self_actor_call` already covers the /// `let user = require_auth(..)` shape; we lift that recognition /// into the destructure case so callers can extract the actor in a /// single statement. /// /// Each `pair_pattern` entry distinguishes the destructured KEY (the /// shape of the RHS source) from the bound LOCAL (what we add to the /// state set). Shorthand patterns reuse the key as the local. fn collect_destructured_self_actor_binding( pattern: Node<'_>, value: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules, state: &mut UnitState, ) { // Two recognition paths run in sequence: // 1. Static classify_destructure_rhs: hard-coded session-container // / self-actor-base / self-actor-call shapes. // 2. Dynamic self_scoped_session_bases lookup: if the RHS is a // chain (or bare identifier) `` and `.user` was added to // `self_scoped_session_bases` by an earlier TRPC param scan, // the destructured `user` key is the actor. Closes the // cal.com `({ ctx, input }: Options) => { const { user } = ctx; }` // shape where ctx is the TRPC-typed param. let kind = classify_destructure_rhs(value, bytes, rules); let trpc_ctx_path = lookup_trpc_ctx_destructure_match(value, bytes, state); if kind == DestructureRhsKind::None && trpc_ctx_path.is_none() { return; } for idx in 0..pattern.named_child_count() { let Some(child) = pattern.named_child(idx as u32) else { continue; }; let (key, local) = match child.kind() { // `{ user }`, key and local are the same identifier. "shorthand_property_identifier_pattern" => { let name = text(child, bytes); (name.clone(), name) } // `{ user = default }`, left is the shorthand key/local. "object_assignment_pattern" => { let Some(left) = child.child_by_field_name("left") else { continue; }; let name = if matches!( left.kind(), "identifier" | "shorthand_property_identifier_pattern" ) { text(left, bytes) } else { first_identifier_name(left, bytes).unwrap_or_default() }; (name.clone(), name) } // `{ user: localName }`, `key` and `value` fields are // distinct (key from RHS source, local in our scope). "pair_pattern" => { let key_node = child.child_by_field_name("key"); let local_node = child.child_by_field_name("value"); let (Some(k), Some(v)) = (key_node, local_node) else { continue; }; let key = text(k, bytes); let local = first_identifier_name(v, bytes).unwrap_or_default(); (key, local) } _ => continue, }; if kind != DestructureRhsKind::None { process_destructure_entry(&key, &local, kind, state); } // Dynamic-set lift: when the RHS resolves to an `` whose // `.user` was added to `self_scoped_session_bases`, the // destructured `user` key is the actor. This closes the // chained TRPC shape `({ ctx }: Options) => { const { user } // = ctx; }` where the param-level pre-pass marked `ctx.user` // earlier in the unit. if let Some(rhs_path) = trpc_ctx_path.as_deref() && key.eq_ignore_ascii_case("user") && !local.is_empty() { let _ = rhs_path; // path itself is not stored; presence is the signal state.self_actor_vars.insert(local); } } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum DestructureRhsKind { /// RHS is a session container, the destructured `user` field /// resolves to the authenticated actor. Examples: `ctx.session`, /// `req.session`, `session`, `await getServerSession()`, /// `getSession()`. SessionContainer, /// RHS is the authed-user base itself (`req.user`, `session.user`, /// `ctx.session.user`). A destructured `id` field is the actor's /// own id. SelfActorBase, /// RHS is not a session/actor source, destructure is irrelevant /// for self-actor recognition. None, } /// When the destructure RHS is `` (an identifier or member /// chain), return `Some(chain_text)` if `.user` was added /// to `state.self_scoped_session_bases` by an earlier /// `collect_trpc_ctx_param` call. Used to mark the destructured /// `user` shorthand as a self-actor binding when extracting it from a /// TRPC ctx param's local, `({ ctx }: Options) => { const { user } /// = ctx; }`. fn lookup_trpc_ctx_destructure_match( node: Node<'_>, bytes: &[u8], state: &UnitState, ) -> Option { if state.self_scoped_session_bases.is_empty() { return None; } let chain_text = chain_text_from_value(node, bytes)?; if chain_text.is_empty() { return None; } let candidate = format!("{chain_text}.user"); if state.self_scoped_session_bases.contains(&candidate) { Some(chain_text) } else { None } } /// Reduce an RHS expression to its dotted chain text, walking through /// `await`/parens/non-null wrappers. Returns `None` for shapes that /// aren't a pure identifier/member-chain (e.g. a call result, a /// template literal, an object-literal expression). fn chain_text_from_value(node: Node<'_>, bytes: &[u8]) -> Option { match node.kind() { "identifier" => { let t = text(node, bytes); if t.is_empty() { None } else { Some(t) } } "field_expression" | "member_expression" | "field_access" | "scoped_identifier" => { let chain = member_chain(node, bytes); if chain.is_empty() { None } else { Some(chain.join(".")) } } "type_cast_expression" | "as_expression" | "cast_expression" | "parenthesized_expression" | "non_null_expression" | "await_expression" | "try_expression" => { let inner = node .child_by_field_name("value") .or_else(|| node.child_by_field_name("expression")); if let Some(v) = inner && let Some(t) = chain_text_from_value(v, bytes) { return Some(t); } for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if let Some(t) = chain_text_from_value(child, bytes) { return Some(t); } } None } _ => None, } } fn classify_destructure_rhs( node: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules, ) -> DestructureRhsKind { if value_is_self_actor_call(node, bytes, rules) { return DestructureRhsKind::SessionContainer; } if value_is_session_provider_chain(node, bytes) { return DestructureRhsKind::SessionContainer; } if value_is_self_actor_base_chain(node, bytes) { return DestructureRhsKind::SelfActorBase; } DestructureRhsKind::None } fn process_destructure_entry( key: &str, local: &str, kind: DestructureRhsKind, state: &mut UnitState, ) { if key.is_empty() || local.is_empty() { return; } let key_lower = key.to_ascii_lowercase(); match kind { DestructureRhsKind::SessionContainer => { if matches!(key_lower.as_str(), "user" | "currentuser" | "current_user") { state.self_actor_vars.insert(local.to_string()); } } DestructureRhsKind::SelfActorBase => { if matches!(key_lower.as_str(), "id" | "userid" | "user_id" | "uid") { state.self_actor_id_vars.insert(local.to_string()); } } DestructureRhsKind::None => {} } } /// True when `node` (after walking through `await`/parens/non-null /// wrappers) is a session-container expression, a chain ending in /// `.session` / `.state.session` / a bare `session` identifier, or a /// call to a known session-getter (`getServerSession()`, /// `getSession()`). Distinct from `value_is_self_actor_call` which /// matches login-guard / authorization-check callees configured per /// language. fn value_is_session_provider_chain(node: Node<'_>, bytes: &[u8]) -> bool { match node.kind() { "field_expression" | "member_expression" | "field_access" | "scoped_identifier" => { let chain = member_chain(node, bytes); if chain.is_empty() { return false; } let joined = chain.join("."); // Bare session containers, `ctx.session`, `req.session`, // `request.session`, plus the Koa `ctx.state` shape. matches!( joined.as_str(), "ctx.session" | "ctx.state" | "req.session" | "request.session" | "session" ) } "identifier" => { let name = text(node, bytes); matches!(name.as_str(), "session") } // Known session-getter calls. Conservative list, only // recogniser shapes that are unambiguously session-providing // in the JS/TS ecosystem (NextAuth's `getServerSession` is the // dominant one). `auth()` and `useSession()` are deliberately // omitted because their meaning is ambiguous outside of a // server-component context and adding them risks // over-suppression in non-NextAuth code. "call_expression" | "call" => { let callee = call_name(node, bytes); let last = bare_method_name(&callee); matches!( last, "getServerSession" | "getSession" | "getServerSideSession" | "unstable_getServerSession" ) } "type_cast_expression" | "as_expression" | "cast_expression" | "parenthesized_expression" | "non_null_expression" | "await_expression" | "try_expression" => { let inner = node .child_by_field_name("value") .or_else(|| node.child_by_field_name("expression")); if let Some(v) = inner && value_is_session_provider_chain(v, bytes) { return true; } for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if value_is_session_provider_chain(child, bytes) { return true; } } false } _ => false, } } /// True when `node` is the canonical authed-user base from /// `is_self_scoped_session_base_text` (e.g. `req.user`, `session.user`, /// `ctx.session.user`). Used to recognise `const { id } = req.user` /// so the destructured `id` becomes a self-actor-id. fn value_is_self_actor_base_chain(node: Node<'_>, bytes: &[u8]) -> bool { match node.kind() { "field_expression" | "member_expression" | "field_access" | "scoped_identifier" => { let chain = member_chain(node, bytes); if chain.is_empty() { return false; } let joined = chain.join("."); is_self_scoped_session_base_text(&joined) } "type_cast_expression" | "as_expression" | "cast_expression" | "parenthesized_expression" | "non_null_expression" | "await_expression" | "try_expression" => { let inner = node .child_by_field_name("value") .or_else(|| node.child_by_field_name("expression")); if let Some(v) = inner && value_is_self_actor_base_chain(v, bytes) { return true; } for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if value_is_self_actor_base_chain(child, bytes) { return true; } } false } _ => false, } } /// Recognise variable bindings whose right-hand side is a literal /// constant, string, integer, float, or boolean. A subject backed /// by a constant binding cannot be user-controlled and so must not /// trigger `.auth.missing_ownership_check` even when the /// variable name happens to match `is_id_like` (e.g. /// `id := "id"` in a Go test fixture). /// /// Walks the binding's RHS through common wrappers /// (`parenthesized_expression`, `type_cast_expression`, /// reference/borrow expressions) before checking for a leaf literal /// kind. Conservative: any non-literal subexpression on the RHS /// (a call, identifier, field-access) skips the binding, that var /// might still hold attacker-controlled data. /// /// Handles the per-language declaration kinds wired in /// `collect_unit_state`: Go `short_var_declaration` (`x := "foo"`), /// JS `lexical_declaration` (`const x = "foo"`), Java /// `local_variable_declaration`, Rust `let_declaration`, and bare /// `assignment_expression`. fn collect_const_string_binding(node: Node<'_>, bytes: &[u8], state: &mut UnitState) { // `assignment` / `assignment_expression`: `x = "foo"`, populate // the LHS (`name` / `left`) when the RHS is a literal. if matches!( node.kind(), "assignment" | "assignment_expression" | "augmented_assignment" ) { let lhs = node .child_by_field_name("left") .or_else(|| node.child_by_field_name("name")) .or_else(|| node.child_by_field_name("target")); let rhs = node .child_by_field_name("right") .or_else(|| node.child_by_field_name("value")); if let (Some(lhs), Some(rhs)) = (lhs, rhs) && rhs_is_pure_literal(rhs) { for var in collect_lhs_idents(lhs, bytes) { state.const_bound_vars.insert(var); } } return; } // Go `short_var_declaration` / `var_declaration` / // `const_declaration`: `id := "id"` or `var id string = "id"`. // Tree-sitter-go uses `left:expression_list` and // `right:expression_list`. if matches!( node.kind(), "short_var_declaration" | "var_spec" | "const_spec" ) { let left = node.child_by_field_name("left").or_else(|| { // Some tree-sitter grammars expose name(s) instead of left node.child_by_field_name("name") }); let right = node.child_by_field_name("right").or_else(|| { node.child_by_field_name("value") .or_else(|| node.child_by_field_name("default")) }); if let (Some(left), Some(right)) = (left, right) { // expression_list parallel, pair LHS idents with RHS exprs. let lhs_idents = collect_lhs_idents(left, bytes); let rhs_exprs: Vec> = if right.kind() == "expression_list" { let mut cursor = right.walk(); right .children(&mut cursor) .filter(|c| !matches!(c.kind(), "," | "(" | ")")) .collect() } else { vec![right] }; for (idx, var) in lhs_idents.into_iter().enumerate() { if let Some(expr) = rhs_exprs.get(idx) && rhs_is_pure_literal(*expr) { state.const_bound_vars.insert(var); } } } return; } // `var_declaration` / `const_declaration` (Go top-level wrappers // around var_spec/const_spec): recurse into children handled above. if matches!(node.kind(), "var_declaration" | "const_declaration") { for idx in 0..node.named_child_count() { if let Some(child) = node.named_child(idx as u32) { collect_const_string_binding(child, bytes, state); } } return; } // Rust `let_declaration` / Python `expression_statement` wrapping a // top-level assignment / JS `lexical_declaration` / Java // `local_variable_declaration`, all expose the binding via // `pattern`/`name` + `value`. let pattern = node .child_by_field_name("pattern") .or_else(|| node.child_by_field_name("name")); let value = node.child_by_field_name("value"); if let (Some(pattern), Some(value)) = (pattern, value) && rhs_is_pure_literal(value) { for var in collect_lhs_idents(pattern, bytes) { state.const_bound_vars.insert(var); } return; } // JS `lexical_declaration` / Java `local_variable_declaration` / // Python `expression_statement`, the binding child is a wrapper // (`variable_declarator`). Recurse into wrappers; the // `variable_declarator` arm in `collect_unit_state` handles them. for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if matches!( child.kind(), "variable_declarator" | "init_declarator" | "var_spec" | "const_spec" | "assignment" | "assignment_expression" ) { collect_const_string_binding(child, bytes, state); } } } /// Returns true if `node` (after unwrapping common wrappers) is a /// pure literal, string, integer, float, boolean, or null. Returns /// false for any expression that could carry attacker-controlled data /// (calls, identifiers, field access, template strings with /// interpolations). fn rhs_is_pure_literal(node: Node<'_>) -> bool { // Unwrap wrappers that don't change taint provenance. let inner = match node.kind() { "parenthesized_expression" | "type_cast_expression" | "as_expression" | "cast_expression" | "reference_expression" => { let value = node .child_by_field_name("value") .or_else(|| node.child_by_field_name("expression")); value.unwrap_or(node) } _ => node, }; matches!( inner.kind(), "string_literal" | "raw_string_literal" | "string" | "interpreted_string_literal" | "rune_literal" | "integer_literal" | "int_literal" | "float_literal" | "true" | "false" | "boolean_literal" | "nil" | "null" | "null_literal" | "none" | "character_literal" ) || (inner.kind() == "template_string" && !template_has_interpolation(inner)) || (inner.kind() == "template_literal" && !template_has_interpolation(inner)) } /// Returns true if a template literal/string contains any /// interpolation segment (which carries dynamic data). Pure /// hard-coded template strings without `${...}` are still constants. fn template_has_interpolation(node: Node<'_>) -> bool { for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if matches!( child.kind(), "template_substitution" | "interpolation" | "string_interpolation" ) { return true; } } false } /// Collect identifier names from an LHS pattern: a bare `identifier`, /// a `tuple_pattern`, a Go `expression_list`, or a Rust `tuple_pattern` /// / `let_pattern`. Returns the bound variable names. Ignores /// destructured field accesses (we only track plain locals). fn collect_lhs_idents(node: Node<'_>, bytes: &[u8]) -> Vec { let mut out = Vec::new(); if node.kind() == "identifier" { out.push(text(node, bytes)); return out; } // Walk children, picking up identifiers; recurse into list/tuple // wrappers commonly seen on LHS of multi-binding declarations. for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; match child.kind() { "identifier" => out.push(text(child, bytes)), "tuple_pattern" | "expression_list" | "pattern_list" | "list_pattern" | "field_identifier" | "shorthand_field_identifier" => { out.extend(collect_lhs_idents(child, bytes)); } _ => {} } } out } /// Detect `let X = V.id` (or `(V.id as ..).into()`, `V.id.into()`, /// `V.user_id`, `V.uid`, `V.userId`) where `V` is in `self_actor_vars`. /// `X` is then a transitive copy of the authenticated actor's id and /// is recorded in `self_actor_id_vars` so subjects of that name count /// as actor context, not as foreign scoped IDs. /// /// Closes a real-repo FP cluster: route handlers idiomatically reduce /// the authed user to a scalar id and reuse it across many SQL params /// (`let uid = user.id; query_all(.., &[uid]); query_all(.., &[uid])`). /// The original `V.id`-shape recognition only covered direct subject /// expressions; this captures the common copy-and-pass shape. fn collect_self_actor_id_binding(node: Node<'_>, bytes: &[u8], state: &mut UnitState) { // Rust `let_declaration` exposes `pattern`; JS/TS // `variable_declarator` exposes `name`. let Some(pattern) = node .child_by_field_name("pattern") .or_else(|| node.child_by_field_name("name")) else { return; }; let Some(var_name) = first_identifier_name(pattern, bytes) else { return; }; if var_name.is_empty() { return; } let Some(value) = node.child_by_field_name("value") else { return; }; if value_is_self_actor_id_field(value, bytes, &state.self_actor_vars) || value_is_self_scoped_session_id_chain(value, bytes) { state.self_actor_id_vars.insert(var_name); } } /// Does `node` resolve to a `V.id` / `V.user_id` / `V.uid` / `V.userId` /// field access where `V` is in `actor_vars`? Walks through common /// wrappers: `try_expression`, `await_expression`, `parenthesized_expression`, /// `reference_expression`, `type_cast_expression` (`v.id as i64`), /// and `call_expression` for chained `.into()` / `.to_string()` etc. fn value_is_self_actor_id_field( node: Node<'_>, bytes: &[u8], actor_vars: &HashSet, ) -> bool { match node.kind() { "field_expression" | "member_expression" | "field_access" | "scoped_identifier" => { let receiver = node .child_by_field_name("value") .or_else(|| node.child_by_field_name("object")); let field = node .child_by_field_name("field") .or_else(|| node.child_by_field_name("property")) .or_else(|| node.child_by_field_name("name")); let (Some(receiver), Some(field)) = (receiver, field) else { return false; }; let receiver_name = text(receiver, bytes); let field_name = text(field, bytes); actor_vars.contains(&receiver_name) && is_self_actor_id_field_name(&field_name) } "type_cast_expression" | "as_expression" | "cast_expression" | "parenthesized_expression" | "try_expression" | "await_expression" | "reference_expression" => { let value = node .child_by_field_name("value") .or_else(|| node.child_by_field_name("expression")); if let Some(v) = value && value_is_self_actor_id_field(v, bytes, actor_vars) { return true; } for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if value_is_self_actor_id_field(child, bytes, actor_vars) { return true; } } false } // `(v.id as i64).into()` / `v.id.to_string()` / `v.id.clone()` , // call on a self-actor id field still propagates self-actor-id. "call_expression" | "call" | "method_invocation" | "method_call_expression" => { let receiver = node .child_by_field_name("function") .or_else(|| node.child_by_field_name("object")); if let Some(r) = receiver { // Function field of a method call is `receiver.method` , // walk the receiver subtree for the self-actor id field. if value_is_self_actor_id_field(r, bytes, actor_vars) { return true; } // Also check the receiver of a method-style chain: // `(v.id as i64).into()`, `function` is the // `field_expression` `(...).into`, whose `value` child // is the cast expression. if let Some(inner) = r .child_by_field_name("value") .or_else(|| r.child_by_field_name("object")) && value_is_self_actor_id_field(inner, bytes, actor_vars) { return true; } } false } _ => false, } } fn is_self_actor_id_field_name(field: &str) -> bool { let lower = field.to_ascii_lowercase(); matches!( lower.as_str(), "id" | "user_id" | "userid" | "uid" | "email" | "username" | "handle" ) } /// Recognise `let X = session.user.id` (or /// `req.session.user.id` / `ctx.session.user.id` / `req.user.id` / /// `request.user.id`, etc.), a copy of the authenticated actor's /// own id field through one of the canonical session-context chains /// (the same set `is_self_scoped_session_subject` accepts at use /// time). Walks through wrappers (`await`, `?.`, parens, casts, /// trivial method chains like `.toString()`). /// /// Closes a real-repo FP cluster (cal.com Next.js handlers): the /// idiomatic shape is `if (session?.user?.id) { const userId = /// session.user.id; await repo.get(userId); }`. The use site sees /// a plain `userId` subject, so without binding-time recognition the /// classifier can't tell it's actor context. fn value_is_self_scoped_session_id_chain(node: Node<'_>, bytes: &[u8]) -> bool { match node.kind() { "field_expression" | "member_expression" | "field_access" | "scoped_identifier" => { // Build the dotted chain and reuse the same predicate the // subject classifier uses (`matches_session_context` + // self-scoped-base check). Doing it via the chain avoids // re-implementing the session-context grammar here. let chain = member_chain(node, bytes); if chain.len() < 2 { return false; } let field = chain.last().expect("len >= 2"); if !is_self_actor_id_field_name(field) { return false; } let base_chain = &chain[..chain.len() - 1]; let base = base_chain.join("."); classify_member_chain(base_chain) == ValueSourceKind::Session && is_self_scoped_session_base_text(&base) } "type_cast_expression" | "as_expression" | "cast_expression" | "parenthesized_expression" | "try_expression" | "await_expression" | "reference_expression" | "non_null_expression" => { let value = node .child_by_field_name("value") .or_else(|| node.child_by_field_name("expression")); if let Some(v) = value && value_is_self_scoped_session_id_chain(v, bytes) { return true; } for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if value_is_self_scoped_session_id_chain(child, bytes) { return true; } } false } // `(req.user.id as number).toString()` / `session.user.id.toString()` "call_expression" | "call" | "method_invocation" | "method_call_expression" => { let receiver = node .child_by_field_name("function") .or_else(|| node.child_by_field_name("object")); if let Some(r) = receiver { if value_is_self_scoped_session_id_chain(r, bytes) { return true; } if let Some(inner) = r .child_by_field_name("value") .or_else(|| r.child_by_field_name("object")) && value_is_self_scoped_session_id_chain(inner, bytes) { return true; } } false } _ => false, } } /// String-level analogue of `is_self_scoped_session_base` from /// `checks.rs`. Kept here in the extract layer to avoid a layer /// dependency; the two lists must stay in sync. fn is_self_scoped_session_base_text(base: &str) -> bool { matches!( base, "req.session.user" | "request.session.user" | "session.user" | "req.session.currentUser" | "request.session.currentUser" | "session.currentUser" | "req.user" | "request.user" | "req.currentUser" | "request.currentUser" | "ctx.session.user" | "ctx.session.currentUser" | "ctx.state.user" | "ctx.state.currentUser" ) } /// Does `node` (possibly wrapped in `?`/`.await`/`&`/`match`) resolve /// to a call whose callee matches `is_login_guard` or /// `is_authorization_check`? Used to detect `let user = /// auth::require_auth(..).await?`-style bindings, including the /// `let user = match require_auth() { Ok(u) => u, Err(_) => return ... }` /// shape used by Worker / Cloudflare-style handlers that propagate /// the auth failure response instead of using `?`. fn value_is_self_actor_call(node: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules) -> bool { match node.kind() { "call_expression" | "call" | "method_invocation" | "method_call_expression" => { let callee = call_name(node, bytes); !callee.is_empty() && (rules.is_login_guard(&callee) || rules.is_authorization_check(&callee)) } "try_expression" | "await_expression" | "reference_expression" | "parenthesized_expression" | "match_expression" => { // For `match SCRUTINEE { ... }`, the scrutinee is the // call we care about, if `require_auth().await` is being // matched, the `Ok(u) => u` arm gives us a self-actor // binding even when `?` isn't usable. Walk all named // children, tree-sitter exposes both the scrutinee and // the arms. for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if value_is_self_actor_call(child, bytes, rules) { return true; } } false } _ => false, } } /// A3: typed route-handler parameters whose declared type names the /// authenticated user (e.g. `user: CurrentUser`, `admin: AdminUser`) /// count as self-actor bindings. Recognized type last-segments: /// `CurrentUser`, `SessionUser`, `AuthUser`, `AdminUser`, /// `AuthenticatedUser`, `RequireAuth`, `RequireLogin`, `Authenticated`. fn collect_typed_extractor_self_actor(node: Node<'_>, bytes: &[u8], state: &mut UnitState) { let Some(pattern) = node.child_by_field_name("pattern") else { return; }; let Some(var_name) = first_identifier_name(pattern, bytes) else { return; }; if var_name.is_empty() { return; } let Some(ty_node) = node.child_by_field_name("type") else { return; }; let ty_text = text(ty_node, bytes); if is_self_actor_type_text(&ty_text) { state.self_actor_vars.insert(var_name); } } /// B3: detect `let X = …prepare(LIT)…` / `let X = …query(LIT)…` /// where the SQL literal classifies as authorization-gated. When /// matched: insert `X` into `state.authorized_sql_vars` and synthesise /// a `Membership` `AuthCheck` at the `let`'s line whose subjects /// include `X` and the value-refs from the SQL call's bind args /// (e.g. `user.id` in `.bind(user.id)`). Downstream uses of `X`'s /// columns are then transitively covered through `row_field_vars`. fn collect_sql_authorized_binding( node: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules, state: &mut UnitState, ) { if rules.acl_tables.is_empty() && !sql_direct_user_id_enabled() { return; } let Some(pattern) = node.child_by_field_name("pattern") else { return; }; let Some(var_name) = first_identifier_name(pattern, bytes) else { return; }; if var_name.is_empty() { return; } let Some(value) = node.child_by_field_name("value") else { return; }; let Some((sql_call, bind_arg_refs)) = find_authorized_sql_call_in_chain(value, bytes, rules) else { return; }; state.authorized_sql_vars.insert(var_name.clone()); let mut subjects = bind_arg_refs; subjects.push(ValueRef { source_kind: ValueSourceKind::Identifier, name: var_name, base: None, field: None, index: None, span: span(node), }); let line = node.start_position().row + 1; state.auth_checks.push(AuthCheck { kind: AuthCheckKind::Membership, callee: "(sql ACL)".into(), subjects, span: span(sql_call), line, args: Vec::new(), condition_text: None, is_route_level: false, }); } /// Always true, the direct-user-id-predicate path in /// `sql_semantics::classify_sql_query` doesn't depend on the ACL /// table list, so we still want to walk `let X = …query(LIT)…` /// chains even when the user hasn't configured any ACL tables. /// Kept as a function so future tuning can disable this path. fn sql_direct_user_id_enabled() -> bool { true } /// Walk down a chain of method calls (`a.b().c().d()`) looking for a /// call whose method matches a SQL prepare/query verb and whose first /// argument is a string literal classifying as auth-gated. Returns /// the matching call node along with the value-refs collected from /// the *outer* chain's argument list (the call that bound the user /// id, e.g. `.bind(user.id)`). fn find_authorized_sql_call_in_chain<'tree>( value: Node<'tree>, bytes: &[u8], rules: &AuthAnalysisRules, ) -> Option<(Node<'tree>, Vec)> { let mut bind_arg_refs: Vec = Vec::new(); let mut cur = unwrap_try_like(value); let mut steps = 0; while steps < 16 { steps += 1; if !matches!( cur.kind(), "call_expression" | "call" | "method_invocation" | "method_call_expression" ) { return None; } // Collect any non-literal arg value-refs from this call , // these typically include the bound user id (e.g. // `.bind(user.id)` → adds `user.id` as a subject). if let Some(args_node) = cur.child_by_field_name("arguments") { for arg in named_children(args_node) { if matches!( arg.kind(), "string_literal" | "raw_string_literal" | "string" ) { continue; } bind_arg_refs.extend(extract_value_refs(arg, bytes)); } } let callee = call_name(cur, bytes); let last_segment = bare_method_name(&callee); if is_sql_prepare_method(last_segment) { // Check first arg is a string literal that classifies // as authorized. let args = cur .child_by_field_name("arguments") .map(named_children) .unwrap_or_default(); if let Some(first_arg) = args.first().copied() && let Some(literal) = collect_string_literal_text(first_arg, bytes) && crate::auth_analysis::sql_semantics::classify_sql_query( &literal, &rules.acl_tables, ) .is_some() { return Some((cur, bind_arg_refs)); } // Method matched but arg isn't a literal we recognise // as authorized, bail. return None; } // Descend through the receiver/object of this call to look // for an inner SQL prepare. let next = cur .child_by_field_name("receiver") .or_else(|| { cur.child_by_field_name("function").and_then(|fun| { fun.child_by_field_name("object") .or_else(|| fun.child_by_field_name("operand")) .or_else(|| fun.child_by_field_name("argument")) .or_else(|| fun.child_by_field_name("value")) }) }) .or_else(|| cur.child_by_field_name("object")); let next = next?; cur = unwrap_try_like(next); } None } /// Recognised SQL prepare/query method names. Matched against the /// last segment of the callee. String comparison only, we don't /// constrain the receiver to a specific type; known DB connection /// receivers are classified by the sink-class type gate, and this /// list is the orthogonal verb axis. fn is_sql_prepare_method(method: &str) -> bool { matches!( method, "prepare" | "query" | "query_one" | "query_all" | "query_as" | "query_map" | "query_row" | "query_scalar" | "fetch" | "fetch_one" | "fetch_all" | "fetch_optional" | "fetch_scalar" | "execute" | "exec" ) } /// Extract the string content from a Rust string literal node, joining /// adjacent fragments (e.g. `"a" "b"` becomes `"ab"`). Returns `None` /// when the node isn't a string literal at all. fn collect_string_literal_text(node: Node<'_>, bytes: &[u8]) -> Option { match node.kind() { "string_literal" | "raw_string_literal" => { let mut buf = String::new(); let mut found = false; for child in named_children(node) { if child.kind() == "string_content" { buf.push_str(&text(child, bytes)); found = true; } } if found { Some(buf) } else { Some(strip_quotes(&text(node, bytes))) } } "string" | "template_string" | "interpreted_string_literal" => { Some(strip_quotes(&text(node, bytes))) } _ => None, } } /// B3: `for ROW in X { … }`, when `X` (the iterator value) names a /// SQL-authorized variable, mark `ROW` authorized too AND record /// `row_field_vars[ROW] = X` so transitive subject coverage works /// for column reads inside the loop body. fn collect_for_row_binding(node: Node<'_>, bytes: &[u8], state: &mut UnitState) { let Some(pattern) = node.child_by_field_name("pattern") else { return; }; let Some(var_name) = first_identifier_name(pattern, bytes) else { return; }; if var_name.is_empty() { return; } let Some(value) = node.child_by_field_name("value") else { return; }; // The iterated expression is often `&X`, `X.iter()`, `X.into_iter()`, // etc. Walk through reference / common iterator-method wrappers // to recover the underlying var name. let Some(source_var) = single_iter_source_name(value, bytes) else { return; }; state .row_field_vars .insert(var_name.clone(), source_var.clone()); if state.authorized_sql_vars.contains(&source_var) { state.authorized_sql_vars.insert(var_name); } } /// Recover the source identifier under common iteration-shape /// wrappers: `X`, `&X`, `&mut X`, `X.iter()`, `X.iter_mut()`, /// `X.into_iter()`, `X.values()`, `X.keys()`. Returns `None` for /// arbitrary expressions (`fetch_rows()`, `make_iter() + 1`, …). fn single_iter_source_name(node: Node<'_>, bytes: &[u8]) -> Option { match node.kind() { "identifier" => { let value = text(node, bytes); if value.is_empty() { None } else { Some(value) } } "reference_expression" | "parenthesized_expression" => { for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if let Some(name) = single_iter_source_name(child, bytes) { return Some(name); } } None } "call_expression" | "call" | "method_invocation" | "method_call_expression" => { let callee = call_name(node, bytes); let last = bare_method_name(&callee); if !matches!( last, "iter" | "iter_mut" | "into_iter" | "values" | "keys" | "drain" ) { return None; } let receiver = node .child_by_field_name("receiver") .or_else(|| { node.child_by_field_name("function").and_then(|fun| { fun.child_by_field_name("object") .or_else(|| fun.child_by_field_name("operand")) .or_else(|| fun.child_by_field_name("argument")) .or_else(|| fun.child_by_field_name("value")) }) }) .or_else(|| node.child_by_field_name("object"))?; single_iter_source_name(receiver, bytes) } _ => None, } } /// B3: `let Y = ROW.method(..)` / `let Y = ROW.field` where `ROW` is /// SQL-authorized, propagate authorized status to `Y` so any /// downstream use (e.g. as a sink subject) is treated as covered. /// `row_field_vars[Y] = ROW` is already populated by /// `collect_row_field_binding`; this helper just propagates the /// authorized-vars set along that edge. fn propagate_sql_authorized_through_field_read( node: Node<'_>, bytes: &[u8], state: &mut UnitState, ) { let Some(pattern) = node.child_by_field_name("pattern") else { return; }; let Some(var_name) = first_identifier_name(pattern, bytes) else { return; }; if var_name.is_empty() { return; } let Some(value) = node.child_by_field_name("value") else { return; }; let Some(source) = extract_row_receiver_name(value, bytes) else { return; }; if state.authorized_sql_vars.contains(&source) { state.authorized_sql_vars.insert(var_name); } } /// Recognise type names that semantically mean "the authenticated /// actor" as the type of a function parameter. Used by /// `collect_typed_extractor_self_actor` to seed `self_actor_vars` so /// that downstream `V.id`-shaped subjects on a parameter of one of /// these types count as actor context, not foreign scoped IDs. /// /// The recogniser is intentionally type-only, no name heuristic on /// the variable. A handler signature /// `pub async fn handler(.., local_user_view: LocalUserView)` is /// recognised because the type name matches, not because the /// parameter is conventionally named `local_user_view`. /// /// **Two acceptance forms:** /// /// 1. *Tight exact set*, names whose entire identity is "auth /// subject": `Authenticated`, `Identity`, `Principal`. Adding new /// bare names to this set should be done sparingly; framework /// types that include `User` should go through the structural /// form instead. /// /// 2. *Structural form*, a CamelCase identifier of the shape /// `User?` where `PREFIX` is one of `Local`, /// `Current`, `Session`, `Auth`, `Authenticated`, `LoggedIn`, /// `Admin`, and `SUFFIX` (optional) is one of `View`, `Info`, /// `Context`, `Session`, `Token`. Catches `LocalUserView` /// (lemmy), `LocalUser`, `CurrentUser`, `LoggedInUser`, /// `AuthenticatedUserContext`, etc. /// /// **Deliberately *not* matched:** /// * Bare `User`, too loose; `User` parameters are very often /// deserialised payloads, not actor extractors. /// * `UserView`, `UserPreferences`, same reason; the prefix is what /// carries the auth signal, not the bare `User` segment. fn is_self_actor_type_text(ty: &str) -> bool { let trimmed = ty .trim() .trim_start_matches('&') .trim_start_matches("mut ") .trim(); let after_colons = trimmed.rsplit("::").next().unwrap_or(trimmed); let base = after_colons .split('<') .next() .unwrap_or(after_colons) .trim(); if matches!(base, "Authenticated" | "Identity" | "Principal") { return true; } matches_self_actor_user_form(base) } /// Structural form: `User?` where PREFIX is in the /// authority-prefix vocabulary and SUFFIX is in the /// auth-context-suffix vocabulary (or absent). /// /// Implementation: strip a leading PREFIX, require the remainder to /// start with `User`, and accept either an exact `User` match or a /// `User`+SUFFIX match. Case-sensitive on the segment boundaries /// because we want CamelCase types only, `localuser` wouldn't be a /// real Rust type name and matching it would create ambiguity with /// payload identifiers. fn matches_self_actor_user_form(base: &str) -> bool { const PREFIXES: &[&str] = &[ "Local", "Current", "Session", "Authenticated", "Auth", "LoggedIn", "Admin", ]; const SUFFIXES: &[&str] = &["View", "Info", "Context", "Session", "Token"]; for prefix in PREFIXES { let Some(rest) = base.strip_prefix(prefix) else { continue; }; let Some(after_user) = rest.strip_prefix("User") else { continue; }; if after_user.is_empty() { return true; } if SUFFIXES.contains(&after_user) { return true; } } false } /// Extract a single-segment receiver name for a value node of the shape /// `ROW.method(..)` or `ROW.field`. Returns `None` when the receiver /// isn't a simple identifier (e.g. deeper chains like `ctx.db.get(..)`). fn extract_row_receiver_name(node: Node<'_>, bytes: &[u8]) -> Option { let node = unwrap_try_like(node); match node.kind() { "call_expression" | "call" | "method_invocation" | "method_call_expression" => { let function = node .child_by_field_name("function") .or_else(|| node.child_by_field_name("method")); let function = function?; single_ident_receiver(function, bytes) .or_else(|| single_ident_from_call_receiver(node, bytes)) } "field_expression" | "member_expression" | "attribute" | "selector_expression" | "field_access" => single_ident_receiver(node, bytes), _ => None, } } fn single_ident_receiver(node: Node<'_>, bytes: &[u8]) -> Option { let object = node .child_by_field_name("value") .or_else(|| node.child_by_field_name("object")) .or_else(|| node.child_by_field_name("operand")) .or_else(|| node.child_by_field_name("receiver"))?; single_ident_text(object, bytes) } fn single_ident_from_call_receiver(node: Node<'_>, bytes: &[u8]) -> Option { let receiver = node .child_by_field_name("receiver") .or_else(|| node.child_by_field_name("object"))?; single_ident_text(receiver, bytes) } fn single_ident_text(node: Node<'_>, bytes: &[u8]) -> Option { if matches!( node.kind(), "identifier" | "shorthand_property_identifier" | "field_identifier" ) { let value = text(node, bytes); if value.is_empty() { None } else { Some(value) } } else { None } } /// Strip `?` / `.await` / `&` / `&mut` wrappers from a value node, /// returning the underlying call/field expression when present. fn unwrap_try_like(node: Node<'_>) -> Node<'_> { let mut cur = node; loop { match cur.kind() { "try_expression" | "await_expression" | "reference_expression" | "parenthesized_expression" => { let Some(inner) = cur .child_by_field_name("expression") .or_else(|| cur.named_child(0)) else { return cur; }; cur = inner; } _ => return cur, } } } /// Detect the `if OWNER != SELF { return ... }` (or `==` with `else` /// early-exit) row-level ownership-equality pattern and emit a /// synthetic `AuthCheck { kind: Ownership }`. The AuthCheck is /// back-dated to the row's `let` line, and populated with the row's /// original fetch arguments as subjects, so the row-fetching call /// (e.g. `db.query_one(.., &[doc_id])`) is also covered. fn detect_ownership_equality_check(if_node: Node<'_>, bytes: &[u8], state: &mut UnitState) { let Some(condition_raw) = if_node.child_by_field_name("condition") else { return; }; let Some(consequence) = if_node.child_by_field_name("consequence") else { return; }; let alternative = if_node.child_by_field_name("alternative"); let condition = unwrap_parens_local(condition_raw); if condition.kind() != "binary_expression" { return; } let Some(operator) = binary_operator_text(condition, bytes) else { return; }; let is_ne = matches!(operator.as_str(), "!=" | "!==" | "ne"); let is_eq = matches!(operator.as_str(), "==" | "===" | "eq"); if !is_ne && !is_eq { return; } let Some((left, right)) = binary_operands(condition) else { return; }; let fail_branch = if is_ne { consequence } else if let Some(alt) = alternative { resolve_else_block(alt) } else { return; }; if !branch_has_early_exit(fail_branch, bytes) { return; } let left_refs = extract_value_refs(left, bytes); let right_refs = extract_value_refs(right, bytes); let (owner_ref, _self_ref) = match ( pick_owner_field_ref(&left_refs), pick_self_actor_ref(&right_refs), ) { (Some(o), Some(s)) => (o, s), _ => match ( pick_owner_field_ref(&right_refs), pick_self_actor_ref(&left_refs), ) { (Some(o), Some(s)) => (o, s), _ => return, }, }; let row_binding = state.row_field_vars.get(&owner_ref.name).cloned(); let if_line = if_node.start_position().row + 1; let if_span = span(if_node); let condition_text = text(condition, bytes); let (check_line, mut subjects) = match row_binding .as_ref() .and_then(|row| state.row_population_data.get(row).map(|v| (row, v))) { Some((row, (row_line, arg_refs))) => { let mut subjects = arg_refs.clone(); subjects.push(ValueRef { source_kind: ValueSourceKind::Identifier, name: row.clone(), base: None, field: None, index: None, span: if_span, }); (*row_line, subjects) } None => match row_binding.as_ref() { Some(row) => ( if_line, vec![ValueRef { source_kind: ValueSourceKind::Identifier, name: row.clone(), base: None, field: None, index: None, span: if_span, }], ), None => (if_line, Vec::new()), }, }; subjects.push(owner_ref); state.auth_checks.push(AuthCheck { kind: AuthCheckKind::Ownership, callee: "(row ownership equality)".into(), subjects, span: if_span, line: check_line, args: Vec::new(), condition_text: Some(condition_text), is_route_level: false, }); } fn unwrap_parens_local(node: Node<'_>) -> Node<'_> { if node.kind() == "parenthesized_expression" && let Some(inner) = node.named_child(0) { return unwrap_parens_local(inner); } node } fn binary_operator_text(node: Node<'_>, bytes: &[u8]) -> Option { if let Some(op) = node.child_by_field_name("operator") { let value = text(op, bytes); if !value.is_empty() { return Some(value); } } let mut cursor = node.walk(); for child in node.children(&mut cursor) { if !child.is_named() { let value = text(child, bytes); if !value.is_empty() { return Some(value); } } } None } fn binary_operands<'tree>(node: Node<'tree>) -> Option<(Node<'tree>, Node<'tree>)> { if let (Some(left), Some(right)) = ( node.child_by_field_name("left"), node.child_by_field_name("right"), ) { return Some((left, right)); } let children = named_children(node); match children.as_slice() { [left, right] => Some((*left, *right)), _ => None, } } fn resolve_else_block(alt: Node<'_>) -> Node<'_> { // Rust wraps the else branch in an `else_clause` with the block // as a named child. Other grammars differ, so we walk defensively. if alt.kind() == "else_clause" && let Some(block) = named_children(alt).into_iter().next() { return block; } alt } fn branch_has_early_exit(branch: Node<'_>, bytes: &[u8]) -> bool { named_children(branch) .into_iter() .any(|n| node_is_early_exit(n, bytes)) } fn node_is_early_exit(node: Node<'_>, bytes: &[u8]) -> bool { match node.kind() { "return_expression" | "return_statement" => true, // Throwing aborts execution flow. Common in JS/TS / Java // (`throw new ForbiddenException()`), Python (`raise ...`), // Ruby (`raise ...`). "throw_statement" | "throw_expression" | "raise_statement" => true, // A call whose callee name is in the framework denial set // (`notFound()` / `redirect()` / `abort()` / `forbidden()` / // `unauthorized()` / etc.) terminates the request. These // helpers either throw under the hood (Next.js, Flask) or // exit the process (`process.exit`, `sys.exit`). "call_expression" | "call" | "method_invocation" => is_denial_call(node, bytes), "expression_statement" => named_children(node) .into_iter() .any(|n| node_is_early_exit(n, bytes)), _ => false, } } /// Recognise calls that act as request-terminating denial helpers. /// /// The callee name is matched against a curated set of framework /// idioms. This is read in `node_is_early_exit` from inside the /// row-ownership-equality detector, where the ambient context already /// requires an `owner.field` vs. `self.id` binary comparison; the /// denial-call match is only the early-exit witness, not the auth /// signal itself. fn is_denial_call(call_node: Node<'_>, bytes: &[u8]) -> bool { let Some(callee_node) = call_node .child_by_field_name("function") .or_else(|| call_node.child_by_field_name("name")) else { return false; }; let callee_text = text(callee_node, bytes); let trimmed = callee_text.trim(); let leaf = trimmed.rsplit('.').next().unwrap_or(trimmed); let leaf = leaf.rsplit("::").next().unwrap_or(leaf); matches!( leaf, "notFound" | "redirect" | "permanentRedirect" | "unauthorized" | "forbidden" | "abort" | "halt" ) } pub(super) fn is_owner_field_subject(subject: &ValueRef) -> bool { let raw = match subject.source_kind { ValueSourceKind::ArrayIndex => subject.base.as_deref().unwrap_or(&subject.name), _ => subject .field .as_deref() .or(subject.base.as_deref()) .unwrap_or(&subject.name), }; let key = canonical_name(raw); matches!( key.as_str(), "userid" | "ownerid" | "authorid" | "createdby" | "uploaderid" | "updatedby" | "submittedby" | "assignedto" | "creatorid" | "postedby" ) } pub(super) fn is_self_actor_subject(subject: &ValueRef) -> bool { // `req.user.id`, `session.user.id`, `ctx.session.user.id`, etc. if subject.source_kind == ValueSourceKind::Session && subject .base .as_deref() .is_some_and(is_self_session_base_local) { return true; } // Plain member chains that name the caller directly: `user.id`, // `current_user.id`, `actor.id`. A3 widens this set via // `self_actor_vars`. let Some(field) = subject.field.as_deref() else { return false; }; if !field.eq_ignore_ascii_case("id") { return false; } let Some(base) = subject.base.as_deref() else { return false; }; let last = base.rsplit('.').next().unwrap_or(base); matches!( last, "user" | "current_user" | "currentUser" | "actor" | "current_actor" ) } fn is_self_session_base_local(base: &str) -> bool { matches!( base, "req.session.user" | "request.session.user" | "session.user" | "req.session.currentUser" | "request.session.currentUser" | "session.currentUser" | "req.user" | "request.user" | "req.currentUser" | "request.currentUser" | "ctx.session.user" | "ctx.session.currentUser" | "ctx.state.user" | "ctx.state.currentUser" ) } fn pick_owner_field_ref(refs: &[ValueRef]) -> Option { refs.iter().find(|v| is_owner_field_subject(v)).cloned() } fn pick_self_actor_ref(refs: &[ValueRef]) -> Option { refs.iter().find(|v| is_self_actor_subject(v)).cloned() } fn classify_auth_check(callee: &str, rules: &AuthAnalysisRules) -> AuthCheckKind { if rules.is_admin_guard(callee, &[]) || matches_name(callee, "isAdmin") { AuthCheckKind::AdminGuard } else if rules.is_login_guard(callee) { AuthCheckKind::LoginGuard } else if matches_name(callee, "checkMembership") || matches_name(callee, "hasWorkspaceMembership") || matches_name(callee, "isMember") || matches_name(callee, "requireMembership") || matches_name(callee, "check_membership") || matches_name(callee, "has_membership") || matches_name(callee, "has_membership?") || matches_name(callee, "require_membership") || matches_name(callee, "ensure_membership") || matches_name(callee, "member_of?") || matches_name(callee, "member?") { AuthCheckKind::Membership } else if matches_name(callee, "checkOwnership") || matches_name(callee, "isOwner") || matches_name(callee, "requireOwnership") || matches_name(callee, "check_ownership") || matches_name(callee, "has_ownership") || matches_name(callee, "require_ownership") || matches_name(callee, "ensure_ownership") || matches_name(callee, "is_owner") || matches_name(callee, "owner?") || matches_name(callee, "owns?") { AuthCheckKind::Ownership } else { AuthCheckKind::Other } } pub fn function_name(node: Node<'_>, bytes: &[u8]) -> Option { function_definition_node(node) .child_by_field_name("name") .map(|name| text(name, bytes)) .filter(|name| !name.is_empty()) } /// True when a Python `decorated_definition` node carries a /// background-task / event-handler decorator. Recognised markers /// (matched against the bare callee name, last segment of any /// dotted/qualified form): /// /// * Celery: `task`, `shared_task`, `periodic_task`, /// `app.task`, `celery.task`, `beat.shared_task`. /// * Airflow: `instrumented_task`. /// * Django: `receiver` (signal receiver, invoked by the framework, /// not by an HTTP request). /// /// Used by `collect_top_level_from_node` to skip pushing a /// `Function` unit for functions that cannot, by construction, be /// the entry point of a user-input flow. Real route handlers are /// added by the framework-specific route extractors (Flask / /// Django / Spring / FastAPI / …) which re-build the unit with /// `RouteHandler` kind and route-decorator-derived auth checks. fn python_decorated_definition_is_background_task(node: Node<'_>, bytes: &[u8]) -> bool { for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if child.kind() != "decorator" { continue; } let Some(inner) = child.named_child(0) else { continue; }; let callee_text = match inner.kind() { "call" => { let Some(function) = inner.child_by_field_name("function") else { continue; }; text(function, bytes) } "identifier" | "attribute" | "scoped_identifier" => text(inner, bytes), _ => continue, }; let last = callee_text.rsplit('.').next().unwrap_or(&callee_text); if matches!( last, "task" | "shared_task" | "periodic_task" | "instrumented_task" | "receiver" ) { return true; } } false } fn function_params(node: Node<'_>, bytes: &[u8]) -> Vec { let Some(params_node) = node.child_by_field_name("parameters") else { return Vec::new(); }; let mut params = Vec::new(); collect_param_names(params_node, bytes, false, &mut params); params } /// Variant of `function_params` that always includes id-like typed /// Python params (`dag_id: str`, `dag_run_id: str`). Used by /// `attach_route_handler` to populate `unit.params` for RouteHandler /// units so middleware-injected auth checks (FastAPI /// `dependencies=[Depends(...)]`, Flask `@requires_role(...)`, etc.) /// can synthesise subjects that cover every handler input, including /// the id-shaped ones that are *the* primary user-controlled data on /// REST routes. /// /// The id-like filter in `collect_param_names` exists to keep /// internal helper signatures (`def f(release_id: int, project: /// Project)`) from passing `unit_has_user_input_evidence`'s param /// heuristic, which would over-fire `missing_ownership_check`. Route /// handlers don't need that filter, they pass the precondition gate /// via `kind == RouteHandler`, and missing the id-like params from /// `unit.params` actively breaks the middleware-injection coverage /// path. pub fn function_params_route_handler(node: Node<'_>, bytes: &[u8]) -> Vec { let Some(params_node) = node.child_by_field_name("parameters") else { return Vec::new(); }; let mut params = Vec::new(); collect_param_names(params_node, bytes, true, &mut params); params } /// Walk a Python function-definition node's parameter list and /// collect every parameter whose static type annotation resolves to /// an integer or boolean scalar (or a generic-wrapped int such as /// `Optional[int]`, `list[int]`, `Iterable[int]`). These names are /// used to seed `AnalysisUnit::typed_bounded_vars` so the ownership /// rule's `is_typed_bounded_subject` filter recognises the bounded /// type without requiring an SSA-derived `VarTypes` map. /// /// No-op for non-Python `function_definition` nodes, only /// tree-sitter-python exposes the `typed_parameter` / /// `typed_default_parameter` shapes inspected here. Conservative: /// only int/bool/float scalars and known integer-list wrappers /// qualify; bare `str`, `bytes`, `Path`, custom DTO types, and /// `Annotated[int, Body()]` wrappers are NOT lifted because the /// presence of an HTTP-binding marker indicates the value is /// caller-controlled (the SSA pipeline handles those). fn python_int_bounded_typed_params(node: Node<'_>, bytes: &[u8]) -> HashSet { let mut out: HashSet = HashSet::new(); let Some(params_node) = node.child_by_field_name("parameters") else { return out; }; for idx in 0..params_node.named_child_count() { let Some(child) = params_node.named_child(idx as u32) else { continue; }; if !matches!(child.kind(), "typed_parameter" | "typed_default_parameter") { continue; } let mut name: Option = None; let mut type_text: Option = None; for inner_idx in 0..child.named_child_count() { let Some(inner) = child.named_child(inner_idx as u32) else { continue; }; if inner.kind() == "identifier" && name.is_none() { let n = text(inner, bytes); if !n.is_empty() { name = Some(n); } } else if inner.kind() == "type" { type_text = Some(text(inner, bytes)); } } if let (Some(n), Some(t)) = (name, type_text) && python_type_text_is_integer_bounded(&t) { out.insert(n); } } out } /// Conservative recogniser for Python type annotations that bound a /// value to an integer or boolean scalar. Accepts: /// * Bare `int`, `bool`, `float`. /// * Common generic wrappers whose element type is one of those: /// `Optional[int]`, `Union[int, None]`, `list[int]`, `List[int]`, /// `tuple[int, ...]`, `Sequence[int]`, `Iterable[int]`, /// `set[int]`, `frozenset[int]`, `dict[int, ...]` (key only). /// /// `Annotated[int, ...]` is intentionally rejected, the FastAPI / /// Pydantic binding marker indicates the value is caller-controlled. fn python_type_text_is_integer_bounded(text: &str) -> bool { let trimmed = text.trim(); // Accept `T | None` (PEP 604) by recursing on each branch. if trimmed.contains('|') { return trimmed .split('|') .map(str::trim) .all(|alt| alt == "None" || python_type_text_is_integer_bounded(alt)); } if matches!(trimmed, "int" | "bool" | "float") { return true; } let Some((head, rest)) = trimmed.split_once('[') else { return false; }; if !rest.ends_with(']') { return false; } let inner = &rest[..rest.len() - 1]; let head_trim = head.trim(); // `Annotated[int, Body()]` etc. is a binding marker, refuse. if matches!(head_trim, "Annotated" | "typing.Annotated") { return false; } let inner_first = inner.split(',').next().unwrap_or(inner).trim(); matches!( head_trim, "Optional" | "typing.Optional" | "Union" | "typing.Union" | "list" | "List" | "typing.List" | "tuple" | "Tuple" | "typing.Tuple" | "set" | "Set" | "typing.Set" | "frozenset" | "Frozenset" | "Sequence" | "typing.Sequence" | "Iterable" | "typing.Iterable" | "Iterator" | "typing.Iterator" | "Collection" | "typing.Collection" | "dict" | "Dict" | "typing.Dict" | "Mapping" | "typing.Mapping" ) && python_type_text_is_integer_bounded(inner_first) } /// Walk the tree starting at `node` and gather TS type-alias / /// interface names whose body references a TRPC-marker type /// (`TrpcSessionUser`, `TRPCContext`, …). Recurses only through /// container kinds that legitimately host top-level type aliases /// (`program` / `module` / `export_statement` / namespace bodies); /// stops at function or class bodies to avoid an O(units × tree) /// blowup on files with many small functions. /// /// No-op for non-TS files, the matched node kinds only exist in /// the TS grammar. Used by [`FileMeta::scan`] (called once per file /// in `collect_top_level_units` / `attach_route_handler`) to amortise /// the alias scan across all units in the same source file. fn scan_trpc_aliases_visit(node: Node<'_>, bytes: &[u8], out: &mut HashSet) { match node.kind() { "type_alias_declaration" | "interface_declaration" => { let body = node .child_by_field_name("value") .or_else(|| node.child_by_field_name("body")); if let Some(body) = body { let body_text = text(body, bytes); if body_text_references_trpc_marker(&body_text) && let Some(name_node) = node.child_by_field_name("name") { let name = text(name_node, bytes); if !name.is_empty() { out.insert(name); } } } return; } // Recurse only through container kinds that legitimately host // top-level type aliases. Skipping into function bodies / // class bodies / call arguments avoids an O(unit × tree) // blowup when `build_function_unit` triggers this scan once // per unit on files with thousands of small functions // (`tests/hostile_input_tests::many_small_functions_do_not_explode`). "program" | "source_file" | "module" | "export_statement" | "namespace_declaration" | "module_declaration" | "internal_module" | "ambient_declaration" | "lexical_declaration" | "variable_declaration" | "statement_block" => {} _ => return, } for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; scan_trpc_aliases_visit(child, bytes, out); } } fn body_text_references_trpc_marker(body_text: &str) -> bool { body_text.contains("TrpcSessionUser") || body_text.contains("TRPCContext") || body_text.contains("ProtectedTRPCContext") || body_text.contains("TrpcContext") } /// Recognise a TS `required_parameter` / `optional_parameter` whose /// type annotation refers to a TRPC-shaped Options alias (or /// inlines `TrpcSessionUser` directly), and add the destructured / /// declared `ctx`-base to `self_scoped_session_bases` so subjects /// rooted at `ctx.user.` count as actor context downstream. /// /// Three pattern shapes are handled: /// 1. Destructured shorthand: `({ ctx, input }: GetOptions)` → /// add `"ctx.user"`. /// 2. Destructured rename: `({ ctx: c, input }: GetOptions)` → /// add `"c.user"`. /// 3. Plain identifier: `(opts: GetOptions)` → add `"opts.ctx.user"`. /// /// The rule is principled: we only fire when the param's type either /// IS one of the file-level TRPC aliases (`state.trpc_alias_names`, /// populated by [`scan_trpc_aliases_from_node_root`]) or its annotation /// text inlines `TrpcSessionUser` directly. Bare `ctx.user` is never /// added to the static session-base list, that would over-suppress /// in non-TRPC code. Instead, the dynamic per-unit set /// `self_scoped_session_bases` carries the lift. fn collect_trpc_ctx_param(node: Node<'_>, bytes: &[u8], state: &mut UnitState) { let Some(pattern) = node.child_by_field_name("pattern") else { return; }; let Some(ty_node) = node.child_by_field_name("type") else { return; }; let ty_text = text(ty_node, bytes); if !type_text_is_trpc_options(&ty_text, &state.trpc_alias_names) { return; } if pattern.kind() == "object_pattern" { for idx in 0..pattern.named_child_count() { let Some(child) = pattern.named_child(idx as u32) else { continue; }; match child.kind() { "shorthand_property_identifier_pattern" => { let name = text(child, bytes); if name.eq_ignore_ascii_case("ctx") { state .self_scoped_session_bases .insert(format!("{name}.user")); } } "object_assignment_pattern" => { if let Some(left) = child.child_by_field_name("left") { let name = if matches!( left.kind(), "identifier" | "shorthand_property_identifier_pattern" ) { text(left, bytes) } else { first_identifier_name(left, bytes).unwrap_or_default() }; if name.eq_ignore_ascii_case("ctx") { state .self_scoped_session_bases .insert(format!("{name}.user")); } } } "pair_pattern" => { let key_node = child.child_by_field_name("key"); let local_node = child.child_by_field_name("value"); if let (Some(k), Some(v)) = (key_node, local_node) { let key = text(k, bytes); let local = first_identifier_name(v, bytes).unwrap_or_default(); if !local.is_empty() && key.eq_ignore_ascii_case("ctx") { state .self_scoped_session_bases .insert(format!("{local}.user")); } } } _ => {} } } return; } if let Some(name) = first_identifier_name(pattern, bytes) && !name.is_empty() { state .self_scoped_session_bases .insert(format!("{name}.ctx.user")); } } /// True when the type-annotation text identifies a TRPC-shaped Options /// type: it contains `TrpcSessionUser` directly (inline object type /// literal), or it references one of the file-level TRPC alias names /// from the pre-scan. fn type_text_is_trpc_options(ty_text: &str, trpc_alias_names: &HashSet) -> bool { if body_text_references_trpc_marker(ty_text) { return true; } let trimmed = ty_text.trim_start_matches(':').trim(); if trimmed.is_empty() { return false; } // Match the leading identifier of the type (dropping any generic // suffix `<...>`). This covers `GetOptions` and // `NonNullable` shapes alike. let head = trimmed.split('<').next().unwrap_or(trimmed).trim(); if trpc_alias_names.contains(head) { return true; } // Also accept the bare alias name appearing anywhere in the // annotation text, handles `Promise` and other // wrappers without enumerating every shape. Word-boundary check // avoids matching aliases that are substrings of longer // identifiers. for alias in trpc_alias_names { if alias.is_empty() { continue; } if let Some(idx) = ty_text.find(alias.as_str()) { let before_ok = idx == 0 || !ty_text.as_bytes()[idx - 1].is_ascii_alphanumeric() && ty_text.as_bytes()[idx - 1] != b'_'; let end = idx + alias.len(); let after_ok = end >= ty_text.len() || !ty_text.as_bytes()[end].is_ascii_alphanumeric() && ty_text.as_bytes()[end] != b'_'; if before_ok && after_ok { return true; } } } false } /// Extract the receiver-variable name from a Go `method_declaration` /// (`func (c *Cache) ...` → `Some("c")`). Returns `None` for any node /// that doesn't expose a `receiver` field (Rust `function_item`, /// Java `method_declaration`, JS arrow-functions, …). /// /// Tree-sitter-go shape: `method_declaration` has a `receiver` field /// whose value is a `parameter_list` containing a single /// `parameter_declaration` with a `name` field (identifier) and a /// `type` field (often `pointer_type`). We only need the name. pub fn method_receiver_name(node: Node<'_>, bytes: &[u8]) -> Option { let receiver = node.child_by_field_name("receiver")?; extract_receiver_param_name(receiver, bytes) } fn extract_receiver_param_name(node: Node<'_>, bytes: &[u8]) -> Option { if let Some(name_node) = node.child_by_field_name("name") { let name = text(name_node, bytes); if !name.is_empty() { return Some(name); } } for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if let Some(found) = extract_receiver_param_name(child, bytes) { return Some(found); } } None } fn collect_param_names( node: Node<'_>, bytes: &[u8], include_id_like_typed: bool, out: &mut Vec, ) { match node.kind() { "identifier" | "property_identifier" | "shorthand_property_identifier_pattern" => { let name = text(node, bytes); if !name.is_empty() && !out.contains(&name) { out.push(name); } } // Go `parameter_declaration` / `variadic_parameter_declaration`: // tree-sitter-go shape exposes `name` (one or more identifiers) // and `type` (the param's static type) as named fields. C/C++ // also use `parameter_declaration` but with a `declarator` // field instead of `name`, so the `name`-field gate // distinguishes Go from C/C++ shapes without language plumbing. // // Two engine improvements at this site, both Go-specific: // // 1. Drop the entire param when its type is a known // non-user-input stdlib type. The dominant case is // `ctx context.Context`, the canonical first param of // nearly every Go function (cancellation / deadline / // value-bag, NOT an HTTP request). Without this gate the // bare param name `ctx` matches the framework-request-name // allow-list in `is_external_input_param_name`, opening // `unit_has_user_input_evidence` on every internal helper. // 2. Descend only into the `name` field so type-segment // identifiers don't pollute the param-name set. Without // this scope, `info *PackageInfo` contributes both `info` // and `PackageInfo` to `unit.params`; `path *Path` would // contribute `path` and `Path`, etc. Mirrors the Rust // `parameter` arm below. // // Real-repo trigger: `/Users/elipeter/oss/gitea` ─ ~1900 // `go.auth.missing_ownership_check` findings on backend // helpers whose only "user-input evidence" was the ubiquitous // `ctx context.Context` first param. "parameter_declaration" | "variadic_parameter_declaration" if node.child_by_field_name("name").is_some() => { let type_node = node.child_by_field_name("type"); if let Some(t) = type_node && is_go_non_user_input_type(t, bytes) { return; } // Mirror of the Python `typed_parameter` filter (see // `is_python_id_like_typed_param` arm above): for non-route // units, an id-like Go param whose declared type is a // bounded primitive scalar (`int64`, `uint32`, `string`, // `bool`, `byte`, `rune`, `float64`, …) is a caller-passed // scope identifier, not user-controlled HTTP input. Real // Go HTTP handlers always carry a framework-request-typed // param (`*http.Request`, `*gin.Context`, `echo.Context`, // `*fiber.Ctx`, `*context.APIContext`, …) and are // recognised by the per-framework route extractors which // call `function_params_route_handler` // (`include_id_like_typed = true`) — those bypass this // filter so id-shaped path params survive on real routes. // // Real-repo trigger: `/Users/elipeter/oss/gitea` ─ ~957 // `go.auth.missing_ownership_check` findings on backend // helpers like // `func GetRunByRepoAndID(ctx context.Context, // repoID, runID int64)`, // `func DeleteRunner(ctx context.Context, id int64)`, // and the entire `models/...` DAO layer where the // ownership check sits in the calling route handler. // Same shape over-fires on minio's `cmd/iam-*-store` // helpers and would on every Go ORM/DAO codebase. let type_is_bounded_scalar = type_node .map(|t| is_go_bounded_scalar_type(t, bytes)) .unwrap_or(false); let mut cursor = node.walk(); for child in node.children_by_field_name("name", &mut cursor) { if child.kind() == "identifier" { let name = text(child, bytes); if name.is_empty() || out.contains(&name) { continue; } if !include_id_like_typed && type_is_bounded_scalar && is_go_id_like_typed_param(&name) { continue; } out.push(name); } } } // Rust `parameter` node: descend ONLY into the `pattern` field so // type-segment identifiers don't pollute the param-name set. // Without this scope, `dst: &std::path::Path` contributes `std`, // `path`, and `Path` to `unit.params`, and `path` then matches // the framework-request-name allow-list in // `is_external_input_param_name`, gating // `unit_has_user_input_evidence` open on internal helpers whose // real params (`dst`, `tasks`, `index_base_map_size`) carry no // user-facing shape. Cluster surfaced from // meilisearch/index-scheduler/src/scheduler/process_snapshot_creation.rs::remove_tasks // where `dst: &std::path::Path` made every `db.delete(task.uid)` // call inside the snapshot cleanup loop fire // `missing_ownership_check`. Same shape would over-fire for // `req: &Request<...>` / `ctx: &Context` / similar typed // helpers. "parameter" => { if let Some(pattern) = node.child_by_field_name("pattern") { collect_param_names(pattern, bytes, include_id_like_typed, out); return; } // Fallback (no `pattern` field): descend into named children // generically, mirroring the default arm. for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; collect_param_names(child, bytes, include_id_like_typed, out); } } "default_parameter" | "typed_parameter" | "typed_default_parameter" => { // tree-sitter-python's `typed_parameter` rule does not // expose a `name` field (the identifier is the wrapper's // first child, with the type expression as a sibling). We // fall back to the first `identifier` child when // `child_by_field_name("name")` returns None so typed // Python params (`connection_id: str`, // `organization_id: int`, …) actually flow into // `unit.params` instead of being silently dropped. Without // this, route-aware extractors (Flask + FastAPI) couldn't // see a typed handler's path params and the FastAPI // dependency-injection recogniser had no subject to // synthesise its auth check against. Languages whose // grammar carries a `name` field (TypeScript // `required_parameter`, …) still take the explicit field // path. // // Note: Restricting this fallback to non-id-like names // (so internal helpers with `release_id: int`, // `organization_id: int`, etc. don't pass // `unit_has_user_input_evidence`) would avoid the helper // FP regression observed on sentry. The principled // long-term fix is cross-file type-flow so subjects like // `project.id` (where `project: Project`) are recognised // as typed-bounded everywhere they're used. Until that // lands, we accept the cluster, handlers go through the // route extractors, and route-decorator-derived auth // checks suppress them. if let Some(name) = node.child_by_field_name("name") { collect_param_names(name, bytes, include_id_like_typed, out); return; } for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if child.kind() == "identifier" { let name_text = text(child, bytes); // Conservative for non-route-handler units: only // push the name when it is NOT id-like. This is a // stopgap until cross-file type-flow lets us // suppress `obj.id` subjects on typed-object args; // without it, exposing typed helpers like // `def f(release_id: int, project: Project) -> ...` // over-fires `missing_ownership_check` because the // engine sees `project.id` as a foreign scoped id. // Route handlers (`include_id_like_typed = true`) // bypass this filter, id-like params on a REST // route are *the* primary user input, and the // RouteHandler kind already passes // `unit_has_user_input_evidence` unconditionally, // so including them in `unit.params` doesn't // affect that gate but does let // `inject_middleware_auth` synthesise auth-check // subjects that match the operation subjects (the // FastAPI `dependencies=[Depends(...)]` coverage // path that was previously empty for handlers like // `def get_dag_run(dag_id: str, dag_run_id: str, // session)`). let is_id_like = is_python_id_like_typed_param(&name_text); if !name_text.is_empty() && !out.contains(&name_text) && (include_id_like_typed || !is_id_like) { out.push(name_text); } return; } } } _ => { for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; collect_param_names(child, bytes, include_id_like_typed, out); } } } } /// Recognise Go parameter types that are categorically not user-input /// bearing. Used by the Go arm of [`collect_param_names`] to drop the /// param entirely (rather than push its name into `unit.params` and /// trip the framework-request-name allow-list in /// `is_external_input_param_name`). /// /// Conservative: only matches the stdlib `context.Context` / /// `context.CancelFunc` interface idioms. These are the dominant /// cluster ─ ~1900 findings on `/Users/elipeter/oss/gitea` ─ and there /// is no shape under which they carry user input. /// /// Implementation note: tree-sitter-go's `qualified_type` exposes /// `package` (identifier) and `name` (type_identifier) as named fields. /// Pointer-wrapping is rare for these (they're already interfaces) but /// is handled defensively by descending through `pointer_type`. fn is_go_non_user_input_type(type_node: Node<'_>, bytes: &[u8]) -> bool { let mut node = type_node; // Strip a single layer of pointer indirection if present. if node.kind() == "pointer_type" { if let Some(inner) = node.child_by_field_name("type") { node = inner; } else if let Some(inner) = node.named_child(0) { node = inner; } } if node.kind() != "qualified_type" { return false; } let pkg = node .child_by_field_name("package") .map(|n| text(n, bytes)) .unwrap_or_default(); let name = node .child_by_field_name("name") .map(|n| text(n, bytes)) .unwrap_or_default(); matches!( (pkg.as_str(), name.as_str()), ("context", "Context") | ("context", "CancelFunc") ) } /// Ascii-lowered id-shape predicate used by the Python typed-param /// fallback in `collect_param_names`. Mirrors /// `auth_analysis::checks::is_id_like_name` (cannot share that fn /// directly without a cross-module dep), both must move in lockstep /// so the precondition gate and the param-extraction filter agree on /// what counts as id-like. fn is_python_id_like_typed_param(name: &str) -> bool { let lower = name.to_ascii_lowercase(); lower == "id" || lower.ends_with("id") || lower.ends_with("_id") || lower.ends_with("ids") } /// Same shape predicate used by the Go typed-param fallback in /// `collect_param_names`. Kept separate from the Python helper so the /// two recognisers can diverge if/when language-specific spellings /// emerge; the current vocabulary is the same canonical id-suffix /// set as `auth_analysis::checks::is_id_like_name`. fn is_go_id_like_typed_param(name: &str) -> bool { let lower = name.to_ascii_lowercase(); lower == "id" || lower.ends_with("id") || lower.ends_with("_id") || lower.ends_with("ids") } /// True iff `type_node` names a Go bounded primitive scalar: /// integer (`int*` / `uint*` / `byte` / `rune` / `uintptr`), floating /// point (`float32` / `float64`), `bool`, or `string`. Used by the /// Go arm of `collect_param_names` to recognise the /// "id-like name + scalar type" DAO-helper shape and refuse to lift /// such params into `unit.params` for non-route units. /// /// Conservative scope: only bare `type_identifier` matches. Pointer /// types (`*Foo`), generic types (`Map[K, V]`), qualified types /// (`pkg.Type`), and slice/array types (`[]T`) are framework or /// payload shapes, NOT bounded primitives, so they're left alone and /// the param keeps its name. This keeps real handler shapes that /// happen to spell an id-like name on a complex type (`req /// *RequestWithID`) from being silently dropped. fn is_go_bounded_scalar_type(type_node: Node<'_>, bytes: &[u8]) -> bool { if type_node.kind() != "type_identifier" { return false; } matches!( text(type_node, bytes).as_str(), "int" | "int8" | "int16" | "int32" | "int64" | "uint" | "uint8" | "uint16" | "uint32" | "uint64" | "uintptr" | "byte" | "rune" | "float32" | "float64" | "bool" | "string" ) } pub fn is_function_like(node: Node<'_>) -> bool { matches!( node.kind(), "function_declaration" | "function_expression" | "arrow_function" | "function_definition" | "method_declaration" | "function_item" | "closure_expression" | "func_literal" | "decorated_definition" | "method" | "singleton_method" | "block" | "do_block" ) } pub fn is_handler_reference(node: Node<'_>) -> bool { is_function_like(node) || matches!( node.kind(), "identifier" | "member_expression" | "attribute" | "selector_expression" | "field_expression" | "scoped_identifier" | "field_access" | "constant" | "scope_resolution" ) } pub fn call_site_from_node(node: Node<'_>, bytes: &[u8]) -> CallSite { if matches!( node.kind(), "call_expression" | "call" | "method_invocation" | "method_call_expression" ) { let name = call_name(node, bytes); let arg_nodes = node .child_by_field_name("arguments") .map(named_children) .unwrap_or_default(); let args = arg_nodes.iter().map(|arg| text(*arg, bytes)).collect(); let args_value_refs = arg_nodes .iter() .map(|arg| extract_value_refs(*arg, bytes)) .collect(); CallSite { name, args, span: span(node), args_value_refs, } } else { CallSite { name: text(node, bytes), args: Vec::new(), span: span(node), args_value_refs: Vec::new(), } } } pub fn call_sites_from_value(node: Node<'_>, bytes: &[u8]) -> Vec { if matches!(node.kind(), "array" | "list" | "tuple") { named_children(node) .into_iter() .map(|child| call_site_from_node(child, bytes)) .filter(|call| !call.name.is_empty()) .collect() } else { let call = call_site_from_node(node, bytes); if call.name.is_empty() { Vec::new() } else { vec![call] } } } pub fn auth_check_from_call_site( call: &CallSite, line: usize, rules: &AuthAnalysisRules, ) -> Option { let kind = if rules.is_admin_guard(&call.name, &call.args) { AuthCheckKind::AdminGuard } else if rules.is_login_guard(&call.name) { AuthCheckKind::LoginGuard } else if rules.is_authorization_check(&call.name) { classify_auth_check(&call.name, rules) } else { return None; }; Some(AuthCheck { kind, callee: call.name.clone(), subjects: Vec::new(), span: call.span, line, args: call.args.clone(), condition_text: None, is_route_level: false, }) } pub fn extract_value_refs(node: Node<'_>, bytes: &[u8]) -> Vec { match node.kind() { "member_expression" | "attribute" | "selector_expression" | "field_expression" | "field_access" => member_value_ref(node, bytes).into_iter().collect(), "subscript_expression" | "subscript" | "element_reference" | "index_expression" => { subscript_value_ref(node, bytes).into_iter().collect() } "call_expression" | "call" | "method_invocation" | "method_call_expression" => { call_value_ref(node, bytes) .map(|value| vec![value]) .unwrap_or_else(|| { let mut refs = Vec::new(); for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; refs.extend(extract_value_refs(child, bytes)); } refs }) } "identifier" // Ruby `@foo` instance variables and `@@foo` class variables are // leaves with no named children, so the catch-all recurse arm // would yield an empty subject set. Surface them as Identifier // value-refs so receiver-side ownership checks (`@issue.visible?`) // produce a subject that the row-fetch exemption can match. | "instance_variable" | "class_variable" | "global_variable" => vec![ValueRef { source_kind: ValueSourceKind::Identifier, name: text(node, bytes), base: None, field: None, index: None, span: span(node), }], // Keyword / named arguments: `Model.objects.filter(organization_id=org.id)`. // Tree-sitter exposes a `name` child (the schema column / parameter // name) and a `value` child (the actual expression). The default // recurse-all-children arm would surface `organization_id` as a // bare-identifier subject, which `is_id_like_name` then flags as // a scoped-identifier user-input. But the kwarg key is the // ORM/RPC schema field name, fixed at call time, never // attacker-controlled. Only the value carries a subject. // // Covers Python `keyword_argument`, JavaScript / TypeScript // `pair` (object property syntax used as kwargs in client libs // like prisma's `where: { id: foo }` is handled separately), // Ruby `pair` (hash kwargs in `Model.where(field: value)`), Go // composite-literal element keys, PHP / C# named arguments. "keyword_argument" | "keyword_arg" | "named_argument" | "named_arg" => { if let Some(value) = node .child_by_field_name("value") .or_else(|| node.child_by_field_name("argument")) { extract_value_refs(value, bytes) } else { Vec::new() } } _ => { let mut refs = Vec::new(); for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; refs.extend(extract_value_refs(child, bytes)); } refs } } } fn call_value_ref(node: Node<'_>, bytes: &[u8]) -> Option { let callee = call_name(node, bytes); let args = node .child_by_field_name("arguments") .map(named_children) .unwrap_or_default(); let chain = member_chain(node, bytes); if let Some(value) = accessor_call_value_ref(node, &callee, &chain, &args, bytes) { return Some(value); } if !args.is_empty() { return None; } if chain.is_empty() { return None; } let name = chain.join("."); let field = chain.last().cloned(); let base = if chain.len() > 1 { Some(chain[..chain.len() - 1].join(".")) } else { None }; Some(ValueRef { source_kind: classify_member_chain(&chain), name, base, field, index: None, span: span(node), }) } fn member_value_ref(node: Node<'_>, bytes: &[u8]) -> Option { let chain = member_chain(node, bytes); if chain.is_empty() { return None; } let name = chain.join("."); let field = chain.last().cloned(); let base = if chain.len() > 1 { Some(chain[..chain.len() - 1].join(".")) } else { None }; let source_kind = classify_member_chain(&chain); Some(ValueRef { source_kind, name, base, field, index: None, span: span(node), }) } fn classify_member_chain(chain: &[String]) -> ValueSourceKind { if matches_request_param(chain) { ValueSourceKind::RequestParam } else if matches_request_body(chain) { ValueSourceKind::RequestBody } else if matches_request_query(chain) { ValueSourceKind::RequestQuery } else if matches_session_context(chain) { ValueSourceKind::Session } else if chain.first().is_some_and(|segment| { matches!( segment.to_ascii_lowercase().as_str(), "invitation" | "token" | "invite" ) }) { ValueSourceKind::TokenField } else { ValueSourceKind::MemberField } } fn matches_request_param(chain: &[String]) -> bool { let lower = lower_segments(chain); (lower.first().is_some_and(|segment| segment == "params")) || (lower.len() >= 2 && lower[0] == "self" && lower[1] == "params") || (lower.len() >= 3 && matches!(lower[0].as_str(), "req" | "request") && lower[1] == "params") || (lower.len() >= 3 && lower[0] == "ctx" && lower[1] == "params") } fn matches_request_body(chain: &[String]) -> bool { let lower = lower_segments(chain); (lower.len() >= 3 && matches!(lower[0].as_str(), "req" | "request") && lower[1] == "body") || (lower.len() >= 3 && matches!(lower[0].as_str(), "req" | "request") && matches!( lower[1].as_str(), "form" | "json" | "values" | "post" | "data" )) || (lower.len() >= 4 && lower[0] == "ctx" && lower[1] == "request" && lower[2] == "body") || (lower.len() >= 3 && lower[0] == "ctx" && lower[1] == "body") } fn matches_request_query(chain: &[String]) -> bool { let lower = lower_segments(chain); (lower.len() >= 3 && matches!(lower[0].as_str(), "req" | "request") && lower[1] == "query") || (lower.len() >= 3 && matches!(lower[0].as_str(), "req" | "request") && matches!(lower[1].as_str(), "args" | "get")) || (lower.len() >= 3 && lower[0] == "ctx" && lower[1] == "query") || (lower.len() >= 4 && lower[0] == "ctx" && lower[1] == "request" && lower[2] == "query") } fn matches_session_context(chain: &[String]) -> bool { let lower = lower_segments(chain); // Bare `session` is overloaded: in JS/TS it routinely means // NextAuth/express-session and `session.user.id` is auth context; // in Python `session.commit()`, `session.add(..)`, `session.scalar(..)` // are SQLAlchemy ORM calls which have nothing to do with // authentication. When the chain starts with bare `session`, // refuse to classify it as auth context if the next segment is a // canonical SQLAlchemy / SQLAlchemy-style ORM method name , // those are read/write verbs and never identity accessors. Any // other field-style accessor (`session.user`, `session.user_id`, // `session.workspace_id`, `session.role`) stays a Session-context // chain so the stale-authorization / ownership rules still see // session-backed foreign ids. Bare `session` with no following // segment is ambiguous and refused. // Chain length 1 (`session` alone, as the receiver of a subscript // like `session[:user_id]`) stays auth context, the session // ambiguity only kicks in when there's a follow-up segment that // can be inspected. Length 2 with a known ORM verb (`session.commit`, // `session.add`) is denylisted; any other follow-up segment // (`session.user`, `session.workspace_id`, `session.role`) keeps // its Session classification. Length 3+ chains with `session` at // the root always stay auth (they describe a session-stored // member or sub-member). let bare_session_chain_is_auth = lower.first().is_some_and(|segment| segment == "session") && (lower.len() == 1 || lower.len() >= 3 || !is_orm_session_verb(&lower[1])); let unambiguous_chain_root = lower.first().is_some_and(|segment| { matches!( segment.as_str(), "current_user" | "current_account" | "current_member" | "securitycontext" | "principal" | "authentication" ) }); bare_session_chain_is_auth || unambiguous_chain_root || (lower.len() >= 2 && matches!(lower[0].as_str(), "req" | "request") && matches!(lower[1].as_str(), "session" | "user" | "currentuser")) || (lower.len() >= 3 && lower[0] == "self" && matches!(lower[1].as_str(), "request" | "session" | "current_user") && matches!(lower[2].as_str(), "session" | "user" | "currentuser")) || (lower.len() >= 3 && lower[0] == "ctx" && matches!(lower[1].as_str(), "session" | "state")) } /// Denylist of SQLAlchemy / generic ORM session verbs. The Python /// pytest-fixture idiom (`session: Session = sqlalchemy_session()`) /// drives every test method through `session.commit()` / /// `session.add(...)` / `session.scalar(...)`; classifying any of /// those calls as auth Session context would falsely qualify /// thousands of test methods as receiving user input. Only verbs /// that name a SQL/transaction operation are listed, identity- /// looking field accessors (`user`, `user_id`, `role`, /// `workspace_id`, `project_id`, ...) all pass through and remain /// auth Session. fn is_orm_session_verb(segment: &str) -> bool { matches!( segment, "commit" | "rollback" | "flush" | "refresh" | "merge" | "expunge" | "expunge_all" | "close" | "begin" | "begin_nested" | "query" | "scalar" | "scalars" | "execute" | "exec" | "exec_driver_sql" | "add" | "add_all" | "delete" | "bulk_save_objects" | "bulk_insert_mappings" | "bulk_update_mappings" | "configure" | "info" ) } fn subscript_value_ref(node: Node<'_>, bytes: &[u8]) -> Option { let object = node .child_by_field_name("object") .or_else(|| node.child_by_field_name("value")) .or_else(|| node.child_by_field_name("operand")); let index = node .child_by_field_name("index") .or_else(|| node.child_by_field_name("subscript")); let (object, index) = if let (Some(object), Some(index)) = (object, index) { (object, index) } else { let children = named_children(node); match children.as_slice() { [object, index, ..] => (*object, *index), _ => return None, } }; let base_chain = member_chain(object, bytes); let base = if base_chain.is_empty() { text(object, bytes) } else { base_chain.join(".") }; let index_text = text(index, bytes); let field = Some(strip_quotes(&index_text)); let source_kind = if base_chain.is_empty() { ValueSourceKind::ArrayIndex } else { match classify_member_chain(&base_chain) { ValueSourceKind::MemberField => ValueSourceKind::ArrayIndex, other => other, } }; Some(ValueRef { source_kind, name: if source_kind == ValueSourceKind::ArrayIndex { format!("{base}[{index_text}]") } else { format!("{base}.{}", strip_quotes(&index_text)) }, base: Some(base), field, index: Some(index_text), span: span(node), }) } pub fn member_chain(node: Node<'_>, bytes: &[u8]) -> Vec { if node.kind() == "call" { // Ruby-style call: explicit receiver field + method/name field. if let Some(receiver) = node.child_by_field_name("receiver") { let mut chain = member_chain(receiver, bytes); let method = node .child_by_field_name("method") .or_else(|| node.child_by_field_name("name")) .map(|method| text(method, bytes)) .unwrap_or_default(); if !method.is_empty() { chain.push(method); } return chain; } // Python-style call: callable expression in the `function` field. // Recursing into it lets chained shapes like // `select(X).filter_by(...)` produce `["select()", "filter_by"]` // — the parent attribute branch appends `()` when its `object` // is a call, marking the intermediate-call shape so that // `receiver_is_chained_call` detects it. Closes airflow-style // SQLAlchemy queryset-builder chains that previously reduced to // bare `["filter_by"]`. if let Some(function) = node.child_by_field_name("function") { return member_chain(function, bytes); } // Bare-method fallback for parser shapes that expose method/name // without a receiver (Ruby implicit-self calls, etc.). let method = node .child_by_field_name("method") .or_else(|| node.child_by_field_name("name")) .map(|method| text(method, bytes)) .unwrap_or_default(); if !method.is_empty() { return vec![method]; } return Vec::new(); } if node.kind() == "method_invocation" || node.kind() == "method_call_expression" { let mut chain = node .child_by_field_name("object") .or_else(|| node.child_by_field_name("receiver")) .map(|object| member_chain(object, bytes)) .unwrap_or_default(); let method = node .child_by_field_name("name") .or_else(|| node.child_by_field_name("method")) .map(|method| text(method, bytes)) .unwrap_or_default(); if !method.is_empty() { chain.push(method); } return chain; } if node.kind() == "scope_resolution" { let mut chain = Vec::new(); if let Some(scope) = node.child_by_field_name("scope") { chain.extend(member_chain(scope, bytes)); } if let Some(name) = node.child_by_field_name("name") { let value = text(name, bytes); if !value.is_empty() { chain.push(value); } } return chain; } if node.kind() == "scoped_identifier" { let mut chain = Vec::new(); if let Some(path) = node.child_by_field_name("path") { chain.extend(member_chain(path, bytes)); } if let Some(name) = node.child_by_field_name("name") { let value = text(name, bytes); if !value.is_empty() { chain.push(value); } } return chain; } if !matches!( node.kind(), "member_expression" | "attribute" | "selector_expression" | "field_expression" | "field_access" ) { let value = text(node, bytes); return if value.is_empty() { Vec::new() } else { vec![value] }; } let mut chain = Vec::new(); if let Some(object) = node .child_by_field_name("object") .or_else(|| node.child_by_field_name("value")) .or_else(|| node.child_by_field_name("operand")) .or_else(|| node.child_by_field_name("argument")) { let object_is_call = matches!( object.kind(), "call" | "call_expression" | "method_invocation" | "method_call_expression" ); let mut sub = member_chain(object, bytes); // Mark intermediate-call segments with `()` so a downstream // chain like `select(X).filter_by(...)` becomes // `["select()", "filter_by"]` rather than `["select", "filter_by"]`. // `receiver_is_chained_call` consults the `(` to detect the // opaque-builder receiver. if object_is_call && sub.last().map(|s| !s.ends_with(')')).unwrap_or(false) && let Some(last) = sub.last_mut() { last.push_str("()"); } chain.extend(sub); } if let Some(property) = node .child_by_field_name("property") .or_else(|| node.child_by_field_name("attribute")) .or_else(|| node.child_by_field_name("field")) .or_else(|| node.child_by_field_name("name")) { let property_text = text(property, bytes); if !property_text.is_empty() { chain.push(property_text); } } chain } pub fn callee_name(node: Node<'_>, bytes: &[u8]) -> String { match node.kind() { "identifier" | "property_identifier" | "constant" | "field_identifier" => text(node, bytes), "member_expression" | "attribute" | "selector_expression" | "field_expression" | "scoped_identifier" | "field_access" | "scope_resolution" | "call" | "method_invocation" | "method_call_expression" => member_chain(node, bytes).join("."), _ => text(node, bytes), } } pub fn call_name(node: Node<'_>, bytes: &[u8]) -> String { if !matches!( node.kind(), "call_expression" | "call" | "method_invocation" | "method_call_expression" ) { return callee_name(node, bytes); } if let Some(function) = node.child_by_field_name("function") { return callee_name(function, bytes); } let method = node .child_by_field_name("method") .or_else(|| node.child_by_field_name("name")) .map(|child| text(child, bytes)) .unwrap_or_default(); let receiver = node .child_by_field_name("receiver") .or_else(|| node.child_by_field_name("object")) .or_else(|| node.child_by_field_name("scope")) .or_else(|| node.child_by_field_name("argument")) .map(|child| member_chain(child, bytes).join(".")) .filter(|value| !value.is_empty()); match (receiver, method.is_empty()) { (Some(receiver), false) => format!("{receiver}.{method}"), (_, false) => method, _ => text(node, bytes), } } pub fn member_target(node: Node<'_>, bytes: &[u8]) -> Option<(String, String)> { let object = node .child_by_field_name("object") .or_else(|| node.child_by_field_name("operand")) .or_else(|| node.child_by_field_name("value")) .or_else(|| node.child_by_field_name("receiver")) .or_else(|| node.child_by_field_name("argument"))?; let property = node .child_by_field_name("property") .or_else(|| node.child_by_field_name("field")) .or_else(|| node.child_by_field_name("attribute")) .or_else(|| node.child_by_field_name("name"))?; Some((text(object, bytes), text(property, bytes))) } pub fn http_method_from_name(name: &str) -> Option { match name.to_ascii_lowercase().as_str() { "get" => Some(HttpMethod::Get), "post" => Some(HttpMethod::Post), "put" => Some(HttpMethod::Put), "delete" => Some(HttpMethod::Delete), "patch" => Some(HttpMethod::Patch), "all" | "any" => Some(HttpMethod::All), "use" => Some(HttpMethod::Use), _ => None, } } pub fn join_route_paths(prefix: &str, route: &str) -> String { match (prefix.trim_end_matches('/'), route.trim_start_matches('/')) { ("", "") => "/".to_string(), ("", route) => format!("/{route}"), (prefix, "") => prefix.to_string(), (prefix, route) => format!("{prefix}/{route}"), } } fn call_receiver_subjects(node: Node<'_>, bytes: &[u8]) -> Vec { let mut subjects = Vec::new(); if let Some(receiver) = node .child_by_field_name("receiver") .or_else(|| node.child_by_field_name("object")) .or_else(|| node.child_by_field_name("argument")) .or_else(|| { node.child_by_field_name("function").and_then(|function| { function .child_by_field_name("object") .or_else(|| function.child_by_field_name("operand")) .or_else(|| function.child_by_field_name("argument")) }) }) { subjects.extend(extract_value_refs(receiver, bytes)); } subjects } pub fn string_literal_value(node: Node<'_>, bytes: &[u8]) -> Option { match node.kind() { "string" | "template_string" | "string_literal" | "interpreted_string_literal" | "raw_string_literal" => Some(strip_quotes(&text(node, bytes))), _ => None, } } pub fn object_property_value<'tree>( node: Node<'tree>, bytes: &[u8], names: &[&str], ) -> Option> { if node.kind() != "object" { return None; } for child in named_children(node) { match child.kind() { "pair" => { let Some(key) = child.child_by_field_name("key") else { continue; }; let key_name = strip_quotes(&text(key, bytes)); if names.iter().any(|name| *name == key_name) { return child.child_by_field_name("value"); } } "shorthand_property_identifier" | "identifier" => { let key_name = text(child, bytes); if names.iter().any(|name| *name == key_name) { return Some(child); } } _ => {} } } None } pub fn decorated_definition_child(node: Node<'_>) -> Option> { node.child_by_field_name("definition") } pub fn function_definition_node(node: Node<'_>) -> Node<'_> { decorated_definition_child(node).unwrap_or(node) } pub fn named_children(node: Node<'_>) -> Vec> { let mut children = Vec::new(); for idx in 0..node.named_child_count() { if let Some(child) = node.named_child(idx as u32) { children.push(child); } } children } pub fn text(node: Node<'_>, bytes: &[u8]) -> String { node.utf8_text(bytes).unwrap_or("").to_string() } pub fn span(node: Node<'_>) -> (usize, usize) { (node.start_byte(), node.end_byte()) } fn dedup_value_refs(values: &mut Vec) { let mut deduped = Vec::new(); for value in values.drain(..) { if !deduped .iter() .any(|existing: &ValueRef| existing.name == value.name && existing.span == value.span) { deduped.push(value); } } *values = deduped; } fn lower_segments(chain: &[String]) -> Vec { chain .iter() .map(|segment| segment.to_ascii_lowercase()) .collect() } fn accessor_call_value_ref( node: Node<'_>, callee: &str, chain: &[String], args: &[Node<'_>], bytes: &[u8], ) -> Option { let method = bare_method_name(callee); let field = args .first() .and_then(|arg| string_literal_value(*arg, bytes)); let source_kind = match method { "Param" | "PathParam" => Some(ValueSourceKind::RequestParam), "Query" | "QueryParam" | "DefaultQuery" | "getParameter" | "getQueryString" => { Some(ValueSourceKind::RequestQuery) } "PostForm" | "FormValue" | "DefaultPostForm" => Some(ValueSourceKind::RequestBody), "Get" | "GetString" | "MustGet" | "getAttribute" => Some(ValueSourceKind::Session), _ if chain.first().is_some_and(|segment| { matches!( segment.to_ascii_lowercase().as_str(), "invitation" | "token" | "invite" ) }) && method.starts_with("get") && method.len() > 3 => { Some(ValueSourceKind::TokenField) } _ => None, }?; let normalized_field = field .or_else(|| { if source_kind == ValueSourceKind::TokenField && method.starts_with("get") { Some(method[3..].to_string()) } else { None } }) .map(|field| { let mut chars = field.chars(); let Some(first) = chars.next() else { return field; }; format!("{}{}", first.to_ascii_lowercase(), chars.as_str()) }) .filter(|field| !field.is_empty()); let base = match source_kind { ValueSourceKind::Session => Some("session".to_string()), _ if chain.len() > 1 => Some(chain[..chain.len() - 1].join(".")), _ => chain.first().cloned(), }; let name = if let Some(field) = normalized_field.as_deref() { match base.as_deref() { Some(base) if !base.is_empty() => format!("{base}.{field}"), _ => field.to_string(), } } else { callee.to_string() }; Some(ValueRef { source_kind, name, base, field: normalized_field, index: None, span: span(node), }) } #[cfg(test)] mod tests { use super::{is_owner_field_subject, is_self_actor_subject, is_self_actor_type_text}; use crate::auth_analysis::model::{ValueRef, ValueSourceKind}; #[test] fn is_self_actor_type_text_matches_known_wrappers() { // Tight exact set: bare names whose entire identity is "auth subject". assert!(is_self_actor_type_text("Authenticated")); assert!(is_self_actor_type_text("Identity")); assert!(is_self_actor_type_text("Principal")); // Structural form: User. assert!(is_self_actor_type_text("CurrentUser")); assert!(is_self_actor_type_text("SessionUser")); assert!(is_self_actor_type_text("AuthUser")); assert!(is_self_actor_type_text("AdminUser")); assert!(is_self_actor_type_text("AuthenticatedUser")); // Lemmy: LocalUserView (the real-repo motivation for the // structural recogniser). assert!(is_self_actor_type_text("LocalUserView")); assert!(is_self_actor_type_text("LocalUser")); assert!(is_self_actor_type_text("LoggedInUser")); assert!(is_self_actor_type_text("CurrentUserContext")); assert!(is_self_actor_type_text("AuthenticatedUserSession")); assert!(is_self_actor_type_text("SessionUserToken")); assert!(is_self_actor_type_text("AdminUserInfo")); // Qualified paths resolve to last segment. assert!(is_self_actor_type_text("crate::auth::CurrentUser")); assert!(is_self_actor_type_text("crate::user::LocalUserView")); assert!(is_self_actor_type_text("&CurrentUser")); assert!(is_self_actor_type_text("&mut AuthUser")); // Generic wrappers: match on the base segment. assert!(is_self_actor_type_text("CurrentUser")); assert!(is_self_actor_type_text("LocalUserView")); // Non-matches. // Bare `User`, too loose; commonly a deserialised payload type. assert!(!is_self_actor_type_text("User")); assert!(!is_self_actor_type_text("UserPreferences")); // `UserView` lacks an authority-prefix segment and stays a // payload-shaped name. assert!(!is_self_actor_type_text("UserView")); // No prefix vocabulary match, still rejected. assert!(!is_self_actor_type_text("PaymentUser")); // Wrong suffix vocabulary. assert!(!is_self_actor_type_text("CurrentUserPreferences")); // Framework extractors / unrelated types. assert!(!is_self_actor_type_text("Db")); assert!(!is_self_actor_type_text("Path<(i64,)>")); assert!(!is_self_actor_type_text("Json")); // `RequireAuth` / `RequireLogin` were dropped from the exact // set: they aren't `User`-bearing types and aren't // semantically the auth subject, they're guard markers. The // route-aware `axum::classify_guard_type` still treats them // as a login guard via the looser substring match. assert!(!is_self_actor_type_text("RequireAuth")); assert!(!is_self_actor_type_text("RequireLogin")); } fn ident(name: &str) -> ValueRef { ValueRef { source_kind: ValueSourceKind::Identifier, name: name.to_string(), base: None, field: None, index: None, span: (0, 0), } } fn member(base: &str, field: &str) -> ValueRef { ValueRef { source_kind: ValueSourceKind::MemberField, name: format!("{base}.{field}"), base: Some(base.to_string()), field: Some(field.to_string()), index: None, span: (0, 0), } } fn session(base: &str, field: &str) -> ValueRef { ValueRef { source_kind: ValueSourceKind::Session, name: format!("{base}.{field}"), base: Some(base.to_string()), field: Some(field.to_string()), index: None, span: (0, 0), } } #[test] fn is_owner_field_subject_matches_known_column_names() { assert!(is_owner_field_subject(&ident("owner_id"))); assert!(is_owner_field_subject(&ident("user_id"))); assert!(is_owner_field_subject(&ident("author_id"))); assert!(is_owner_field_subject(&ident("created_by"))); assert!(is_owner_field_subject(&member("row", "owner_id"))); assert!(!is_owner_field_subject(&ident("group_id"))); assert!(!is_owner_field_subject(&ident("doc_id"))); assert!(!is_owner_field_subject(&ident("user"))); } #[test] fn is_self_actor_subject_matches_known_self_shapes() { assert!(is_self_actor_subject(&member("user", "id"))); assert!(is_self_actor_subject(&member("current_user", "id"))); assert!(is_self_actor_subject(&session("req.user", "id"))); assert!(is_self_actor_subject(&session("ctx.session.user", "id"))); // Wrong field. assert!(!is_self_actor_subject(&member("user", "workspace_id"))); // Unknown base. assert!(!is_self_actor_subject(&member("target", "id"))); // Plain identifier, no base. assert!(!is_self_actor_subject(&ident("user_id"))); } #[test] fn type_text_is_trpc_options_matches_alias_and_inline_marker() { use super::type_text_is_trpc_options; use std::collections::HashSet; let mut aliases = HashSet::new(); aliases.insert("GetOptions".to_string()); aliases.insert("UpdateOptions".to_string()); // Inline `TrpcSessionUser` marker, accepted regardless of alias set. assert!(type_text_is_trpc_options( ": { ctx: { user: NonNullable } }", &aliases )); assert!(type_text_is_trpc_options( ": { user: TrpcSessionUser }", &HashSet::new() )); // Plain alias name match. assert!(type_text_is_trpc_options(": GetOptions", &aliases)); assert!(type_text_is_trpc_options("GetOptions", &aliases)); // Generic-wrapped alias. assert!(type_text_is_trpc_options(": Promise", &aliases)); assert!(type_text_is_trpc_options( ": NonNullable", &aliases )); // Negatives: alias not in set, no inline marker. assert!(!type_text_is_trpc_options(": OtherOptions", &aliases)); assert!(!type_text_is_trpc_options(": Promise", &aliases)); assert!(!type_text_is_trpc_options(": SomeRandomType", &aliases)); // Substring of a longer identifier must NOT match. assert!(!type_text_is_trpc_options(": MyGetOptionsX", &aliases)); } #[test] fn body_text_references_trpc_marker_recognises_known_markers() { use super::body_text_references_trpc_marker as bm; assert!(bm("type X = { user: NonNullable }")); assert!(bm("interface Ctx extends TRPCContext { ... }")); assert!(bm("type Ctx = ProtectedTRPCContext")); assert!(bm("export type Y = { ctx: TrpcContext }")); // Negatives. assert!(!bm("type X = { user: User }")); assert!(!bm("type X = SessionContext")); assert!(!bm("type X = { foo: SomeContext }")); } /// Pin the string-level analogue used by /// `value_is_self_scoped_session_id_chain`: it must accept the /// same set of session-scoped bases that `checks.rs:: /// is_self_scoped_session_base` accepts. When you add a new base /// to one, add it to the other and update both tests. #[test] fn is_self_scoped_session_base_text_matches_known_session_bases() { use super::is_self_scoped_session_base_text as bt; // Express / passport idioms. assert!(bt("req.user")); assert!(bt("request.user")); assert!(bt("req.session.user")); assert!(bt("req.session.currentUser")); // Bare session.user (Next.js / NextAuth idiom). assert!(bt("session.user")); assert!(bt("session.currentUser")); // Koa ctx.state / ctx.session. assert!(bt("ctx.session.user")); assert!(bt("ctx.state.user")); // Negatives, bases that are NOT canonical authed-user roots. assert!(!bt("req.body")); assert!(!bt("req.params")); assert!(!bt("ctx.user")); assert!(!bt("data.user")); assert!(!bt("user")); } /// Pins the bare-`session` chain narrowing: ORM session verbs /// (`commit` / `add` / `scalar` / `execute` / ...) are denylisted ///, they do not contribute auth Session evidence even though the /// chain root is the literal name `session`. Any other field- /// shaped second segment (`user`, `user_id`, `workspace_id`, /// `project_id`, `role`) keeps its Session classification so the /// stale-authorization / missing-ownership rules still see /// session-backed foreign ids. Closes the airflow pytest cluster /// where `session.commit()` made `unit_has_user_input_evidence` /// return true on test methods with no actual user input, while /// preserving the gin/rails/rocket stale-session fixtures whose /// session chains use foreign-id field accessors. #[test] fn matches_session_context_denylists_orm_session_verbs() { use super::matches_session_context as msc; let v = |chain: &[&str]| chain.iter().map(|s| s.to_string()).collect::>(); // Bare `session.`, auth context. assert!(msc(&v(&["session", "user"]))); assert!(msc(&v(&["session", "user_id"]))); assert!(msc(&v(&["session", "id"]))); assert!(msc(&v(&["session", "uid"]))); assert!(msc(&v(&["session", "email"]))); assert!(msc(&v(&["session", "currentUser"]))); // Foreign-id fields stored on the session, must remain auth // Session for the stale-authorization rule (gin/rails/rocket // fixtures). assert!(msc(&v(&["session", "workspace_id"]))); assert!(msc(&v(&["session", "project_id"]))); assert!(msc(&v(&["session", "role"]))); assert!(msc(&v(&["session", "currentWorkspaceID"]))); // SQLAlchemy verbs, NOT auth context. assert!(!msc(&v(&["session", "commit"]))); assert!(!msc(&v(&["session", "rollback"]))); assert!(!msc(&v(&["session", "scalar"]))); assert!(!msc(&v(&["session", "scalars"]))); assert!(!msc(&v(&["session", "add"]))); assert!(!msc(&v(&["session", "delete"]))); assert!(!msc(&v(&["session", "execute"]))); assert!(!msc(&v(&["session", "flush"]))); assert!(!msc(&v(&["session", "query"]))); assert!(!msc(&v(&["session", "merge"]))); assert!(!msc(&v(&["session", "refresh"]))); assert!(!msc(&v(&["session", "close"]))); // Bare `session` alone (length 1) stays auth, covers // subscript shapes like `session[:workspace_id]` whose object // is just the bare `session` identifier. assert!(msc(&v(&["session"]))); // `req.session.user`, unchanged: explicit auth-session base. assert!(msc(&v(&["req", "session", "user"]))); // `request.session`, unchanged: req/request-prefixed arm // recognises `session` regardless of any subsequent segment. assert!(msc(&v(&["request", "session"]))); // `current_user.`, unambiguous chain root, fires regardless. assert!(msc(&v(&["current_user", "id"]))); assert!(msc(&v(&["current_user", "preferences"]))); } /// Rust `parameter` nodes carry both a `pattern` field (the /// binding) and a `type` field (the annotation). Until the /// `parameter` arm in `collect_param_names`, the recursive default /// arm collected identifiers from the `type` subtree as well , /// turning `dst: &std::path::Path` into the param name set /// `["dst", "std", "path", "Path"]`. `path` then matched the /// framework-request-name allow-list in `is_external_input_param_name`, /// gating `unit_has_user_input_evidence` open on internal helpers /// that take a filesystem-path argument and re-firing /// `missing_ownership_check` at every id-shaped operation /// downstream. The arm restricts descent to the `pattern` field /// for Rust parameters so only true binding names reach /// `unit.params`. Real-repo motivation: /// meilisearch/index-scheduler/src/scheduler/process_snapshot_creation.rs::remove_tasks /// (`dst: &std::path::Path` made every `db.delete(task.uid)` call /// fire missing-ownership-check). Same shape would also fire for /// Rust functions taking `req: &Request<...>`, /// `ctx: &Context`, etc., where the type tail matches the /// framework name list but the binding is unrelated. #[test] fn collect_param_names_rust_skips_type_segment_idents() { use super::function_params; let mut parser = tree_sitter::Parser::new(); parser .set_language(&tree_sitter::Language::from(tree_sitter_rust::LANGUAGE)) .unwrap(); let src = b"unsafe fn remove_tasks(tasks: &[Task], dst: &std::path::Path, sz: usize) {}"; let tree = parser.parse(src.as_slice(), None).unwrap(); let func = tree .root_node() .child(0) .expect("source_file should have a function"); let params = function_params(func, src); assert_eq!( params, vec!["tasks".to_string(), "dst".to_string(), "sz".to_string()], "type-segment idents (`std`, `path`, `Path`) must NOT pollute the param-name set" ); } #[test] fn collect_param_names_rust_handles_request_typed_params() { // `req: &Request`, `Request` and `Body` lowercase to // `request` and `body`, both in the framework name list. The // binding `req` is the only legitimate param name. use super::function_params; let mut parser = tree_sitter::Parser::new(); parser .set_language(&tree_sitter::Language::from(tree_sitter_rust::LANGUAGE)) .unwrap(); let src = b"fn handle(req: &Request, state: AppState) -> Response { todo!() }"; let tree = parser.parse(src.as_slice(), None).unwrap(); let func = tree.root_node().child(0).expect("function"); let params = function_params(func, src); assert_eq!( params, vec!["req".to_string(), "state".to_string()], "type idents `Request`/`Body`/`Response`/`AppState` must not leak as params" ); } #[test] fn collect_param_names_rust_destructured_pattern_picks_up_bindings() { // Tuple-pattern binding: `((a, b)): (u32, u32)` should yield // both bound names from the pattern subtree, but NOT the type // segment `u32`. use super::function_params; let mut parser = tree_sitter::Parser::new(); parser .set_language(&tree_sitter::Language::from(tree_sitter_rust::LANGUAGE)) .unwrap(); let src = b"fn split((a, b): (u32, u32)) {}"; let tree = parser.parse(src.as_slice(), None).unwrap(); let func = tree.root_node().child(0).expect("function"); let params = function_params(func, src); assert!(params.contains(&"a".to_string()), "got {:?}", params); assert!(params.contains(&"b".to_string()), "got {:?}", params); assert!(!params.contains(&"u32".to_string()), "got {:?}", params); } /// Go's stdlib `context.Context` is the canonical first-param of /// most functions but is NOT user input ─ it carries deadline / /// cancellation / value-bag, never an HTTP request. The Go arm of /// `collect_param_names` drops the param entirely when its type is /// `context.Context` so the bare name `ctx` doesn't trip the /// framework-request-name allow-list. /// /// Real-repo motivation: /// `/Users/elipeter/oss/gitea/services/packages/packages.go::AddFileToExistingPackage` /// and ~1900 sibling helpers passed /// `unit_has_user_input_evidence` solely on this param. #[test] fn collect_param_names_go_drops_context_context_param() { use super::function_params; let mut parser = tree_sitter::Parser::new(); parser .set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE)) .unwrap(); let src = b"package x\nfunc GetPackage(ctx context.Context, info *PackageInfo) {}\n"; let tree = parser.parse(src.as_slice(), None).unwrap(); let func = (0..tree.root_node().named_child_count()) .filter_map(|i| tree.root_node().named_child(i as u32)) .find(|n| n.kind() == "function_declaration") .expect("file should have a function_declaration"); let params = function_params(func, src); assert!( !params.contains(&"ctx".to_string()), "ctx context.Context must be dropped: got {:?}", params ); assert!( !params.contains(&"context".to_string()) && !params.contains(&"Context".to_string()), "type-segment idents must not leak: got {:?}", params ); assert!( params.contains(&"info".to_string()), "non-context typed params keep their name: got {:?}", params ); assert!( !params.contains(&"PackageInfo".to_string()), "type-segment idents must not leak from non-context params either: got {:?}", params ); } /// Per-framework `*context.APIContext` (gitea), `*gin.Context`, /// `iris.Context`, `*fiber.Ctx` and similar ARE user input ─ the /// type-aware filter must NOT drop these. The non-stdlib package /// name distinguishes them from the stdlib `context.Context`. #[test] fn collect_param_names_go_keeps_framework_context_param() { use super::function_params; let mut parser = tree_sitter::Parser::new(); parser .set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE)) .unwrap(); let src = b"package x\nfunc Handle(ctx *context.APIContext) {}\n"; let tree = parser.parse(src.as_slice(), None).unwrap(); let func = (0..tree.root_node().named_child_count()) .filter_map(|i| tree.root_node().named_child(i as u32)) .find(|n| n.kind() == "function_declaration") .expect("file should have a function_declaration"); let params = function_params(func, src); assert!( params.contains(&"ctx".to_string()), "framework-bearing ctx must survive: got {:?}", params ); } /// Multiple-name single-type Go declarations (`a, b int`) must /// surface every name. #[test] fn collect_param_names_go_multi_name_param_decl() { use super::function_params; let mut parser = tree_sitter::Parser::new(); parser .set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE)) .unwrap(); let src = b"package x\nfunc Add(a, b int, ctx context.Context) {}\n"; let tree = parser.parse(src.as_slice(), None).unwrap(); let func = (0..tree.root_node().named_child_count()) .filter_map(|i| tree.root_node().named_child(i as u32)) .find(|n| n.kind() == "function_declaration") .expect("file should have a function_declaration"); let params = function_params(func, src); assert!(params.contains(&"a".to_string()), "got {:?}", params); assert!(params.contains(&"b".to_string()), "got {:?}", params); assert!(!params.contains(&"ctx".to_string()), "got {:?}", params); assert!(!params.contains(&"int".to_string()), "got {:?}", params); } /// DAO-helper shape (`func GetRunByRepoAndID(ctx context.Context, /// repoID, runID int64)`): id-like names with bounded primitive /// scalar types are caller-passed scope identifiers, NOT user /// input. For non-route units (`function_params`, /// `include_id_like_typed = false`), they must NOT lift into /// `unit.params` — that would gate `unit_has_user_input_evidence` /// open on every internal Go ORM helper and over-fire /// `go.auth.missing_ownership_check`. /// /// Real-repo trigger: /// `/Users/elipeter/oss/gitea/models/actions/run_job.go:: /// GetRunByRepoAndID` and ~957 sibling helpers across gitea's /// `models/...` DAO layer. Same shape over-fires on minio's /// `cmd/iam-*-store` and is the canonical Go ORM helper signature. #[test] fn collect_param_names_go_drops_id_like_scalar_params_for_dao_helper() { use super::function_params; let mut parser = tree_sitter::Parser::new(); parser .set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE)) .unwrap(); let src = b"package x\nfunc GetRunByRepoAndID(ctx context.Context, repoID, runID int64) {}\n"; let tree = parser.parse(src.as_slice(), None).unwrap(); let func = (0..tree.root_node().named_child_count()) .filter_map(|i| tree.root_node().named_child(i as u32)) .find(|n| n.kind() == "function_declaration") .expect("file should have a function_declaration"); let params = function_params(func, src); assert!( !params.contains(&"ctx".to_string()), "context.Context dropped: got {:?}", params ); assert!( !params.contains(&"repoID".to_string()), "id-like scalar param dropped for DAO helper: got {:?}", params ); assert!( !params.contains(&"runID".to_string()), "id-like scalar param dropped for DAO helper: got {:?}", params ); assert!( params.is_empty(), "no params survive on DAO-shape helper: got {:?}", params ); } /// Conservative scope: only **bounded primitive scalar** types /// trigger the id-like drop. Pointer / struct / slice types are /// payload shapes that may or may not be user-controlled — leave /// them alone so non-DAO helpers retain their evidence. #[test] fn collect_param_names_go_keeps_id_like_pointer_struct_param() { use super::function_params; let mut parser = tree_sitter::Parser::new(); parser .set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE)) .unwrap(); // `runnerID *Runner` — id-like name, but the type is a pointer // (payload shape), so the param name must survive. let src = b"package x\nfunc UpdateRunner(ctx context.Context, runnerID *Runner) {}\n"; let tree = parser.parse(src.as_slice(), None).unwrap(); let func = (0..tree.root_node().named_child_count()) .filter_map(|i| tree.root_node().named_child(i as u32)) .find(|n| n.kind() == "function_declaration") .expect("file should have a function_declaration"); let params = function_params(func, src); assert!( params.contains(&"runnerID".to_string()), "id-like pointer param survives: got {:?}", params ); } /// Route handlers go through `function_params_route_handler` /// (`include_id_like_typed = true`) — the id-like-scalar filter /// must NOT trip there. Path-param-on-REST-route is *the* /// primary user input and middleware-injected auth checks rely on /// these names being present in `unit.params`. #[test] fn collect_param_names_go_route_handler_keeps_id_like_scalar_params() { use super::function_params_route_handler; let mut parser = tree_sitter::Parser::new(); parser .set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE)) .unwrap(); let src = b"package x\nfunc GetRepo(ctx context.Context, repoID int64) {}\n"; let tree = parser.parse(src.as_slice(), None).unwrap(); let func = (0..tree.root_node().named_child_count()) .filter_map(|i| tree.root_node().named_child(i as u32)) .find(|n| n.kind() == "function_declaration") .expect("file should have a function_declaration"); let params = function_params_route_handler(func, src); assert!( params.contains(&"repoID".to_string()), "id-like scalar param kept for route handler: got {:?}", params ); } /// Pin `member_chain` output for the SQLAlchemy queryset chain /// `select(C).filter_by(id=x)`. Pre-fix, Python `call` nodes use a /// `function` field (not `receiver`/`method`) so the recursive call /// arm returned an empty Vec, reducing the chain to bare /// `["filter_by"]`. The fix: (1) traverse `function` field in the /// `call` arm; (2) the parent attribute branch appends `()` to last /// segment when its `object` is a call. Together they produce /// `["select()", "filter_by"]` so `receiver_is_chained_call` detects /// the intermediate-call shape. #[test] fn member_chain_python_select_filter_by_chain_marks_intermediate_call() { use super::{callee_name, member_chain}; use tree_sitter::{Node, Parser}; let mut parser = Parser::new(); parser .set_language(&tree_sitter::Language::from(tree_sitter_python::LANGUAGE)) .unwrap(); let src = b"x = select(C).filter_by(id=u)\n"; let tree = parser.parse(src.as_slice(), None).unwrap(); fn find_outer_call<'a>(node: Node<'a>) -> Option> { if node.kind() == "call" && let Some(function) = node.child_by_field_name("function") && function.kind() == "attribute" { return Some(node); } for i in 0..node.named_child_count() { if let Some(child) = node.named_child(i as u32) && let Some(found) = find_outer_call(child) { return Some(found); } } None } let outer_call = find_outer_call(tree.root_node()) .expect("expected outer call node `select(C).filter_by(id=u)`"); assert_eq!( member_chain(outer_call, src), vec!["select()".to_string(), "filter_by".to_string()], "Python chained call must produce `[select(), filter_by]` so receiver_is_chained_call detects the intermediate-call shape", ); assert_eq!( callee_name(outer_call, src), "select().filter_by".to_string(), "callee_name joins the chain with `.`", ); } /// Regression guard: simple Python `obj.method(arg)` callees keep /// their previous `member_chain` output (`["obj", "method"]`). The /// `function`-field traversal must not pollute non-chained shapes. #[test] fn member_chain_python_simple_attribute_call_unchanged() { use super::callee_name; use tree_sitter::{Node, Parser}; let mut parser = Parser::new(); parser .set_language(&tree_sitter::Language::from(tree_sitter_python::LANGUAGE)) .unwrap(); let src = b"x = obj.method(a)\n"; let tree = parser.parse(src.as_slice(), None).unwrap(); fn find_call<'a>(node: Node<'a>) -> Option> { if node.kind() == "call" { return Some(node); } for i in 0..node.named_child_count() { if let Some(child) = node.named_child(i as u32) && let Some(found) = find_call(child) { return Some(found); } } None } let call_node = find_call(tree.root_node()).expect("expected `obj.method(a)` call"); assert_eq!( callee_name(call_node, src), "obj.method".to_string(), "simple attribute call must not pick up `()` markers", ); } mod ruby_visibility_and_callbacks { use super::super::{ RubyVisibility, ruby_callback_target_names, ruby_method_is_callback_or_private, ruby_method_visibility, }; use tree_sitter::{Node, Parser, Tree}; fn parse(src: &str) -> (Tree, Vec) { let mut parser = Parser::new(); parser .set_language(&tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE)) .unwrap(); let bytes = src.as_bytes().to_vec(); let tree = parser.parse(bytes.as_slice(), None).expect("parse"); (tree, bytes) } fn find_class_body<'a>(node: Node<'a>) -> Option> { if node.kind() == "class" { return node.child_by_field_name("body"); } for idx in 0..node.named_child_count() { let Some(child) = node.named_child(idx as u32) else { continue; }; if let Some(body) = find_class_body(child) { return Some(body); } } None } #[test] fn bare_private_directive_marks_subsequent_methods_private() { let src = "class C\n def public_a; end\n private\n def helper_b; end\n def helper_c; end\nend\n"; let (tree, bytes) = parse(src); let body = find_class_body(tree.root_node()).expect("body"); let vis = ruby_method_visibility(body, &bytes); assert_eq!(vis.get("public_a").copied(), Some(RubyVisibility::Public)); assert_eq!(vis.get("helper_b").copied(), Some(RubyVisibility::Private)); assert_eq!(vis.get("helper_c").copied(), Some(RubyVisibility::Private)); } #[test] fn targeted_private_marks_only_named_methods() { let src = "class C\n def a; end\n def b; end\n def c; end\n private :a, :c\nend\n"; let (tree, bytes) = parse(src); let body = find_class_body(tree.root_node()).expect("body"); let vis = ruby_method_visibility(body, &bytes); assert_eq!(vis.get("a").copied(), Some(RubyVisibility::Private)); assert_eq!(vis.get("b").copied(), Some(RubyVisibility::Public)); assert_eq!(vis.get("c").copied(), Some(RubyVisibility::Private)); } #[test] fn public_directive_re_opens_visibility() { let src = "class C\n private\n def a; end\n public\n def b; end\nend\n"; let (tree, bytes) = parse(src); let body = find_class_body(tree.root_node()).expect("body"); let vis = ruby_method_visibility(body, &bytes); assert_eq!(vis.get("a").copied(), Some(RubyVisibility::Private)); assert_eq!(vis.get("b").copied(), Some(RubyVisibility::Public)); } #[test] fn protected_directive_recognised() { let src = "class C\n protected\n def helper; end\nend\n"; let (tree, bytes) = parse(src); let body = find_class_body(tree.root_node()).expect("body"); let vis = ruby_method_visibility(body, &bytes); assert_eq!(vis.get("helper").copied(), Some(RubyVisibility::Protected)); } #[test] fn before_action_collects_callback_target_names() { let src = "class C\n before_action :set_account\n before_action :set_user, only: [:show, :update]\n def show; end\n def set_account; end\n def set_user; end\nend\n"; let (tree, bytes) = parse(src); let body = find_class_body(tree.root_node()).expect("body"); let callbacks = ruby_callback_target_names(body, &bytes); assert!(callbacks.contains("set_account")); assert!(callbacks.contains("set_user")); // `only:` / `except:` keys must not pollute the target set. assert!(!callbacks.contains("show")); assert!(!callbacks.contains("update")); assert!(!callbacks.contains("only")); } #[test] fn before_action_block_form_yields_no_targets() { // Block form `before_action do ... end` carries no symbol arg. let src = "class C\n before_action do\n require_login\n end\n def show; end\nend\n"; let (tree, bytes) = parse(src); let body = find_class_body(tree.root_node()).expect("body"); let callbacks = ruby_callback_target_names(body, &bytes); assert!(callbacks.is_empty(), "got {:?}", callbacks); } #[test] fn skip_before_action_target_collected() { let src = "class C\n skip_before_action :authenticate_user!, only: [:index]\n def index; end\nend\n"; let (tree, bytes) = parse(src); let body = find_class_body(tree.root_node()).expect("body"); let callbacks = ruby_callback_target_names(body, &bytes); assert!(callbacks.contains("authenticate_user!")); } #[test] fn legacy_before_filter_alias_collected() { let src = "class C\n before_filter :legacy_helper\n def legacy_helper; end\nend\n"; let (tree, bytes) = parse(src); let body = find_class_body(tree.root_node()).expect("body"); let callbacks = ruby_callback_target_names(body, &bytes); assert!(callbacks.contains("legacy_helper")); } #[test] fn callback_target_or_private_predicate_combines_layers() { // Private method → suppressed. // Public callback target → suppressed. // Public non-callback method → kept. let src = "class C\n before_action :set_account\n def show; end\n def set_account; end\n private\n def helper; end\nend\n"; let (tree, bytes) = parse(src); let body = find_class_body(tree.root_node()).expect("body"); let visibility = ruby_method_visibility(body, &bytes); let callbacks = ruby_callback_target_names(body, &bytes); assert!(!ruby_method_is_callback_or_private( "show", &visibility, &callbacks )); assert!(ruby_method_is_callback_or_private( "set_account", &visibility, &callbacks )); assert!(ruby_method_is_callback_or_private( "helper", &visibility, &callbacks )); } } #[test] fn trpc_options_destructure_param_seeds_self_scoped_session_base() { // Cal.com-shaped TRPC handler: parameter is a destructured // options alias whose `ctx` field's nested type literal // references `TrpcSessionUser`. `FileMeta::scan` adds // `GetOptions` to `trpc_alias_names` (body-text marker hit); // `collect_trpc_ctx_param` then fires on the // `required_parameter` and seeds `ctx.user` into the unit's // `self_scoped_session_bases`. let mut parser = tree_sitter::Parser::new(); parser .set_language(&tree_sitter::Language::from( tree_sitter_typescript::LANGUAGE_TYPESCRIPT, )) .unwrap(); let src = br#" type TrpcSessionUser = { id: number }; type GetOptions = { ctx: { user: NonNullable }; input: { id: number }; }; export const handleGet = async ({ ctx, input }: GetOptions) => { return prisma.booking.findFirst({ where: { id: input.id, userId: ctx.user.id } }); }; "#; let tree = parser.parse(src.as_slice(), None).unwrap(); let meta = super::FileMeta::scan(tree.root_node(), src); assert!( meta.trpc_alias_names.contains("GetOptions"), "trpc_alias_names missing GetOptions: {:?}", meta.trpc_alias_names ); let rules = crate::auth_analysis::config::AuthAnalysisRules::disabled(); let mut model = crate::auth_analysis::model::AuthorizationModel::default(); super::collect_top_level_units(tree.root_node(), src, &rules, &mut model); let unit = model .units .iter() .find(|u| u.name.as_deref() == Some("handleGet")) .expect("handleGet unit"); assert!( unit.self_scoped_session_bases.contains("ctx.user"), "self_scoped_session_bases missing ctx.user: {:?}", unit.self_scoped_session_bases ); } /// Pin the JS/TS post-fetch ownership-equality recogniser added in /// session 0011. The `if_statement` arm of `collect_unit_state` /// must dispatch to `detect_ownership_equality_check` (previously /// only `if_expression` did), the strict `!==` operator must be /// recognised as inequality, the framework denial helper /// `notFound()` must count as an early-exit witness, and the JS/TS /// `variable_declarator` arm must populate `row_population_data` /// so the synthetic `Ownership` AuthCheck attributes back to the /// row's let line. #[test] fn detect_post_fetch_ownership_jsts_with_strict_neq_and_denial_call() { let mut parser = tree_sitter::Parser::new(); parser .set_language(&tree_sitter::Language::from( tree_sitter_typescript::LANGUAGE_TYPESCRIPT, )) .unwrap(); let src = br#" declare class Repo { findById(id: string): Promise<{ userId: number }>; } declare function getServerSession(): Promise<{ user?: { id: number } } | null>; declare function notFound(): never; export async function handleGet({ id }: { id: string }) { const session = await getServerSession(); if (!session?.user?.id) return null; const repo: Repo = new Repo(); const webhook = await repo.findById(id); if (webhook.userId !== session.user.id) { notFound(); } return webhook; } "#; let tree = parser.parse(src.as_slice(), None).unwrap(); let rules = crate::auth_analysis::config::AuthAnalysisRules::disabled(); let mut model = crate::auth_analysis::model::AuthorizationModel::default(); super::collect_top_level_units(tree.root_node(), src, &rules, &mut model); let unit = model .units .iter() .find(|u| u.name.as_deref() == Some("handleGet")) .expect("handleGet unit"); let webhook_pop = unit .row_population_data .get("webhook") .expect("collect_row_population must populate `webhook` from variable_declarator"); // The `let webhook = await repo.findById(id)` line should // anchor at the call site, not the let line. In this fixture // both are on the same line so the back-dating is invisible // here, the assertion is that the entry exists. assert!(webhook_pop.0 > 0); let owner_check = unit .auth_checks .iter() .find(|c| matches!(c.kind, super::AuthCheckKind::Ownership)) .expect("ownership-equality detector must emit an Ownership AuthCheck"); let owner_subject = owner_check .subjects .iter() .find(|s| s.field.as_deref() == Some("userId")) .expect("Ownership AuthCheck must carry the owner field subject"); assert_eq!( owner_subject.base.as_deref(), Some("webhook"), "owner subject base must be the row var: {:?}", owner_subject ); } /// Pin the NextAuth Adapter factory recogniser added in session /// 0030. `body_returns_nextauth_options` must flip on for the /// cal.com `function CalComAdapter(client): Adapter { return { /// createUser, getUser, getUserByAccount, ... } }` shape so that /// `is_nextauth_callback_unit` suppresses the missing-ownership /// rule across the inner Adapter methods (their operations /// accumulate onto the outer factory's unit). #[test] fn nextauth_adapter_factory_flags_outer_unit() { let mut parser = tree_sitter::Parser::new(); parser .set_language(&tree_sitter::Language::from( tree_sitter_typescript::LANGUAGE_TYPESCRIPT, )) .unwrap(); let src = br#" declare const prismaClient: any; export default function CalComAdapter(client: any) { return { createUser: async (data: { email: string }) => { const user = await prismaClient.user.create({ data }); return user; }, getUser: async (id: string) => { const user = await prismaClient.user.findUnique({ where: { id } }); return user; }, async getUserByAccount(providerAccountId: { provider: string; providerAccountId: string }) { const account = await prismaClient.account.findUnique({ where: { provider_providerAccountId: providerAccountId }, select: { user: true }, }); return account?.user ?? null; }, createVerificationToken: async (data: any) => prismaClient.verificationToken.create({ data }), useVerificationToken: async (identifier: any) => prismaClient.verificationToken.delete({ where: identifier }), linkAccount: async (account: any) => prismaClient.account.create({ data: account }), unlinkAccount: async (providerAccountId: any) => prismaClient.account.delete({ where: providerAccountId }), }; } "#; let tree = parser.parse(src.as_slice(), None).unwrap(); let rules = crate::auth_analysis::config::AuthAnalysisRules::disabled(); let mut model = crate::auth_analysis::model::AuthorizationModel::default(); super::collect_top_level_units(tree.root_node(), src, &rules, &mut model); let unit = model .units .iter() .find(|u| u.name.as_deref() == Some("CalComAdapter")) .expect("CalComAdapter unit"); assert!( unit.is_nextauth_options_factory, "Adapter factory must set is_nextauth_options_factory: \ {:?}", unit.name ); } /// Negative: a generic CRUD repo with `createUser` / `getUser` / /// `updateUser` / `deleteUser` (no Adapter-distinctive method /// names) must NOT be flagged as a NextAuth Adapter. Without the /// distinctive-name gate any plain user repo would suppress /// missing-ownership findings. #[test] fn nextauth_adapter_recogniser_rejects_generic_crud_repo() { let mut parser = tree_sitter::Parser::new(); parser .set_language(&tree_sitter::Language::from( tree_sitter_typescript::LANGUAGE_TYPESCRIPT, )) .unwrap(); let src = br#" declare const db: any; export function makeUserRepo() { return { createUser: async (data: any) => db.user.create({ data }), getUser: async (id: string) => db.user.findUnique({ where: { id } }), updateUser: async (id: string, data: any) => db.user.update({ where: { id }, data }), deleteUser: async (id: string) => db.user.delete({ where: { id } }), }; } "#; let tree = parser.parse(src.as_slice(), None).unwrap(); let rules = crate::auth_analysis::config::AuthAnalysisRules::disabled(); let mut model = crate::auth_analysis::model::AuthorizationModel::default(); super::collect_top_level_units(tree.root_node(), src, &rules, &mut model); let unit = model .units .iter() .find(|u| u.name.as_deref() == Some("makeUserRepo")) .expect("makeUserRepo unit"); assert!( !unit.is_nextauth_options_factory, "generic CRUD repo must NOT be flagged as Adapter: {:?}", unit.name ); } }