mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-06 19:35:13 +02:00
5890 lines
226 KiB
Rust
5890 lines
226 KiB
Rust
use crate::auth_analysis::config::{AuthAnalysisRules, canonical_name, matches_name, strip_quotes};
|
||
use crate::auth_analysis::model::{
|
||
AnalysisUnit, AnalysisUnitKind, AuthCheck, AuthCheckKind, AuthorizationModel, CallSite,
|
||
Framework, HttpMethod, OperationKind, RouteRegistration, SensitiveOperation, SinkClass,
|
||
ValueRef, ValueSourceKind,
|
||
};
|
||
use crate::labels::bare_method_name;
|
||
use std::collections::{HashMap, HashSet};
|
||
use std::path::Path;
|
||
use tree_sitter::Node;
|
||
|
||
pub fn collect_top_level_units(
|
||
root: Node<'_>,
|
||
bytes: &[u8],
|
||
rules: &AuthAnalysisRules,
|
||
model: &mut AuthorizationModel,
|
||
) {
|
||
let file_meta = FileMeta::scan(root, bytes);
|
||
for idx in 0..root.named_child_count() {
|
||
let Some(child) = root.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
collect_top_level_from_node(child, bytes, rules, model, &file_meta);
|
||
}
|
||
}
|
||
|
||
fn collect_top_level_from_node(
|
||
node: Node<'_>,
|
||
bytes: &[u8],
|
||
rules: &AuthAnalysisRules,
|
||
model: &mut AuthorizationModel,
|
||
file_meta: &FileMeta,
|
||
) {
|
||
match node.kind() {
|
||
"function_declaration"
|
||
| "function_definition"
|
||
| "method_declaration"
|
||
| "function_item"
|
||
| "method"
|
||
| "singleton_method" => {
|
||
model.units.push(build_function_unit_with_meta(
|
||
node,
|
||
AnalysisUnitKind::Function,
|
||
function_name(node, bytes),
|
||
bytes,
|
||
rules,
|
||
Some(file_meta),
|
||
));
|
||
}
|
||
"decorated_definition"
|
||
if decorated_definition_child(node)
|
||
.is_some_and(|definition| definition.kind() == "function_definition") =>
|
||
{
|
||
// Celery / Airflow / DRF background-task decorators
|
||
// (`@instrumented_task`, `@shared_task`, `@app.task`,
|
||
// `@celery.task`, `@beat.shared_task`, `@periodic_task`,
|
||
// `@receiver`) mark a function as an internal scheduled
|
||
// job, not a user-reachable handler. Any id-shaped
|
||
// parameter name (`uuid: str`, `release_id: int`,
|
||
// `voucher_code_ids: list[int]`) refers to an
|
||
// internally-generated identifier, by construction the
|
||
// task is invoked from `task.delay(...)` in already-auth-
|
||
// checked code, never from an HTTP request directly.
|
||
//
|
||
// Skipping the unit at extract time stops the ownership /
|
||
// token-override / partial-batch-authorization rules from
|
||
// examining its operations. Real route handlers go
|
||
// through the framework extractors (Flask /
|
||
// FastAPI / Django / DRF) and re-add a `RouteHandler`
|
||
// unit with auth_checks injected from the route
|
||
// decorator, so this skip never hides a real handler.
|
||
if python_decorated_definition_is_background_task(node, bytes) {
|
||
return;
|
||
}
|
||
model.units.push(build_function_unit_with_meta(
|
||
node,
|
||
AnalysisUnitKind::Function,
|
||
function_name(node, bytes),
|
||
bytes,
|
||
rules,
|
||
Some(file_meta),
|
||
));
|
||
}
|
||
"lexical_declaration" | "variable_declaration" => {
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if child.kind() == "variable_declarator"
|
||
&& let Some(unit) =
|
||
function_unit_from_var_declarator(child, bytes, rules, Some(file_meta))
|
||
{
|
||
model.units.push(unit);
|
||
}
|
||
}
|
||
}
|
||
"export_statement" => {
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if child.is_named() {
|
||
collect_top_level_from_node(child, bytes, rules, model, file_meta);
|
||
}
|
||
}
|
||
}
|
||
"program" | "source_file" | "module" | "class_declaration" | "class_body"
|
||
| "body_statement" => {
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
collect_top_level_from_node(child, bytes, rules, model, file_meta);
|
||
}
|
||
}
|
||
// Ruby `class Foo; ... end`. Gate method descent through the
|
||
// visibility / callback-target filter so private helpers and
|
||
// `before_action :foo`-style callback targets are not emitted
|
||
// as `Function` units (the upstream cause of
|
||
// `rb.auth.missing_ownership_check` FPs on `set_X` row-fetch
|
||
// helpers in mastodon / diaspora controllers). Non-method
|
||
// class-body children (nested `class` / `module` /
|
||
// `singleton_method`) still recurse normally.
|
||
"class" => {
|
||
let body = node.child_by_field_name("body");
|
||
let visibility = body
|
||
.map(|b| ruby_method_visibility(b, bytes))
|
||
.unwrap_or_default();
|
||
let callbacks = body
|
||
.map(|b| ruby_callback_target_names(b, bytes))
|
||
.unwrap_or_default();
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if Some(child) == body {
|
||
for body_idx in 0..child.named_child_count() {
|
||
let Some(grand) = child.named_child(body_idx as u32) else {
|
||
continue;
|
||
};
|
||
if grand.kind() == "method" {
|
||
let name = function_name(grand, bytes).unwrap_or_default();
|
||
if !name.is_empty()
|
||
&& ruby_method_is_callback_or_private(
|
||
&name,
|
||
&visibility,
|
||
&callbacks,
|
||
)
|
||
{
|
||
continue;
|
||
}
|
||
}
|
||
collect_top_level_from_node(grand, bytes, rules, model, file_meta);
|
||
}
|
||
} else {
|
||
collect_top_level_from_node(child, bytes, rules, model, file_meta);
|
||
}
|
||
}
|
||
}
|
||
_ => {}
|
||
}
|
||
}
|
||
|
||
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
|
||
pub enum RubyVisibility {
|
||
Public,
|
||
Protected,
|
||
Private,
|
||
}
|
||
|
||
/// Walk a Ruby class body in source order and attribute each method
|
||
/// definition's visibility, mirroring Ruby's `private` / `protected` /
|
||
/// `public` directive semantics.
|
||
///
|
||
/// Two directive forms are recognised:
|
||
/// 1. **Bare** (`private`). Tree-sitter parses these as a top-level
|
||
/// `(identifier "private")` sibling. Toggles default visibility
|
||
/// for every subsequent method.
|
||
/// 2. **Targeted** (`private :foo, :bar`). Parsed as
|
||
/// `(call method:identifier arguments:argument_list ...)`.
|
||
/// Explicitly marks the named methods; does not change default.
|
||
pub fn ruby_method_visibility(
|
||
body: Node<'_>,
|
||
bytes: &[u8],
|
||
) -> std::collections::HashMap<String, RubyVisibility> {
|
||
use crate::auth_analysis::config::matches_name;
|
||
use std::collections::HashMap;
|
||
|
||
let mut map: HashMap<String, RubyVisibility> = HashMap::new();
|
||
let mut current = RubyVisibility::Public;
|
||
for child in named_children(body) {
|
||
match child.kind() {
|
||
"identifier" => {
|
||
if let Some(vis) = ruby_visibility_for_directive(text(child, bytes).trim()) {
|
||
current = vis;
|
||
}
|
||
}
|
||
"call" => {
|
||
let callee_full = call_name(child, bytes);
|
||
let callee = bare_method_name(&callee_full);
|
||
let Some(target_vis) = ruby_visibility_for_directive(callee) else {
|
||
continue;
|
||
};
|
||
let arguments = child.child_by_field_name("arguments");
|
||
let args: Vec<Node<'_>> = arguments
|
||
.map(|node| named_children(node))
|
||
.unwrap_or_default();
|
||
if args.is_empty() {
|
||
current = target_vis;
|
||
continue;
|
||
}
|
||
let mut targeted_any = false;
|
||
for arg in args {
|
||
for name in ruby_symbol_names(arg, bytes) {
|
||
if name.is_empty() {
|
||
continue;
|
||
}
|
||
map.insert(name, target_vis);
|
||
targeted_any = true;
|
||
}
|
||
if arg.kind() == "method"
|
||
&& let Some(name_node) = arg.child_by_field_name("name")
|
||
{
|
||
let name = text(name_node, bytes);
|
||
if !name.is_empty() {
|
||
map.insert(name, target_vis);
|
||
targeted_any = true;
|
||
}
|
||
}
|
||
}
|
||
if !targeted_any {
|
||
current = target_vis;
|
||
}
|
||
let _ = matches_name;
|
||
}
|
||
"method" => {
|
||
if let Some(name_node) = child.child_by_field_name("name") {
|
||
let name = text(name_node, bytes);
|
||
if !name.is_empty() {
|
||
map.insert(name, current);
|
||
}
|
||
}
|
||
}
|
||
_ => {}
|
||
}
|
||
}
|
||
map
|
||
}
|
||
|
||
fn ruby_visibility_for_directive(name: &str) -> Option<RubyVisibility> {
|
||
match name {
|
||
"private" => Some(RubyVisibility::Private),
|
||
"protected" => Some(RubyVisibility::Protected),
|
||
"public" => Some(RubyVisibility::Public),
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
/// Collect names of methods registered as Rails filter callbacks
|
||
/// (`before_action`, `after_action`, `around_action`, with their
|
||
/// `prepend_*` / `append_*` / `skip_*` siblings, plus the legacy
|
||
/// `*_filter` aliases). Such methods may be public but are invoked
|
||
/// only as part of an action's request cycle, never as standalone
|
||
/// routes — so emitting them as units produces spurious
|
||
/// `missing_ownership_check` flags on the helper body's row fetches.
|
||
pub fn ruby_callback_target_names(
|
||
body: Node<'_>,
|
||
bytes: &[u8],
|
||
) -> std::collections::HashSet<String> {
|
||
use std::collections::HashSet;
|
||
|
||
let mut targets: HashSet<String> = HashSet::new();
|
||
for child in named_children(body) {
|
||
if child.kind() != "call" {
|
||
continue;
|
||
}
|
||
let callee_full = call_name(child, bytes);
|
||
let callee = bare_method_name(&callee_full);
|
||
if !ruby_is_filter_callback_directive(callee) {
|
||
continue;
|
||
}
|
||
let Some(arguments) = child.child_by_field_name("arguments") else {
|
||
continue;
|
||
};
|
||
for arg in named_children(arguments) {
|
||
if arg.kind() == "pair" {
|
||
continue;
|
||
}
|
||
for name in ruby_symbol_names(arg, bytes) {
|
||
if name.is_empty() {
|
||
continue;
|
||
}
|
||
targets.insert(name);
|
||
}
|
||
}
|
||
}
|
||
targets
|
||
}
|
||
|
||
fn ruby_is_filter_callback_directive(name: &str) -> bool {
|
||
matches!(
|
||
name,
|
||
"before_action"
|
||
| "after_action"
|
||
| "around_action"
|
||
| "prepend_before_action"
|
||
| "prepend_after_action"
|
||
| "prepend_around_action"
|
||
| "append_before_action"
|
||
| "append_after_action"
|
||
| "append_around_action"
|
||
| "skip_before_action"
|
||
| "skip_after_action"
|
||
| "skip_around_action"
|
||
| "before_filter"
|
||
| "after_filter"
|
||
| "around_filter"
|
||
| "prepend_before_filter"
|
||
| "prepend_after_filter"
|
||
| "prepend_around_filter"
|
||
| "append_before_filter"
|
||
| "append_after_filter"
|
||
| "append_around_filter"
|
||
| "skip_before_filter"
|
||
| "skip_after_filter"
|
||
| "skip_around_filter"
|
||
)
|
||
}
|
||
|
||
fn ruby_symbol_names(node: Node<'_>, bytes: &[u8]) -> Vec<String> {
|
||
match node.kind() {
|
||
"simple_symbol" | "hash_key_symbol" | "identifier" | "string" => {
|
||
vec![
|
||
strip_quotes(&text(node, bytes))
|
||
.trim_start_matches(':')
|
||
.to_string(),
|
||
]
|
||
}
|
||
"array" => named_children(node)
|
||
.into_iter()
|
||
.flat_map(|child| ruby_symbol_names(child, bytes))
|
||
.collect(),
|
||
_ => Vec::new(),
|
||
}
|
||
}
|
||
|
||
pub fn ruby_method_is_callback_or_private(
|
||
name: &str,
|
||
visibility: &std::collections::HashMap<String, RubyVisibility>,
|
||
callbacks: &std::collections::HashSet<String>,
|
||
) -> bool {
|
||
let vis = visibility
|
||
.get(name)
|
||
.copied()
|
||
.unwrap_or(RubyVisibility::Public);
|
||
if vis != RubyVisibility::Public {
|
||
return true;
|
||
}
|
||
callbacks.contains(name)
|
||
}
|
||
|
||
fn function_unit_from_var_declarator(
|
||
node: Node<'_>,
|
||
bytes: &[u8],
|
||
rules: &AuthAnalysisRules,
|
||
file_meta: Option<&FileMeta>,
|
||
) -> Option<AnalysisUnit> {
|
||
let value = node.child_by_field_name("value")?;
|
||
if !is_function_like(value) {
|
||
return None;
|
||
}
|
||
let name = node
|
||
.child_by_field_name("name")
|
||
.map(|n| text(n, bytes))
|
||
.filter(|s| !s.is_empty());
|
||
Some(build_function_unit_with_meta(
|
||
value,
|
||
AnalysisUnitKind::Function,
|
||
name,
|
||
bytes,
|
||
rules,
|
||
file_meta,
|
||
))
|
||
}
|
||
|
||
pub struct ResolvedHandler {
|
||
pub unit_idx: usize,
|
||
pub span: (usize, usize),
|
||
pub params: Vec<String>,
|
||
pub line: usize,
|
||
}
|
||
|
||
pub fn visit_named_nodes(node: Node<'_>, visit: &mut impl FnMut(Node<'_>)) {
|
||
visit(node);
|
||
for child in named_children(node) {
|
||
visit_named_nodes(child, visit);
|
||
}
|
||
}
|
||
|
||
pub fn attach_route_handler(
|
||
root: Node<'_>,
|
||
handler_expr: Node<'_>,
|
||
route_name: String,
|
||
bytes: &[u8],
|
||
rules: &AuthAnalysisRules,
|
||
model: &mut AuthorizationModel,
|
||
) -> Option<ResolvedHandler> {
|
||
let handler_node = resolve_handler_node(root, handler_expr, bytes)?;
|
||
// `attach_route_handler` is called by route-aware extractors (express,
|
||
// koa, fastify, axum, …) which already hold the file root. Build
|
||
// the FileMeta once here so the JS/TS TRPC pre-scan only walks the
|
||
// top-level decl set per file (instead of per route).
|
||
let file_meta = FileMeta::scan(root, bytes);
|
||
let line = handler_node.start_position().row + 1;
|
||
let handler_span = span(handler_node);
|
||
let definition = function_definition_node(handler_node);
|
||
// Route-handler-aware param list: includes id-like Python typed
|
||
// params (`dag_id: str`, `dag_run_id: str`) that
|
||
// `collect_param_names`'s default branch filters out for internal
|
||
// helpers. `inject_middleware_auth` clones this list into the
|
||
// synthetic-subject set on each middleware-injected auth check so
|
||
// `auth_check_covers_subject` matches the operation subjects
|
||
// produced by the handler body (e.g. `filter_by(dag_id=dag_id,
|
||
// run_id=dag_run_id)`).
|
||
let route_handler_params = function_params_route_handler(definition, bytes);
|
||
|
||
// **Promote-or-create.** Most route-aware extractors invoke
|
||
// `collect_top_level_units` first, which already produced a
|
||
// [`AnalysisUnitKind::Function`] unit covering this same span.
|
||
// Pushing a brand-new RouteHandler unit duplicates the analysis
|
||
// surface, `check_ownership_gaps` then evaluates the operation
|
||
// twice and emits the FP from the (un-injected) Function unit even
|
||
// when the RouteHandler unit's middleware-derived auth check
|
||
// suppresses it. Promoting the existing unit keeps the model
|
||
// single-tenanted per handler so downstream auth-check injection
|
||
// (FastAPI `dependencies=[Depends(...)]`, Express middleware, ...)
|
||
// lands on the unit that's evaluated.
|
||
if let Some((idx, existing)) = model
|
||
.units
|
||
.iter_mut()
|
||
.enumerate()
|
||
.find(|(_, u)| u.kind == AnalysisUnitKind::Function && u.span == handler_span)
|
||
{
|
||
existing.kind = AnalysisUnitKind::RouteHandler;
|
||
existing.name = Some(route_name);
|
||
existing.params = route_handler_params.clone();
|
||
return Some(ResolvedHandler {
|
||
unit_idx: idx,
|
||
span: handler_span,
|
||
params: route_handler_params,
|
||
line,
|
||
});
|
||
}
|
||
|
||
let unit_idx = model.units.len();
|
||
let mut unit = build_function_unit_with_meta(
|
||
handler_node,
|
||
AnalysisUnitKind::RouteHandler,
|
||
Some(route_name),
|
||
bytes,
|
||
rules,
|
||
Some(&file_meta),
|
||
);
|
||
unit.params = route_handler_params.clone();
|
||
model.units.push(unit);
|
||
Some(ResolvedHandler {
|
||
unit_idx,
|
||
span: handler_span,
|
||
params: route_handler_params,
|
||
line,
|
||
})
|
||
}
|
||
|
||
/// Per-file metadata gathered once at the top of
|
||
/// [`collect_top_level_units`] / [`attach_route_handler`] and passed
|
||
/// down through unit construction. Currently carries the set of TS
|
||
/// type-alias names whose body references a TRPC-marker type; future
|
||
/// fields can be added without changing the per-unit signature.
|
||
#[derive(Default, Debug, Clone)]
|
||
pub struct FileMeta {
|
||
pub trpc_alias_names: HashSet<String>,
|
||
}
|
||
|
||
impl FileMeta {
|
||
pub fn scan(root: Node<'_>, bytes: &[u8]) -> Self {
|
||
let mut trpc_alias_names = HashSet::new();
|
||
scan_trpc_aliases_visit(root, bytes, &mut trpc_alias_names);
|
||
Self { trpc_alias_names }
|
||
}
|
||
}
|
||
|
||
pub fn push_route_registration(
|
||
model: &mut AuthorizationModel,
|
||
framework: Framework,
|
||
method: HttpMethod,
|
||
path: String,
|
||
file: &Path,
|
||
handler: ResolvedHandler,
|
||
middleware_calls: Vec<CallSite>,
|
||
) {
|
||
model.routes.push(RouteRegistration {
|
||
framework,
|
||
method,
|
||
path,
|
||
middleware: middleware_names(&middleware_calls),
|
||
handler_span: handler.span,
|
||
handler_params: handler.params,
|
||
file: file.to_path_buf(),
|
||
line: handler.line,
|
||
unit_idx: handler.unit_idx,
|
||
middleware_calls,
|
||
});
|
||
}
|
||
|
||
pub fn middleware_names(middleware_calls: &[CallSite]) -> Vec<String> {
|
||
middleware_calls
|
||
.iter()
|
||
.map(|call| call.name.clone())
|
||
.collect()
|
||
}
|
||
|
||
pub fn resolve_handler_node<'tree>(
|
||
root: Node<'tree>,
|
||
handler_expr: Node<'tree>,
|
||
bytes: &[u8],
|
||
) -> Option<Node<'tree>> {
|
||
if is_function_like(handler_expr) {
|
||
return Some(handler_expr);
|
||
}
|
||
|
||
if !is_handler_reference(handler_expr) {
|
||
return None;
|
||
}
|
||
|
||
let candidate = callee_name(handler_expr, bytes);
|
||
let name = candidate.rsplit('.').next().unwrap_or(&candidate);
|
||
if name.is_empty() {
|
||
return None;
|
||
}
|
||
find_top_level_function_node(root, name, bytes)
|
||
}
|
||
|
||
fn find_top_level_function_node<'tree>(
|
||
root: Node<'tree>,
|
||
name: &str,
|
||
bytes: &[u8],
|
||
) -> Option<Node<'tree>> {
|
||
for idx in 0..root.named_child_count() {
|
||
let Some(child) = root.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if let Some(found) = find_top_level_function_node_in_child(child, name, bytes) {
|
||
return Some(found);
|
||
}
|
||
}
|
||
None
|
||
}
|
||
|
||
fn find_top_level_function_node_in_child<'tree>(
|
||
node: Node<'tree>,
|
||
name: &str,
|
||
bytes: &[u8],
|
||
) -> Option<Node<'tree>> {
|
||
match node.kind() {
|
||
"function_declaration" | "function_definition" | "method_declaration" => {
|
||
if function_name(node, bytes).as_deref() == Some(name) {
|
||
Some(node)
|
||
} else {
|
||
None
|
||
}
|
||
}
|
||
"function_item" => {
|
||
if function_name(node, bytes).as_deref() == Some(name) {
|
||
Some(node)
|
||
} else {
|
||
None
|
||
}
|
||
}
|
||
"decorated_definition" => {
|
||
let definition = decorated_definition_child(node)?;
|
||
if definition.kind() == "function_definition"
|
||
&& function_name(node, bytes).as_deref() == Some(name)
|
||
{
|
||
Some(node)
|
||
} else {
|
||
None
|
||
}
|
||
}
|
||
"lexical_declaration" | "variable_declaration" => {
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if child.kind() != "variable_declarator" {
|
||
continue;
|
||
}
|
||
let Some(var_name) = child.child_by_field_name("name") else {
|
||
continue;
|
||
};
|
||
if text(var_name, bytes) != name {
|
||
continue;
|
||
}
|
||
let Some(value) = child.child_by_field_name("value") else {
|
||
continue;
|
||
};
|
||
if is_function_like(value) {
|
||
return Some(value);
|
||
}
|
||
}
|
||
None
|
||
}
|
||
"export_statement" => {
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if child.is_named()
|
||
&& let Some(found) = find_top_level_function_node_in_child(child, name, bytes)
|
||
{
|
||
return Some(found);
|
||
}
|
||
}
|
||
None
|
||
}
|
||
"program" | "source_file" | "class_declaration" | "class_body" => {
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if let Some(found) = find_top_level_function_node_in_child(child, name, bytes) {
|
||
return Some(found);
|
||
}
|
||
}
|
||
None
|
||
}
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
pub fn build_function_unit(
|
||
node: Node<'_>,
|
||
kind: AnalysisUnitKind,
|
||
name: Option<String>,
|
||
bytes: &[u8],
|
||
rules: &AuthAnalysisRules,
|
||
) -> AnalysisUnit {
|
||
build_function_unit_with_meta(node, kind, name, bytes, rules, None)
|
||
}
|
||
|
||
/// Internal variant of [`build_function_unit`] that accepts a
|
||
/// pre-computed file-level [`FileMeta`]. When `file_meta` is
|
||
/// `Some`, its `trpc_alias_names` set is copied into `UnitState`
|
||
/// once per unit so the per-parameter pre-pass doesn't re-scan the
|
||
/// source-file root. Pre-built `FileMeta` is required to keep
|
||
/// `tests/hostile_input_tests::many_small_functions_do_not_explode`
|
||
/// inside its 15s budget on N×N files.
|
||
pub fn build_function_unit_with_meta(
|
||
node: Node<'_>,
|
||
kind: AnalysisUnitKind,
|
||
name: Option<String>,
|
||
bytes: &[u8],
|
||
rules: &AuthAnalysisRules,
|
||
file_meta: Option<&FileMeta>,
|
||
) -> AnalysisUnit {
|
||
let definition = function_definition_node(node);
|
||
let params = function_params(definition, bytes);
|
||
// Structurally-typed bounded params: walk the parameter list and
|
||
// mark any param whose type annotation resolves to an integer or
|
||
// boolean scalar (`int`, `bool`, `Optional[int]`, `list[int]`,
|
||
// `Iterable[int]`, …) as typed-bounded. Mirrors the SSA-derived
|
||
// `apply_typed_bounded_params` lift but runs even when the SSA
|
||
// var_types map isn't supplied (internal helpers analysed without
|
||
// a CFG, ad-hoc unit lookups, …). Without this, a Python helper
|
||
// signature like `get_release_project_new_group_count(environment_ids:
|
||
// list[int], project_ids: list[int])` would drop into the
|
||
// ownership rule because the param names match `is_id_like` even
|
||
// though the static type proves the values are bounded numerics
|
||
// that can't carry a SQL/file/shell payload.
|
||
let preseeded_bounded = python_int_bounded_typed_params(definition, bytes);
|
||
let line = node.start_position().row + 1;
|
||
let mut state = UnitState::default();
|
||
// Seed Go's method-receiver name (`func (c *Cache) ...` → `c`) into
|
||
// `non_sink_vars` so calls of the form `c.foo(..)` /
|
||
// `c.field.foo(..)` route through the in-memory-local sink class
|
||
// and skip the verb-name fallback. These are intra-struct
|
||
// dispatches; without type tracking, the auth analyser cannot tell
|
||
// a `*Cache` field-call from a `*sql.DB` call by name alone, so we
|
||
// err on the safe side per the deferred memo
|
||
// (`project_realrepo_hugo.md`). Only Go's `method_declaration`
|
||
// exposes a `receiver` field, Rust/Java instance methods route
|
||
// through `self`/`this` keywords and are unaffected.
|
||
if let Some(receiver_name) = method_receiver_name(definition, bytes) {
|
||
state.non_sink_vars.insert(receiver_name);
|
||
}
|
||
if let Some(meta) = file_meta {
|
||
state.trpc_alias_names = meta.trpc_alias_names.clone();
|
||
}
|
||
collect_unit_state(node, bytes, rules, &mut state);
|
||
dedup_value_refs(&mut state.value_refs);
|
||
let context_inputs: Vec<ValueRef> = state
|
||
.value_refs
|
||
.iter()
|
||
.filter(|value| {
|
||
matches!(
|
||
value.source_kind,
|
||
ValueSourceKind::RequestParam
|
||
| ValueSourceKind::RequestBody
|
||
| ValueSourceKind::RequestQuery
|
||
| ValueSourceKind::Session
|
||
)
|
||
})
|
||
.cloned()
|
||
.collect();
|
||
|
||
let is_nextauth_options_factory = body_returns_nextauth_options(node, bytes);
|
||
|
||
AnalysisUnit {
|
||
kind,
|
||
name,
|
||
span: span(node),
|
||
params,
|
||
context_inputs,
|
||
call_sites: state.call_sites,
|
||
auth_checks: state.auth_checks,
|
||
operations: state.operations,
|
||
value_refs: state.value_refs,
|
||
condition_texts: state.condition_texts,
|
||
line,
|
||
row_field_vars: state.row_field_vars,
|
||
var_alias_chain: state.var_alias_chain,
|
||
row_population_data: state.row_population_data,
|
||
self_actor_vars: state.self_actor_vars,
|
||
self_actor_id_vars: state.self_actor_id_vars,
|
||
authorized_sql_vars: state.authorized_sql_vars,
|
||
const_bound_vars: state.const_bound_vars,
|
||
typed_bounded_vars: preseeded_bounded,
|
||
typed_bounded_dto_fields: std::collections::HashMap::new(),
|
||
self_scoped_session_bases: state.self_scoped_session_bases,
|
||
is_nextauth_options_factory,
|
||
}
|
||
}
|
||
|
||
/// True when the function body at `node` is a NextAuth authority
|
||
/// surface. Recognises two shapes:
|
||
///
|
||
/// 1. An object literal with a `callbacks: { ... }` property whose
|
||
/// nested entries name at least one canonical NextAuth callback
|
||
/// (`signIn`, `session`, `jwt`, `redirect`, `authorize`,
|
||
/// `authorized`). Matches the cal.com idiom
|
||
/// `export const getOptions = (...) => ({ callbacks: { ... } })`.
|
||
///
|
||
/// 2. An object literal whose entries name at least one distinctive
|
||
/// NextAuth Adapter method (`getUserByAccount`, `linkAccount`,
|
||
/// `unlinkAccount`, `createVerificationToken`,
|
||
/// `useVerificationToken`, `getSessionAndUser`) AND at least one
|
||
/// other canonical Adapter method. Matches the cal.com idiom
|
||
/// `function CalComAdapter(prisma): Adapter { return { ... } }`
|
||
/// where the returned Adapter object holds the implementation.
|
||
///
|
||
/// In both shapes the inner method bodies are NOT enumerated as
|
||
/// separate units (object method shorthands stay anonymous), so every
|
||
/// identity-resolution operation from the inner methods accumulates
|
||
/// onto the outer factory's unit. Without this flag the outer unit's
|
||
/// name is `getOptions` / `CalComAdapter`, so `is_nextauth_callback_unit`
|
||
/// cannot match by name and the missing-ownership rule fires on every
|
||
/// identity lookup inside the surface.
|
||
///
|
||
/// JS/TS-only by construction (matches `object` / `pair` /
|
||
/// `method_definition` / `shorthand_property_identifier` node kinds).
|
||
/// Returns false on other languages.
|
||
fn body_returns_nextauth_options(node: Node<'_>, bytes: &[u8]) -> bool {
|
||
fn scan(node: Node<'_>, bytes: &[u8]) -> bool {
|
||
if matches!(node.kind(), "object" | "object_expression")
|
||
&& (object_has_nextauth_callbacks_property(node, bytes)
|
||
|| object_is_nextauth_adapter(node, bytes))
|
||
{
|
||
return true;
|
||
}
|
||
for child in named_children(node) {
|
||
if scan(child, bytes) {
|
||
return true;
|
||
}
|
||
}
|
||
false
|
||
}
|
||
scan(node, bytes)
|
||
}
|
||
|
||
fn object_has_nextauth_callbacks_property(node: Node<'_>, bytes: &[u8]) -> bool {
|
||
for entry in named_children(node) {
|
||
let Some((key_text, value_node)) = object_entry_key_value(entry, bytes) else {
|
||
continue;
|
||
};
|
||
if key_text != "callbacks" {
|
||
continue;
|
||
}
|
||
if matches!(value_node.kind(), "object" | "object_expression")
|
||
&& object_contains_nextauth_callback_method(value_node, bytes)
|
||
{
|
||
return true;
|
||
}
|
||
}
|
||
false
|
||
}
|
||
|
||
fn object_contains_nextauth_callback_method(node: Node<'_>, bytes: &[u8]) -> bool {
|
||
for entry in named_children(node) {
|
||
if entry.kind() == "method_definition" {
|
||
if let Some(name_node) = entry.child_by_field_name("name") {
|
||
let name = text(name_node, bytes);
|
||
if is_nextauth_callback_name(&name) {
|
||
return true;
|
||
}
|
||
}
|
||
continue;
|
||
}
|
||
if let Some((key_text, _value_node)) = object_entry_key_value(entry, bytes)
|
||
&& is_nextauth_callback_name(&key_text)
|
||
{
|
||
return true;
|
||
}
|
||
}
|
||
false
|
||
}
|
||
|
||
fn object_entry_key_value<'a>(entry: Node<'a>, bytes: &[u8]) -> Option<(String, Node<'a>)> {
|
||
match entry.kind() {
|
||
"pair" => {
|
||
let key = entry.child_by_field_name("key")?;
|
||
let value = entry.child_by_field_name("value")?;
|
||
Some((object_key_text(key, bytes), value))
|
||
}
|
||
"method_definition" => {
|
||
let name = entry.child_by_field_name("name")?;
|
||
Some((text(name, bytes), entry))
|
||
}
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
fn object_key_text(node: Node<'_>, bytes: &[u8]) -> String {
|
||
match node.kind() {
|
||
"property_identifier" | "identifier" | "shorthand_property_identifier" => text(node, bytes),
|
||
"string" | "string_literal" => {
|
||
let raw = text(node, bytes);
|
||
raw.trim_matches(|c| c == '"' || c == '\'' || c == '`')
|
||
.to_string()
|
||
}
|
||
"computed_property_name" => {
|
||
if let Some(inner) = node.named_child(0) {
|
||
object_key_text(inner, bytes)
|
||
} else {
|
||
String::new()
|
||
}
|
||
}
|
||
_ => text(node, bytes),
|
||
}
|
||
}
|
||
|
||
fn is_nextauth_callback_name(name: &str) -> bool {
|
||
matches!(
|
||
name,
|
||
"signIn" | "session" | "jwt" | "redirect" | "authorize" | "authorized"
|
||
)
|
||
}
|
||
|
||
/// True when the object literal at `node` looks like a NextAuth
|
||
/// Adapter implementation: at least one distinctive Adapter method
|
||
/// name AND at least two canonical Adapter method names overall.
|
||
/// The distinctive subset (`getUserByAccount`, `linkAccount`,
|
||
/// `unlinkAccount`, `createVerificationToken`, `useVerificationToken`,
|
||
/// `getSessionAndUser`) names operations that are unique to the
|
||
/// NextAuth Adapter contract; the broader canonical set (createUser /
|
||
/// getUser / getUserByEmail / updateUser / deleteUser / createSession /
|
||
/// updateSession / deleteSession) overlaps with generic CRUD repos, so
|
||
/// the distinctive-name witness gates the recognition.
|
||
fn object_is_nextauth_adapter(node: Node<'_>, bytes: &[u8]) -> bool {
|
||
let mut distinctive_seen = false;
|
||
let mut total = 0_usize;
|
||
for entry in named_children(node) {
|
||
let Some(key_text) = adapter_object_entry_key(entry, bytes) else {
|
||
continue;
|
||
};
|
||
if !is_nextauth_adapter_method_name(&key_text) {
|
||
continue;
|
||
}
|
||
total += 1;
|
||
if is_nextauth_adapter_distinctive_method_name(&key_text) {
|
||
distinctive_seen = true;
|
||
}
|
||
}
|
||
distinctive_seen && total >= 2
|
||
}
|
||
|
||
fn adapter_object_entry_key(entry: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||
match entry.kind() {
|
||
"method_definition" => entry
|
||
.child_by_field_name("name")
|
||
.map(|n| object_key_text(n, bytes)),
|
||
"pair" => entry
|
||
.child_by_field_name("key")
|
||
.map(|n| object_key_text(n, bytes)),
|
||
"shorthand_property_identifier" => Some(text(entry, bytes)),
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
fn is_nextauth_adapter_method_name(name: &str) -> bool {
|
||
matches!(
|
||
name,
|
||
"createUser"
|
||
| "getUser"
|
||
| "getUserByEmail"
|
||
| "getUserByAccount"
|
||
| "updateUser"
|
||
| "deleteUser"
|
||
| "linkAccount"
|
||
| "unlinkAccount"
|
||
| "createSession"
|
||
| "getSessionAndUser"
|
||
| "updateSession"
|
||
| "deleteSession"
|
||
| "createVerificationToken"
|
||
| "useVerificationToken"
|
||
)
|
||
}
|
||
|
||
fn is_nextauth_adapter_distinctive_method_name(name: &str) -> bool {
|
||
matches!(
|
||
name,
|
||
"getUserByAccount"
|
||
| "linkAccount"
|
||
| "unlinkAccount"
|
||
| "createVerificationToken"
|
||
| "useVerificationToken"
|
||
| "getSessionAndUser"
|
||
)
|
||
}
|
||
|
||
#[derive(Default)]
|
||
struct UnitState {
|
||
call_sites: Vec<CallSite>,
|
||
auth_checks: Vec<AuthCheck>,
|
||
operations: Vec<SensitiveOperation>,
|
||
value_refs: Vec<ValueRef>,
|
||
condition_texts: Vec<String>,
|
||
/// Local variable names bound to a known non-sink collection
|
||
/// (e.g. `HashMap::new()`, `Vec::with_capacity(_)`, `vec![]`,
|
||
/// or via an explicit type annotation). Consulted by
|
||
/// `collect_call` so method calls on these bindings
|
||
/// (`map.insert(…)`, `set.remove(…)`) aren't classified as
|
||
/// auth-relevant Read/Mutation operations.
|
||
non_sink_vars: HashSet<String>,
|
||
/// Map from local variable name to the row binding it was read
|
||
/// from (`let owner_id = existing.get("user_id")` → `owner_id →
|
||
/// existing`). Powers A2's row-level ownership-equality check so
|
||
/// downstream uses of fields from the same row are implicitly
|
||
/// covered by a check on the row's owner column.
|
||
row_field_vars: HashMap<String, String>,
|
||
/// Full chain text for `let X = BASE.FIELD` shapes (or
|
||
/// transitively through method calls / try / await wrappers when
|
||
/// the value resolves to a member access). Stored alongside
|
||
/// `row_field_vars` so the row-population reverse-walk can match
|
||
/// plain-identifier sink subjects against population args by
|
||
/// their original chain text. See
|
||
/// [`crate::auth_analysis::model::AnalysisUnit::var_alias_chain`].
|
||
var_alias_chain: HashMap<String, String>,
|
||
/// Per row-binding metadata from the `let ROW = CALL(...)` site:
|
||
/// the declaration line and the set of `ValueRef`s appearing in
|
||
/// the call's arguments. When an A2 AuthCheck fires against
|
||
/// `ROW`, we back-date the check to this line and merge these
|
||
/// argument value-refs into its subjects so the original fetch
|
||
/// (e.g. `db.query_one(..., &[doc_id])`) is also covered.
|
||
row_population_data: HashMap<String, (usize, Vec<ValueRef>)>,
|
||
/// A3: local variables bound to the authenticated actor.
|
||
/// Populated from `let V = require_auth(..).await?` (or any call
|
||
/// matching `rules.is_login_guard` / `rules.is_authorization_check`)
|
||
/// and from typed route-handler parameters whose type names the
|
||
/// authenticated user (`CurrentUser`, `AuthUser`, …). Copied onto
|
||
/// `AnalysisUnit.self_actor_vars` so `checks.rs` can recognize
|
||
/// `V.id` as actor context rather than a foreign scoped id.
|
||
self_actor_vars: HashSet<String>,
|
||
/// Transitive copies of the authenticated actor's id field
|
||
/// (`let X = V.id` / `let X = (V.id as ..).into()` /
|
||
/// `let X = V.user_id` / `V.uid`). Populated by
|
||
/// `collect_self_actor_id_binding`. Copied onto
|
||
/// `AnalysisUnit.self_actor_id_vars` so subjects whose name appears
|
||
/// here count as actor context, closes the FP where a route
|
||
/// handler does `let uid = user.id; query_all(.., &[uid])` and the
|
||
/// engine sees `uid` only as a plain scoped id.
|
||
self_actor_id_vars: HashSet<String>,
|
||
/// B3: local variables bound (directly or transitively) to a
|
||
/// SQL query whose literal text is auth-gated. Populated by
|
||
/// `collect_sql_authorized_binding` and the `for ROW in X` /
|
||
/// `let Y = ROW.method(..)` propagation paths inside
|
||
/// `collect_row_field_binding` and `collect_for_row_binding`.
|
||
authorized_sql_vars: HashSet<String>,
|
||
/// Local variables whose declaration binds them to a string,
|
||
/// numeric, or boolean literal, `id := "id"` / `let id = "1"` /
|
||
/// `String id = "id";`. These cannot be user-controlled and so
|
||
/// must not be treated as scoped-identifier subjects by
|
||
/// `is_relevant_target_subject`. Closes the gin/context_test.go
|
||
/// FP where `id := "id"; c.AddParam(id, value)` triggered
|
||
/// `go.auth.missing_ownership_check` because the local `id`
|
||
/// matched `is_id_like` but had no actor-context exemption.
|
||
const_bound_vars: HashSet<String>,
|
||
/// Dynamic per-unit session-base set lifted into the
|
||
/// `AnalysisUnit` of the same name. Populated by
|
||
/// [`collect_trpc_ctx_param`] when a TS parameter's type
|
||
/// references a TRPC-shaped Options alias. See the field doc on
|
||
/// [`crate::auth_analysis::model::AnalysisUnit::self_scoped_session_bases`].
|
||
self_scoped_session_bases: HashSet<String>,
|
||
/// File-level set of TS type-alias names whose body references a
|
||
/// TRPC-marker type (`TrpcSessionUser` etc.). Populated once per
|
||
/// unit at the top of [`build_function_unit`] by walking up to
|
||
/// the source-file root and scanning every
|
||
/// `type_alias_declaration` / `interface_declaration`. Read by
|
||
/// [`collect_trpc_ctx_param`] to decide whether a parameter's
|
||
/// type annotation (often just an alias name like `GetOptions`)
|
||
/// resolves to a TRPC handler signature. Empty for non-TS
|
||
/// languages, the scanner only matches TS-grammar node kinds.
|
||
trpc_alias_names: HashSet<String>,
|
||
}
|
||
|
||
fn collect_unit_state(
|
||
node: Node<'_>,
|
||
bytes: &[u8],
|
||
rules: &AuthAnalysisRules,
|
||
state: &mut UnitState,
|
||
) {
|
||
match node.kind() {
|
||
"call_expression" | "call" | "method_invocation" | "method_call_expression" => {
|
||
collect_call(node, bytes, rules, state)
|
||
}
|
||
"while_statement" | "do_statement" | "while_modifier" | "until_modifier"
|
||
| "while_expression" | "unless" | "unless_modifier" => {
|
||
if let Some(condition) = node.child_by_field_name("condition") {
|
||
collect_condition(condition, bytes, rules, state);
|
||
}
|
||
}
|
||
"if_statement" | "elif_clause" | "if_expression" | "if" | "if_modifier" => {
|
||
if let Some(condition) = node.child_by_field_name("condition") {
|
||
collect_condition(condition, bytes, rules, state);
|
||
}
|
||
detect_ownership_equality_check(node, bytes, state);
|
||
}
|
||
"conditional_expression" => collect_condition(node, bytes, rules, state),
|
||
"let_declaration" => {
|
||
collect_non_sink_binding(node, bytes, rules, state);
|
||
collect_row_field_binding(node, bytes, state);
|
||
collect_member_alias_binding(node, bytes, state);
|
||
collect_row_population(node, bytes, state);
|
||
collect_self_actor_binding(node, bytes, rules, state);
|
||
collect_self_actor_id_binding(node, bytes, state);
|
||
collect_sql_authorized_binding(node, bytes, rules, state);
|
||
propagate_sql_authorized_through_field_read(node, bytes, state);
|
||
collect_const_string_binding(node, bytes, state);
|
||
}
|
||
// JS/TS `variable_declarator` inside `lexical_declaration`
|
||
// (`const X = ...`, `let X = ...`), exposes `name` + `value`
|
||
// fields. Run the same self-actor / self-actor-id binding
|
||
// recognition as the Rust `let_declaration` arm above so the
|
||
// session-self-actor copy chain (`const session = await
|
||
// getServerSession(...)`; `const userId = session.user.id`)
|
||
// populates `self_actor_vars` / `self_actor_id_vars`.
|
||
"variable_declarator" => {
|
||
collect_self_actor_binding(node, bytes, rules, state);
|
||
collect_self_actor_id_binding(node, bytes, state);
|
||
collect_const_string_binding(node, bytes, state);
|
||
// JS/TS row-fetch declarators (`const webhook = await
|
||
// repo.findById(id)`) need row-population recognition so
|
||
// the post-fetch ownership-equality detector can attribute
|
||
// back to the row's let line. `collect_row_population`
|
||
// accepts the `name` field used by `variable_declarator`.
|
||
collect_row_population(node, bytes, state);
|
||
}
|
||
// Go `id := "id"` / Python `id = "id"` / Java `String id = "id";` /
|
||
// Ruby `id = "id"`, language-specific binding nodes that the
|
||
// let_declaration arm above doesn't catch. Const-only, never
|
||
// marks self_actor / row_field / sql vars (those need richer
|
||
// right-hand-side analysis already provided by the
|
||
// let_declaration arm).
|
||
"short_var_declaration"
|
||
| "const_declaration"
|
||
| "var_declaration"
|
||
| "var_spec"
|
||
| "lexical_declaration"
|
||
| "local_variable_declaration"
|
||
| "assignment"
|
||
| "assignment_expression"
|
||
| "augmented_assignment"
|
||
| "expression_statement" => {
|
||
collect_const_string_binding(node, bytes, state);
|
||
// Ruby `@issue = Issue.find(params[:id])` is the canonical
|
||
// controller idiom: instance-variable assignment whose RHS
|
||
// is a row-fetch call. The let_declaration arm above
|
||
// doesn't fire for this kind, so register the row
|
||
// population separately. `collect_row_population` reads
|
||
// either `pattern`/`value` or `left`/`right`, so it works
|
||
// unchanged for Ruby `assignment` once the LHS recognises
|
||
// `instance_variable`.
|
||
if matches!(node.kind(), "assignment" | "assignment_expression") {
|
||
collect_row_population(node, bytes, state);
|
||
// Python `verified_ids = set()` /
|
||
// `cache: dict[str,int] = {}` and JS analogues bind a
|
||
// local non-sink container. `collect_non_sink_binding`
|
||
// accepts both `pattern`/`value` and `left`/`right`
|
||
// field names so the same recognition path covers
|
||
// these assignment-node shapes.
|
||
collect_non_sink_binding(node, bytes, rules, state);
|
||
}
|
||
}
|
||
"for_expression" => {
|
||
collect_for_row_binding(node, bytes, state);
|
||
}
|
||
"parameter" => {
|
||
collect_typed_extractor_self_actor(node, bytes, state);
|
||
}
|
||
// TS `required_parameter` / `optional_parameter`, the analogous
|
||
// arm to Rust's `parameter`. Recognise TRPC-shaped Options
|
||
// params (`{ ctx, input }: GetOptions`) and add the destructured
|
||
// ctx-base to `self_scoped_session_bases` so downstream
|
||
// `ctx.user.id` accesses count as actor context.
|
||
"required_parameter" | "optional_parameter" => {
|
||
collect_trpc_ctx_param(node, bytes, state);
|
||
}
|
||
_ => {}
|
||
}
|
||
|
||
// O(1) per-node shallow value-ref emission, then descend.
|
||
//
|
||
// Pre-fix this site called `extract_value_refs(node, bytes)` which walks
|
||
// node's entire subtree. Combined with the recursion below — which
|
||
// visits every descendant and re-runs the same call at each level — the
|
||
// total work was O(N * subtree_size) ≈ O(N²) per function body. On
|
||
// mm/channels/app the inner-walk dominated `build_function_unit_with_meta`
|
||
// and its descendants (~17%+15%+11% of total wall-clock split across
|
||
// `build_function_unit_with_meta`, `collect_unit_state`, and
|
||
// `extract_value_refs` in the post-shared-model profile, 2026-05-04).
|
||
//
|
||
// The recursion below already visits every descendant once. Emitting a
|
||
// shallow value-ref per node — only the ref the node itself represents —
|
||
// produces the same SET of value-refs after `dedup_value_refs` runs in
|
||
// `build_function_unit_with_meta`, because every ref-emitting kind
|
||
// (member chain, subscript, accessor call, identifier) is reachable as a
|
||
// single node visit. Public callers of `extract_value_refs` (e.g.
|
||
// `collect_call`, `collect_condition`, assignment-side extraction) keep
|
||
// the deep walk: they intentionally want refs from the full subtree
|
||
// rooted at the argument they pass.
|
||
append_shallow_value_ref(node, bytes, &mut state.value_refs);
|
||
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
collect_unit_state(child, bytes, rules, state);
|
||
}
|
||
}
|
||
|
||
/// Per-node value-ref emission used inside `collect_unit_state`'s tree walk.
|
||
///
|
||
/// Returns the value-ref the node itself represents (a member chain, a
|
||
/// subscript, an accessor call's chain, or an identifier-like leaf), without
|
||
/// descending into descendants. The caller's existing AST recursion handles
|
||
/// children; relying on that recursion turns the previously O(N²) per-body
|
||
/// walk into O(N).
|
||
fn append_shallow_value_ref(node: Node<'_>, bytes: &[u8], refs: &mut Vec<ValueRef>) {
|
||
match node.kind() {
|
||
"member_expression"
|
||
| "attribute"
|
||
| "selector_expression"
|
||
| "field_expression"
|
||
| "field_access" => {
|
||
if let Some(value) = member_value_ref(node, bytes) {
|
||
refs.push(value);
|
||
}
|
||
}
|
||
"subscript_expression" | "subscript" | "element_reference" | "index_expression" => {
|
||
if let Some(value) = subscript_value_ref(node, bytes) {
|
||
refs.push(value);
|
||
}
|
||
}
|
||
"call_expression" | "call" | "method_invocation" | "method_call_expression" => {
|
||
// Accessor-call chains (`cache.get(key)`, `req.params.id`) absorb
|
||
// into a single chain ValueRef; non-accessor calls return None
|
||
// here and rely on recursion to visit `function` + arg children
|
||
// so each leaf identifier emits its own ref.
|
||
if let Some(value) = call_value_ref(node, bytes) {
|
||
refs.push(value);
|
||
}
|
||
}
|
||
// Bare identifier and Ruby `@foo` / `@@foo` / `$foo` leaves: emit a
|
||
// single Identifier-kind ValueRef. Mirrors `extract_value_refs`'s
|
||
// identifier arm so `dedup_value_refs` collapses any cross-path
|
||
// duplicates against existing emissions from sibling deep walks
|
||
// (e.g. `collect_condition`'s `extract_value_refs(condition)`).
|
||
"identifier" | "instance_variable" | "class_variable" | "global_variable" => {
|
||
refs.push(ValueRef {
|
||
source_kind: ValueSourceKind::Identifier,
|
||
name: text(node, bytes),
|
||
base: None,
|
||
field: None,
|
||
index: None,
|
||
span: span(node),
|
||
});
|
||
}
|
||
_ => {}
|
||
}
|
||
}
|
||
|
||
fn collect_call(node: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules, state: &mut UnitState) {
|
||
let callee = call_name(node, bytes);
|
||
if callee.is_empty() {
|
||
return;
|
||
}
|
||
|
||
let args = node
|
||
.child_by_field_name("arguments")
|
||
.map(named_children)
|
||
.unwrap_or_default();
|
||
let mut subjects: Vec<ValueRef> = call_receiver_subjects(node, bytes);
|
||
subjects.extend(
|
||
args.iter()
|
||
.flat_map(|arg| extract_value_refs(*arg, bytes))
|
||
.collect::<Vec<_>>(),
|
||
);
|
||
let line = node.start_position().row + 1;
|
||
let string_args: Vec<String> = args.iter().map(|arg| text(*arg, bytes)).collect();
|
||
let args_value_refs: Vec<Vec<ValueRef>> = args
|
||
.iter()
|
||
.map(|arg| extract_value_refs(*arg, bytes))
|
||
.collect();
|
||
let node_text = text(node, bytes);
|
||
state.call_sites.push(CallSite {
|
||
name: callee.clone(),
|
||
args: string_args.clone(),
|
||
span: span(node),
|
||
args_value_refs,
|
||
});
|
||
|
||
if rules.is_authorization_check(&callee) {
|
||
state.auth_checks.push(AuthCheck {
|
||
kind: classify_auth_check(&callee, rules),
|
||
callee: callee.clone(),
|
||
subjects: subjects.clone(),
|
||
span: span(node),
|
||
line,
|
||
args: string_args,
|
||
condition_text: None,
|
||
is_route_level: false,
|
||
});
|
||
}
|
||
|
||
// Split classification into OperationKind (what verb?) and
|
||
// SinkClass (what resource?). The sink class drives the
|
||
// ownership gate; OperationKind is kept for partial-batch / stale-
|
||
// session checks that care about read-vs-mutation semantics.
|
||
let (op_kind, sink_class) = if rules.is_token_lookup_call(&callee, &node_text) {
|
||
(Some(OperationKind::TokenLookup), None)
|
||
} else if let Some(class) = rules.classify_sink_class(&callee, &state.non_sink_vars) {
|
||
let op = match class {
|
||
SinkClass::DbCrossTenantRead => OperationKind::Read,
|
||
// InMemoryLocal: keep the verb for telemetry but the
|
||
// ownership gate ignores this class.
|
||
SinkClass::InMemoryLocal => {
|
||
if rules.is_mutation(&callee) {
|
||
OperationKind::Mutation
|
||
} else {
|
||
OperationKind::Read
|
||
}
|
||
}
|
||
// Publish / outbound / cache / DB mutation, treat as
|
||
// write-shaped by default unless the callee name is a
|
||
// read verb (e.g. `cache.get(tenant_id)`).
|
||
_ => {
|
||
if rules.is_read(&callee) && !rules.is_mutation(&callee) {
|
||
OperationKind::Read
|
||
} else {
|
||
OperationKind::Mutation
|
||
}
|
||
}
|
||
};
|
||
(Some(op), Some(class))
|
||
} else {
|
||
(None, None)
|
||
};
|
||
|
||
if let Some(kind) = op_kind {
|
||
state.operations.push(SensitiveOperation {
|
||
kind,
|
||
sink_class,
|
||
callee,
|
||
subjects,
|
||
span: span(node),
|
||
line,
|
||
text: node_text,
|
||
});
|
||
}
|
||
}
|
||
|
||
fn collect_condition(
|
||
node: Node<'_>,
|
||
bytes: &[u8],
|
||
rules: &AuthAnalysisRules,
|
||
state: &mut UnitState,
|
||
) {
|
||
let condition_text = text(node, bytes);
|
||
if condition_text.is_empty() {
|
||
return;
|
||
}
|
||
state.condition_texts.push(condition_text.clone());
|
||
|
||
let subjects = extract_value_refs(node, bytes);
|
||
let line = node.start_position().row + 1;
|
||
|
||
if rules.has_expiry_field(&condition_text) {
|
||
state.auth_checks.push(AuthCheck {
|
||
kind: AuthCheckKind::TokenExpiry,
|
||
callee: "(condition)".into(),
|
||
subjects: subjects.clone(),
|
||
span: span(node),
|
||
line,
|
||
args: Vec::new(),
|
||
condition_text: Some(condition_text.clone()),
|
||
is_route_level: false,
|
||
});
|
||
}
|
||
|
||
if rules.has_recipient_field(&condition_text) {
|
||
state.auth_checks.push(AuthCheck {
|
||
kind: AuthCheckKind::TokenRecipient,
|
||
callee: "(condition)".into(),
|
||
subjects,
|
||
span: span(node),
|
||
line,
|
||
args: Vec::new(),
|
||
condition_text: Some(condition_text),
|
||
is_route_level: false,
|
||
});
|
||
}
|
||
}
|
||
|
||
/// Detect bindings that produce a known non-sink collection
|
||
/// (e.g. `HashMap::new()`, `Vec::with_capacity(_)`, `vec![]`, an
|
||
/// explicit type annotation like `: HashMap<_, _>`, or Python's
|
||
/// bare `set()` / `dict()` / `collections.defaultdict(list)`).
|
||
/// Registered variable names are consulted by `collect_call` so
|
||
/// later method calls on those bindings (`map.insert(..)`,
|
||
/// `set.remove(..)`, `verified_ids.update(..)`) aren't treated as
|
||
/// auth-relevant Read/Mutation operations.
|
||
///
|
||
/// Field names accepted: Rust `let_declaration` uses `pattern` /
|
||
/// `value`; Python `assignment` and JS `assignment_expression` use
|
||
/// `left` / `right`. Both shapes share the same recognition path.
|
||
fn collect_non_sink_binding(
|
||
node: Node<'_>,
|
||
bytes: &[u8],
|
||
rules: &AuthAnalysisRules,
|
||
state: &mut UnitState,
|
||
) {
|
||
let Some(pattern) = node
|
||
.child_by_field_name("pattern")
|
||
.or_else(|| node.child_by_field_name("left"))
|
||
else {
|
||
return;
|
||
};
|
||
let Some(var_name) = first_identifier_name(pattern, bytes) else {
|
||
return;
|
||
};
|
||
if var_name.is_empty() {
|
||
return;
|
||
}
|
||
|
||
if let Some(ty_node) = node.child_by_field_name("type") {
|
||
let ty_text = text(ty_node, bytes);
|
||
if rules.is_non_sink_receiver_type(&ty_text) {
|
||
state.non_sink_vars.insert(var_name);
|
||
return;
|
||
}
|
||
}
|
||
|
||
if let Some(value) = node
|
||
.child_by_field_name("value")
|
||
.or_else(|| node.child_by_field_name("right"))
|
||
&& value_is_non_sink_constructor(value, bytes, rules)
|
||
{
|
||
state.non_sink_vars.insert(var_name);
|
||
}
|
||
}
|
||
|
||
fn first_identifier_name(node: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||
if matches!(
|
||
node.kind(),
|
||
"identifier"
|
||
| "shorthand_property_identifier_pattern"
|
||
// Ruby `@foo` instance vars and `@@foo` class vars:
|
||
// Rails controllers populate the row via `@issue =
|
||
// Issue.find(...)`, so the row var is the *full* `@issue`
|
||
// text, chain_root in checks.rs strips on `.` only, so an
|
||
// auth check on `@issue.visible?` resolves to root `@issue`,
|
||
// matching the row var.
|
||
| "instance_variable"
|
||
| "class_variable"
|
||
// Ruby globals `$foo` are unusual but match the same
|
||
// handler-state idiom, kept symmetric with @-vars.
|
||
| "global_variable"
|
||
) {
|
||
let value = text(node, bytes);
|
||
if !value.is_empty() {
|
||
return Some(value);
|
||
}
|
||
}
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if let Some(found) = first_identifier_name(child, bytes) {
|
||
return Some(found);
|
||
}
|
||
}
|
||
None
|
||
}
|
||
|
||
fn value_is_non_sink_constructor(node: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules) -> bool {
|
||
match node.kind() {
|
||
"call_expression" | "call" | "method_invocation" | "method_call_expression" => {
|
||
let callee = call_name(node, bytes);
|
||
rules.is_non_sink_constructor_callee(&callee)
|
||
}
|
||
"macro_invocation" => {
|
||
let name = node
|
||
.child_by_field_name("macro")
|
||
.map(|m| text(m, bytes))
|
||
.unwrap_or_default();
|
||
let last = name.rsplit("::").next().unwrap_or(&name);
|
||
matches!(last, "vec" | "smallvec")
|
||
}
|
||
"try_expression" | "await_expression" | "reference_expression" => {
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if value_is_non_sink_constructor(child, bytes, rules) {
|
||
return true;
|
||
}
|
||
}
|
||
false
|
||
}
|
||
_ => false,
|
||
}
|
||
}
|
||
|
||
/// Track `let V = ROW.method(..)` or `let V = ROW.field` so later
|
||
/// row-level ownership-equality checks on `V` (or on another var read
|
||
/// from the same `ROW`) can be attributed back to `ROW`. See
|
||
/// `detect_ownership_equality_check` for the consumer.
|
||
fn collect_row_field_binding(node: Node<'_>, bytes: &[u8], state: &mut UnitState) {
|
||
let Some(pattern) = node.child_by_field_name("pattern") else {
|
||
return;
|
||
};
|
||
let Some(var_name) = first_identifier_name(pattern, bytes) else {
|
||
return;
|
||
};
|
||
if var_name.is_empty() {
|
||
return;
|
||
}
|
||
let Some(value) = node.child_by_field_name("value") else {
|
||
return;
|
||
};
|
||
let Some(row_name) = extract_row_receiver_name(value, bytes) else {
|
||
return;
|
||
};
|
||
state.row_field_vars.insert(var_name, row_name);
|
||
}
|
||
|
||
/// Track `let X = BASE.FIELD` (or `BASE.FIELD?` / `(BASE.FIELD).await`)
|
||
/// so a downstream sink whose subject is the bare identifier `X` can be
|
||
/// matched against row-population args that recorded the original
|
||
/// chain text. Distinct from `collect_row_field_binding`, which only
|
||
/// records the receiver name (loses the field).
|
||
///
|
||
/// Only fires when the value resolves to a member-access node and the
|
||
/// resulting chain has at least two segments (`req.community_id`,
|
||
/// `data.user.id`, …), single-ident receivers are uninteresting and a
|
||
/// chain of length one would just duplicate the binding's own name.
|
||
///
|
||
/// Defensive: never overwrites an existing entry, first writer wins.
|
||
/// Re-binding the same local name (rare in idiomatic Rust) is treated
|
||
/// as a separate variable scope; the rest of the analysis already
|
||
/// works on the first binding seen during a top-down walk.
|
||
fn collect_member_alias_binding(node: Node<'_>, bytes: &[u8], state: &mut UnitState) {
|
||
let Some(pattern) = node.child_by_field_name("pattern") else {
|
||
return;
|
||
};
|
||
let Some(var_name) = first_identifier_name(pattern, bytes) else {
|
||
return;
|
||
};
|
||
if var_name.is_empty() {
|
||
return;
|
||
}
|
||
let Some(value) = node.child_by_field_name("value") else {
|
||
return;
|
||
};
|
||
let target = unwrap_try_like(value);
|
||
if !matches!(
|
||
target.kind(),
|
||
"member_expression"
|
||
| "attribute"
|
||
| "selector_expression"
|
||
| "field_expression"
|
||
| "field_access"
|
||
) {
|
||
return;
|
||
}
|
||
let chain = member_chain(target, bytes);
|
||
if chain.len() < 2 {
|
||
return;
|
||
}
|
||
let chain_text = chain.join(".");
|
||
state.var_alias_chain.entry(var_name).or_insert(chain_text);
|
||
}
|
||
|
||
/// Record the line and argument value-refs of a `let ROW = CALL(..)`.
|
||
/// When A2 synthesises an `AuthCheck` on `ROW` later, we back-date the
|
||
/// check to this line and merge the args into its subjects so the
|
||
/// original fetch (e.g. `db.query_one(.., &[doc_id])`) is also covered.
|
||
///
|
||
/// The recorded line is the **call**'s start line, not the
|
||
/// `let_declaration`'s. These differ for multi-line bindings such as
|
||
///
|
||
/// ```ignore
|
||
/// let orig = // let_declaration starts here
|
||
/// CommentView::read(&mut pool, comment_id, ..).await?; // call starts here
|
||
/// ```
|
||
///
|
||
/// `has_row_fetch_exemption` looks for a row var "declared at this
|
||
/// op's line", where `op.line` is the call site. Recording the
|
||
/// let-line caused the multi-line shape to fall through the exemption
|
||
///, surfaced on lemmy's `comment/lock.rs:31`, where every fetch-then-
|
||
/// check route handler that wraps the read across two lines was
|
||
/// flagged despite a textual auth check on the resulting row.
|
||
fn collect_row_population(node: Node<'_>, bytes: &[u8], state: &mut UnitState) {
|
||
// Most languages expose `pattern`/`value` on let / const / var
|
||
// declarations. Ruby `assignment` uses `left`/`right` instead.
|
||
// JS/TS `variable_declarator` uses `name`/`value`. Accept any of
|
||
// them; when none is present the node isn't an RHS-bound binding
|
||
// and we skip.
|
||
let Some(pattern) = node
|
||
.child_by_field_name("pattern")
|
||
.or_else(|| node.child_by_field_name("name"))
|
||
.or_else(|| node.child_by_field_name("left"))
|
||
else {
|
||
return;
|
||
};
|
||
let Some(var_name) = first_identifier_name(pattern, bytes) else {
|
||
return;
|
||
};
|
||
if var_name.is_empty() {
|
||
return;
|
||
}
|
||
let Some(value) = node
|
||
.child_by_field_name("value")
|
||
.or_else(|| node.child_by_field_name("right"))
|
||
else {
|
||
return;
|
||
};
|
||
let call_node = unwrap_try_like(value);
|
||
if !matches!(
|
||
call_node.kind(),
|
||
"call_expression" | "call" | "method_invocation" | "method_call_expression"
|
||
) {
|
||
return;
|
||
}
|
||
let args = call_node
|
||
.child_by_field_name("arguments")
|
||
.map(named_children)
|
||
.unwrap_or_default();
|
||
let mut arg_refs: Vec<ValueRef> = Vec::new();
|
||
for arg in args {
|
||
arg_refs.extend(extract_value_refs(arg, bytes));
|
||
}
|
||
let call_line = call_node.start_position().row + 1;
|
||
state
|
||
.row_population_data
|
||
.insert(var_name, (call_line, arg_refs));
|
||
}
|
||
|
||
/// A3: record `let V = CALL(..)` (or `.await?` / `?` / reference
|
||
/// chains wrapping such a call) where `CALL` matches a configured
|
||
/// login-guard or authorization-check name. `V` is then treated as the
|
||
/// authenticated actor, `V.id`-shaped subjects are actor context and
|
||
/// shouldn't be flagged as foreign scoped IDs.
|
||
fn collect_self_actor_binding(
|
||
node: Node<'_>,
|
||
bytes: &[u8],
|
||
rules: &AuthAnalysisRules,
|
||
state: &mut UnitState,
|
||
) {
|
||
// Rust `let_declaration` exposes `pattern`; JS/TS
|
||
// `variable_declarator` exposes `name`. Try both so the same
|
||
// recognition fires across languages.
|
||
let Some(pattern) = node
|
||
.child_by_field_name("pattern")
|
||
.or_else(|| node.child_by_field_name("name"))
|
||
else {
|
||
return;
|
||
};
|
||
let Some(value) = node.child_by_field_name("value") else {
|
||
return;
|
||
};
|
||
|
||
// Destructuring: `const { user } = ctx.session;` /
|
||
// `const { user } = await getServerSession();` /
|
||
// `const { id } = req.user;`. These bind LOCAL variables that are
|
||
// semantically the actor (or the actor's id), and the existing
|
||
// single-ident path can't see them because `first_identifier_name`
|
||
// either picks the wrong key when several are destructured or
|
||
// misses the session-container RHS shape entirely.
|
||
if pattern.kind() == "object_pattern" {
|
||
collect_destructured_self_actor_binding(pattern, value, bytes, rules, state);
|
||
return;
|
||
}
|
||
|
||
let Some(var_name) = first_identifier_name(pattern, bytes) else {
|
||
return;
|
||
};
|
||
if var_name.is_empty() {
|
||
return;
|
||
}
|
||
if value_is_self_actor_call(value, bytes, rules) {
|
||
state.self_actor_vars.insert(var_name);
|
||
}
|
||
}
|
||
|
||
/// Pattern is `object_pattern` (JS/TS destructure). Walk the keys and
|
||
/// classify the RHS to decide what each destructured local should
|
||
/// register as:
|
||
///
|
||
/// * `const { user } = ctx.session` / `const { user } = await
|
||
/// getServerSession()`, RHS is a session container, so a
|
||
/// destructured `user` (or `currentUser`) becomes the unit's
|
||
/// self-actor binding.
|
||
/// * `const { id } = req.user` / `const { userId } = session.user` ,
|
||
/// RHS is the canonical authed-user base from
|
||
/// `is_self_scoped_session_base_text`, so a destructured `id` /
|
||
/// `userId` / `user_id` / `uid` becomes a self-actor-id binding.
|
||
/// * `const { user } = await loginGuardCall()`, also accepted
|
||
/// because `value_is_self_actor_call` already covers the
|
||
/// `let user = require_auth(..)` shape; we lift that recognition
|
||
/// into the destructure case so callers can extract the actor in a
|
||
/// single statement.
|
||
///
|
||
/// Each `pair_pattern` entry distinguishes the destructured KEY (the
|
||
/// shape of the RHS source) from the bound LOCAL (what we add to the
|
||
/// state set). Shorthand patterns reuse the key as the local.
|
||
fn collect_destructured_self_actor_binding(
|
||
pattern: Node<'_>,
|
||
value: Node<'_>,
|
||
bytes: &[u8],
|
||
rules: &AuthAnalysisRules,
|
||
state: &mut UnitState,
|
||
) {
|
||
// Two recognition paths run in sequence:
|
||
// 1. Static classify_destructure_rhs: hard-coded session-container
|
||
// / self-actor-base / self-actor-call shapes.
|
||
// 2. Dynamic self_scoped_session_bases lookup: if the RHS is a
|
||
// chain (or bare identifier) `<X>` and `<X>.user` was added to
|
||
// `self_scoped_session_bases` by an earlier TRPC param scan,
|
||
// the destructured `user` key is the actor. Closes the
|
||
// cal.com `({ ctx, input }: Options) => { const { user } = ctx; }`
|
||
// shape where ctx is the TRPC-typed param.
|
||
let kind = classify_destructure_rhs(value, bytes, rules);
|
||
let trpc_ctx_path = lookup_trpc_ctx_destructure_match(value, bytes, state);
|
||
|
||
if kind == DestructureRhsKind::None && trpc_ctx_path.is_none() {
|
||
return;
|
||
}
|
||
|
||
for idx in 0..pattern.named_child_count() {
|
||
let Some(child) = pattern.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
let (key, local) = match child.kind() {
|
||
// `{ user }`, key and local are the same identifier.
|
||
"shorthand_property_identifier_pattern" => {
|
||
let name = text(child, bytes);
|
||
(name.clone(), name)
|
||
}
|
||
// `{ user = default }`, left is the shorthand key/local.
|
||
"object_assignment_pattern" => {
|
||
let Some(left) = child.child_by_field_name("left") else {
|
||
continue;
|
||
};
|
||
let name = if matches!(
|
||
left.kind(),
|
||
"identifier" | "shorthand_property_identifier_pattern"
|
||
) {
|
||
text(left, bytes)
|
||
} else {
|
||
first_identifier_name(left, bytes).unwrap_or_default()
|
||
};
|
||
(name.clone(), name)
|
||
}
|
||
// `{ user: localName }`, `key` and `value` fields are
|
||
// distinct (key from RHS source, local in our scope).
|
||
"pair_pattern" => {
|
||
let key_node = child.child_by_field_name("key");
|
||
let local_node = child.child_by_field_name("value");
|
||
let (Some(k), Some(v)) = (key_node, local_node) else {
|
||
continue;
|
||
};
|
||
let key = text(k, bytes);
|
||
let local = first_identifier_name(v, bytes).unwrap_or_default();
|
||
(key, local)
|
||
}
|
||
_ => continue,
|
||
};
|
||
if kind != DestructureRhsKind::None {
|
||
process_destructure_entry(&key, &local, kind, state);
|
||
}
|
||
// Dynamic-set lift: when the RHS resolves to an `<X>` whose
|
||
// `<X>.user` was added to `self_scoped_session_bases`, the
|
||
// destructured `user` key is the actor. This closes the
|
||
// chained TRPC shape `({ ctx }: Options) => { const { user }
|
||
// = ctx; }` where the param-level pre-pass marked `ctx.user`
|
||
// earlier in the unit.
|
||
if let Some(rhs_path) = trpc_ctx_path.as_deref()
|
||
&& key.eq_ignore_ascii_case("user")
|
||
&& !local.is_empty()
|
||
{
|
||
let _ = rhs_path; // path itself is not stored; presence is the signal
|
||
state.self_actor_vars.insert(local);
|
||
}
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||
enum DestructureRhsKind {
|
||
/// RHS is a session container, the destructured `user` field
|
||
/// resolves to the authenticated actor. Examples: `ctx.session`,
|
||
/// `req.session`, `session`, `await getServerSession()`,
|
||
/// `getSession()`.
|
||
SessionContainer,
|
||
/// RHS is the authed-user base itself (`req.user`, `session.user`,
|
||
/// `ctx.session.user`). A destructured `id` field is the actor's
|
||
/// own id.
|
||
SelfActorBase,
|
||
/// RHS is not a session/actor source, destructure is irrelevant
|
||
/// for self-actor recognition.
|
||
None,
|
||
}
|
||
|
||
/// When the destructure RHS is `<chain>` (an identifier or member
|
||
/// chain), return `Some(chain_text)` if `<chain_text>.user` was added
|
||
/// to `state.self_scoped_session_bases` by an earlier
|
||
/// `collect_trpc_ctx_param` call. Used to mark the destructured
|
||
/// `user` shorthand as a self-actor binding when extracting it from a
|
||
/// TRPC ctx param's local, `({ ctx }: Options) => { const { user }
|
||
/// = ctx; }`.
|
||
fn lookup_trpc_ctx_destructure_match(
|
||
node: Node<'_>,
|
||
bytes: &[u8],
|
||
state: &UnitState,
|
||
) -> Option<String> {
|
||
if state.self_scoped_session_bases.is_empty() {
|
||
return None;
|
||
}
|
||
let chain_text = chain_text_from_value(node, bytes)?;
|
||
if chain_text.is_empty() {
|
||
return None;
|
||
}
|
||
let candidate = format!("{chain_text}.user");
|
||
if state.self_scoped_session_bases.contains(&candidate) {
|
||
Some(chain_text)
|
||
} else {
|
||
None
|
||
}
|
||
}
|
||
|
||
/// Reduce an RHS expression to its dotted chain text, walking through
|
||
/// `await`/parens/non-null wrappers. Returns `None` for shapes that
|
||
/// aren't a pure identifier/member-chain (e.g. a call result, a
|
||
/// template literal, an object-literal expression).
|
||
fn chain_text_from_value(node: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||
match node.kind() {
|
||
"identifier" => {
|
||
let t = text(node, bytes);
|
||
if t.is_empty() { None } else { Some(t) }
|
||
}
|
||
"field_expression" | "member_expression" | "field_access" | "scoped_identifier" => {
|
||
let chain = member_chain(node, bytes);
|
||
if chain.is_empty() {
|
||
None
|
||
} else {
|
||
Some(chain.join("."))
|
||
}
|
||
}
|
||
"type_cast_expression"
|
||
| "as_expression"
|
||
| "cast_expression"
|
||
| "parenthesized_expression"
|
||
| "non_null_expression"
|
||
| "await_expression"
|
||
| "try_expression" => {
|
||
let inner = node
|
||
.child_by_field_name("value")
|
||
.or_else(|| node.child_by_field_name("expression"));
|
||
if let Some(v) = inner
|
||
&& let Some(t) = chain_text_from_value(v, bytes)
|
||
{
|
||
return Some(t);
|
||
}
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if let Some(t) = chain_text_from_value(child, bytes) {
|
||
return Some(t);
|
||
}
|
||
}
|
||
None
|
||
}
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
fn classify_destructure_rhs(
|
||
node: Node<'_>,
|
||
bytes: &[u8],
|
||
rules: &AuthAnalysisRules,
|
||
) -> DestructureRhsKind {
|
||
if value_is_self_actor_call(node, bytes, rules) {
|
||
return DestructureRhsKind::SessionContainer;
|
||
}
|
||
if value_is_session_provider_chain(node, bytes) {
|
||
return DestructureRhsKind::SessionContainer;
|
||
}
|
||
if value_is_self_actor_base_chain(node, bytes) {
|
||
return DestructureRhsKind::SelfActorBase;
|
||
}
|
||
DestructureRhsKind::None
|
||
}
|
||
|
||
fn process_destructure_entry(
|
||
key: &str,
|
||
local: &str,
|
||
kind: DestructureRhsKind,
|
||
state: &mut UnitState,
|
||
) {
|
||
if key.is_empty() || local.is_empty() {
|
||
return;
|
||
}
|
||
let key_lower = key.to_ascii_lowercase();
|
||
match kind {
|
||
DestructureRhsKind::SessionContainer => {
|
||
if matches!(key_lower.as_str(), "user" | "currentuser" | "current_user") {
|
||
state.self_actor_vars.insert(local.to_string());
|
||
}
|
||
}
|
||
DestructureRhsKind::SelfActorBase => {
|
||
if matches!(key_lower.as_str(), "id" | "userid" | "user_id" | "uid") {
|
||
state.self_actor_id_vars.insert(local.to_string());
|
||
}
|
||
}
|
||
DestructureRhsKind::None => {}
|
||
}
|
||
}
|
||
|
||
/// True when `node` (after walking through `await`/parens/non-null
|
||
/// wrappers) is a session-container expression, a chain ending in
|
||
/// `.session` / `.state.session` / a bare `session` identifier, or a
|
||
/// call to a known session-getter (`getServerSession()`,
|
||
/// `getSession()`). Distinct from `value_is_self_actor_call` which
|
||
/// matches login-guard / authorization-check callees configured per
|
||
/// language.
|
||
fn value_is_session_provider_chain(node: Node<'_>, bytes: &[u8]) -> bool {
|
||
match node.kind() {
|
||
"field_expression" | "member_expression" | "field_access" | "scoped_identifier" => {
|
||
let chain = member_chain(node, bytes);
|
||
if chain.is_empty() {
|
||
return false;
|
||
}
|
||
let joined = chain.join(".");
|
||
// Bare session containers, `ctx.session`, `req.session`,
|
||
// `request.session`, plus the Koa `ctx.state` shape.
|
||
matches!(
|
||
joined.as_str(),
|
||
"ctx.session" | "ctx.state" | "req.session" | "request.session" | "session"
|
||
)
|
||
}
|
||
"identifier" => {
|
||
let name = text(node, bytes);
|
||
matches!(name.as_str(), "session")
|
||
}
|
||
// Known session-getter calls. Conservative list, only
|
||
// recogniser shapes that are unambiguously session-providing
|
||
// in the JS/TS ecosystem (NextAuth's `getServerSession` is the
|
||
// dominant one). `auth()` and `useSession()` are deliberately
|
||
// omitted because their meaning is ambiguous outside of a
|
||
// server-component context and adding them risks
|
||
// over-suppression in non-NextAuth code.
|
||
"call_expression" | "call" => {
|
||
let callee = call_name(node, bytes);
|
||
let last = bare_method_name(&callee);
|
||
matches!(
|
||
last,
|
||
"getServerSession"
|
||
| "getSession"
|
||
| "getServerSideSession"
|
||
| "unstable_getServerSession"
|
||
)
|
||
}
|
||
"type_cast_expression"
|
||
| "as_expression"
|
||
| "cast_expression"
|
||
| "parenthesized_expression"
|
||
| "non_null_expression"
|
||
| "await_expression"
|
||
| "try_expression" => {
|
||
let inner = node
|
||
.child_by_field_name("value")
|
||
.or_else(|| node.child_by_field_name("expression"));
|
||
if let Some(v) = inner
|
||
&& value_is_session_provider_chain(v, bytes)
|
||
{
|
||
return true;
|
||
}
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if value_is_session_provider_chain(child, bytes) {
|
||
return true;
|
||
}
|
||
}
|
||
false
|
||
}
|
||
_ => false,
|
||
}
|
||
}
|
||
|
||
/// True when `node` is the canonical authed-user base from
|
||
/// `is_self_scoped_session_base_text` (e.g. `req.user`, `session.user`,
|
||
/// `ctx.session.user`). Used to recognise `const { id } = req.user`
|
||
/// so the destructured `id` becomes a self-actor-id.
|
||
fn value_is_self_actor_base_chain(node: Node<'_>, bytes: &[u8]) -> bool {
|
||
match node.kind() {
|
||
"field_expression" | "member_expression" | "field_access" | "scoped_identifier" => {
|
||
let chain = member_chain(node, bytes);
|
||
if chain.is_empty() {
|
||
return false;
|
||
}
|
||
let joined = chain.join(".");
|
||
is_self_scoped_session_base_text(&joined)
|
||
}
|
||
"type_cast_expression"
|
||
| "as_expression"
|
||
| "cast_expression"
|
||
| "parenthesized_expression"
|
||
| "non_null_expression"
|
||
| "await_expression"
|
||
| "try_expression" => {
|
||
let inner = node
|
||
.child_by_field_name("value")
|
||
.or_else(|| node.child_by_field_name("expression"));
|
||
if let Some(v) = inner
|
||
&& value_is_self_actor_base_chain(v, bytes)
|
||
{
|
||
return true;
|
||
}
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if value_is_self_actor_base_chain(child, bytes) {
|
||
return true;
|
||
}
|
||
}
|
||
false
|
||
}
|
||
_ => false,
|
||
}
|
||
}
|
||
|
||
/// Recognise variable bindings whose right-hand side is a literal
|
||
/// constant, string, integer, float, or boolean. A subject backed
|
||
/// by a constant binding cannot be user-controlled and so must not
|
||
/// trigger `<lang>.auth.missing_ownership_check` even when the
|
||
/// variable name happens to match `is_id_like` (e.g.
|
||
/// `id := "id"` in a Go test fixture).
|
||
///
|
||
/// Walks the binding's RHS through common wrappers
|
||
/// (`parenthesized_expression`, `type_cast_expression`,
|
||
/// reference/borrow expressions) before checking for a leaf literal
|
||
/// kind. Conservative: any non-literal subexpression on the RHS
|
||
/// (a call, identifier, field-access) skips the binding, that var
|
||
/// might still hold attacker-controlled data.
|
||
///
|
||
/// Handles the per-language declaration kinds wired in
|
||
/// `collect_unit_state`: Go `short_var_declaration` (`x := "foo"`),
|
||
/// JS `lexical_declaration` (`const x = "foo"`), Java
|
||
/// `local_variable_declaration`, Rust `let_declaration`, and bare
|
||
/// `assignment_expression`.
|
||
fn collect_const_string_binding(node: Node<'_>, bytes: &[u8], state: &mut UnitState) {
|
||
// `assignment` / `assignment_expression`: `x = "foo"`, populate
|
||
// the LHS (`name` / `left`) when the RHS is a literal.
|
||
if matches!(
|
||
node.kind(),
|
||
"assignment" | "assignment_expression" | "augmented_assignment"
|
||
) {
|
||
let lhs = node
|
||
.child_by_field_name("left")
|
||
.or_else(|| node.child_by_field_name("name"))
|
||
.or_else(|| node.child_by_field_name("target"));
|
||
let rhs = node
|
||
.child_by_field_name("right")
|
||
.or_else(|| node.child_by_field_name("value"));
|
||
if let (Some(lhs), Some(rhs)) = (lhs, rhs)
|
||
&& rhs_is_pure_literal(rhs)
|
||
{
|
||
for var in collect_lhs_idents(lhs, bytes) {
|
||
state.const_bound_vars.insert(var);
|
||
}
|
||
}
|
||
return;
|
||
}
|
||
|
||
// Go `short_var_declaration` / `var_declaration` /
|
||
// `const_declaration`: `id := "id"` or `var id string = "id"`.
|
||
// Tree-sitter-go uses `left:expression_list` and
|
||
// `right:expression_list`.
|
||
if matches!(
|
||
node.kind(),
|
||
"short_var_declaration" | "var_spec" | "const_spec"
|
||
) {
|
||
let left = node.child_by_field_name("left").or_else(|| {
|
||
// Some tree-sitter grammars expose name(s) instead of left
|
||
node.child_by_field_name("name")
|
||
});
|
||
let right = node.child_by_field_name("right").or_else(|| {
|
||
node.child_by_field_name("value")
|
||
.or_else(|| node.child_by_field_name("default"))
|
||
});
|
||
if let (Some(left), Some(right)) = (left, right) {
|
||
// expression_list parallel, pair LHS idents with RHS exprs.
|
||
let lhs_idents = collect_lhs_idents(left, bytes);
|
||
let rhs_exprs: Vec<Node<'_>> = if right.kind() == "expression_list" {
|
||
let mut cursor = right.walk();
|
||
right
|
||
.children(&mut cursor)
|
||
.filter(|c| !matches!(c.kind(), "," | "(" | ")"))
|
||
.collect()
|
||
} else {
|
||
vec![right]
|
||
};
|
||
for (idx, var) in lhs_idents.into_iter().enumerate() {
|
||
if let Some(expr) = rhs_exprs.get(idx)
|
||
&& rhs_is_pure_literal(*expr)
|
||
{
|
||
state.const_bound_vars.insert(var);
|
||
}
|
||
}
|
||
}
|
||
return;
|
||
}
|
||
|
||
// `var_declaration` / `const_declaration` (Go top-level wrappers
|
||
// around var_spec/const_spec): recurse into children handled above.
|
||
if matches!(node.kind(), "var_declaration" | "const_declaration") {
|
||
for idx in 0..node.named_child_count() {
|
||
if let Some(child) = node.named_child(idx as u32) {
|
||
collect_const_string_binding(child, bytes, state);
|
||
}
|
||
}
|
||
return;
|
||
}
|
||
|
||
// Rust `let_declaration` / Python `expression_statement` wrapping a
|
||
// top-level assignment / JS `lexical_declaration` / Java
|
||
// `local_variable_declaration`, all expose the binding via
|
||
// `pattern`/`name` + `value`.
|
||
let pattern = node
|
||
.child_by_field_name("pattern")
|
||
.or_else(|| node.child_by_field_name("name"));
|
||
let value = node.child_by_field_name("value");
|
||
if let (Some(pattern), Some(value)) = (pattern, value)
|
||
&& rhs_is_pure_literal(value)
|
||
{
|
||
for var in collect_lhs_idents(pattern, bytes) {
|
||
state.const_bound_vars.insert(var);
|
||
}
|
||
return;
|
||
}
|
||
|
||
// JS `lexical_declaration` / Java `local_variable_declaration` /
|
||
// Python `expression_statement`, the binding child is a wrapper
|
||
// (`variable_declarator`). Recurse into wrappers; the
|
||
// `variable_declarator` arm in `collect_unit_state` handles them.
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if matches!(
|
||
child.kind(),
|
||
"variable_declarator"
|
||
| "init_declarator"
|
||
| "var_spec"
|
||
| "const_spec"
|
||
| "assignment"
|
||
| "assignment_expression"
|
||
) {
|
||
collect_const_string_binding(child, bytes, state);
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Returns true if `node` (after unwrapping common wrappers) is a
|
||
/// pure literal, string, integer, float, boolean, or null. Returns
|
||
/// false for any expression that could carry attacker-controlled data
|
||
/// (calls, identifiers, field access, template strings with
|
||
/// interpolations).
|
||
fn rhs_is_pure_literal(node: Node<'_>) -> bool {
|
||
// Unwrap wrappers that don't change taint provenance.
|
||
let inner = match node.kind() {
|
||
"parenthesized_expression"
|
||
| "type_cast_expression"
|
||
| "as_expression"
|
||
| "cast_expression"
|
||
| "reference_expression" => {
|
||
let value = node
|
||
.child_by_field_name("value")
|
||
.or_else(|| node.child_by_field_name("expression"));
|
||
value.unwrap_or(node)
|
||
}
|
||
_ => node,
|
||
};
|
||
matches!(
|
||
inner.kind(),
|
||
"string_literal"
|
||
| "raw_string_literal"
|
||
| "string"
|
||
| "interpreted_string_literal"
|
||
| "rune_literal"
|
||
| "integer_literal"
|
||
| "int_literal"
|
||
| "float_literal"
|
||
| "true"
|
||
| "false"
|
||
| "boolean_literal"
|
||
| "nil"
|
||
| "null"
|
||
| "null_literal"
|
||
| "none"
|
||
| "character_literal"
|
||
) || (inner.kind() == "template_string" && !template_has_interpolation(inner))
|
||
|| (inner.kind() == "template_literal" && !template_has_interpolation(inner))
|
||
}
|
||
|
||
/// Returns true if a template literal/string contains any
|
||
/// interpolation segment (which carries dynamic data). Pure
|
||
/// hard-coded template strings without `${...}` are still constants.
|
||
fn template_has_interpolation(node: Node<'_>) -> bool {
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if matches!(
|
||
child.kind(),
|
||
"template_substitution" | "interpolation" | "string_interpolation"
|
||
) {
|
||
return true;
|
||
}
|
||
}
|
||
false
|
||
}
|
||
|
||
/// Collect identifier names from an LHS pattern: a bare `identifier`,
|
||
/// a `tuple_pattern`, a Go `expression_list`, or a Rust `tuple_pattern`
|
||
/// / `let_pattern`. Returns the bound variable names. Ignores
|
||
/// destructured field accesses (we only track plain locals).
|
||
fn collect_lhs_idents(node: Node<'_>, bytes: &[u8]) -> Vec<String> {
|
||
let mut out = Vec::new();
|
||
if node.kind() == "identifier" {
|
||
out.push(text(node, bytes));
|
||
return out;
|
||
}
|
||
// Walk children, picking up identifiers; recurse into list/tuple
|
||
// wrappers commonly seen on LHS of multi-binding declarations.
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
match child.kind() {
|
||
"identifier" => out.push(text(child, bytes)),
|
||
"tuple_pattern"
|
||
| "expression_list"
|
||
| "pattern_list"
|
||
| "list_pattern"
|
||
| "field_identifier"
|
||
| "shorthand_field_identifier" => {
|
||
out.extend(collect_lhs_idents(child, bytes));
|
||
}
|
||
_ => {}
|
||
}
|
||
}
|
||
out
|
||
}
|
||
|
||
/// Detect `let X = V.id` (or `(V.id as ..).into()`, `V.id.into()`,
|
||
/// `V.user_id`, `V.uid`, `V.userId`) where `V` is in `self_actor_vars`.
|
||
/// `X` is then a transitive copy of the authenticated actor's id and
|
||
/// is recorded in `self_actor_id_vars` so subjects of that name count
|
||
/// as actor context, not as foreign scoped IDs.
|
||
///
|
||
/// Closes a real-repo FP cluster: route handlers idiomatically reduce
|
||
/// the authed user to a scalar id and reuse it across many SQL params
|
||
/// (`let uid = user.id; query_all(.., &[uid]); query_all(.., &[uid])`).
|
||
/// The original `V.id`-shape recognition only covered direct subject
|
||
/// expressions; this captures the common copy-and-pass shape.
|
||
fn collect_self_actor_id_binding(node: Node<'_>, bytes: &[u8], state: &mut UnitState) {
|
||
// Rust `let_declaration` exposes `pattern`; JS/TS
|
||
// `variable_declarator` exposes `name`.
|
||
let Some(pattern) = node
|
||
.child_by_field_name("pattern")
|
||
.or_else(|| node.child_by_field_name("name"))
|
||
else {
|
||
return;
|
||
};
|
||
let Some(var_name) = first_identifier_name(pattern, bytes) else {
|
||
return;
|
||
};
|
||
if var_name.is_empty() {
|
||
return;
|
||
}
|
||
let Some(value) = node.child_by_field_name("value") else {
|
||
return;
|
||
};
|
||
if value_is_self_actor_id_field(value, bytes, &state.self_actor_vars)
|
||
|| value_is_self_scoped_session_id_chain(value, bytes)
|
||
{
|
||
state.self_actor_id_vars.insert(var_name);
|
||
}
|
||
}
|
||
|
||
/// Does `node` resolve to a `V.id` / `V.user_id` / `V.uid` / `V.userId`
|
||
/// field access where `V` is in `actor_vars`? Walks through common
|
||
/// wrappers: `try_expression`, `await_expression`, `parenthesized_expression`,
|
||
/// `reference_expression`, `type_cast_expression` (`v.id as i64`),
|
||
/// and `call_expression` for chained `.into()` / `.to_string()` etc.
|
||
fn value_is_self_actor_id_field(
|
||
node: Node<'_>,
|
||
bytes: &[u8],
|
||
actor_vars: &HashSet<String>,
|
||
) -> bool {
|
||
match node.kind() {
|
||
"field_expression" | "member_expression" | "field_access" | "scoped_identifier" => {
|
||
let receiver = node
|
||
.child_by_field_name("value")
|
||
.or_else(|| node.child_by_field_name("object"));
|
||
let field = node
|
||
.child_by_field_name("field")
|
||
.or_else(|| node.child_by_field_name("property"))
|
||
.or_else(|| node.child_by_field_name("name"));
|
||
let (Some(receiver), Some(field)) = (receiver, field) else {
|
||
return false;
|
||
};
|
||
let receiver_name = text(receiver, bytes);
|
||
let field_name = text(field, bytes);
|
||
actor_vars.contains(&receiver_name) && is_self_actor_id_field_name(&field_name)
|
||
}
|
||
"type_cast_expression"
|
||
| "as_expression"
|
||
| "cast_expression"
|
||
| "parenthesized_expression"
|
||
| "try_expression"
|
||
| "await_expression"
|
||
| "reference_expression" => {
|
||
let value = node
|
||
.child_by_field_name("value")
|
||
.or_else(|| node.child_by_field_name("expression"));
|
||
if let Some(v) = value
|
||
&& value_is_self_actor_id_field(v, bytes, actor_vars)
|
||
{
|
||
return true;
|
||
}
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if value_is_self_actor_id_field(child, bytes, actor_vars) {
|
||
return true;
|
||
}
|
||
}
|
||
false
|
||
}
|
||
// `(v.id as i64).into()` / `v.id.to_string()` / `v.id.clone()` ,
|
||
// call on a self-actor id field still propagates self-actor-id.
|
||
"call_expression" | "call" | "method_invocation" | "method_call_expression" => {
|
||
let receiver = node
|
||
.child_by_field_name("function")
|
||
.or_else(|| node.child_by_field_name("object"));
|
||
if let Some(r) = receiver {
|
||
// Function field of a method call is `receiver.method` ,
|
||
// walk the receiver subtree for the self-actor id field.
|
||
if value_is_self_actor_id_field(r, bytes, actor_vars) {
|
||
return true;
|
||
}
|
||
// Also check the receiver of a method-style chain:
|
||
// `(v.id as i64).into()`, `function` is the
|
||
// `field_expression` `(...).into`, whose `value` child
|
||
// is the cast expression.
|
||
if let Some(inner) = r
|
||
.child_by_field_name("value")
|
||
.or_else(|| r.child_by_field_name("object"))
|
||
&& value_is_self_actor_id_field(inner, bytes, actor_vars)
|
||
{
|
||
return true;
|
||
}
|
||
}
|
||
false
|
||
}
|
||
_ => false,
|
||
}
|
||
}
|
||
|
||
fn is_self_actor_id_field_name(field: &str) -> bool {
|
||
let lower = field.to_ascii_lowercase();
|
||
matches!(
|
||
lower.as_str(),
|
||
"id" | "user_id" | "userid" | "uid" | "email" | "username" | "handle"
|
||
)
|
||
}
|
||
|
||
/// Recognise `let X = session.user.id` (or
|
||
/// `req.session.user.id` / `ctx.session.user.id` / `req.user.id` /
|
||
/// `request.user.id`, etc.), a copy of the authenticated actor's
|
||
/// own id field through one of the canonical session-context chains
|
||
/// (the same set `is_self_scoped_session_subject` accepts at use
|
||
/// time). Walks through wrappers (`await`, `?.`, parens, casts,
|
||
/// trivial method chains like `.toString()`).
|
||
///
|
||
/// Closes a real-repo FP cluster (cal.com Next.js handlers): the
|
||
/// idiomatic shape is `if (session?.user?.id) { const userId =
|
||
/// session.user.id; await repo.get(userId); }`. The use site sees
|
||
/// a plain `userId` subject, so without binding-time recognition the
|
||
/// classifier can't tell it's actor context.
|
||
fn value_is_self_scoped_session_id_chain(node: Node<'_>, bytes: &[u8]) -> bool {
|
||
match node.kind() {
|
||
"field_expression" | "member_expression" | "field_access" | "scoped_identifier" => {
|
||
// Build the dotted chain and reuse the same predicate the
|
||
// subject classifier uses (`matches_session_context` +
|
||
// self-scoped-base check). Doing it via the chain avoids
|
||
// re-implementing the session-context grammar here.
|
||
let chain = member_chain(node, bytes);
|
||
if chain.len() < 2 {
|
||
return false;
|
||
}
|
||
let field = chain.last().expect("len >= 2");
|
||
if !is_self_actor_id_field_name(field) {
|
||
return false;
|
||
}
|
||
let base_chain = &chain[..chain.len() - 1];
|
||
let base = base_chain.join(".");
|
||
classify_member_chain(base_chain) == ValueSourceKind::Session
|
||
&& is_self_scoped_session_base_text(&base)
|
||
}
|
||
"type_cast_expression"
|
||
| "as_expression"
|
||
| "cast_expression"
|
||
| "parenthesized_expression"
|
||
| "try_expression"
|
||
| "await_expression"
|
||
| "reference_expression"
|
||
| "non_null_expression" => {
|
||
let value = node
|
||
.child_by_field_name("value")
|
||
.or_else(|| node.child_by_field_name("expression"));
|
||
if let Some(v) = value
|
||
&& value_is_self_scoped_session_id_chain(v, bytes)
|
||
{
|
||
return true;
|
||
}
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if value_is_self_scoped_session_id_chain(child, bytes) {
|
||
return true;
|
||
}
|
||
}
|
||
false
|
||
}
|
||
// `(req.user.id as number).toString()` / `session.user.id.toString()`
|
||
"call_expression" | "call" | "method_invocation" | "method_call_expression" => {
|
||
let receiver = node
|
||
.child_by_field_name("function")
|
||
.or_else(|| node.child_by_field_name("object"));
|
||
if let Some(r) = receiver {
|
||
if value_is_self_scoped_session_id_chain(r, bytes) {
|
||
return true;
|
||
}
|
||
if let Some(inner) = r
|
||
.child_by_field_name("value")
|
||
.or_else(|| r.child_by_field_name("object"))
|
||
&& value_is_self_scoped_session_id_chain(inner, bytes)
|
||
{
|
||
return true;
|
||
}
|
||
}
|
||
false
|
||
}
|
||
_ => false,
|
||
}
|
||
}
|
||
|
||
/// String-level analogue of `is_self_scoped_session_base` from
|
||
/// `checks.rs`. Kept here in the extract layer to avoid a layer
|
||
/// dependency; the two lists must stay in sync.
|
||
fn is_self_scoped_session_base_text(base: &str) -> bool {
|
||
matches!(
|
||
base,
|
||
"req.session.user"
|
||
| "request.session.user"
|
||
| "session.user"
|
||
| "req.session.currentUser"
|
||
| "request.session.currentUser"
|
||
| "session.currentUser"
|
||
| "req.user"
|
||
| "request.user"
|
||
| "req.currentUser"
|
||
| "request.currentUser"
|
||
| "ctx.session.user"
|
||
| "ctx.session.currentUser"
|
||
| "ctx.state.user"
|
||
| "ctx.state.currentUser"
|
||
)
|
||
}
|
||
|
||
/// Does `node` (possibly wrapped in `?`/`.await`/`&`/`match`) resolve
|
||
/// to a call whose callee matches `is_login_guard` or
|
||
/// `is_authorization_check`? Used to detect `let user =
|
||
/// auth::require_auth(..).await?`-style bindings, including the
|
||
/// `let user = match require_auth() { Ok(u) => u, Err(_) => return ... }`
|
||
/// shape used by Worker / Cloudflare-style handlers that propagate
|
||
/// the auth failure response instead of using `?`.
|
||
fn value_is_self_actor_call(node: Node<'_>, bytes: &[u8], rules: &AuthAnalysisRules) -> bool {
|
||
match node.kind() {
|
||
"call_expression" | "call" | "method_invocation" | "method_call_expression" => {
|
||
let callee = call_name(node, bytes);
|
||
!callee.is_empty()
|
||
&& (rules.is_login_guard(&callee) || rules.is_authorization_check(&callee))
|
||
}
|
||
"try_expression"
|
||
| "await_expression"
|
||
| "reference_expression"
|
||
| "parenthesized_expression"
|
||
| "match_expression" => {
|
||
// For `match SCRUTINEE { ... }`, the scrutinee is the
|
||
// call we care about, if `require_auth().await` is being
|
||
// matched, the `Ok(u) => u` arm gives us a self-actor
|
||
// binding even when `?` isn't usable. Walk all named
|
||
// children, tree-sitter exposes both the scrutinee and
|
||
// the arms.
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if value_is_self_actor_call(child, bytes, rules) {
|
||
return true;
|
||
}
|
||
}
|
||
false
|
||
}
|
||
_ => false,
|
||
}
|
||
}
|
||
|
||
/// A3: typed route-handler parameters whose declared type names the
|
||
/// authenticated user (e.g. `user: CurrentUser`, `admin: AdminUser`)
|
||
/// count as self-actor bindings. Recognized type last-segments:
|
||
/// `CurrentUser`, `SessionUser`, `AuthUser`, `AdminUser`,
|
||
/// `AuthenticatedUser`, `RequireAuth`, `RequireLogin`, `Authenticated`.
|
||
fn collect_typed_extractor_self_actor(node: Node<'_>, bytes: &[u8], state: &mut UnitState) {
|
||
let Some(pattern) = node.child_by_field_name("pattern") else {
|
||
return;
|
||
};
|
||
let Some(var_name) = first_identifier_name(pattern, bytes) else {
|
||
return;
|
||
};
|
||
if var_name.is_empty() {
|
||
return;
|
||
}
|
||
let Some(ty_node) = node.child_by_field_name("type") else {
|
||
return;
|
||
};
|
||
let ty_text = text(ty_node, bytes);
|
||
if is_self_actor_type_text(&ty_text) {
|
||
state.self_actor_vars.insert(var_name);
|
||
}
|
||
}
|
||
|
||
/// B3: detect `let X = …prepare(LIT)…` / `let X = …query(LIT)…`
|
||
/// where the SQL literal classifies as authorization-gated. When
|
||
/// matched: insert `X` into `state.authorized_sql_vars` and synthesise
|
||
/// a `Membership` `AuthCheck` at the `let`'s line whose subjects
|
||
/// include `X` and the value-refs from the SQL call's bind args
|
||
/// (e.g. `user.id` in `.bind(user.id)`). Downstream uses of `X`'s
|
||
/// columns are then transitively covered through `row_field_vars`.
|
||
fn collect_sql_authorized_binding(
|
||
node: Node<'_>,
|
||
bytes: &[u8],
|
||
rules: &AuthAnalysisRules,
|
||
state: &mut UnitState,
|
||
) {
|
||
if rules.acl_tables.is_empty() && !sql_direct_user_id_enabled() {
|
||
return;
|
||
}
|
||
let Some(pattern) = node.child_by_field_name("pattern") else {
|
||
return;
|
||
};
|
||
let Some(var_name) = first_identifier_name(pattern, bytes) else {
|
||
return;
|
||
};
|
||
if var_name.is_empty() {
|
||
return;
|
||
}
|
||
let Some(value) = node.child_by_field_name("value") else {
|
||
return;
|
||
};
|
||
let Some((sql_call, bind_arg_refs)) = find_authorized_sql_call_in_chain(value, bytes, rules)
|
||
else {
|
||
return;
|
||
};
|
||
|
||
state.authorized_sql_vars.insert(var_name.clone());
|
||
|
||
let mut subjects = bind_arg_refs;
|
||
subjects.push(ValueRef {
|
||
source_kind: ValueSourceKind::Identifier,
|
||
name: var_name,
|
||
base: None,
|
||
field: None,
|
||
index: None,
|
||
span: span(node),
|
||
});
|
||
let line = node.start_position().row + 1;
|
||
state.auth_checks.push(AuthCheck {
|
||
kind: AuthCheckKind::Membership,
|
||
callee: "(sql ACL)".into(),
|
||
subjects,
|
||
span: span(sql_call),
|
||
line,
|
||
args: Vec::new(),
|
||
condition_text: None,
|
||
is_route_level: false,
|
||
});
|
||
}
|
||
|
||
/// Always true, the direct-user-id-predicate path in
|
||
/// `sql_semantics::classify_sql_query` doesn't depend on the ACL
|
||
/// table list, so we still want to walk `let X = …query(LIT)…`
|
||
/// chains even when the user hasn't configured any ACL tables.
|
||
/// Kept as a function so future tuning can disable this path.
|
||
fn sql_direct_user_id_enabled() -> bool {
|
||
true
|
||
}
|
||
|
||
/// Walk down a chain of method calls (`a.b().c().d()`) looking for a
|
||
/// call whose method matches a SQL prepare/query verb and whose first
|
||
/// argument is a string literal classifying as auth-gated. Returns
|
||
/// the matching call node along with the value-refs collected from
|
||
/// the *outer* chain's argument list (the call that bound the user
|
||
/// id, e.g. `.bind(user.id)`).
|
||
fn find_authorized_sql_call_in_chain<'tree>(
|
||
value: Node<'tree>,
|
||
bytes: &[u8],
|
||
rules: &AuthAnalysisRules,
|
||
) -> Option<(Node<'tree>, Vec<ValueRef>)> {
|
||
let mut bind_arg_refs: Vec<ValueRef> = Vec::new();
|
||
let mut cur = unwrap_try_like(value);
|
||
let mut steps = 0;
|
||
while steps < 16 {
|
||
steps += 1;
|
||
if !matches!(
|
||
cur.kind(),
|
||
"call_expression" | "call" | "method_invocation" | "method_call_expression"
|
||
) {
|
||
return None;
|
||
}
|
||
// Collect any non-literal arg value-refs from this call ,
|
||
// these typically include the bound user id (e.g.
|
||
// `.bind(user.id)` → adds `user.id` as a subject).
|
||
if let Some(args_node) = cur.child_by_field_name("arguments") {
|
||
for arg in named_children(args_node) {
|
||
if matches!(
|
||
arg.kind(),
|
||
"string_literal" | "raw_string_literal" | "string"
|
||
) {
|
||
continue;
|
||
}
|
||
bind_arg_refs.extend(extract_value_refs(arg, bytes));
|
||
}
|
||
}
|
||
|
||
let callee = call_name(cur, bytes);
|
||
let last_segment = bare_method_name(&callee);
|
||
if is_sql_prepare_method(last_segment) {
|
||
// Check first arg is a string literal that classifies
|
||
// as authorized.
|
||
let args = cur
|
||
.child_by_field_name("arguments")
|
||
.map(named_children)
|
||
.unwrap_or_default();
|
||
if let Some(first_arg) = args.first().copied()
|
||
&& let Some(literal) = collect_string_literal_text(first_arg, bytes)
|
||
&& crate::auth_analysis::sql_semantics::classify_sql_query(
|
||
&literal,
|
||
&rules.acl_tables,
|
||
)
|
||
.is_some()
|
||
{
|
||
return Some((cur, bind_arg_refs));
|
||
}
|
||
// Method matched but arg isn't a literal we recognise
|
||
// as authorized, bail.
|
||
return None;
|
||
}
|
||
|
||
// Descend through the receiver/object of this call to look
|
||
// for an inner SQL prepare.
|
||
let next = cur
|
||
.child_by_field_name("receiver")
|
||
.or_else(|| {
|
||
cur.child_by_field_name("function").and_then(|fun| {
|
||
fun.child_by_field_name("object")
|
||
.or_else(|| fun.child_by_field_name("operand"))
|
||
.or_else(|| fun.child_by_field_name("argument"))
|
||
.or_else(|| fun.child_by_field_name("value"))
|
||
})
|
||
})
|
||
.or_else(|| cur.child_by_field_name("object"));
|
||
let next = next?;
|
||
cur = unwrap_try_like(next);
|
||
}
|
||
None
|
||
}
|
||
|
||
/// Recognised SQL prepare/query method names. Matched against the
|
||
/// last segment of the callee. String comparison only, we don't
|
||
/// constrain the receiver to a specific type; known DB connection
|
||
/// receivers are classified by the sink-class type gate, and this
|
||
/// list is the orthogonal verb axis.
|
||
fn is_sql_prepare_method(method: &str) -> bool {
|
||
matches!(
|
||
method,
|
||
"prepare"
|
||
| "query"
|
||
| "query_one"
|
||
| "query_all"
|
||
| "query_as"
|
||
| "query_map"
|
||
| "query_row"
|
||
| "query_scalar"
|
||
| "fetch"
|
||
| "fetch_one"
|
||
| "fetch_all"
|
||
| "fetch_optional"
|
||
| "fetch_scalar"
|
||
| "execute"
|
||
| "exec"
|
||
)
|
||
}
|
||
|
||
/// Extract the string content from a Rust string literal node, joining
|
||
/// adjacent fragments (e.g. `"a" "b"` becomes `"ab"`). Returns `None`
|
||
/// when the node isn't a string literal at all.
|
||
fn collect_string_literal_text(node: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||
match node.kind() {
|
||
"string_literal" | "raw_string_literal" => {
|
||
let mut buf = String::new();
|
||
let mut found = false;
|
||
for child in named_children(node) {
|
||
if child.kind() == "string_content" {
|
||
buf.push_str(&text(child, bytes));
|
||
found = true;
|
||
}
|
||
}
|
||
if found {
|
||
Some(buf)
|
||
} else {
|
||
Some(strip_quotes(&text(node, bytes)))
|
||
}
|
||
}
|
||
"string" | "template_string" | "interpreted_string_literal" => {
|
||
Some(strip_quotes(&text(node, bytes)))
|
||
}
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
/// B3: `for ROW in X { … }`, when `X` (the iterator value) names a
|
||
/// SQL-authorized variable, mark `ROW` authorized too AND record
|
||
/// `row_field_vars[ROW] = X` so transitive subject coverage works
|
||
/// for column reads inside the loop body.
|
||
fn collect_for_row_binding(node: Node<'_>, bytes: &[u8], state: &mut UnitState) {
|
||
let Some(pattern) = node.child_by_field_name("pattern") else {
|
||
return;
|
||
};
|
||
let Some(var_name) = first_identifier_name(pattern, bytes) else {
|
||
return;
|
||
};
|
||
if var_name.is_empty() {
|
||
return;
|
||
}
|
||
let Some(value) = node.child_by_field_name("value") else {
|
||
return;
|
||
};
|
||
// The iterated expression is often `&X`, `X.iter()`, `X.into_iter()`,
|
||
// etc. Walk through reference / common iterator-method wrappers
|
||
// to recover the underlying var name.
|
||
let Some(source_var) = single_iter_source_name(value, bytes) else {
|
||
return;
|
||
};
|
||
state
|
||
.row_field_vars
|
||
.insert(var_name.clone(), source_var.clone());
|
||
if state.authorized_sql_vars.contains(&source_var) {
|
||
state.authorized_sql_vars.insert(var_name);
|
||
}
|
||
}
|
||
|
||
/// Recover the source identifier under common iteration-shape
|
||
/// wrappers: `X`, `&X`, `&mut X`, `X.iter()`, `X.iter_mut()`,
|
||
/// `X.into_iter()`, `X.values()`, `X.keys()`. Returns `None` for
|
||
/// arbitrary expressions (`fetch_rows()`, `make_iter() + 1`, …).
|
||
fn single_iter_source_name(node: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||
match node.kind() {
|
||
"identifier" => {
|
||
let value = text(node, bytes);
|
||
if value.is_empty() { None } else { Some(value) }
|
||
}
|
||
"reference_expression" | "parenthesized_expression" => {
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if let Some(name) = single_iter_source_name(child, bytes) {
|
||
return Some(name);
|
||
}
|
||
}
|
||
None
|
||
}
|
||
"call_expression" | "call" | "method_invocation" | "method_call_expression" => {
|
||
let callee = call_name(node, bytes);
|
||
let last = bare_method_name(&callee);
|
||
if !matches!(
|
||
last,
|
||
"iter" | "iter_mut" | "into_iter" | "values" | "keys" | "drain"
|
||
) {
|
||
return None;
|
||
}
|
||
let receiver = node
|
||
.child_by_field_name("receiver")
|
||
.or_else(|| {
|
||
node.child_by_field_name("function").and_then(|fun| {
|
||
fun.child_by_field_name("object")
|
||
.or_else(|| fun.child_by_field_name("operand"))
|
||
.or_else(|| fun.child_by_field_name("argument"))
|
||
.or_else(|| fun.child_by_field_name("value"))
|
||
})
|
||
})
|
||
.or_else(|| node.child_by_field_name("object"))?;
|
||
single_iter_source_name(receiver, bytes)
|
||
}
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
/// B3: `let Y = ROW.method(..)` / `let Y = ROW.field` where `ROW` is
|
||
/// SQL-authorized, propagate authorized status to `Y` so any
|
||
/// downstream use (e.g. as a sink subject) is treated as covered.
|
||
/// `row_field_vars[Y] = ROW` is already populated by
|
||
/// `collect_row_field_binding`; this helper just propagates the
|
||
/// authorized-vars set along that edge.
|
||
fn propagate_sql_authorized_through_field_read(
|
||
node: Node<'_>,
|
||
bytes: &[u8],
|
||
state: &mut UnitState,
|
||
) {
|
||
let Some(pattern) = node.child_by_field_name("pattern") else {
|
||
return;
|
||
};
|
||
let Some(var_name) = first_identifier_name(pattern, bytes) else {
|
||
return;
|
||
};
|
||
if var_name.is_empty() {
|
||
return;
|
||
}
|
||
let Some(value) = node.child_by_field_name("value") else {
|
||
return;
|
||
};
|
||
let Some(source) = extract_row_receiver_name(value, bytes) else {
|
||
return;
|
||
};
|
||
if state.authorized_sql_vars.contains(&source) {
|
||
state.authorized_sql_vars.insert(var_name);
|
||
}
|
||
}
|
||
|
||
/// Recognise type names that semantically mean "the authenticated
|
||
/// actor" as the type of a function parameter. Used by
|
||
/// `collect_typed_extractor_self_actor` to seed `self_actor_vars` so
|
||
/// that downstream `V.id`-shaped subjects on a parameter of one of
|
||
/// these types count as actor context, not foreign scoped IDs.
|
||
///
|
||
/// The recogniser is intentionally type-only, no name heuristic on
|
||
/// the variable. A handler signature
|
||
/// `pub async fn handler(.., local_user_view: LocalUserView)` is
|
||
/// recognised because the type name matches, not because the
|
||
/// parameter is conventionally named `local_user_view`.
|
||
///
|
||
/// **Two acceptance forms:**
|
||
///
|
||
/// 1. *Tight exact set*, names whose entire identity is "auth
|
||
/// subject": `Authenticated`, `Identity`, `Principal`. Adding new
|
||
/// bare names to this set should be done sparingly; framework
|
||
/// types that include `User` should go through the structural
|
||
/// form instead.
|
||
///
|
||
/// 2. *Structural form*, a CamelCase identifier of the shape
|
||
/// `<PREFIX>User<SUFFIX>?` where `PREFIX` is one of `Local`,
|
||
/// `Current`, `Session`, `Auth`, `Authenticated`, `LoggedIn`,
|
||
/// `Admin`, and `SUFFIX` (optional) is one of `View`, `Info`,
|
||
/// `Context`, `Session`, `Token`. Catches `LocalUserView`
|
||
/// (lemmy), `LocalUser`, `CurrentUser`, `LoggedInUser`,
|
||
/// `AuthenticatedUserContext`, etc.
|
||
///
|
||
/// **Deliberately *not* matched:**
|
||
/// * Bare `User`, too loose; `User` parameters are very often
|
||
/// deserialised payloads, not actor extractors.
|
||
/// * `UserView`, `UserPreferences`, same reason; the prefix is what
|
||
/// carries the auth signal, not the bare `User` segment.
|
||
fn is_self_actor_type_text(ty: &str) -> bool {
|
||
let trimmed = ty
|
||
.trim()
|
||
.trim_start_matches('&')
|
||
.trim_start_matches("mut ")
|
||
.trim();
|
||
let after_colons = trimmed.rsplit("::").next().unwrap_or(trimmed);
|
||
let base = after_colons
|
||
.split('<')
|
||
.next()
|
||
.unwrap_or(after_colons)
|
||
.trim();
|
||
if matches!(base, "Authenticated" | "Identity" | "Principal") {
|
||
return true;
|
||
}
|
||
matches_self_actor_user_form(base)
|
||
}
|
||
|
||
/// Structural form: `<PREFIX>User<SUFFIX>?` where PREFIX is in the
|
||
/// authority-prefix vocabulary and SUFFIX is in the
|
||
/// auth-context-suffix vocabulary (or absent).
|
||
///
|
||
/// Implementation: strip a leading PREFIX, require the remainder to
|
||
/// start with `User`, and accept either an exact `User` match or a
|
||
/// `User`+SUFFIX match. Case-sensitive on the segment boundaries
|
||
/// because we want CamelCase types only, `localuser` wouldn't be a
|
||
/// real Rust type name and matching it would create ambiguity with
|
||
/// payload identifiers.
|
||
fn matches_self_actor_user_form(base: &str) -> bool {
|
||
const PREFIXES: &[&str] = &[
|
||
"Local",
|
||
"Current",
|
||
"Session",
|
||
"Authenticated",
|
||
"Auth",
|
||
"LoggedIn",
|
||
"Admin",
|
||
];
|
||
const SUFFIXES: &[&str] = &["View", "Info", "Context", "Session", "Token"];
|
||
for prefix in PREFIXES {
|
||
let Some(rest) = base.strip_prefix(prefix) else {
|
||
continue;
|
||
};
|
||
let Some(after_user) = rest.strip_prefix("User") else {
|
||
continue;
|
||
};
|
||
if after_user.is_empty() {
|
||
return true;
|
||
}
|
||
if SUFFIXES.contains(&after_user) {
|
||
return true;
|
||
}
|
||
}
|
||
false
|
||
}
|
||
|
||
/// Extract a single-segment receiver name for a value node of the shape
|
||
/// `ROW.method(..)` or `ROW.field`. Returns `None` when the receiver
|
||
/// isn't a simple identifier (e.g. deeper chains like `ctx.db.get(..)`).
|
||
fn extract_row_receiver_name(node: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||
let node = unwrap_try_like(node);
|
||
match node.kind() {
|
||
"call_expression" | "call" | "method_invocation" | "method_call_expression" => {
|
||
let function = node
|
||
.child_by_field_name("function")
|
||
.or_else(|| node.child_by_field_name("method"));
|
||
let function = function?;
|
||
single_ident_receiver(function, bytes)
|
||
.or_else(|| single_ident_from_call_receiver(node, bytes))
|
||
}
|
||
"field_expression"
|
||
| "member_expression"
|
||
| "attribute"
|
||
| "selector_expression"
|
||
| "field_access" => single_ident_receiver(node, bytes),
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
fn single_ident_receiver(node: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||
let object = node
|
||
.child_by_field_name("value")
|
||
.or_else(|| node.child_by_field_name("object"))
|
||
.or_else(|| node.child_by_field_name("operand"))
|
||
.or_else(|| node.child_by_field_name("receiver"))?;
|
||
single_ident_text(object, bytes)
|
||
}
|
||
|
||
fn single_ident_from_call_receiver(node: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||
let receiver = node
|
||
.child_by_field_name("receiver")
|
||
.or_else(|| node.child_by_field_name("object"))?;
|
||
single_ident_text(receiver, bytes)
|
||
}
|
||
|
||
fn single_ident_text(node: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||
if matches!(
|
||
node.kind(),
|
||
"identifier" | "shorthand_property_identifier" | "field_identifier"
|
||
) {
|
||
let value = text(node, bytes);
|
||
if value.is_empty() { None } else { Some(value) }
|
||
} else {
|
||
None
|
||
}
|
||
}
|
||
|
||
/// Strip `?` / `.await` / `&` / `&mut` wrappers from a value node,
|
||
/// returning the underlying call/field expression when present.
|
||
fn unwrap_try_like(node: Node<'_>) -> Node<'_> {
|
||
let mut cur = node;
|
||
loop {
|
||
match cur.kind() {
|
||
"try_expression"
|
||
| "await_expression"
|
||
| "reference_expression"
|
||
| "parenthesized_expression" => {
|
||
let Some(inner) = cur
|
||
.child_by_field_name("expression")
|
||
.or_else(|| cur.named_child(0))
|
||
else {
|
||
return cur;
|
||
};
|
||
cur = inner;
|
||
}
|
||
_ => return cur,
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Detect the `if OWNER != SELF { return ... }` (or `==` with `else`
|
||
/// early-exit) row-level ownership-equality pattern and emit a
|
||
/// synthetic `AuthCheck { kind: Ownership }`. The AuthCheck is
|
||
/// back-dated to the row's `let` line, and populated with the row's
|
||
/// original fetch arguments as subjects, so the row-fetching call
|
||
/// (e.g. `db.query_one(.., &[doc_id])`) is also covered.
|
||
fn detect_ownership_equality_check(if_node: Node<'_>, bytes: &[u8], state: &mut UnitState) {
|
||
let Some(condition_raw) = if_node.child_by_field_name("condition") else {
|
||
return;
|
||
};
|
||
let Some(consequence) = if_node.child_by_field_name("consequence") else {
|
||
return;
|
||
};
|
||
let alternative = if_node.child_by_field_name("alternative");
|
||
let condition = unwrap_parens_local(condition_raw);
|
||
if condition.kind() != "binary_expression" {
|
||
return;
|
||
}
|
||
let Some(operator) = binary_operator_text(condition, bytes) else {
|
||
return;
|
||
};
|
||
let is_ne = matches!(operator.as_str(), "!=" | "!==" | "ne");
|
||
let is_eq = matches!(operator.as_str(), "==" | "===" | "eq");
|
||
if !is_ne && !is_eq {
|
||
return;
|
||
}
|
||
let Some((left, right)) = binary_operands(condition) else {
|
||
return;
|
||
};
|
||
|
||
let fail_branch = if is_ne {
|
||
consequence
|
||
} else if let Some(alt) = alternative {
|
||
resolve_else_block(alt)
|
||
} else {
|
||
return;
|
||
};
|
||
|
||
if !branch_has_early_exit(fail_branch, bytes) {
|
||
return;
|
||
}
|
||
|
||
let left_refs = extract_value_refs(left, bytes);
|
||
let right_refs = extract_value_refs(right, bytes);
|
||
|
||
let (owner_ref, _self_ref) = match (
|
||
pick_owner_field_ref(&left_refs),
|
||
pick_self_actor_ref(&right_refs),
|
||
) {
|
||
(Some(o), Some(s)) => (o, s),
|
||
_ => match (
|
||
pick_owner_field_ref(&right_refs),
|
||
pick_self_actor_ref(&left_refs),
|
||
) {
|
||
(Some(o), Some(s)) => (o, s),
|
||
_ => return,
|
||
},
|
||
};
|
||
|
||
let row_binding = state.row_field_vars.get(&owner_ref.name).cloned();
|
||
let if_line = if_node.start_position().row + 1;
|
||
let if_span = span(if_node);
|
||
let condition_text = text(condition, bytes);
|
||
|
||
let (check_line, mut subjects) = match row_binding
|
||
.as_ref()
|
||
.and_then(|row| state.row_population_data.get(row).map(|v| (row, v)))
|
||
{
|
||
Some((row, (row_line, arg_refs))) => {
|
||
let mut subjects = arg_refs.clone();
|
||
subjects.push(ValueRef {
|
||
source_kind: ValueSourceKind::Identifier,
|
||
name: row.clone(),
|
||
base: None,
|
||
field: None,
|
||
index: None,
|
||
span: if_span,
|
||
});
|
||
(*row_line, subjects)
|
||
}
|
||
None => match row_binding.as_ref() {
|
||
Some(row) => (
|
||
if_line,
|
||
vec![ValueRef {
|
||
source_kind: ValueSourceKind::Identifier,
|
||
name: row.clone(),
|
||
base: None,
|
||
field: None,
|
||
index: None,
|
||
span: if_span,
|
||
}],
|
||
),
|
||
None => (if_line, Vec::new()),
|
||
},
|
||
};
|
||
subjects.push(owner_ref);
|
||
|
||
state.auth_checks.push(AuthCheck {
|
||
kind: AuthCheckKind::Ownership,
|
||
callee: "(row ownership equality)".into(),
|
||
subjects,
|
||
span: if_span,
|
||
line: check_line,
|
||
args: Vec::new(),
|
||
condition_text: Some(condition_text),
|
||
is_route_level: false,
|
||
});
|
||
}
|
||
|
||
fn unwrap_parens_local(node: Node<'_>) -> Node<'_> {
|
||
if node.kind() == "parenthesized_expression"
|
||
&& let Some(inner) = node.named_child(0)
|
||
{
|
||
return unwrap_parens_local(inner);
|
||
}
|
||
node
|
||
}
|
||
|
||
fn binary_operator_text(node: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||
if let Some(op) = node.child_by_field_name("operator") {
|
||
let value = text(op, bytes);
|
||
if !value.is_empty() {
|
||
return Some(value);
|
||
}
|
||
}
|
||
let mut cursor = node.walk();
|
||
for child in node.children(&mut cursor) {
|
||
if !child.is_named() {
|
||
let value = text(child, bytes);
|
||
if !value.is_empty() {
|
||
return Some(value);
|
||
}
|
||
}
|
||
}
|
||
None
|
||
}
|
||
|
||
fn binary_operands<'tree>(node: Node<'tree>) -> Option<(Node<'tree>, Node<'tree>)> {
|
||
if let (Some(left), Some(right)) = (
|
||
node.child_by_field_name("left"),
|
||
node.child_by_field_name("right"),
|
||
) {
|
||
return Some((left, right));
|
||
}
|
||
let children = named_children(node);
|
||
match children.as_slice() {
|
||
[left, right] => Some((*left, *right)),
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
fn resolve_else_block(alt: Node<'_>) -> Node<'_> {
|
||
// Rust wraps the else branch in an `else_clause` with the block
|
||
// as a named child. Other grammars differ, so we walk defensively.
|
||
if alt.kind() == "else_clause"
|
||
&& let Some(block) = named_children(alt).into_iter().next()
|
||
{
|
||
return block;
|
||
}
|
||
alt
|
||
}
|
||
|
||
fn branch_has_early_exit(branch: Node<'_>, bytes: &[u8]) -> bool {
|
||
named_children(branch)
|
||
.into_iter()
|
||
.any(|n| node_is_early_exit(n, bytes))
|
||
}
|
||
|
||
fn node_is_early_exit(node: Node<'_>, bytes: &[u8]) -> bool {
|
||
match node.kind() {
|
||
"return_expression" | "return_statement" => true,
|
||
// Throwing aborts execution flow. Common in JS/TS / Java
|
||
// (`throw new ForbiddenException()`), Python (`raise ...`),
|
||
// Ruby (`raise ...`).
|
||
"throw_statement" | "throw_expression" | "raise_statement" => true,
|
||
// A call whose callee name is in the framework denial set
|
||
// (`notFound()` / `redirect()` / `abort()` / `forbidden()` /
|
||
// `unauthorized()` / etc.) terminates the request. These
|
||
// helpers either throw under the hood (Next.js, Flask) or
|
||
// exit the process (`process.exit`, `sys.exit`).
|
||
"call_expression" | "call" | "method_invocation" => is_denial_call(node, bytes),
|
||
"expression_statement" => named_children(node)
|
||
.into_iter()
|
||
.any(|n| node_is_early_exit(n, bytes)),
|
||
_ => false,
|
||
}
|
||
}
|
||
|
||
/// Recognise calls that act as request-terminating denial helpers.
|
||
///
|
||
/// The callee name is matched against a curated set of framework
|
||
/// idioms. This is read in `node_is_early_exit` from inside the
|
||
/// row-ownership-equality detector, where the ambient context already
|
||
/// requires an `owner.field` vs. `self.id` binary comparison; the
|
||
/// denial-call match is only the early-exit witness, not the auth
|
||
/// signal itself.
|
||
fn is_denial_call(call_node: Node<'_>, bytes: &[u8]) -> bool {
|
||
let Some(callee_node) = call_node
|
||
.child_by_field_name("function")
|
||
.or_else(|| call_node.child_by_field_name("name"))
|
||
else {
|
||
return false;
|
||
};
|
||
let callee_text = text(callee_node, bytes);
|
||
let trimmed = callee_text.trim();
|
||
let leaf = trimmed.rsplit('.').next().unwrap_or(trimmed);
|
||
let leaf = leaf.rsplit("::").next().unwrap_or(leaf);
|
||
matches!(
|
||
leaf,
|
||
"notFound"
|
||
| "redirect"
|
||
| "permanentRedirect"
|
||
| "unauthorized"
|
||
| "forbidden"
|
||
| "abort"
|
||
| "halt"
|
||
)
|
||
}
|
||
|
||
pub(super) fn is_owner_field_subject(subject: &ValueRef) -> bool {
|
||
let raw = match subject.source_kind {
|
||
ValueSourceKind::ArrayIndex => subject.base.as_deref().unwrap_or(&subject.name),
|
||
_ => subject
|
||
.field
|
||
.as_deref()
|
||
.or(subject.base.as_deref())
|
||
.unwrap_or(&subject.name),
|
||
};
|
||
let key = canonical_name(raw);
|
||
matches!(
|
||
key.as_str(),
|
||
"userid"
|
||
| "ownerid"
|
||
| "authorid"
|
||
| "createdby"
|
||
| "uploaderid"
|
||
| "updatedby"
|
||
| "submittedby"
|
||
| "assignedto"
|
||
| "creatorid"
|
||
| "postedby"
|
||
)
|
||
}
|
||
|
||
pub(super) fn is_self_actor_subject(subject: &ValueRef) -> bool {
|
||
// `req.user.id`, `session.user.id`, `ctx.session.user.id`, etc.
|
||
if subject.source_kind == ValueSourceKind::Session
|
||
&& subject
|
||
.base
|
||
.as_deref()
|
||
.is_some_and(is_self_session_base_local)
|
||
{
|
||
return true;
|
||
}
|
||
// Plain member chains that name the caller directly: `user.id`,
|
||
// `current_user.id`, `actor.id`. A3 widens this set via
|
||
// `self_actor_vars`.
|
||
let Some(field) = subject.field.as_deref() else {
|
||
return false;
|
||
};
|
||
if !field.eq_ignore_ascii_case("id") {
|
||
return false;
|
||
}
|
||
let Some(base) = subject.base.as_deref() else {
|
||
return false;
|
||
};
|
||
let last = base.rsplit('.').next().unwrap_or(base);
|
||
matches!(
|
||
last,
|
||
"user" | "current_user" | "currentUser" | "actor" | "current_actor"
|
||
)
|
||
}
|
||
|
||
fn is_self_session_base_local(base: &str) -> bool {
|
||
matches!(
|
||
base,
|
||
"req.session.user"
|
||
| "request.session.user"
|
||
| "session.user"
|
||
| "req.session.currentUser"
|
||
| "request.session.currentUser"
|
||
| "session.currentUser"
|
||
| "req.user"
|
||
| "request.user"
|
||
| "req.currentUser"
|
||
| "request.currentUser"
|
||
| "ctx.session.user"
|
||
| "ctx.session.currentUser"
|
||
| "ctx.state.user"
|
||
| "ctx.state.currentUser"
|
||
)
|
||
}
|
||
|
||
fn pick_owner_field_ref(refs: &[ValueRef]) -> Option<ValueRef> {
|
||
refs.iter().find(|v| is_owner_field_subject(v)).cloned()
|
||
}
|
||
|
||
fn pick_self_actor_ref(refs: &[ValueRef]) -> Option<ValueRef> {
|
||
refs.iter().find(|v| is_self_actor_subject(v)).cloned()
|
||
}
|
||
|
||
fn classify_auth_check(callee: &str, rules: &AuthAnalysisRules) -> AuthCheckKind {
|
||
if rules.is_admin_guard(callee, &[]) || matches_name(callee, "isAdmin") {
|
||
AuthCheckKind::AdminGuard
|
||
} else if rules.is_login_guard(callee) {
|
||
AuthCheckKind::LoginGuard
|
||
} else if matches_name(callee, "checkMembership")
|
||
|| matches_name(callee, "hasWorkspaceMembership")
|
||
|| matches_name(callee, "isMember")
|
||
|| matches_name(callee, "requireMembership")
|
||
|| matches_name(callee, "check_membership")
|
||
|| matches_name(callee, "has_membership")
|
||
|| matches_name(callee, "has_membership?")
|
||
|| matches_name(callee, "require_membership")
|
||
|| matches_name(callee, "ensure_membership")
|
||
|| matches_name(callee, "member_of?")
|
||
|| matches_name(callee, "member?")
|
||
{
|
||
AuthCheckKind::Membership
|
||
} else if matches_name(callee, "checkOwnership")
|
||
|| matches_name(callee, "isOwner")
|
||
|| matches_name(callee, "requireOwnership")
|
||
|| matches_name(callee, "check_ownership")
|
||
|| matches_name(callee, "has_ownership")
|
||
|| matches_name(callee, "require_ownership")
|
||
|| matches_name(callee, "ensure_ownership")
|
||
|| matches_name(callee, "is_owner")
|
||
|| matches_name(callee, "owner?")
|
||
|| matches_name(callee, "owns?")
|
||
{
|
||
AuthCheckKind::Ownership
|
||
} else {
|
||
AuthCheckKind::Other
|
||
}
|
||
}
|
||
|
||
pub fn function_name(node: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||
function_definition_node(node)
|
||
.child_by_field_name("name")
|
||
.map(|name| text(name, bytes))
|
||
.filter(|name| !name.is_empty())
|
||
}
|
||
|
||
/// True when a Python `decorated_definition` node carries a
|
||
/// background-task / event-handler decorator. Recognised markers
|
||
/// (matched against the bare callee name, last segment of any
|
||
/// dotted/qualified form):
|
||
///
|
||
/// * Celery: `task`, `shared_task`, `periodic_task`,
|
||
/// `app.task`, `celery.task`, `beat.shared_task`.
|
||
/// * Airflow: `instrumented_task`.
|
||
/// * Django: `receiver` (signal receiver, invoked by the framework,
|
||
/// not by an HTTP request).
|
||
///
|
||
/// Used by `collect_top_level_from_node` to skip pushing a
|
||
/// `Function` unit for functions that cannot, by construction, be
|
||
/// the entry point of a user-input flow. Real route handlers are
|
||
/// added by the framework-specific route extractors (Flask /
|
||
/// Django / Spring / FastAPI / …) which re-build the unit with
|
||
/// `RouteHandler` kind and route-decorator-derived auth checks.
|
||
fn python_decorated_definition_is_background_task(node: Node<'_>, bytes: &[u8]) -> bool {
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if child.kind() != "decorator" {
|
||
continue;
|
||
}
|
||
let Some(inner) = child.named_child(0) else {
|
||
continue;
|
||
};
|
||
let callee_text = match inner.kind() {
|
||
"call" => {
|
||
let Some(function) = inner.child_by_field_name("function") else {
|
||
continue;
|
||
};
|
||
text(function, bytes)
|
||
}
|
||
"identifier" | "attribute" | "scoped_identifier" => text(inner, bytes),
|
||
_ => continue,
|
||
};
|
||
let last = callee_text.rsplit('.').next().unwrap_or(&callee_text);
|
||
if matches!(
|
||
last,
|
||
"task" | "shared_task" | "periodic_task" | "instrumented_task" | "receiver"
|
||
) {
|
||
return true;
|
||
}
|
||
}
|
||
false
|
||
}
|
||
|
||
fn function_params(node: Node<'_>, bytes: &[u8]) -> Vec<String> {
|
||
let Some(params_node) = node.child_by_field_name("parameters") else {
|
||
return Vec::new();
|
||
};
|
||
let mut params = Vec::new();
|
||
collect_param_names(params_node, bytes, false, &mut params);
|
||
params
|
||
}
|
||
|
||
/// Variant of `function_params` that always includes id-like typed
|
||
/// Python params (`dag_id: str`, `dag_run_id: str`). Used by
|
||
/// `attach_route_handler` to populate `unit.params` for RouteHandler
|
||
/// units so middleware-injected auth checks (FastAPI
|
||
/// `dependencies=[Depends(...)]`, Flask `@requires_role(...)`, etc.)
|
||
/// can synthesise subjects that cover every handler input, including
|
||
/// the id-shaped ones that are *the* primary user-controlled data on
|
||
/// REST routes.
|
||
///
|
||
/// The id-like filter in `collect_param_names` exists to keep
|
||
/// internal helper signatures (`def f(release_id: int, project:
|
||
/// Project)`) from passing `unit_has_user_input_evidence`'s param
|
||
/// heuristic, which would over-fire `missing_ownership_check`. Route
|
||
/// handlers don't need that filter, they pass the precondition gate
|
||
/// via `kind == RouteHandler`, and missing the id-like params from
|
||
/// `unit.params` actively breaks the middleware-injection coverage
|
||
/// path.
|
||
pub fn function_params_route_handler(node: Node<'_>, bytes: &[u8]) -> Vec<String> {
|
||
let Some(params_node) = node.child_by_field_name("parameters") else {
|
||
return Vec::new();
|
||
};
|
||
let mut params = Vec::new();
|
||
collect_param_names(params_node, bytes, true, &mut params);
|
||
params
|
||
}
|
||
|
||
/// Walk a Python function-definition node's parameter list and
|
||
/// collect every parameter whose static type annotation resolves to
|
||
/// an integer or boolean scalar (or a generic-wrapped int such as
|
||
/// `Optional[int]`, `list[int]`, `Iterable[int]`). These names are
|
||
/// used to seed `AnalysisUnit::typed_bounded_vars` so the ownership
|
||
/// rule's `is_typed_bounded_subject` filter recognises the bounded
|
||
/// type without requiring an SSA-derived `VarTypes` map.
|
||
///
|
||
/// No-op for non-Python `function_definition` nodes, only
|
||
/// tree-sitter-python exposes the `typed_parameter` /
|
||
/// `typed_default_parameter` shapes inspected here. Conservative:
|
||
/// only int/bool/float scalars and known integer-list wrappers
|
||
/// qualify; bare `str`, `bytes`, `Path`, custom DTO types, and
|
||
/// `Annotated[int, Body()]` wrappers are NOT lifted because the
|
||
/// presence of an HTTP-binding marker indicates the value is
|
||
/// caller-controlled (the SSA pipeline handles those).
|
||
fn python_int_bounded_typed_params(node: Node<'_>, bytes: &[u8]) -> HashSet<String> {
|
||
let mut out: HashSet<String> = HashSet::new();
|
||
let Some(params_node) = node.child_by_field_name("parameters") else {
|
||
return out;
|
||
};
|
||
for idx in 0..params_node.named_child_count() {
|
||
let Some(child) = params_node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if !matches!(child.kind(), "typed_parameter" | "typed_default_parameter") {
|
||
continue;
|
||
}
|
||
let mut name: Option<String> = None;
|
||
let mut type_text: Option<String> = None;
|
||
for inner_idx in 0..child.named_child_count() {
|
||
let Some(inner) = child.named_child(inner_idx as u32) else {
|
||
continue;
|
||
};
|
||
if inner.kind() == "identifier" && name.is_none() {
|
||
let n = text(inner, bytes);
|
||
if !n.is_empty() {
|
||
name = Some(n);
|
||
}
|
||
} else if inner.kind() == "type" {
|
||
type_text = Some(text(inner, bytes));
|
||
}
|
||
}
|
||
if let (Some(n), Some(t)) = (name, type_text)
|
||
&& python_type_text_is_integer_bounded(&t)
|
||
{
|
||
out.insert(n);
|
||
}
|
||
}
|
||
out
|
||
}
|
||
|
||
/// Conservative recogniser for Python type annotations that bound a
|
||
/// value to an integer or boolean scalar. Accepts:
|
||
/// * Bare `int`, `bool`, `float`.
|
||
/// * Common generic wrappers whose element type is one of those:
|
||
/// `Optional[int]`, `Union[int, None]`, `list[int]`, `List[int]`,
|
||
/// `tuple[int, ...]`, `Sequence[int]`, `Iterable[int]`,
|
||
/// `set[int]`, `frozenset[int]`, `dict[int, ...]` (key only).
|
||
///
|
||
/// `Annotated[int, ...]` is intentionally rejected, the FastAPI /
|
||
/// Pydantic binding marker indicates the value is caller-controlled.
|
||
fn python_type_text_is_integer_bounded(text: &str) -> bool {
|
||
let trimmed = text.trim();
|
||
// Accept `T | None` (PEP 604) by recursing on each branch.
|
||
if trimmed.contains('|') {
|
||
return trimmed
|
||
.split('|')
|
||
.map(str::trim)
|
||
.all(|alt| alt == "None" || python_type_text_is_integer_bounded(alt));
|
||
}
|
||
if matches!(trimmed, "int" | "bool" | "float") {
|
||
return true;
|
||
}
|
||
let Some((head, rest)) = trimmed.split_once('[') else {
|
||
return false;
|
||
};
|
||
if !rest.ends_with(']') {
|
||
return false;
|
||
}
|
||
let inner = &rest[..rest.len() - 1];
|
||
let head_trim = head.trim();
|
||
// `Annotated[int, Body()]` etc. is a binding marker, refuse.
|
||
if matches!(head_trim, "Annotated" | "typing.Annotated") {
|
||
return false;
|
||
}
|
||
let inner_first = inner.split(',').next().unwrap_or(inner).trim();
|
||
matches!(
|
||
head_trim,
|
||
"Optional"
|
||
| "typing.Optional"
|
||
| "Union"
|
||
| "typing.Union"
|
||
| "list"
|
||
| "List"
|
||
| "typing.List"
|
||
| "tuple"
|
||
| "Tuple"
|
||
| "typing.Tuple"
|
||
| "set"
|
||
| "Set"
|
||
| "typing.Set"
|
||
| "frozenset"
|
||
| "Frozenset"
|
||
| "Sequence"
|
||
| "typing.Sequence"
|
||
| "Iterable"
|
||
| "typing.Iterable"
|
||
| "Iterator"
|
||
| "typing.Iterator"
|
||
| "Collection"
|
||
| "typing.Collection"
|
||
| "dict"
|
||
| "Dict"
|
||
| "typing.Dict"
|
||
| "Mapping"
|
||
| "typing.Mapping"
|
||
) && python_type_text_is_integer_bounded(inner_first)
|
||
}
|
||
|
||
/// Walk the tree starting at `node` and gather TS type-alias /
|
||
/// interface names whose body references a TRPC-marker type
|
||
/// (`TrpcSessionUser`, `TRPCContext`, …). Recurses only through
|
||
/// container kinds that legitimately host top-level type aliases
|
||
/// (`program` / `module` / `export_statement` / namespace bodies);
|
||
/// stops at function or class bodies to avoid an O(units × tree)
|
||
/// blowup on files with many small functions.
|
||
///
|
||
/// No-op for non-TS files, the matched node kinds only exist in
|
||
/// the TS grammar. Used by [`FileMeta::scan`] (called once per file
|
||
/// in `collect_top_level_units` / `attach_route_handler`) to amortise
|
||
/// the alias scan across all units in the same source file.
|
||
fn scan_trpc_aliases_visit(node: Node<'_>, bytes: &[u8], out: &mut HashSet<String>) {
|
||
match node.kind() {
|
||
"type_alias_declaration" | "interface_declaration" => {
|
||
let body = node
|
||
.child_by_field_name("value")
|
||
.or_else(|| node.child_by_field_name("body"));
|
||
if let Some(body) = body {
|
||
let body_text = text(body, bytes);
|
||
if body_text_references_trpc_marker(&body_text)
|
||
&& let Some(name_node) = node.child_by_field_name("name")
|
||
{
|
||
let name = text(name_node, bytes);
|
||
if !name.is_empty() {
|
||
out.insert(name);
|
||
}
|
||
}
|
||
}
|
||
return;
|
||
}
|
||
// Recurse only through container kinds that legitimately host
|
||
// top-level type aliases. Skipping into function bodies /
|
||
// class bodies / call arguments avoids an O(unit × tree)
|
||
// blowup when `build_function_unit` triggers this scan once
|
||
// per unit on files with thousands of small functions
|
||
// (`tests/hostile_input_tests::many_small_functions_do_not_explode`).
|
||
"program"
|
||
| "source_file"
|
||
| "module"
|
||
| "export_statement"
|
||
| "namespace_declaration"
|
||
| "module_declaration"
|
||
| "internal_module"
|
||
| "ambient_declaration"
|
||
| "lexical_declaration"
|
||
| "variable_declaration"
|
||
| "statement_block" => {}
|
||
_ => return,
|
||
}
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
scan_trpc_aliases_visit(child, bytes, out);
|
||
}
|
||
}
|
||
|
||
fn body_text_references_trpc_marker(body_text: &str) -> bool {
|
||
body_text.contains("TrpcSessionUser")
|
||
|| body_text.contains("TRPCContext")
|
||
|| body_text.contains("ProtectedTRPCContext")
|
||
|| body_text.contains("TrpcContext")
|
||
}
|
||
|
||
/// Recognise a TS `required_parameter` / `optional_parameter` whose
|
||
/// type annotation refers to a TRPC-shaped Options alias (or
|
||
/// inlines `TrpcSessionUser` directly), and add the destructured /
|
||
/// declared `ctx`-base to `self_scoped_session_bases` so subjects
|
||
/// rooted at `ctx.user.<id-like>` count as actor context downstream.
|
||
///
|
||
/// Three pattern shapes are handled:
|
||
/// 1. Destructured shorthand: `({ ctx, input }: GetOptions)` →
|
||
/// add `"ctx.user"`.
|
||
/// 2. Destructured rename: `({ ctx: c, input }: GetOptions)` →
|
||
/// add `"c.user"`.
|
||
/// 3. Plain identifier: `(opts: GetOptions)` → add `"opts.ctx.user"`.
|
||
///
|
||
/// The rule is principled: we only fire when the param's type either
|
||
/// IS one of the file-level TRPC aliases (`state.trpc_alias_names`,
|
||
/// populated by [`scan_trpc_aliases_from_node_root`]) or its annotation
|
||
/// text inlines `TrpcSessionUser` directly. Bare `ctx.user` is never
|
||
/// added to the static session-base list, that would over-suppress
|
||
/// in non-TRPC code. Instead, the dynamic per-unit set
|
||
/// `self_scoped_session_bases` carries the lift.
|
||
fn collect_trpc_ctx_param(node: Node<'_>, bytes: &[u8], state: &mut UnitState) {
|
||
let Some(pattern) = node.child_by_field_name("pattern") else {
|
||
return;
|
||
};
|
||
let Some(ty_node) = node.child_by_field_name("type") else {
|
||
return;
|
||
};
|
||
let ty_text = text(ty_node, bytes);
|
||
if !type_text_is_trpc_options(&ty_text, &state.trpc_alias_names) {
|
||
return;
|
||
}
|
||
|
||
if pattern.kind() == "object_pattern" {
|
||
for idx in 0..pattern.named_child_count() {
|
||
let Some(child) = pattern.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
match child.kind() {
|
||
"shorthand_property_identifier_pattern" => {
|
||
let name = text(child, bytes);
|
||
if name.eq_ignore_ascii_case("ctx") {
|
||
state
|
||
.self_scoped_session_bases
|
||
.insert(format!("{name}.user"));
|
||
}
|
||
}
|
||
"object_assignment_pattern" => {
|
||
if let Some(left) = child.child_by_field_name("left") {
|
||
let name = if matches!(
|
||
left.kind(),
|
||
"identifier" | "shorthand_property_identifier_pattern"
|
||
) {
|
||
text(left, bytes)
|
||
} else {
|
||
first_identifier_name(left, bytes).unwrap_or_default()
|
||
};
|
||
if name.eq_ignore_ascii_case("ctx") {
|
||
state
|
||
.self_scoped_session_bases
|
||
.insert(format!("{name}.user"));
|
||
}
|
||
}
|
||
}
|
||
"pair_pattern" => {
|
||
let key_node = child.child_by_field_name("key");
|
||
let local_node = child.child_by_field_name("value");
|
||
if let (Some(k), Some(v)) = (key_node, local_node) {
|
||
let key = text(k, bytes);
|
||
let local = first_identifier_name(v, bytes).unwrap_or_default();
|
||
if !local.is_empty() && key.eq_ignore_ascii_case("ctx") {
|
||
state
|
||
.self_scoped_session_bases
|
||
.insert(format!("{local}.user"));
|
||
}
|
||
}
|
||
}
|
||
_ => {}
|
||
}
|
||
}
|
||
return;
|
||
}
|
||
|
||
if let Some(name) = first_identifier_name(pattern, bytes)
|
||
&& !name.is_empty()
|
||
{
|
||
state
|
||
.self_scoped_session_bases
|
||
.insert(format!("{name}.ctx.user"));
|
||
}
|
||
}
|
||
|
||
/// True when the type-annotation text identifies a TRPC-shaped Options
|
||
/// type: it contains `TrpcSessionUser` directly (inline object type
|
||
/// literal), or it references one of the file-level TRPC alias names
|
||
/// from the pre-scan.
|
||
fn type_text_is_trpc_options(ty_text: &str, trpc_alias_names: &HashSet<String>) -> bool {
|
||
if body_text_references_trpc_marker(ty_text) {
|
||
return true;
|
||
}
|
||
let trimmed = ty_text.trim_start_matches(':').trim();
|
||
if trimmed.is_empty() {
|
||
return false;
|
||
}
|
||
// Match the leading identifier of the type (dropping any generic
|
||
// suffix `<...>`). This covers `GetOptions` and
|
||
// `NonNullable<GetOptions>` shapes alike.
|
||
let head = trimmed.split('<').next().unwrap_or(trimmed).trim();
|
||
if trpc_alias_names.contains(head) {
|
||
return true;
|
||
}
|
||
// Also accept the bare alias name appearing anywhere in the
|
||
// annotation text, handles `Promise<GetOptions>` and other
|
||
// wrappers without enumerating every shape. Word-boundary check
|
||
// avoids matching aliases that are substrings of longer
|
||
// identifiers.
|
||
for alias in trpc_alias_names {
|
||
if alias.is_empty() {
|
||
continue;
|
||
}
|
||
if let Some(idx) = ty_text.find(alias.as_str()) {
|
||
let before_ok = idx == 0
|
||
|| !ty_text.as_bytes()[idx - 1].is_ascii_alphanumeric()
|
||
&& ty_text.as_bytes()[idx - 1] != b'_';
|
||
let end = idx + alias.len();
|
||
let after_ok = end >= ty_text.len()
|
||
|| !ty_text.as_bytes()[end].is_ascii_alphanumeric()
|
||
&& ty_text.as_bytes()[end] != b'_';
|
||
if before_ok && after_ok {
|
||
return true;
|
||
}
|
||
}
|
||
}
|
||
false
|
||
}
|
||
|
||
/// Extract the receiver-variable name from a Go `method_declaration`
|
||
/// (`func (c *Cache) ...` → `Some("c")`). Returns `None` for any node
|
||
/// that doesn't expose a `receiver` field (Rust `function_item`,
|
||
/// Java `method_declaration`, JS arrow-functions, …).
|
||
///
|
||
/// Tree-sitter-go shape: `method_declaration` has a `receiver` field
|
||
/// whose value is a `parameter_list` containing a single
|
||
/// `parameter_declaration` with a `name` field (identifier) and a
|
||
/// `type` field (often `pointer_type`). We only need the name.
|
||
pub fn method_receiver_name(node: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||
let receiver = node.child_by_field_name("receiver")?;
|
||
extract_receiver_param_name(receiver, bytes)
|
||
}
|
||
|
||
fn extract_receiver_param_name(node: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||
if let Some(name_node) = node.child_by_field_name("name") {
|
||
let name = text(name_node, bytes);
|
||
if !name.is_empty() {
|
||
return Some(name);
|
||
}
|
||
}
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if let Some(found) = extract_receiver_param_name(child, bytes) {
|
||
return Some(found);
|
||
}
|
||
}
|
||
None
|
||
}
|
||
|
||
fn collect_param_names(
|
||
node: Node<'_>,
|
||
bytes: &[u8],
|
||
include_id_like_typed: bool,
|
||
out: &mut Vec<String>,
|
||
) {
|
||
match node.kind() {
|
||
"identifier" | "property_identifier" | "shorthand_property_identifier_pattern" => {
|
||
let name = text(node, bytes);
|
||
if !name.is_empty() && !out.contains(&name) {
|
||
out.push(name);
|
||
}
|
||
}
|
||
// Go `parameter_declaration` / `variadic_parameter_declaration`:
|
||
// tree-sitter-go shape exposes `name` (one or more identifiers)
|
||
// and `type` (the param's static type) as named fields. C/C++
|
||
// also use `parameter_declaration` but with a `declarator`
|
||
// field instead of `name`, so the `name`-field gate
|
||
// distinguishes Go from C/C++ shapes without language plumbing.
|
||
//
|
||
// Two engine improvements at this site, both Go-specific:
|
||
//
|
||
// 1. Drop the entire param when its type is a known
|
||
// non-user-input stdlib type. The dominant case is
|
||
// `ctx context.Context`, the canonical first param of
|
||
// nearly every Go function (cancellation / deadline /
|
||
// value-bag, NOT an HTTP request). Without this gate the
|
||
// bare param name `ctx` matches the framework-request-name
|
||
// allow-list in `is_external_input_param_name`, opening
|
||
// `unit_has_user_input_evidence` on every internal helper.
|
||
// 2. Descend only into the `name` field so type-segment
|
||
// identifiers don't pollute the param-name set. Without
|
||
// this scope, `info *PackageInfo` contributes both `info`
|
||
// and `PackageInfo` to `unit.params`; `path *Path` would
|
||
// contribute `path` and `Path`, etc. Mirrors the Rust
|
||
// `parameter` arm below.
|
||
//
|
||
// Real-repo trigger: `/Users/elipeter/oss/gitea` ─ ~1900
|
||
// `go.auth.missing_ownership_check` findings on backend
|
||
// helpers whose only "user-input evidence" was the ubiquitous
|
||
// `ctx context.Context` first param.
|
||
"parameter_declaration" | "variadic_parameter_declaration"
|
||
if node.child_by_field_name("name").is_some() =>
|
||
{
|
||
let type_node = node.child_by_field_name("type");
|
||
if let Some(t) = type_node
|
||
&& is_go_non_user_input_type(t, bytes)
|
||
{
|
||
return;
|
||
}
|
||
// Mirror of the Python `typed_parameter` filter (see
|
||
// `is_python_id_like_typed_param` arm above): for non-route
|
||
// units, an id-like Go param whose declared type is a
|
||
// bounded primitive scalar (`int64`, `uint32`, `string`,
|
||
// `bool`, `byte`, `rune`, `float64`, …) is a caller-passed
|
||
// scope identifier, not user-controlled HTTP input. Real
|
||
// Go HTTP handlers always carry a framework-request-typed
|
||
// param (`*http.Request`, `*gin.Context`, `echo.Context`,
|
||
// `*fiber.Ctx`, `*context.APIContext`, …) and are
|
||
// recognised by the per-framework route extractors which
|
||
// call `function_params_route_handler`
|
||
// (`include_id_like_typed = true`) — those bypass this
|
||
// filter so id-shaped path params survive on real routes.
|
||
//
|
||
// Real-repo trigger: `/Users/elipeter/oss/gitea` ─ ~957
|
||
// `go.auth.missing_ownership_check` findings on backend
|
||
// helpers like
|
||
// `func GetRunByRepoAndID(ctx context.Context,
|
||
// repoID, runID int64)`,
|
||
// `func DeleteRunner(ctx context.Context, id int64)`,
|
||
// and the entire `models/...` DAO layer where the
|
||
// ownership check sits in the calling route handler.
|
||
// Same shape over-fires on minio's `cmd/iam-*-store`
|
||
// helpers and would on every Go ORM/DAO codebase.
|
||
let type_is_bounded_scalar = type_node
|
||
.map(|t| is_go_bounded_scalar_type(t, bytes))
|
||
.unwrap_or(false);
|
||
let mut cursor = node.walk();
|
||
for child in node.children_by_field_name("name", &mut cursor) {
|
||
if child.kind() == "identifier" {
|
||
let name = text(child, bytes);
|
||
if name.is_empty() || out.contains(&name) {
|
||
continue;
|
||
}
|
||
if !include_id_like_typed
|
||
&& type_is_bounded_scalar
|
||
&& is_go_id_like_typed_param(&name)
|
||
{
|
||
continue;
|
||
}
|
||
out.push(name);
|
||
}
|
||
}
|
||
}
|
||
// Rust `parameter` node: descend ONLY into the `pattern` field so
|
||
// type-segment identifiers don't pollute the param-name set.
|
||
// Without this scope, `dst: &std::path::Path` contributes `std`,
|
||
// `path`, and `Path` to `unit.params`, and `path` then matches
|
||
// the framework-request-name allow-list in
|
||
// `is_external_input_param_name`, gating
|
||
// `unit_has_user_input_evidence` open on internal helpers whose
|
||
// real params (`dst`, `tasks`, `index_base_map_size`) carry no
|
||
// user-facing shape. Cluster surfaced from
|
||
// meilisearch/index-scheduler/src/scheduler/process_snapshot_creation.rs::remove_tasks
|
||
// where `dst: &std::path::Path` made every `db.delete(task.uid)`
|
||
// call inside the snapshot cleanup loop fire
|
||
// `missing_ownership_check`. Same shape would over-fire for
|
||
// `req: &Request<...>` / `ctx: &Context<T>` / similar typed
|
||
// helpers.
|
||
"parameter" => {
|
||
if let Some(pattern) = node.child_by_field_name("pattern") {
|
||
collect_param_names(pattern, bytes, include_id_like_typed, out);
|
||
return;
|
||
}
|
||
// Fallback (no `pattern` field): descend into named children
|
||
// generically, mirroring the default arm.
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
collect_param_names(child, bytes, include_id_like_typed, out);
|
||
}
|
||
}
|
||
"default_parameter" | "typed_parameter" | "typed_default_parameter" => {
|
||
// tree-sitter-python's `typed_parameter` rule does not
|
||
// expose a `name` field (the identifier is the wrapper's
|
||
// first child, with the type expression as a sibling). We
|
||
// fall back to the first `identifier` child when
|
||
// `child_by_field_name("name")` returns None so typed
|
||
// Python params (`connection_id: str`,
|
||
// `organization_id: int`, …) actually flow into
|
||
// `unit.params` instead of being silently dropped. Without
|
||
// this, route-aware extractors (Flask + FastAPI) couldn't
|
||
// see a typed handler's path params and the FastAPI
|
||
// dependency-injection recogniser had no subject to
|
||
// synthesise its auth check against. Languages whose
|
||
// grammar carries a `name` field (TypeScript
|
||
// `required_parameter`, …) still take the explicit field
|
||
// path.
|
||
//
|
||
// Note: Restricting this fallback to non-id-like names
|
||
// (so internal helpers with `release_id: int`,
|
||
// `organization_id: int`, etc. don't pass
|
||
// `unit_has_user_input_evidence`) would avoid the helper
|
||
// FP regression observed on sentry. The principled
|
||
// long-term fix is cross-file type-flow so subjects like
|
||
// `project.id` (where `project: Project`) are recognised
|
||
// as typed-bounded everywhere they're used. Until that
|
||
// lands, we accept the cluster, handlers go through the
|
||
// route extractors, and route-decorator-derived auth
|
||
// checks suppress them.
|
||
if let Some(name) = node.child_by_field_name("name") {
|
||
collect_param_names(name, bytes, include_id_like_typed, out);
|
||
return;
|
||
}
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if child.kind() == "identifier" {
|
||
let name_text = text(child, bytes);
|
||
// Conservative for non-route-handler units: only
|
||
// push the name when it is NOT id-like. This is a
|
||
// stopgap until cross-file type-flow lets us
|
||
// suppress `obj.id` subjects on typed-object args;
|
||
// without it, exposing typed helpers like
|
||
// `def f(release_id: int, project: Project) -> ...`
|
||
// over-fires `missing_ownership_check` because the
|
||
// engine sees `project.id` as a foreign scoped id.
|
||
// Route handlers (`include_id_like_typed = true`)
|
||
// bypass this filter, id-like params on a REST
|
||
// route are *the* primary user input, and the
|
||
// RouteHandler kind already passes
|
||
// `unit_has_user_input_evidence` unconditionally,
|
||
// so including them in `unit.params` doesn't
|
||
// affect that gate but does let
|
||
// `inject_middleware_auth` synthesise auth-check
|
||
// subjects that match the operation subjects (the
|
||
// FastAPI `dependencies=[Depends(...)]` coverage
|
||
// path that was previously empty for handlers like
|
||
// `def get_dag_run(dag_id: str, dag_run_id: str,
|
||
// session)`).
|
||
let is_id_like = is_python_id_like_typed_param(&name_text);
|
||
if !name_text.is_empty()
|
||
&& !out.contains(&name_text)
|
||
&& (include_id_like_typed || !is_id_like)
|
||
{
|
||
out.push(name_text);
|
||
}
|
||
return;
|
||
}
|
||
}
|
||
}
|
||
_ => {
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
collect_param_names(child, bytes, include_id_like_typed, out);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Recognise Go parameter types that are categorically not user-input
|
||
/// bearing. Used by the Go arm of [`collect_param_names`] to drop the
|
||
/// param entirely (rather than push its name into `unit.params` and
|
||
/// trip the framework-request-name allow-list in
|
||
/// `is_external_input_param_name`).
|
||
///
|
||
/// Conservative: only matches the stdlib `context.Context` /
|
||
/// `context.CancelFunc` interface idioms. These are the dominant
|
||
/// cluster ─ ~1900 findings on `/Users/elipeter/oss/gitea` ─ and there
|
||
/// is no shape under which they carry user input.
|
||
///
|
||
/// Implementation note: tree-sitter-go's `qualified_type` exposes
|
||
/// `package` (identifier) and `name` (type_identifier) as named fields.
|
||
/// Pointer-wrapping is rare for these (they're already interfaces) but
|
||
/// is handled defensively by descending through `pointer_type`.
|
||
fn is_go_non_user_input_type(type_node: Node<'_>, bytes: &[u8]) -> bool {
|
||
let mut node = type_node;
|
||
// Strip a single layer of pointer indirection if present.
|
||
if node.kind() == "pointer_type" {
|
||
if let Some(inner) = node.child_by_field_name("type") {
|
||
node = inner;
|
||
} else if let Some(inner) = node.named_child(0) {
|
||
node = inner;
|
||
}
|
||
}
|
||
if node.kind() != "qualified_type" {
|
||
return false;
|
||
}
|
||
let pkg = node
|
||
.child_by_field_name("package")
|
||
.map(|n| text(n, bytes))
|
||
.unwrap_or_default();
|
||
let name = node
|
||
.child_by_field_name("name")
|
||
.map(|n| text(n, bytes))
|
||
.unwrap_or_default();
|
||
matches!(
|
||
(pkg.as_str(), name.as_str()),
|
||
("context", "Context") | ("context", "CancelFunc")
|
||
)
|
||
}
|
||
|
||
/// Ascii-lowered id-shape predicate used by the Python typed-param
|
||
/// fallback in `collect_param_names`. Mirrors
|
||
/// `auth_analysis::checks::is_id_like_name` (cannot share that fn
|
||
/// directly without a cross-module dep), both must move in lockstep
|
||
/// so the precondition gate and the param-extraction filter agree on
|
||
/// what counts as id-like.
|
||
fn is_python_id_like_typed_param(name: &str) -> bool {
|
||
let lower = name.to_ascii_lowercase();
|
||
lower == "id" || lower.ends_with("id") || lower.ends_with("_id") || lower.ends_with("ids")
|
||
}
|
||
|
||
/// Same shape predicate used by the Go typed-param fallback in
|
||
/// `collect_param_names`. Kept separate from the Python helper so the
|
||
/// two recognisers can diverge if/when language-specific spellings
|
||
/// emerge; the current vocabulary is the same canonical id-suffix
|
||
/// set as `auth_analysis::checks::is_id_like_name`.
|
||
fn is_go_id_like_typed_param(name: &str) -> bool {
|
||
let lower = name.to_ascii_lowercase();
|
||
lower == "id" || lower.ends_with("id") || lower.ends_with("_id") || lower.ends_with("ids")
|
||
}
|
||
|
||
/// True iff `type_node` names a Go bounded primitive scalar:
|
||
/// integer (`int*` / `uint*` / `byte` / `rune` / `uintptr`), floating
|
||
/// point (`float32` / `float64`), `bool`, or `string`. Used by the
|
||
/// Go arm of `collect_param_names` to recognise the
|
||
/// "id-like name + scalar type" DAO-helper shape and refuse to lift
|
||
/// such params into `unit.params` for non-route units.
|
||
///
|
||
/// Conservative scope: only bare `type_identifier` matches. Pointer
|
||
/// types (`*Foo`), generic types (`Map[K, V]`), qualified types
|
||
/// (`pkg.Type`), and slice/array types (`[]T`) are framework or
|
||
/// payload shapes, NOT bounded primitives, so they're left alone and
|
||
/// the param keeps its name. This keeps real handler shapes that
|
||
/// happen to spell an id-like name on a complex type (`req
|
||
/// *RequestWithID`) from being silently dropped.
|
||
fn is_go_bounded_scalar_type(type_node: Node<'_>, bytes: &[u8]) -> bool {
|
||
if type_node.kind() != "type_identifier" {
|
||
return false;
|
||
}
|
||
matches!(
|
||
text(type_node, bytes).as_str(),
|
||
"int"
|
||
| "int8"
|
||
| "int16"
|
||
| "int32"
|
||
| "int64"
|
||
| "uint"
|
||
| "uint8"
|
||
| "uint16"
|
||
| "uint32"
|
||
| "uint64"
|
||
| "uintptr"
|
||
| "byte"
|
||
| "rune"
|
||
| "float32"
|
||
| "float64"
|
||
| "bool"
|
||
| "string"
|
||
)
|
||
}
|
||
|
||
pub fn is_function_like(node: Node<'_>) -> bool {
|
||
matches!(
|
||
node.kind(),
|
||
"function_declaration"
|
||
| "function_expression"
|
||
| "arrow_function"
|
||
| "function_definition"
|
||
| "method_declaration"
|
||
| "function_item"
|
||
| "closure_expression"
|
||
| "func_literal"
|
||
| "decorated_definition"
|
||
| "method"
|
||
| "singleton_method"
|
||
| "block"
|
||
| "do_block"
|
||
)
|
||
}
|
||
|
||
pub fn is_handler_reference(node: Node<'_>) -> bool {
|
||
is_function_like(node)
|
||
|| matches!(
|
||
node.kind(),
|
||
"identifier"
|
||
| "member_expression"
|
||
| "attribute"
|
||
| "selector_expression"
|
||
| "field_expression"
|
||
| "scoped_identifier"
|
||
| "field_access"
|
||
| "constant"
|
||
| "scope_resolution"
|
||
)
|
||
}
|
||
|
||
pub fn call_site_from_node(node: Node<'_>, bytes: &[u8]) -> CallSite {
|
||
if matches!(
|
||
node.kind(),
|
||
"call_expression" | "call" | "method_invocation" | "method_call_expression"
|
||
) {
|
||
let name = call_name(node, bytes);
|
||
let arg_nodes = node
|
||
.child_by_field_name("arguments")
|
||
.map(named_children)
|
||
.unwrap_or_default();
|
||
let args = arg_nodes.iter().map(|arg| text(*arg, bytes)).collect();
|
||
let args_value_refs = arg_nodes
|
||
.iter()
|
||
.map(|arg| extract_value_refs(*arg, bytes))
|
||
.collect();
|
||
CallSite {
|
||
name,
|
||
args,
|
||
span: span(node),
|
||
args_value_refs,
|
||
}
|
||
} else {
|
||
CallSite {
|
||
name: text(node, bytes),
|
||
args: Vec::new(),
|
||
span: span(node),
|
||
args_value_refs: Vec::new(),
|
||
}
|
||
}
|
||
}
|
||
|
||
pub fn call_sites_from_value(node: Node<'_>, bytes: &[u8]) -> Vec<CallSite> {
|
||
if matches!(node.kind(), "array" | "list" | "tuple") {
|
||
named_children(node)
|
||
.into_iter()
|
||
.map(|child| call_site_from_node(child, bytes))
|
||
.filter(|call| !call.name.is_empty())
|
||
.collect()
|
||
} else {
|
||
let call = call_site_from_node(node, bytes);
|
||
if call.name.is_empty() {
|
||
Vec::new()
|
||
} else {
|
||
vec![call]
|
||
}
|
||
}
|
||
}
|
||
|
||
pub fn auth_check_from_call_site(
|
||
call: &CallSite,
|
||
line: usize,
|
||
rules: &AuthAnalysisRules,
|
||
) -> Option<AuthCheck> {
|
||
let kind = if rules.is_admin_guard(&call.name, &call.args) {
|
||
AuthCheckKind::AdminGuard
|
||
} else if rules.is_login_guard(&call.name) {
|
||
AuthCheckKind::LoginGuard
|
||
} else if rules.is_authorization_check(&call.name) {
|
||
classify_auth_check(&call.name, rules)
|
||
} else {
|
||
return None;
|
||
};
|
||
|
||
Some(AuthCheck {
|
||
kind,
|
||
callee: call.name.clone(),
|
||
subjects: Vec::new(),
|
||
span: call.span,
|
||
line,
|
||
args: call.args.clone(),
|
||
condition_text: None,
|
||
is_route_level: false,
|
||
})
|
||
}
|
||
|
||
pub fn extract_value_refs(node: Node<'_>, bytes: &[u8]) -> Vec<ValueRef> {
|
||
match node.kind() {
|
||
"member_expression"
|
||
| "attribute"
|
||
| "selector_expression"
|
||
| "field_expression"
|
||
| "field_access" => member_value_ref(node, bytes).into_iter().collect(),
|
||
"subscript_expression" | "subscript" | "element_reference" | "index_expression" => {
|
||
subscript_value_ref(node, bytes).into_iter().collect()
|
||
}
|
||
"call_expression" | "call" | "method_invocation" | "method_call_expression" => {
|
||
call_value_ref(node, bytes)
|
||
.map(|value| vec![value])
|
||
.unwrap_or_else(|| {
|
||
let mut refs = Vec::new();
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
refs.extend(extract_value_refs(child, bytes));
|
||
}
|
||
refs
|
||
})
|
||
}
|
||
"identifier"
|
||
// Ruby `@foo` instance variables and `@@foo` class variables are
|
||
// leaves with no named children, so the catch-all recurse arm
|
||
// would yield an empty subject set. Surface them as Identifier
|
||
// value-refs so receiver-side ownership checks (`@issue.visible?`)
|
||
// produce a subject that the row-fetch exemption can match.
|
||
| "instance_variable"
|
||
| "class_variable"
|
||
| "global_variable" => vec![ValueRef {
|
||
source_kind: ValueSourceKind::Identifier,
|
||
name: text(node, bytes),
|
||
base: None,
|
||
field: None,
|
||
index: None,
|
||
span: span(node),
|
||
}],
|
||
// Keyword / named arguments: `Model.objects.filter(organization_id=org.id)`.
|
||
// Tree-sitter exposes a `name` child (the schema column / parameter
|
||
// name) and a `value` child (the actual expression). The default
|
||
// recurse-all-children arm would surface `organization_id` as a
|
||
// bare-identifier subject, which `is_id_like_name` then flags as
|
||
// a scoped-identifier user-input. But the kwarg key is the
|
||
// ORM/RPC schema field name, fixed at call time, never
|
||
// attacker-controlled. Only the value carries a subject.
|
||
//
|
||
// Covers Python `keyword_argument`, JavaScript / TypeScript
|
||
// `pair` (object property syntax used as kwargs in client libs
|
||
// like prisma's `where: { id: foo }` is handled separately),
|
||
// Ruby `pair` (hash kwargs in `Model.where(field: value)`), Go
|
||
// composite-literal element keys, PHP / C# named arguments.
|
||
"keyword_argument"
|
||
| "keyword_arg"
|
||
| "named_argument"
|
||
| "named_arg" => {
|
||
if let Some(value) = node
|
||
.child_by_field_name("value")
|
||
.or_else(|| node.child_by_field_name("argument"))
|
||
{
|
||
extract_value_refs(value, bytes)
|
||
} else {
|
||
Vec::new()
|
||
}
|
||
}
|
||
_ => {
|
||
let mut refs = Vec::new();
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
refs.extend(extract_value_refs(child, bytes));
|
||
}
|
||
refs
|
||
}
|
||
}
|
||
}
|
||
|
||
fn call_value_ref(node: Node<'_>, bytes: &[u8]) -> Option<ValueRef> {
|
||
let callee = call_name(node, bytes);
|
||
let args = node
|
||
.child_by_field_name("arguments")
|
||
.map(named_children)
|
||
.unwrap_or_default();
|
||
let chain = member_chain(node, bytes);
|
||
|
||
if let Some(value) = accessor_call_value_ref(node, &callee, &chain, &args, bytes) {
|
||
return Some(value);
|
||
}
|
||
|
||
if !args.is_empty() {
|
||
return None;
|
||
}
|
||
if chain.is_empty() {
|
||
return None;
|
||
}
|
||
let name = chain.join(".");
|
||
let field = chain.last().cloned();
|
||
let base = if chain.len() > 1 {
|
||
Some(chain[..chain.len() - 1].join("."))
|
||
} else {
|
||
None
|
||
};
|
||
|
||
Some(ValueRef {
|
||
source_kind: classify_member_chain(&chain),
|
||
name,
|
||
base,
|
||
field,
|
||
index: None,
|
||
span: span(node),
|
||
})
|
||
}
|
||
|
||
fn member_value_ref(node: Node<'_>, bytes: &[u8]) -> Option<ValueRef> {
|
||
let chain = member_chain(node, bytes);
|
||
if chain.is_empty() {
|
||
return None;
|
||
}
|
||
let name = chain.join(".");
|
||
let field = chain.last().cloned();
|
||
let base = if chain.len() > 1 {
|
||
Some(chain[..chain.len() - 1].join("."))
|
||
} else {
|
||
None
|
||
};
|
||
let source_kind = classify_member_chain(&chain);
|
||
|
||
Some(ValueRef {
|
||
source_kind,
|
||
name,
|
||
base,
|
||
field,
|
||
index: None,
|
||
span: span(node),
|
||
})
|
||
}
|
||
|
||
fn classify_member_chain(chain: &[String]) -> ValueSourceKind {
|
||
if matches_request_param(chain) {
|
||
ValueSourceKind::RequestParam
|
||
} else if matches_request_body(chain) {
|
||
ValueSourceKind::RequestBody
|
||
} else if matches_request_query(chain) {
|
||
ValueSourceKind::RequestQuery
|
||
} else if matches_session_context(chain) {
|
||
ValueSourceKind::Session
|
||
} else if chain.first().is_some_and(|segment| {
|
||
matches!(
|
||
segment.to_ascii_lowercase().as_str(),
|
||
"invitation" | "token" | "invite"
|
||
)
|
||
}) {
|
||
ValueSourceKind::TokenField
|
||
} else {
|
||
ValueSourceKind::MemberField
|
||
}
|
||
}
|
||
|
||
fn matches_request_param(chain: &[String]) -> bool {
|
||
let lower = lower_segments(chain);
|
||
(lower.first().is_some_and(|segment| segment == "params"))
|
||
|| (lower.len() >= 2 && lower[0] == "self" && lower[1] == "params")
|
||
|| (lower.len() >= 3
|
||
&& matches!(lower[0].as_str(), "req" | "request")
|
||
&& lower[1] == "params")
|
||
|| (lower.len() >= 3 && lower[0] == "ctx" && lower[1] == "params")
|
||
}
|
||
|
||
fn matches_request_body(chain: &[String]) -> bool {
|
||
let lower = lower_segments(chain);
|
||
(lower.len() >= 3 && matches!(lower[0].as_str(), "req" | "request") && lower[1] == "body")
|
||
|| (lower.len() >= 3
|
||
&& matches!(lower[0].as_str(), "req" | "request")
|
||
&& matches!(
|
||
lower[1].as_str(),
|
||
"form" | "json" | "values" | "post" | "data"
|
||
))
|
||
|| (lower.len() >= 4 && lower[0] == "ctx" && lower[1] == "request" && lower[2] == "body")
|
||
|| (lower.len() >= 3 && lower[0] == "ctx" && lower[1] == "body")
|
||
}
|
||
|
||
fn matches_request_query(chain: &[String]) -> bool {
|
||
let lower = lower_segments(chain);
|
||
(lower.len() >= 3 && matches!(lower[0].as_str(), "req" | "request") && lower[1] == "query")
|
||
|| (lower.len() >= 3
|
||
&& matches!(lower[0].as_str(), "req" | "request")
|
||
&& matches!(lower[1].as_str(), "args" | "get"))
|
||
|| (lower.len() >= 3 && lower[0] == "ctx" && lower[1] == "query")
|
||
|| (lower.len() >= 4 && lower[0] == "ctx" && lower[1] == "request" && lower[2] == "query")
|
||
}
|
||
|
||
fn matches_session_context(chain: &[String]) -> bool {
|
||
let lower = lower_segments(chain);
|
||
// Bare `session` is overloaded: in JS/TS it routinely means
|
||
// NextAuth/express-session and `session.user.id` is auth context;
|
||
// in Python `session.commit()`, `session.add(..)`, `session.scalar(..)`
|
||
// are SQLAlchemy ORM calls which have nothing to do with
|
||
// authentication. When the chain starts with bare `session`,
|
||
// refuse to classify it as auth context if the next segment is a
|
||
// canonical SQLAlchemy / SQLAlchemy-style ORM method name ,
|
||
// those are read/write verbs and never identity accessors. Any
|
||
// other field-style accessor (`session.user`, `session.user_id`,
|
||
// `session.workspace_id`, `session.role`) stays a Session-context
|
||
// chain so the stale-authorization / ownership rules still see
|
||
// session-backed foreign ids. Bare `session` with no following
|
||
// segment is ambiguous and refused.
|
||
// Chain length 1 (`session` alone, as the receiver of a subscript
|
||
// like `session[:user_id]`) stays auth context, the session
|
||
// ambiguity only kicks in when there's a follow-up segment that
|
||
// can be inspected. Length 2 with a known ORM verb (`session.commit`,
|
||
// `session.add`) is denylisted; any other follow-up segment
|
||
// (`session.user`, `session.workspace_id`, `session.role`) keeps
|
||
// its Session classification. Length 3+ chains with `session` at
|
||
// the root always stay auth (they describe a session-stored
|
||
// member or sub-member).
|
||
let bare_session_chain_is_auth = lower.first().is_some_and(|segment| segment == "session")
|
||
&& (lower.len() == 1 || lower.len() >= 3 || !is_orm_session_verb(&lower[1]));
|
||
let unambiguous_chain_root = lower.first().is_some_and(|segment| {
|
||
matches!(
|
||
segment.as_str(),
|
||
"current_user"
|
||
| "current_account"
|
||
| "current_member"
|
||
| "securitycontext"
|
||
| "principal"
|
||
| "authentication"
|
||
)
|
||
});
|
||
bare_session_chain_is_auth
|
||
|| unambiguous_chain_root
|
||
|| (lower.len() >= 2
|
||
&& matches!(lower[0].as_str(), "req" | "request")
|
||
&& matches!(lower[1].as_str(), "session" | "user" | "currentuser"))
|
||
|| (lower.len() >= 3
|
||
&& lower[0] == "self"
|
||
&& matches!(lower[1].as_str(), "request" | "session" | "current_user")
|
||
&& matches!(lower[2].as_str(), "session" | "user" | "currentuser"))
|
||
|| (lower.len() >= 3
|
||
&& lower[0] == "ctx"
|
||
&& matches!(lower[1].as_str(), "session" | "state"))
|
||
}
|
||
|
||
/// Denylist of SQLAlchemy / generic ORM session verbs. The Python
|
||
/// pytest-fixture idiom (`session: Session = sqlalchemy_session()`)
|
||
/// drives every test method through `session.commit()` /
|
||
/// `session.add(...)` / `session.scalar(...)`; classifying any of
|
||
/// those calls as auth Session context would falsely qualify
|
||
/// thousands of test methods as receiving user input. Only verbs
|
||
/// that name a SQL/transaction operation are listed, identity-
|
||
/// looking field accessors (`user`, `user_id`, `role`,
|
||
/// `workspace_id`, `project_id`, ...) all pass through and remain
|
||
/// auth Session.
|
||
fn is_orm_session_verb(segment: &str) -> bool {
|
||
matches!(
|
||
segment,
|
||
"commit"
|
||
| "rollback"
|
||
| "flush"
|
||
| "refresh"
|
||
| "merge"
|
||
| "expunge"
|
||
| "expunge_all"
|
||
| "close"
|
||
| "begin"
|
||
| "begin_nested"
|
||
| "query"
|
||
| "scalar"
|
||
| "scalars"
|
||
| "execute"
|
||
| "exec"
|
||
| "exec_driver_sql"
|
||
| "add"
|
||
| "add_all"
|
||
| "delete"
|
||
| "bulk_save_objects"
|
||
| "bulk_insert_mappings"
|
||
| "bulk_update_mappings"
|
||
| "configure"
|
||
| "info"
|
||
)
|
||
}
|
||
|
||
fn subscript_value_ref(node: Node<'_>, bytes: &[u8]) -> Option<ValueRef> {
|
||
let object = node
|
||
.child_by_field_name("object")
|
||
.or_else(|| node.child_by_field_name("value"))
|
||
.or_else(|| node.child_by_field_name("operand"));
|
||
let index = node
|
||
.child_by_field_name("index")
|
||
.or_else(|| node.child_by_field_name("subscript"));
|
||
let (object, index) = if let (Some(object), Some(index)) = (object, index) {
|
||
(object, index)
|
||
} else {
|
||
let children = named_children(node);
|
||
match children.as_slice() {
|
||
[object, index, ..] => (*object, *index),
|
||
_ => return None,
|
||
}
|
||
};
|
||
let base_chain = member_chain(object, bytes);
|
||
let base = if base_chain.is_empty() {
|
||
text(object, bytes)
|
||
} else {
|
||
base_chain.join(".")
|
||
};
|
||
let index_text = text(index, bytes);
|
||
let field = Some(strip_quotes(&index_text));
|
||
let source_kind = if base_chain.is_empty() {
|
||
ValueSourceKind::ArrayIndex
|
||
} else {
|
||
match classify_member_chain(&base_chain) {
|
||
ValueSourceKind::MemberField => ValueSourceKind::ArrayIndex,
|
||
other => other,
|
||
}
|
||
};
|
||
|
||
Some(ValueRef {
|
||
source_kind,
|
||
name: if source_kind == ValueSourceKind::ArrayIndex {
|
||
format!("{base}[{index_text}]")
|
||
} else {
|
||
format!("{base}.{}", strip_quotes(&index_text))
|
||
},
|
||
base: Some(base),
|
||
field,
|
||
index: Some(index_text),
|
||
span: span(node),
|
||
})
|
||
}
|
||
|
||
pub fn member_chain(node: Node<'_>, bytes: &[u8]) -> Vec<String> {
|
||
if node.kind() == "call" {
|
||
// Ruby-style call: explicit receiver field + method/name field.
|
||
if let Some(receiver) = node.child_by_field_name("receiver") {
|
||
let mut chain = member_chain(receiver, bytes);
|
||
let method = node
|
||
.child_by_field_name("method")
|
||
.or_else(|| node.child_by_field_name("name"))
|
||
.map(|method| text(method, bytes))
|
||
.unwrap_or_default();
|
||
if !method.is_empty() {
|
||
chain.push(method);
|
||
}
|
||
return chain;
|
||
}
|
||
// Python-style call: callable expression in the `function` field.
|
||
// Recursing into it lets chained shapes like
|
||
// `select(X).filter_by(...)` produce `["select()", "filter_by"]`
|
||
// — the parent attribute branch appends `()` when its `object`
|
||
// is a call, marking the intermediate-call shape so that
|
||
// `receiver_is_chained_call` detects it. Closes airflow-style
|
||
// SQLAlchemy queryset-builder chains that previously reduced to
|
||
// bare `["filter_by"]`.
|
||
if let Some(function) = node.child_by_field_name("function") {
|
||
return member_chain(function, bytes);
|
||
}
|
||
// Bare-method fallback for parser shapes that expose method/name
|
||
// without a receiver (Ruby implicit-self calls, etc.).
|
||
let method = node
|
||
.child_by_field_name("method")
|
||
.or_else(|| node.child_by_field_name("name"))
|
||
.map(|method| text(method, bytes))
|
||
.unwrap_or_default();
|
||
if !method.is_empty() {
|
||
return vec![method];
|
||
}
|
||
return Vec::new();
|
||
}
|
||
|
||
if node.kind() == "method_invocation" || node.kind() == "method_call_expression" {
|
||
let mut chain = node
|
||
.child_by_field_name("object")
|
||
.or_else(|| node.child_by_field_name("receiver"))
|
||
.map(|object| member_chain(object, bytes))
|
||
.unwrap_or_default();
|
||
let method = node
|
||
.child_by_field_name("name")
|
||
.or_else(|| node.child_by_field_name("method"))
|
||
.map(|method| text(method, bytes))
|
||
.unwrap_or_default();
|
||
if !method.is_empty() {
|
||
chain.push(method);
|
||
}
|
||
return chain;
|
||
}
|
||
|
||
if node.kind() == "scope_resolution" {
|
||
let mut chain = Vec::new();
|
||
if let Some(scope) = node.child_by_field_name("scope") {
|
||
chain.extend(member_chain(scope, bytes));
|
||
}
|
||
if let Some(name) = node.child_by_field_name("name") {
|
||
let value = text(name, bytes);
|
||
if !value.is_empty() {
|
||
chain.push(value);
|
||
}
|
||
}
|
||
return chain;
|
||
}
|
||
|
||
if node.kind() == "scoped_identifier" {
|
||
let mut chain = Vec::new();
|
||
if let Some(path) = node.child_by_field_name("path") {
|
||
chain.extend(member_chain(path, bytes));
|
||
}
|
||
if let Some(name) = node.child_by_field_name("name") {
|
||
let value = text(name, bytes);
|
||
if !value.is_empty() {
|
||
chain.push(value);
|
||
}
|
||
}
|
||
return chain;
|
||
}
|
||
|
||
if !matches!(
|
||
node.kind(),
|
||
"member_expression"
|
||
| "attribute"
|
||
| "selector_expression"
|
||
| "field_expression"
|
||
| "field_access"
|
||
) {
|
||
let value = text(node, bytes);
|
||
return if value.is_empty() {
|
||
Vec::new()
|
||
} else {
|
||
vec![value]
|
||
};
|
||
}
|
||
|
||
let mut chain = Vec::new();
|
||
if let Some(object) = node
|
||
.child_by_field_name("object")
|
||
.or_else(|| node.child_by_field_name("value"))
|
||
.or_else(|| node.child_by_field_name("operand"))
|
||
.or_else(|| node.child_by_field_name("argument"))
|
||
{
|
||
let object_is_call = matches!(
|
||
object.kind(),
|
||
"call" | "call_expression" | "method_invocation" | "method_call_expression"
|
||
);
|
||
let mut sub = member_chain(object, bytes);
|
||
// Mark intermediate-call segments with `()` so a downstream
|
||
// chain like `select(X).filter_by(...)` becomes
|
||
// `["select()", "filter_by"]` rather than `["select", "filter_by"]`.
|
||
// `receiver_is_chained_call` consults the `(` to detect the
|
||
// opaque-builder receiver.
|
||
if object_is_call
|
||
&& sub.last().map(|s| !s.ends_with(')')).unwrap_or(false)
|
||
&& let Some(last) = sub.last_mut()
|
||
{
|
||
last.push_str("()");
|
||
}
|
||
chain.extend(sub);
|
||
}
|
||
if let Some(property) = node
|
||
.child_by_field_name("property")
|
||
.or_else(|| node.child_by_field_name("attribute"))
|
||
.or_else(|| node.child_by_field_name("field"))
|
||
.or_else(|| node.child_by_field_name("name"))
|
||
{
|
||
let property_text = text(property, bytes);
|
||
if !property_text.is_empty() {
|
||
chain.push(property_text);
|
||
}
|
||
}
|
||
chain
|
||
}
|
||
|
||
pub fn callee_name(node: Node<'_>, bytes: &[u8]) -> String {
|
||
match node.kind() {
|
||
"identifier" | "property_identifier" | "constant" | "field_identifier" => text(node, bytes),
|
||
"member_expression"
|
||
| "attribute"
|
||
| "selector_expression"
|
||
| "field_expression"
|
||
| "scoped_identifier"
|
||
| "field_access"
|
||
| "scope_resolution"
|
||
| "call"
|
||
| "method_invocation"
|
||
| "method_call_expression" => member_chain(node, bytes).join("."),
|
||
_ => text(node, bytes),
|
||
}
|
||
}
|
||
|
||
pub fn call_name(node: Node<'_>, bytes: &[u8]) -> String {
|
||
if !matches!(
|
||
node.kind(),
|
||
"call_expression" | "call" | "method_invocation" | "method_call_expression"
|
||
) {
|
||
return callee_name(node, bytes);
|
||
}
|
||
|
||
if let Some(function) = node.child_by_field_name("function") {
|
||
return callee_name(function, bytes);
|
||
}
|
||
|
||
let method = node
|
||
.child_by_field_name("method")
|
||
.or_else(|| node.child_by_field_name("name"))
|
||
.map(|child| text(child, bytes))
|
||
.unwrap_or_default();
|
||
let receiver = node
|
||
.child_by_field_name("receiver")
|
||
.or_else(|| node.child_by_field_name("object"))
|
||
.or_else(|| node.child_by_field_name("scope"))
|
||
.or_else(|| node.child_by_field_name("argument"))
|
||
.map(|child| member_chain(child, bytes).join("."))
|
||
.filter(|value| !value.is_empty());
|
||
|
||
match (receiver, method.is_empty()) {
|
||
(Some(receiver), false) => format!("{receiver}.{method}"),
|
||
(_, false) => method,
|
||
_ => text(node, bytes),
|
||
}
|
||
}
|
||
|
||
pub fn member_target(node: Node<'_>, bytes: &[u8]) -> Option<(String, String)> {
|
||
let object = node
|
||
.child_by_field_name("object")
|
||
.or_else(|| node.child_by_field_name("operand"))
|
||
.or_else(|| node.child_by_field_name("value"))
|
||
.or_else(|| node.child_by_field_name("receiver"))
|
||
.or_else(|| node.child_by_field_name("argument"))?;
|
||
let property = node
|
||
.child_by_field_name("property")
|
||
.or_else(|| node.child_by_field_name("field"))
|
||
.or_else(|| node.child_by_field_name("attribute"))
|
||
.or_else(|| node.child_by_field_name("name"))?;
|
||
Some((text(object, bytes), text(property, bytes)))
|
||
}
|
||
|
||
pub fn http_method_from_name(name: &str) -> Option<HttpMethod> {
|
||
match name.to_ascii_lowercase().as_str() {
|
||
"get" => Some(HttpMethod::Get),
|
||
"post" => Some(HttpMethod::Post),
|
||
"put" => Some(HttpMethod::Put),
|
||
"delete" => Some(HttpMethod::Delete),
|
||
"patch" => Some(HttpMethod::Patch),
|
||
"all" | "any" => Some(HttpMethod::All),
|
||
"use" => Some(HttpMethod::Use),
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
pub fn join_route_paths(prefix: &str, route: &str) -> String {
|
||
match (prefix.trim_end_matches('/'), route.trim_start_matches('/')) {
|
||
("", "") => "/".to_string(),
|
||
("", route) => format!("/{route}"),
|
||
(prefix, "") => prefix.to_string(),
|
||
(prefix, route) => format!("{prefix}/{route}"),
|
||
}
|
||
}
|
||
|
||
fn call_receiver_subjects(node: Node<'_>, bytes: &[u8]) -> Vec<ValueRef> {
|
||
let mut subjects = Vec::new();
|
||
if let Some(receiver) = node
|
||
.child_by_field_name("receiver")
|
||
.or_else(|| node.child_by_field_name("object"))
|
||
.or_else(|| node.child_by_field_name("argument"))
|
||
.or_else(|| {
|
||
node.child_by_field_name("function").and_then(|function| {
|
||
function
|
||
.child_by_field_name("object")
|
||
.or_else(|| function.child_by_field_name("operand"))
|
||
.or_else(|| function.child_by_field_name("argument"))
|
||
})
|
||
})
|
||
{
|
||
subjects.extend(extract_value_refs(receiver, bytes));
|
||
}
|
||
subjects
|
||
}
|
||
|
||
pub fn string_literal_value(node: Node<'_>, bytes: &[u8]) -> Option<String> {
|
||
match node.kind() {
|
||
"string"
|
||
| "template_string"
|
||
| "string_literal"
|
||
| "interpreted_string_literal"
|
||
| "raw_string_literal" => Some(strip_quotes(&text(node, bytes))),
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
pub fn object_property_value<'tree>(
|
||
node: Node<'tree>,
|
||
bytes: &[u8],
|
||
names: &[&str],
|
||
) -> Option<Node<'tree>> {
|
||
if node.kind() != "object" {
|
||
return None;
|
||
}
|
||
|
||
for child in named_children(node) {
|
||
match child.kind() {
|
||
"pair" => {
|
||
let Some(key) = child.child_by_field_name("key") else {
|
||
continue;
|
||
};
|
||
let key_name = strip_quotes(&text(key, bytes));
|
||
if names.iter().any(|name| *name == key_name) {
|
||
return child.child_by_field_name("value");
|
||
}
|
||
}
|
||
"shorthand_property_identifier" | "identifier" => {
|
||
let key_name = text(child, bytes);
|
||
if names.iter().any(|name| *name == key_name) {
|
||
return Some(child);
|
||
}
|
||
}
|
||
_ => {}
|
||
}
|
||
}
|
||
|
||
None
|
||
}
|
||
|
||
pub fn decorated_definition_child(node: Node<'_>) -> Option<Node<'_>> {
|
||
node.child_by_field_name("definition")
|
||
}
|
||
|
||
pub fn function_definition_node(node: Node<'_>) -> Node<'_> {
|
||
decorated_definition_child(node).unwrap_or(node)
|
||
}
|
||
|
||
pub fn named_children(node: Node<'_>) -> Vec<Node<'_>> {
|
||
let mut children = Vec::new();
|
||
for idx in 0..node.named_child_count() {
|
||
if let Some(child) = node.named_child(idx as u32) {
|
||
children.push(child);
|
||
}
|
||
}
|
||
children
|
||
}
|
||
|
||
pub fn text(node: Node<'_>, bytes: &[u8]) -> String {
|
||
node.utf8_text(bytes).unwrap_or("").to_string()
|
||
}
|
||
|
||
pub fn span(node: Node<'_>) -> (usize, usize) {
|
||
(node.start_byte(), node.end_byte())
|
||
}
|
||
|
||
fn dedup_value_refs(values: &mut Vec<ValueRef>) {
|
||
let mut deduped = Vec::new();
|
||
for value in values.drain(..) {
|
||
if !deduped
|
||
.iter()
|
||
.any(|existing: &ValueRef| existing.name == value.name && existing.span == value.span)
|
||
{
|
||
deduped.push(value);
|
||
}
|
||
}
|
||
*values = deduped;
|
||
}
|
||
|
||
fn lower_segments(chain: &[String]) -> Vec<String> {
|
||
chain
|
||
.iter()
|
||
.map(|segment| segment.to_ascii_lowercase())
|
||
.collect()
|
||
}
|
||
|
||
fn accessor_call_value_ref(
|
||
node: Node<'_>,
|
||
callee: &str,
|
||
chain: &[String],
|
||
args: &[Node<'_>],
|
||
bytes: &[u8],
|
||
) -> Option<ValueRef> {
|
||
let method = bare_method_name(callee);
|
||
let field = args
|
||
.first()
|
||
.and_then(|arg| string_literal_value(*arg, bytes));
|
||
let source_kind = match method {
|
||
"Param" | "PathParam" => Some(ValueSourceKind::RequestParam),
|
||
"Query" | "QueryParam" | "DefaultQuery" | "getParameter" | "getQueryString" => {
|
||
Some(ValueSourceKind::RequestQuery)
|
||
}
|
||
"PostForm" | "FormValue" | "DefaultPostForm" => Some(ValueSourceKind::RequestBody),
|
||
"Get" | "GetString" | "MustGet" | "getAttribute" => Some(ValueSourceKind::Session),
|
||
_ if chain.first().is_some_and(|segment| {
|
||
matches!(
|
||
segment.to_ascii_lowercase().as_str(),
|
||
"invitation" | "token" | "invite"
|
||
)
|
||
}) && method.starts_with("get")
|
||
&& method.len() > 3 =>
|
||
{
|
||
Some(ValueSourceKind::TokenField)
|
||
}
|
||
_ => None,
|
||
}?;
|
||
|
||
let normalized_field = field
|
||
.or_else(|| {
|
||
if source_kind == ValueSourceKind::TokenField && method.starts_with("get") {
|
||
Some(method[3..].to_string())
|
||
} else {
|
||
None
|
||
}
|
||
})
|
||
.map(|field| {
|
||
let mut chars = field.chars();
|
||
let Some(first) = chars.next() else {
|
||
return field;
|
||
};
|
||
format!("{}{}", first.to_ascii_lowercase(), chars.as_str())
|
||
})
|
||
.filter(|field| !field.is_empty());
|
||
|
||
let base = match source_kind {
|
||
ValueSourceKind::Session => Some("session".to_string()),
|
||
_ if chain.len() > 1 => Some(chain[..chain.len() - 1].join(".")),
|
||
_ => chain.first().cloned(),
|
||
};
|
||
|
||
let name = if let Some(field) = normalized_field.as_deref() {
|
||
match base.as_deref() {
|
||
Some(base) if !base.is_empty() => format!("{base}.{field}"),
|
||
_ => field.to_string(),
|
||
}
|
||
} else {
|
||
callee.to_string()
|
||
};
|
||
|
||
Some(ValueRef {
|
||
source_kind,
|
||
name,
|
||
base,
|
||
field: normalized_field,
|
||
index: None,
|
||
span: span(node),
|
||
})
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::{is_owner_field_subject, is_self_actor_subject, is_self_actor_type_text};
|
||
use crate::auth_analysis::model::{ValueRef, ValueSourceKind};
|
||
|
||
#[test]
|
||
fn is_self_actor_type_text_matches_known_wrappers() {
|
||
// Tight exact set: bare names whose entire identity is "auth subject".
|
||
assert!(is_self_actor_type_text("Authenticated"));
|
||
assert!(is_self_actor_type_text("Identity"));
|
||
assert!(is_self_actor_type_text("Principal"));
|
||
|
||
// Structural form: <PREFIX>User<SUFFIX?>.
|
||
assert!(is_self_actor_type_text("CurrentUser"));
|
||
assert!(is_self_actor_type_text("SessionUser"));
|
||
assert!(is_self_actor_type_text("AuthUser"));
|
||
assert!(is_self_actor_type_text("AdminUser"));
|
||
assert!(is_self_actor_type_text("AuthenticatedUser"));
|
||
// Lemmy: LocalUserView (the real-repo motivation for the
|
||
// structural recogniser).
|
||
assert!(is_self_actor_type_text("LocalUserView"));
|
||
assert!(is_self_actor_type_text("LocalUser"));
|
||
assert!(is_self_actor_type_text("LoggedInUser"));
|
||
assert!(is_self_actor_type_text("CurrentUserContext"));
|
||
assert!(is_self_actor_type_text("AuthenticatedUserSession"));
|
||
assert!(is_self_actor_type_text("SessionUserToken"));
|
||
assert!(is_self_actor_type_text("AdminUserInfo"));
|
||
// Qualified paths resolve to last segment.
|
||
assert!(is_self_actor_type_text("crate::auth::CurrentUser"));
|
||
assert!(is_self_actor_type_text("crate::user::LocalUserView"));
|
||
assert!(is_self_actor_type_text("&CurrentUser"));
|
||
assert!(is_self_actor_type_text("&mut AuthUser"));
|
||
// Generic wrappers: match on the base segment.
|
||
assert!(is_self_actor_type_text("CurrentUser<Admin>"));
|
||
assert!(is_self_actor_type_text("LocalUserView<Admin>"));
|
||
|
||
// Non-matches.
|
||
// Bare `User`, too loose; commonly a deserialised payload type.
|
||
assert!(!is_self_actor_type_text("User"));
|
||
assert!(!is_self_actor_type_text("UserPreferences"));
|
||
// `UserView` lacks an authority-prefix segment and stays a
|
||
// payload-shaped name.
|
||
assert!(!is_self_actor_type_text("UserView"));
|
||
// No prefix vocabulary match, still rejected.
|
||
assert!(!is_self_actor_type_text("PaymentUser"));
|
||
// Wrong suffix vocabulary.
|
||
assert!(!is_self_actor_type_text("CurrentUserPreferences"));
|
||
// Framework extractors / unrelated types.
|
||
assert!(!is_self_actor_type_text("Db"));
|
||
assert!(!is_self_actor_type_text("Path<(i64,)>"));
|
||
assert!(!is_self_actor_type_text("Json<Body>"));
|
||
// `RequireAuth` / `RequireLogin` were dropped from the exact
|
||
// set: they aren't `User`-bearing types and aren't
|
||
// semantically the auth subject, they're guard markers. The
|
||
// route-aware `axum::classify_guard_type` still treats them
|
||
// as a login guard via the looser substring match.
|
||
assert!(!is_self_actor_type_text("RequireAuth"));
|
||
assert!(!is_self_actor_type_text("RequireLogin"));
|
||
}
|
||
|
||
fn ident(name: &str) -> ValueRef {
|
||
ValueRef {
|
||
source_kind: ValueSourceKind::Identifier,
|
||
name: name.to_string(),
|
||
base: None,
|
||
field: None,
|
||
index: None,
|
||
span: (0, 0),
|
||
}
|
||
}
|
||
|
||
fn member(base: &str, field: &str) -> ValueRef {
|
||
ValueRef {
|
||
source_kind: ValueSourceKind::MemberField,
|
||
name: format!("{base}.{field}"),
|
||
base: Some(base.to_string()),
|
||
field: Some(field.to_string()),
|
||
index: None,
|
||
span: (0, 0),
|
||
}
|
||
}
|
||
|
||
fn session(base: &str, field: &str) -> ValueRef {
|
||
ValueRef {
|
||
source_kind: ValueSourceKind::Session,
|
||
name: format!("{base}.{field}"),
|
||
base: Some(base.to_string()),
|
||
field: Some(field.to_string()),
|
||
index: None,
|
||
span: (0, 0),
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn is_owner_field_subject_matches_known_column_names() {
|
||
assert!(is_owner_field_subject(&ident("owner_id")));
|
||
assert!(is_owner_field_subject(&ident("user_id")));
|
||
assert!(is_owner_field_subject(&ident("author_id")));
|
||
assert!(is_owner_field_subject(&ident("created_by")));
|
||
assert!(is_owner_field_subject(&member("row", "owner_id")));
|
||
assert!(!is_owner_field_subject(&ident("group_id")));
|
||
assert!(!is_owner_field_subject(&ident("doc_id")));
|
||
assert!(!is_owner_field_subject(&ident("user")));
|
||
}
|
||
|
||
#[test]
|
||
fn is_self_actor_subject_matches_known_self_shapes() {
|
||
assert!(is_self_actor_subject(&member("user", "id")));
|
||
assert!(is_self_actor_subject(&member("current_user", "id")));
|
||
assert!(is_self_actor_subject(&session("req.user", "id")));
|
||
assert!(is_self_actor_subject(&session("ctx.session.user", "id")));
|
||
// Wrong field.
|
||
assert!(!is_self_actor_subject(&member("user", "workspace_id")));
|
||
// Unknown base.
|
||
assert!(!is_self_actor_subject(&member("target", "id")));
|
||
// Plain identifier, no base.
|
||
assert!(!is_self_actor_subject(&ident("user_id")));
|
||
}
|
||
|
||
#[test]
|
||
fn type_text_is_trpc_options_matches_alias_and_inline_marker() {
|
||
use super::type_text_is_trpc_options;
|
||
use std::collections::HashSet;
|
||
let mut aliases = HashSet::new();
|
||
aliases.insert("GetOptions".to_string());
|
||
aliases.insert("UpdateOptions".to_string());
|
||
|
||
// Inline `TrpcSessionUser` marker, accepted regardless of alias set.
|
||
assert!(type_text_is_trpc_options(
|
||
": { ctx: { user: NonNullable<TrpcSessionUser> } }",
|
||
&aliases
|
||
));
|
||
assert!(type_text_is_trpc_options(
|
||
": { user: TrpcSessionUser }",
|
||
&HashSet::new()
|
||
));
|
||
|
||
// Plain alias name match.
|
||
assert!(type_text_is_trpc_options(": GetOptions", &aliases));
|
||
assert!(type_text_is_trpc_options("GetOptions", &aliases));
|
||
|
||
// Generic-wrapped alias.
|
||
assert!(type_text_is_trpc_options(": Promise<GetOptions>", &aliases));
|
||
assert!(type_text_is_trpc_options(
|
||
": NonNullable<UpdateOptions>",
|
||
&aliases
|
||
));
|
||
|
||
// Negatives: alias not in set, no inline marker.
|
||
assert!(!type_text_is_trpc_options(": OtherOptions", &aliases));
|
||
assert!(!type_text_is_trpc_options(": Promise<Foo>", &aliases));
|
||
assert!(!type_text_is_trpc_options(": SomeRandomType", &aliases));
|
||
// Substring of a longer identifier must NOT match.
|
||
assert!(!type_text_is_trpc_options(": MyGetOptionsX", &aliases));
|
||
}
|
||
|
||
#[test]
|
||
fn body_text_references_trpc_marker_recognises_known_markers() {
|
||
use super::body_text_references_trpc_marker as bm;
|
||
assert!(bm("type X = { user: NonNullable<TrpcSessionUser> }"));
|
||
assert!(bm("interface Ctx extends TRPCContext { ... }"));
|
||
assert!(bm("type Ctx = ProtectedTRPCContext"));
|
||
assert!(bm("export type Y = { ctx: TrpcContext }"));
|
||
// Negatives.
|
||
assert!(!bm("type X = { user: User }"));
|
||
assert!(!bm("type X = SessionContext"));
|
||
assert!(!bm("type X = { foo: SomeContext }"));
|
||
}
|
||
|
||
/// Pin the string-level analogue used by
|
||
/// `value_is_self_scoped_session_id_chain`: it must accept the
|
||
/// same set of session-scoped bases that `checks.rs::
|
||
/// is_self_scoped_session_base` accepts. When you add a new base
|
||
/// to one, add it to the other and update both tests.
|
||
#[test]
|
||
fn is_self_scoped_session_base_text_matches_known_session_bases() {
|
||
use super::is_self_scoped_session_base_text as bt;
|
||
// Express / passport idioms.
|
||
assert!(bt("req.user"));
|
||
assert!(bt("request.user"));
|
||
assert!(bt("req.session.user"));
|
||
assert!(bt("req.session.currentUser"));
|
||
// Bare session.user (Next.js / NextAuth idiom).
|
||
assert!(bt("session.user"));
|
||
assert!(bt("session.currentUser"));
|
||
// Koa ctx.state / ctx.session.
|
||
assert!(bt("ctx.session.user"));
|
||
assert!(bt("ctx.state.user"));
|
||
// Negatives, bases that are NOT canonical authed-user roots.
|
||
assert!(!bt("req.body"));
|
||
assert!(!bt("req.params"));
|
||
assert!(!bt("ctx.user"));
|
||
assert!(!bt("data.user"));
|
||
assert!(!bt("user"));
|
||
}
|
||
|
||
/// Pins the bare-`session` chain narrowing: ORM session verbs
|
||
/// (`commit` / `add` / `scalar` / `execute` / ...) are denylisted
|
||
///, they do not contribute auth Session evidence even though the
|
||
/// chain root is the literal name `session`. Any other field-
|
||
/// shaped second segment (`user`, `user_id`, `workspace_id`,
|
||
/// `project_id`, `role`) keeps its Session classification so the
|
||
/// stale-authorization / missing-ownership rules still see
|
||
/// session-backed foreign ids. Closes the airflow pytest cluster
|
||
/// where `session.commit()` made `unit_has_user_input_evidence`
|
||
/// return true on test methods with no actual user input, while
|
||
/// preserving the gin/rails/rocket stale-session fixtures whose
|
||
/// session chains use foreign-id field accessors.
|
||
#[test]
|
||
fn matches_session_context_denylists_orm_session_verbs() {
|
||
use super::matches_session_context as msc;
|
||
let v = |chain: &[&str]| chain.iter().map(|s| s.to_string()).collect::<Vec<_>>();
|
||
// Bare `session.<identity-field>`, auth context.
|
||
assert!(msc(&v(&["session", "user"])));
|
||
assert!(msc(&v(&["session", "user_id"])));
|
||
assert!(msc(&v(&["session", "id"])));
|
||
assert!(msc(&v(&["session", "uid"])));
|
||
assert!(msc(&v(&["session", "email"])));
|
||
assert!(msc(&v(&["session", "currentUser"])));
|
||
// Foreign-id fields stored on the session, must remain auth
|
||
// Session for the stale-authorization rule (gin/rails/rocket
|
||
// fixtures).
|
||
assert!(msc(&v(&["session", "workspace_id"])));
|
||
assert!(msc(&v(&["session", "project_id"])));
|
||
assert!(msc(&v(&["session", "role"])));
|
||
assert!(msc(&v(&["session", "currentWorkspaceID"])));
|
||
// SQLAlchemy verbs, NOT auth context.
|
||
assert!(!msc(&v(&["session", "commit"])));
|
||
assert!(!msc(&v(&["session", "rollback"])));
|
||
assert!(!msc(&v(&["session", "scalar"])));
|
||
assert!(!msc(&v(&["session", "scalars"])));
|
||
assert!(!msc(&v(&["session", "add"])));
|
||
assert!(!msc(&v(&["session", "delete"])));
|
||
assert!(!msc(&v(&["session", "execute"])));
|
||
assert!(!msc(&v(&["session", "flush"])));
|
||
assert!(!msc(&v(&["session", "query"])));
|
||
assert!(!msc(&v(&["session", "merge"])));
|
||
assert!(!msc(&v(&["session", "refresh"])));
|
||
assert!(!msc(&v(&["session", "close"])));
|
||
// Bare `session` alone (length 1) stays auth, covers
|
||
// subscript shapes like `session[:workspace_id]` whose object
|
||
// is just the bare `session` identifier.
|
||
assert!(msc(&v(&["session"])));
|
||
// `req.session.user`, unchanged: explicit auth-session base.
|
||
assert!(msc(&v(&["req", "session", "user"])));
|
||
// `request.session`, unchanged: req/request-prefixed arm
|
||
// recognises `session` regardless of any subsequent segment.
|
||
assert!(msc(&v(&["request", "session"])));
|
||
// `current_user.<x>`, unambiguous chain root, fires regardless.
|
||
assert!(msc(&v(&["current_user", "id"])));
|
||
assert!(msc(&v(&["current_user", "preferences"])));
|
||
}
|
||
|
||
/// Rust `parameter` nodes carry both a `pattern` field (the
|
||
/// binding) and a `type` field (the annotation). Until the
|
||
/// `parameter` arm in `collect_param_names`, the recursive default
|
||
/// arm collected identifiers from the `type` subtree as well ,
|
||
/// turning `dst: &std::path::Path` into the param name set
|
||
/// `["dst", "std", "path", "Path"]`. `path` then matched the
|
||
/// framework-request-name allow-list in `is_external_input_param_name`,
|
||
/// gating `unit_has_user_input_evidence` open on internal helpers
|
||
/// that take a filesystem-path argument and re-firing
|
||
/// `missing_ownership_check` at every id-shaped operation
|
||
/// downstream. The arm restricts descent to the `pattern` field
|
||
/// for Rust parameters so only true binding names reach
|
||
/// `unit.params`. Real-repo motivation:
|
||
/// meilisearch/index-scheduler/src/scheduler/process_snapshot_creation.rs::remove_tasks
|
||
/// (`dst: &std::path::Path` made every `db.delete(task.uid)` call
|
||
/// fire missing-ownership-check). Same shape would also fire for
|
||
/// Rust functions taking `req: &Request<...>`,
|
||
/// `ctx: &Context<T>`, etc., where the type tail matches the
|
||
/// framework name list but the binding is unrelated.
|
||
#[test]
|
||
fn collect_param_names_rust_skips_type_segment_idents() {
|
||
use super::function_params;
|
||
let mut parser = tree_sitter::Parser::new();
|
||
parser
|
||
.set_language(&tree_sitter::Language::from(tree_sitter_rust::LANGUAGE))
|
||
.unwrap();
|
||
let src = b"unsafe fn remove_tasks(tasks: &[Task], dst: &std::path::Path, sz: usize) {}";
|
||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
let func = tree
|
||
.root_node()
|
||
.child(0)
|
||
.expect("source_file should have a function");
|
||
let params = function_params(func, src);
|
||
assert_eq!(
|
||
params,
|
||
vec!["tasks".to_string(), "dst".to_string(), "sz".to_string()],
|
||
"type-segment idents (`std`, `path`, `Path`) must NOT pollute the param-name set"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn collect_param_names_rust_handles_request_typed_params() {
|
||
// `req: &Request<Body>`, `Request` and `Body` lowercase to
|
||
// `request` and `body`, both in the framework name list. The
|
||
// binding `req` is the only legitimate param name.
|
||
use super::function_params;
|
||
let mut parser = tree_sitter::Parser::new();
|
||
parser
|
||
.set_language(&tree_sitter::Language::from(tree_sitter_rust::LANGUAGE))
|
||
.unwrap();
|
||
let src = b"fn handle(req: &Request<Body>, state: AppState) -> Response { todo!() }";
|
||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
let func = tree.root_node().child(0).expect("function");
|
||
let params = function_params(func, src);
|
||
assert_eq!(
|
||
params,
|
||
vec!["req".to_string(), "state".to_string()],
|
||
"type idents `Request`/`Body`/`Response`/`AppState` must not leak as params"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn collect_param_names_rust_destructured_pattern_picks_up_bindings() {
|
||
// Tuple-pattern binding: `((a, b)): (u32, u32)` should yield
|
||
// both bound names from the pattern subtree, but NOT the type
|
||
// segment `u32`.
|
||
use super::function_params;
|
||
let mut parser = tree_sitter::Parser::new();
|
||
parser
|
||
.set_language(&tree_sitter::Language::from(tree_sitter_rust::LANGUAGE))
|
||
.unwrap();
|
||
let src = b"fn split((a, b): (u32, u32)) {}";
|
||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
let func = tree.root_node().child(0).expect("function");
|
||
let params = function_params(func, src);
|
||
assert!(params.contains(&"a".to_string()), "got {:?}", params);
|
||
assert!(params.contains(&"b".to_string()), "got {:?}", params);
|
||
assert!(!params.contains(&"u32".to_string()), "got {:?}", params);
|
||
}
|
||
|
||
/// Go's stdlib `context.Context` is the canonical first-param of
|
||
/// most functions but is NOT user input ─ it carries deadline /
|
||
/// cancellation / value-bag, never an HTTP request. The Go arm of
|
||
/// `collect_param_names` drops the param entirely when its type is
|
||
/// `context.Context` so the bare name `ctx` doesn't trip the
|
||
/// framework-request-name allow-list.
|
||
///
|
||
/// Real-repo motivation:
|
||
/// `/Users/elipeter/oss/gitea/services/packages/packages.go::AddFileToExistingPackage`
|
||
/// and ~1900 sibling helpers passed
|
||
/// `unit_has_user_input_evidence` solely on this param.
|
||
#[test]
|
||
fn collect_param_names_go_drops_context_context_param() {
|
||
use super::function_params;
|
||
let mut parser = tree_sitter::Parser::new();
|
||
parser
|
||
.set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE))
|
||
.unwrap();
|
||
let src = b"package x\nfunc GetPackage(ctx context.Context, info *PackageInfo) {}\n";
|
||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
let func = (0..tree.root_node().named_child_count())
|
||
.filter_map(|i| tree.root_node().named_child(i as u32))
|
||
.find(|n| n.kind() == "function_declaration")
|
||
.expect("file should have a function_declaration");
|
||
let params = function_params(func, src);
|
||
assert!(
|
||
!params.contains(&"ctx".to_string()),
|
||
"ctx context.Context must be dropped: got {:?}",
|
||
params
|
||
);
|
||
assert!(
|
||
!params.contains(&"context".to_string()) && !params.contains(&"Context".to_string()),
|
||
"type-segment idents must not leak: got {:?}",
|
||
params
|
||
);
|
||
assert!(
|
||
params.contains(&"info".to_string()),
|
||
"non-context typed params keep their name: got {:?}",
|
||
params
|
||
);
|
||
assert!(
|
||
!params.contains(&"PackageInfo".to_string()),
|
||
"type-segment idents must not leak from non-context params either: got {:?}",
|
||
params
|
||
);
|
||
}
|
||
|
||
/// Per-framework `*context.APIContext` (gitea), `*gin.Context`,
|
||
/// `iris.Context`, `*fiber.Ctx` and similar ARE user input ─ the
|
||
/// type-aware filter must NOT drop these. The non-stdlib package
|
||
/// name distinguishes them from the stdlib `context.Context`.
|
||
#[test]
|
||
fn collect_param_names_go_keeps_framework_context_param() {
|
||
use super::function_params;
|
||
let mut parser = tree_sitter::Parser::new();
|
||
parser
|
||
.set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE))
|
||
.unwrap();
|
||
let src = b"package x\nfunc Handle(ctx *context.APIContext) {}\n";
|
||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
let func = (0..tree.root_node().named_child_count())
|
||
.filter_map(|i| tree.root_node().named_child(i as u32))
|
||
.find(|n| n.kind() == "function_declaration")
|
||
.expect("file should have a function_declaration");
|
||
let params = function_params(func, src);
|
||
assert!(
|
||
params.contains(&"ctx".to_string()),
|
||
"framework-bearing ctx must survive: got {:?}",
|
||
params
|
||
);
|
||
}
|
||
|
||
/// Multiple-name single-type Go declarations (`a, b int`) must
|
||
/// surface every name.
|
||
#[test]
|
||
fn collect_param_names_go_multi_name_param_decl() {
|
||
use super::function_params;
|
||
let mut parser = tree_sitter::Parser::new();
|
||
parser
|
||
.set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE))
|
||
.unwrap();
|
||
let src = b"package x\nfunc Add(a, b int, ctx context.Context) {}\n";
|
||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
let func = (0..tree.root_node().named_child_count())
|
||
.filter_map(|i| tree.root_node().named_child(i as u32))
|
||
.find(|n| n.kind() == "function_declaration")
|
||
.expect("file should have a function_declaration");
|
||
let params = function_params(func, src);
|
||
assert!(params.contains(&"a".to_string()), "got {:?}", params);
|
||
assert!(params.contains(&"b".to_string()), "got {:?}", params);
|
||
assert!(!params.contains(&"ctx".to_string()), "got {:?}", params);
|
||
assert!(!params.contains(&"int".to_string()), "got {:?}", params);
|
||
}
|
||
|
||
/// DAO-helper shape (`func GetRunByRepoAndID(ctx context.Context,
|
||
/// repoID, runID int64)`): id-like names with bounded primitive
|
||
/// scalar types are caller-passed scope identifiers, NOT user
|
||
/// input. For non-route units (`function_params`,
|
||
/// `include_id_like_typed = false`), they must NOT lift into
|
||
/// `unit.params` — that would gate `unit_has_user_input_evidence`
|
||
/// open on every internal Go ORM helper and over-fire
|
||
/// `go.auth.missing_ownership_check`.
|
||
///
|
||
/// Real-repo trigger:
|
||
/// `/Users/elipeter/oss/gitea/models/actions/run_job.go::
|
||
/// GetRunByRepoAndID` and ~957 sibling helpers across gitea's
|
||
/// `models/...` DAO layer. Same shape over-fires on minio's
|
||
/// `cmd/iam-*-store` and is the canonical Go ORM helper signature.
|
||
#[test]
|
||
fn collect_param_names_go_drops_id_like_scalar_params_for_dao_helper() {
|
||
use super::function_params;
|
||
let mut parser = tree_sitter::Parser::new();
|
||
parser
|
||
.set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE))
|
||
.unwrap();
|
||
let src =
|
||
b"package x\nfunc GetRunByRepoAndID(ctx context.Context, repoID, runID int64) {}\n";
|
||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
let func = (0..tree.root_node().named_child_count())
|
||
.filter_map(|i| tree.root_node().named_child(i as u32))
|
||
.find(|n| n.kind() == "function_declaration")
|
||
.expect("file should have a function_declaration");
|
||
let params = function_params(func, src);
|
||
assert!(
|
||
!params.contains(&"ctx".to_string()),
|
||
"context.Context dropped: got {:?}",
|
||
params
|
||
);
|
||
assert!(
|
||
!params.contains(&"repoID".to_string()),
|
||
"id-like scalar param dropped for DAO helper: got {:?}",
|
||
params
|
||
);
|
||
assert!(
|
||
!params.contains(&"runID".to_string()),
|
||
"id-like scalar param dropped for DAO helper: got {:?}",
|
||
params
|
||
);
|
||
assert!(
|
||
params.is_empty(),
|
||
"no params survive on DAO-shape helper: got {:?}",
|
||
params
|
||
);
|
||
}
|
||
|
||
/// Conservative scope: only **bounded primitive scalar** types
|
||
/// trigger the id-like drop. Pointer / struct / slice types are
|
||
/// payload shapes that may or may not be user-controlled — leave
|
||
/// them alone so non-DAO helpers retain their evidence.
|
||
#[test]
|
||
fn collect_param_names_go_keeps_id_like_pointer_struct_param() {
|
||
use super::function_params;
|
||
let mut parser = tree_sitter::Parser::new();
|
||
parser
|
||
.set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE))
|
||
.unwrap();
|
||
// `runnerID *Runner` — id-like name, but the type is a pointer
|
||
// (payload shape), so the param name must survive.
|
||
let src = b"package x\nfunc UpdateRunner(ctx context.Context, runnerID *Runner) {}\n";
|
||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
let func = (0..tree.root_node().named_child_count())
|
||
.filter_map(|i| tree.root_node().named_child(i as u32))
|
||
.find(|n| n.kind() == "function_declaration")
|
||
.expect("file should have a function_declaration");
|
||
let params = function_params(func, src);
|
||
assert!(
|
||
params.contains(&"runnerID".to_string()),
|
||
"id-like pointer param survives: got {:?}",
|
||
params
|
||
);
|
||
}
|
||
|
||
/// Route handlers go through `function_params_route_handler`
|
||
/// (`include_id_like_typed = true`) — the id-like-scalar filter
|
||
/// must NOT trip there. Path-param-on-REST-route is *the*
|
||
/// primary user input and middleware-injected auth checks rely on
|
||
/// these names being present in `unit.params`.
|
||
#[test]
|
||
fn collect_param_names_go_route_handler_keeps_id_like_scalar_params() {
|
||
use super::function_params_route_handler;
|
||
let mut parser = tree_sitter::Parser::new();
|
||
parser
|
||
.set_language(&tree_sitter::Language::from(tree_sitter_go::LANGUAGE))
|
||
.unwrap();
|
||
let src = b"package x\nfunc GetRepo(ctx context.Context, repoID int64) {}\n";
|
||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
let func = (0..tree.root_node().named_child_count())
|
||
.filter_map(|i| tree.root_node().named_child(i as u32))
|
||
.find(|n| n.kind() == "function_declaration")
|
||
.expect("file should have a function_declaration");
|
||
let params = function_params_route_handler(func, src);
|
||
assert!(
|
||
params.contains(&"repoID".to_string()),
|
||
"id-like scalar param kept for route handler: got {:?}",
|
||
params
|
||
);
|
||
}
|
||
|
||
/// Pin `member_chain` output for the SQLAlchemy queryset chain
|
||
/// `select(C).filter_by(id=x)`. Pre-fix, Python `call` nodes use a
|
||
/// `function` field (not `receiver`/`method`) so the recursive call
|
||
/// arm returned an empty Vec, reducing the chain to bare
|
||
/// `["filter_by"]`. The fix: (1) traverse `function` field in the
|
||
/// `call` arm; (2) the parent attribute branch appends `()` to last
|
||
/// segment when its `object` is a call. Together they produce
|
||
/// `["select()", "filter_by"]` so `receiver_is_chained_call` detects
|
||
/// the intermediate-call shape.
|
||
#[test]
|
||
fn member_chain_python_select_filter_by_chain_marks_intermediate_call() {
|
||
use super::{callee_name, member_chain};
|
||
use tree_sitter::{Node, Parser};
|
||
|
||
let mut parser = Parser::new();
|
||
parser
|
||
.set_language(&tree_sitter::Language::from(tree_sitter_python::LANGUAGE))
|
||
.unwrap();
|
||
let src = b"x = select(C).filter_by(id=u)\n";
|
||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
|
||
fn find_outer_call<'a>(node: Node<'a>) -> Option<Node<'a>> {
|
||
if node.kind() == "call"
|
||
&& let Some(function) = node.child_by_field_name("function")
|
||
&& function.kind() == "attribute"
|
||
{
|
||
return Some(node);
|
||
}
|
||
for i in 0..node.named_child_count() {
|
||
if let Some(child) = node.named_child(i as u32)
|
||
&& let Some(found) = find_outer_call(child)
|
||
{
|
||
return Some(found);
|
||
}
|
||
}
|
||
None
|
||
}
|
||
|
||
let outer_call = find_outer_call(tree.root_node())
|
||
.expect("expected outer call node `select(C).filter_by(id=u)`");
|
||
|
||
assert_eq!(
|
||
member_chain(outer_call, src),
|
||
vec!["select()".to_string(), "filter_by".to_string()],
|
||
"Python chained call must produce `[select(), filter_by]` so receiver_is_chained_call detects the intermediate-call shape",
|
||
);
|
||
assert_eq!(
|
||
callee_name(outer_call, src),
|
||
"select().filter_by".to_string(),
|
||
"callee_name joins the chain with `.`",
|
||
);
|
||
}
|
||
|
||
/// Regression guard: simple Python `obj.method(arg)` callees keep
|
||
/// their previous `member_chain` output (`["obj", "method"]`). The
|
||
/// `function`-field traversal must not pollute non-chained shapes.
|
||
#[test]
|
||
fn member_chain_python_simple_attribute_call_unchanged() {
|
||
use super::callee_name;
|
||
use tree_sitter::{Node, Parser};
|
||
|
||
let mut parser = Parser::new();
|
||
parser
|
||
.set_language(&tree_sitter::Language::from(tree_sitter_python::LANGUAGE))
|
||
.unwrap();
|
||
let src = b"x = obj.method(a)\n";
|
||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
|
||
fn find_call<'a>(node: Node<'a>) -> Option<Node<'a>> {
|
||
if node.kind() == "call" {
|
||
return Some(node);
|
||
}
|
||
for i in 0..node.named_child_count() {
|
||
if let Some(child) = node.named_child(i as u32)
|
||
&& let Some(found) = find_call(child)
|
||
{
|
||
return Some(found);
|
||
}
|
||
}
|
||
None
|
||
}
|
||
|
||
let call_node = find_call(tree.root_node()).expect("expected `obj.method(a)` call");
|
||
assert_eq!(
|
||
callee_name(call_node, src),
|
||
"obj.method".to_string(),
|
||
"simple attribute call must not pick up `()` markers",
|
||
);
|
||
}
|
||
|
||
mod ruby_visibility_and_callbacks {
|
||
use super::super::{
|
||
RubyVisibility, ruby_callback_target_names, ruby_method_is_callback_or_private,
|
||
ruby_method_visibility,
|
||
};
|
||
use tree_sitter::{Node, Parser, Tree};
|
||
|
||
fn parse(src: &str) -> (Tree, Vec<u8>) {
|
||
let mut parser = Parser::new();
|
||
parser
|
||
.set_language(&tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE))
|
||
.unwrap();
|
||
let bytes = src.as_bytes().to_vec();
|
||
let tree = parser.parse(bytes.as_slice(), None).expect("parse");
|
||
(tree, bytes)
|
||
}
|
||
|
||
fn find_class_body<'a>(node: Node<'a>) -> Option<Node<'a>> {
|
||
if node.kind() == "class" {
|
||
return node.child_by_field_name("body");
|
||
}
|
||
for idx in 0..node.named_child_count() {
|
||
let Some(child) = node.named_child(idx as u32) else {
|
||
continue;
|
||
};
|
||
if let Some(body) = find_class_body(child) {
|
||
return Some(body);
|
||
}
|
||
}
|
||
None
|
||
}
|
||
|
||
#[test]
|
||
fn bare_private_directive_marks_subsequent_methods_private() {
|
||
let src = "class C\n def public_a; end\n private\n def helper_b; end\n def helper_c; end\nend\n";
|
||
let (tree, bytes) = parse(src);
|
||
let body = find_class_body(tree.root_node()).expect("body");
|
||
let vis = ruby_method_visibility(body, &bytes);
|
||
assert_eq!(vis.get("public_a").copied(), Some(RubyVisibility::Public));
|
||
assert_eq!(vis.get("helper_b").copied(), Some(RubyVisibility::Private));
|
||
assert_eq!(vis.get("helper_c").copied(), Some(RubyVisibility::Private));
|
||
}
|
||
|
||
#[test]
|
||
fn targeted_private_marks_only_named_methods() {
|
||
let src = "class C\n def a; end\n def b; end\n def c; end\n private :a, :c\nend\n";
|
||
let (tree, bytes) = parse(src);
|
||
let body = find_class_body(tree.root_node()).expect("body");
|
||
let vis = ruby_method_visibility(body, &bytes);
|
||
assert_eq!(vis.get("a").copied(), Some(RubyVisibility::Private));
|
||
assert_eq!(vis.get("b").copied(), Some(RubyVisibility::Public));
|
||
assert_eq!(vis.get("c").copied(), Some(RubyVisibility::Private));
|
||
}
|
||
|
||
#[test]
|
||
fn public_directive_re_opens_visibility() {
|
||
let src = "class C\n private\n def a; end\n public\n def b; end\nend\n";
|
||
let (tree, bytes) = parse(src);
|
||
let body = find_class_body(tree.root_node()).expect("body");
|
||
let vis = ruby_method_visibility(body, &bytes);
|
||
assert_eq!(vis.get("a").copied(), Some(RubyVisibility::Private));
|
||
assert_eq!(vis.get("b").copied(), Some(RubyVisibility::Public));
|
||
}
|
||
|
||
#[test]
|
||
fn protected_directive_recognised() {
|
||
let src = "class C\n protected\n def helper; end\nend\n";
|
||
let (tree, bytes) = parse(src);
|
||
let body = find_class_body(tree.root_node()).expect("body");
|
||
let vis = ruby_method_visibility(body, &bytes);
|
||
assert_eq!(vis.get("helper").copied(), Some(RubyVisibility::Protected));
|
||
}
|
||
|
||
#[test]
|
||
fn before_action_collects_callback_target_names() {
|
||
let src = "class C\n before_action :set_account\n before_action :set_user, only: [:show, :update]\n def show; end\n def set_account; end\n def set_user; end\nend\n";
|
||
let (tree, bytes) = parse(src);
|
||
let body = find_class_body(tree.root_node()).expect("body");
|
||
let callbacks = ruby_callback_target_names(body, &bytes);
|
||
assert!(callbacks.contains("set_account"));
|
||
assert!(callbacks.contains("set_user"));
|
||
// `only:` / `except:` keys must not pollute the target set.
|
||
assert!(!callbacks.contains("show"));
|
||
assert!(!callbacks.contains("update"));
|
||
assert!(!callbacks.contains("only"));
|
||
}
|
||
|
||
#[test]
|
||
fn before_action_block_form_yields_no_targets() {
|
||
// Block form `before_action do ... end` carries no symbol arg.
|
||
let src =
|
||
"class C\n before_action do\n require_login\n end\n def show; end\nend\n";
|
||
let (tree, bytes) = parse(src);
|
||
let body = find_class_body(tree.root_node()).expect("body");
|
||
let callbacks = ruby_callback_target_names(body, &bytes);
|
||
assert!(callbacks.is_empty(), "got {:?}", callbacks);
|
||
}
|
||
|
||
#[test]
|
||
fn skip_before_action_target_collected() {
|
||
let src = "class C\n skip_before_action :authenticate_user!, only: [:index]\n def index; end\nend\n";
|
||
let (tree, bytes) = parse(src);
|
||
let body = find_class_body(tree.root_node()).expect("body");
|
||
let callbacks = ruby_callback_target_names(body, &bytes);
|
||
assert!(callbacks.contains("authenticate_user!"));
|
||
}
|
||
|
||
#[test]
|
||
fn legacy_before_filter_alias_collected() {
|
||
let src = "class C\n before_filter :legacy_helper\n def legacy_helper; end\nend\n";
|
||
let (tree, bytes) = parse(src);
|
||
let body = find_class_body(tree.root_node()).expect("body");
|
||
let callbacks = ruby_callback_target_names(body, &bytes);
|
||
assert!(callbacks.contains("legacy_helper"));
|
||
}
|
||
|
||
#[test]
|
||
fn callback_target_or_private_predicate_combines_layers() {
|
||
// Private method → suppressed.
|
||
// Public callback target → suppressed.
|
||
// Public non-callback method → kept.
|
||
let src = "class C\n before_action :set_account\n def show; end\n def set_account; end\n private\n def helper; end\nend\n";
|
||
let (tree, bytes) = parse(src);
|
||
let body = find_class_body(tree.root_node()).expect("body");
|
||
let visibility = ruby_method_visibility(body, &bytes);
|
||
let callbacks = ruby_callback_target_names(body, &bytes);
|
||
assert!(!ruby_method_is_callback_or_private(
|
||
"show",
|
||
&visibility,
|
||
&callbacks
|
||
));
|
||
assert!(ruby_method_is_callback_or_private(
|
||
"set_account",
|
||
&visibility,
|
||
&callbacks
|
||
));
|
||
assert!(ruby_method_is_callback_or_private(
|
||
"helper",
|
||
&visibility,
|
||
&callbacks
|
||
));
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn trpc_options_destructure_param_seeds_self_scoped_session_base() {
|
||
// Cal.com-shaped TRPC handler: parameter is a destructured
|
||
// options alias whose `ctx` field's nested type literal
|
||
// references `TrpcSessionUser`. `FileMeta::scan` adds
|
||
// `GetOptions` to `trpc_alias_names` (body-text marker hit);
|
||
// `collect_trpc_ctx_param` then fires on the
|
||
// `required_parameter` and seeds `ctx.user` into the unit's
|
||
// `self_scoped_session_bases`.
|
||
let mut parser = tree_sitter::Parser::new();
|
||
parser
|
||
.set_language(&tree_sitter::Language::from(
|
||
tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
|
||
))
|
||
.unwrap();
|
||
let src = br#"
|
||
type TrpcSessionUser = { id: number };
|
||
type GetOptions = {
|
||
ctx: { user: NonNullable<TrpcSessionUser> };
|
||
input: { id: number };
|
||
};
|
||
export const handleGet = async ({ ctx, input }: GetOptions) => {
|
||
return prisma.booking.findFirst({ where: { id: input.id, userId: ctx.user.id } });
|
||
};
|
||
"#;
|
||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
let meta = super::FileMeta::scan(tree.root_node(), src);
|
||
assert!(
|
||
meta.trpc_alias_names.contains("GetOptions"),
|
||
"trpc_alias_names missing GetOptions: {:?}",
|
||
meta.trpc_alias_names
|
||
);
|
||
|
||
let rules = crate::auth_analysis::config::AuthAnalysisRules::disabled();
|
||
let mut model = crate::auth_analysis::model::AuthorizationModel::default();
|
||
super::collect_top_level_units(tree.root_node(), src, &rules, &mut model);
|
||
let unit = model
|
||
.units
|
||
.iter()
|
||
.find(|u| u.name.as_deref() == Some("handleGet"))
|
||
.expect("handleGet unit");
|
||
assert!(
|
||
unit.self_scoped_session_bases.contains("ctx.user"),
|
||
"self_scoped_session_bases missing ctx.user: {:?}",
|
||
unit.self_scoped_session_bases
|
||
);
|
||
}
|
||
|
||
/// Pin the JS/TS post-fetch ownership-equality recogniser added in
|
||
/// session 0011. The `if_statement` arm of `collect_unit_state`
|
||
/// must dispatch to `detect_ownership_equality_check` (previously
|
||
/// only `if_expression` did), the strict `!==` operator must be
|
||
/// recognised as inequality, the framework denial helper
|
||
/// `notFound()` must count as an early-exit witness, and the JS/TS
|
||
/// `variable_declarator` arm must populate `row_population_data`
|
||
/// so the synthetic `Ownership` AuthCheck attributes back to the
|
||
/// row's let line.
|
||
#[test]
|
||
fn detect_post_fetch_ownership_jsts_with_strict_neq_and_denial_call() {
|
||
let mut parser = tree_sitter::Parser::new();
|
||
parser
|
||
.set_language(&tree_sitter::Language::from(
|
||
tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
|
||
))
|
||
.unwrap();
|
||
let src = br#"
|
||
declare class Repo { findById(id: string): Promise<{ userId: number }>; }
|
||
declare function getServerSession(): Promise<{ user?: { id: number } } | null>;
|
||
declare function notFound(): never;
|
||
export async function handleGet({ id }: { id: string }) {
|
||
const session = await getServerSession();
|
||
if (!session?.user?.id) return null;
|
||
const repo: Repo = new Repo();
|
||
const webhook = await repo.findById(id);
|
||
if (webhook.userId !== session.user.id) {
|
||
notFound();
|
||
}
|
||
return webhook;
|
||
}
|
||
"#;
|
||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
let rules = crate::auth_analysis::config::AuthAnalysisRules::disabled();
|
||
let mut model = crate::auth_analysis::model::AuthorizationModel::default();
|
||
super::collect_top_level_units(tree.root_node(), src, &rules, &mut model);
|
||
let unit = model
|
||
.units
|
||
.iter()
|
||
.find(|u| u.name.as_deref() == Some("handleGet"))
|
||
.expect("handleGet unit");
|
||
|
||
let webhook_pop = unit
|
||
.row_population_data
|
||
.get("webhook")
|
||
.expect("collect_row_population must populate `webhook` from variable_declarator");
|
||
// The `let webhook = await repo.findById(id)` line should
|
||
// anchor at the call site, not the let line. In this fixture
|
||
// both are on the same line so the back-dating is invisible
|
||
// here, the assertion is that the entry exists.
|
||
assert!(webhook_pop.0 > 0);
|
||
|
||
let owner_check = unit
|
||
.auth_checks
|
||
.iter()
|
||
.find(|c| matches!(c.kind, super::AuthCheckKind::Ownership))
|
||
.expect("ownership-equality detector must emit an Ownership AuthCheck");
|
||
let owner_subject = owner_check
|
||
.subjects
|
||
.iter()
|
||
.find(|s| s.field.as_deref() == Some("userId"))
|
||
.expect("Ownership AuthCheck must carry the owner field subject");
|
||
assert_eq!(
|
||
owner_subject.base.as_deref(),
|
||
Some("webhook"),
|
||
"owner subject base must be the row var: {:?}",
|
||
owner_subject
|
||
);
|
||
}
|
||
|
||
/// Pin the NextAuth Adapter factory recogniser added in session
|
||
/// 0030. `body_returns_nextauth_options` must flip on for the
|
||
/// cal.com `function CalComAdapter(client): Adapter { return {
|
||
/// createUser, getUser, getUserByAccount, ... } }` shape so that
|
||
/// `is_nextauth_callback_unit` suppresses the missing-ownership
|
||
/// rule across the inner Adapter methods (their operations
|
||
/// accumulate onto the outer factory's unit).
|
||
#[test]
|
||
fn nextauth_adapter_factory_flags_outer_unit() {
|
||
let mut parser = tree_sitter::Parser::new();
|
||
parser
|
||
.set_language(&tree_sitter::Language::from(
|
||
tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
|
||
))
|
||
.unwrap();
|
||
let src = br#"
|
||
declare const prismaClient: any;
|
||
export default function CalComAdapter(client: any) {
|
||
return {
|
||
createUser: async (data: { email: string }) => {
|
||
const user = await prismaClient.user.create({ data });
|
||
return user;
|
||
},
|
||
getUser: async (id: string) => {
|
||
const user = await prismaClient.user.findUnique({ where: { id } });
|
||
return user;
|
||
},
|
||
async getUserByAccount(providerAccountId: { provider: string; providerAccountId: string }) {
|
||
const account = await prismaClient.account.findUnique({
|
||
where: { provider_providerAccountId: providerAccountId },
|
||
select: { user: true },
|
||
});
|
||
return account?.user ?? null;
|
||
},
|
||
createVerificationToken: async (data: any) => prismaClient.verificationToken.create({ data }),
|
||
useVerificationToken: async (identifier: any) => prismaClient.verificationToken.delete({ where: identifier }),
|
||
linkAccount: async (account: any) => prismaClient.account.create({ data: account }),
|
||
unlinkAccount: async (providerAccountId: any) => prismaClient.account.delete({ where: providerAccountId }),
|
||
};
|
||
}
|
||
"#;
|
||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
let rules = crate::auth_analysis::config::AuthAnalysisRules::disabled();
|
||
let mut model = crate::auth_analysis::model::AuthorizationModel::default();
|
||
super::collect_top_level_units(tree.root_node(), src, &rules, &mut model);
|
||
let unit = model
|
||
.units
|
||
.iter()
|
||
.find(|u| u.name.as_deref() == Some("CalComAdapter"))
|
||
.expect("CalComAdapter unit");
|
||
assert!(
|
||
unit.is_nextauth_options_factory,
|
||
"Adapter factory must set is_nextauth_options_factory: \
|
||
{:?}",
|
||
unit.name
|
||
);
|
||
}
|
||
|
||
/// Negative: a generic CRUD repo with `createUser` / `getUser` /
|
||
/// `updateUser` / `deleteUser` (no Adapter-distinctive method
|
||
/// names) must NOT be flagged as a NextAuth Adapter. Without the
|
||
/// distinctive-name gate any plain user repo would suppress
|
||
/// missing-ownership findings.
|
||
#[test]
|
||
fn nextauth_adapter_recogniser_rejects_generic_crud_repo() {
|
||
let mut parser = tree_sitter::Parser::new();
|
||
parser
|
||
.set_language(&tree_sitter::Language::from(
|
||
tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
|
||
))
|
||
.unwrap();
|
||
let src = br#"
|
||
declare const db: any;
|
||
export function makeUserRepo() {
|
||
return {
|
||
createUser: async (data: any) => db.user.create({ data }),
|
||
getUser: async (id: string) => db.user.findUnique({ where: { id } }),
|
||
updateUser: async (id: string, data: any) => db.user.update({ where: { id }, data }),
|
||
deleteUser: async (id: string) => db.user.delete({ where: { id } }),
|
||
};
|
||
}
|
||
"#;
|
||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
let rules = crate::auth_analysis::config::AuthAnalysisRules::disabled();
|
||
let mut model = crate::auth_analysis::model::AuthorizationModel::default();
|
||
super::collect_top_level_units(tree.root_node(), src, &rules, &mut model);
|
||
let unit = model
|
||
.units
|
||
.iter()
|
||
.find(|u| u.name.as_deref() == Some("makeUserRepo"))
|
||
.expect("makeUserRepo unit");
|
||
assert!(
|
||
!unit.is_nextauth_options_factory,
|
||
"generic CRUD repo must NOT be flagged as Adapter: {:?}",
|
||
unit.name
|
||
);
|
||
}
|
||
}
|