mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
516 lines
21 KiB
Rust
516 lines
21 KiB
Rust
//! Cross-file FastAPI router-dependency tracking.
|
|
//!
|
|
//! FastAPI propagates `dependencies=[Security(...), Depends(...)]` declared
|
|
//! at the router level onto every route attached to that router, including
|
|
//! routes attached via cross-file `<parent>.include_router(<child>.router)`
|
|
//! lifts. The per-file router-dep collector in
|
|
//! `crate::auth_analysis::extract::flask::collect_router_level_dependencies`
|
|
//! sees only the file under analysis, so a bare child router whose auth is
|
|
//! declared on a parent router in `__init__.py` (canonical airflow shape) has
|
|
//! no visible deps. This module captures the cross-file edges + parent
|
|
//! declarations during pass 1 and resolves them into a per-child effective
|
|
//! dep map for pass 2's auth analysis.
|
|
//!
|
|
//! Storage shape: per-Python-file [`PerFileRouterFacts`] with
|
|
//! `local_router_deps` (the `<router> = X(deps=[…])` declarations
|
|
//! visible in the file) and `include_router_edges` (the
|
|
//! `<parent>.include_router(<child_module>.<child_var>, …)` calls).
|
|
//! Persisted into `crate::summary::GlobalSummaries::router_facts_by_module`
|
|
//! during pass 1 and resolved into the active file's
|
|
//! [`crate::auth_analysis::model::AuthorizationModel::cross_file_router_deps`]
|
|
//! at pass 2 entry.
|
|
//!
|
|
//! Module identity: file basename without `.py`. This is approximate (two
|
|
//! files named `task_instances.py` in different packages would collide) but
|
|
//! covers airflow-style codebases where include_router targets reference the
|
|
//! child's module name directly (`task_instances.router`). Transitive lifts
|
|
//! (`grandparent.include_router(parent); parent.include_router(child)`) are
|
|
//! resolved by walking the index iteratively at lookup time.
|
|
|
|
use crate::auth_analysis::extract::common::{
|
|
call_site_from_node, named_children, string_literal_value, text,
|
|
};
|
|
use crate::auth_analysis::model::CallSite;
|
|
use std::collections::HashMap;
|
|
use std::path::Path;
|
|
use tree_sitter::{Node, Tree};
|
|
|
|
/// Per-file extracted router declarations + include_router edges.
|
|
/// Persisted into `GlobalSummaries.router_facts_by_module` keyed by the
|
|
/// file's [`module_id_for_path`]. Single-purpose: drives the cross-file
|
|
/// router-dep resolution at pass 2 entry.
|
|
#[derive(Debug, Clone, Default)]
|
|
pub struct PerFileRouterFacts {
|
|
/// Local router var → declared inline `dependencies=[...]` deps.
|
|
/// Mirrors `flask::collect_router_level_dependencies` output.
|
|
pub local_router_deps: HashMap<String, Vec<(CallSite, bool)>>,
|
|
/// `<parent>.include_router(<child_module>.<child_var>, ...)` edges
|
|
/// observed in this file. Each edge specifies a parent router var
|
|
/// (local to this file) and a child router identified by its
|
|
/// module_id + var name. Cross-file lookups walk these.
|
|
pub include_router_edges: Vec<RouterIncludeEdge>,
|
|
}
|
|
|
|
/// A single `<parent>.include_router(<child_module>.<child_var>, ...)`
|
|
/// edge. `parent_var` is the local variable that owns the deps to lift;
|
|
/// `child_module_id` + `child_var` together name the child router whose
|
|
/// routes inherit the parent's deps.
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub struct RouterIncludeEdge {
|
|
pub parent_var: String,
|
|
pub child_module_id: String,
|
|
pub child_var: String,
|
|
}
|
|
|
|
/// Translate a file path into a stable cross-file module identifier.
|
|
///
|
|
/// Currently the file's basename without the `.py` extension — sufficient
|
|
/// for the airflow shape (`from . import task_instances; …
|
|
/// authenticated_router.include_router(task_instances.router)`) where the
|
|
/// include_router target's module reference is the child file's own
|
|
/// basename. Returns `None` for files whose stem is `__init__`
|
|
/// (parent files don't need to be looked up; they emit edges only) or
|
|
/// for paths with no usable stem.
|
|
pub fn module_id_for_path(path: &Path) -> Option<String> {
|
|
let stem = path.file_stem()?.to_str()?;
|
|
if stem.is_empty() || stem == "__init__" {
|
|
return None;
|
|
}
|
|
Some(stem.to_string())
|
|
}
|
|
|
|
/// Stable storage key for the per-project router-facts index.
|
|
///
|
|
/// Uses the file's **full filesystem path** (lossy-converted to UTF-8)
|
|
/// because the only goal of the storage key is uniqueness across files
|
|
/// in a single scan. Collisions on shorter forms (file basename or
|
|
/// `<parent_dir>::__init__`) are common in real codebases — airflow
|
|
/// alone has 17 `routes/__init__.py` files spread across providers and
|
|
/// test trees, and any keying scheme that drops the path prefix would
|
|
/// have one such file silently overwrite another's `include_router`
|
|
/// edges, breaking the cross-file lift on whichever parent lost the
|
|
/// race.
|
|
///
|
|
/// The lookup side ([`crate::summary::GlobalSummaries::resolve_cross_file_router_deps`])
|
|
/// iterates every stored entry and matches child references by the
|
|
/// **last segment** ([`module_id_for_path`]) — so duplicate-basename
|
|
/// children still get every parent's deps accumulated, which is the
|
|
/// FastAPI-runtime-correct behavior. Path-based storage keys plus
|
|
/// basename-based lookup keys is the right pairing.
|
|
pub fn module_id_for_storage(path: &Path) -> Option<String> {
|
|
let s = path.to_string_lossy();
|
|
if s.is_empty() {
|
|
return None;
|
|
}
|
|
Some(s.into_owned())
|
|
}
|
|
|
|
/// Extract router-level deps + include_router edges from a Python AST.
|
|
/// Returns `None` for non-Python files; pass 1 callers must gate on the
|
|
/// file's language slug before invoking. Empty facts (no routers and no
|
|
/// edges) still return `Some(Default::default())` so callers can record
|
|
/// an empty index entry without re-extracting.
|
|
pub fn extract_router_facts_for_python(tree: &Tree, bytes: &[u8]) -> PerFileRouterFacts {
|
|
let mut facts = PerFileRouterFacts::default();
|
|
let root = tree.root_node();
|
|
collect_local_router_deps(root, bytes, &mut facts.local_router_deps);
|
|
collect_include_router_edges(root, bytes, &mut facts.include_router_edges);
|
|
facts
|
|
}
|
|
|
|
/// Walk the module root for top-level `<id> = <RouterCtor>(..., dependencies=[…])`
|
|
/// assignments, mirroring
|
|
/// [`crate::auth_analysis::extract::flask::collect_router_level_dependencies`].
|
|
/// Reimplemented here to avoid an inter-module Visibility tangle and
|
|
/// to keep this module self-contained — the router extractor is the
|
|
/// single source of truth at FlaskExtractor::extract time, this module
|
|
/// is a parallel collection path that runs in pass 1.
|
|
fn collect_local_router_deps(
|
|
root: Node<'_>,
|
|
bytes: &[u8],
|
|
out: &mut HashMap<String, Vec<(CallSite, bool)>>,
|
|
) {
|
|
for child in named_children(root) {
|
|
let assign = match child.kind() {
|
|
"expression_statement" => named_children(child).into_iter().next(),
|
|
"assignment" => Some(child),
|
|
_ => None,
|
|
};
|
|
let Some(assign) = assign else { continue };
|
|
if assign.kind() != "assignment" {
|
|
continue;
|
|
}
|
|
let Some(left) = assign.child_by_field_name("left") else {
|
|
continue;
|
|
};
|
|
if left.kind() != "identifier" {
|
|
continue;
|
|
}
|
|
let Some(right) = assign.child_by_field_name("right") else {
|
|
continue;
|
|
};
|
|
if right.kind() != "call" {
|
|
continue;
|
|
}
|
|
let Some(function) = right.child_by_field_name("function") else {
|
|
continue;
|
|
};
|
|
let function_text = text(function, bytes);
|
|
if !is_router_like_constructor(&function_text) {
|
|
continue;
|
|
}
|
|
let Some(arguments) = right.child_by_field_name("arguments") else {
|
|
continue;
|
|
};
|
|
let Some(deps_value) = keyword_argument_value(arguments, bytes, "dependencies") else {
|
|
continue;
|
|
};
|
|
let mut deps = Vec::new();
|
|
for element in named_children(deps_value) {
|
|
if let Some(unwrapped) = unwrap_depends_call(element, bytes) {
|
|
deps.push(unwrapped);
|
|
}
|
|
}
|
|
if deps.is_empty() {
|
|
continue;
|
|
}
|
|
let var_name = text(left, bytes).trim().to_string();
|
|
if var_name.is_empty() {
|
|
continue;
|
|
}
|
|
out.entry(var_name).or_insert(deps);
|
|
}
|
|
}
|
|
|
|
/// Walk every call expression in the file looking for
|
|
/// `<parent>.include_router(<child_module>.<child_var>, ...)` shapes.
|
|
/// Records `(parent_var, child_module_id, child_var)` for each. Skips
|
|
/// edges where the child reference is a bare identifier (no module
|
|
/// segment) — those would require Python import resolution to attach
|
|
/// to a specific file, beyond this single-hop basename matching.
|
|
fn collect_include_router_edges(root: Node<'_>, bytes: &[u8], out: &mut Vec<RouterIncludeEdge>) {
|
|
walk_for_include_router(root, bytes, out);
|
|
}
|
|
|
|
fn walk_for_include_router(node: Node<'_>, bytes: &[u8], out: &mut Vec<RouterIncludeEdge>) {
|
|
if node.kind() == "call"
|
|
&& let Some(edge) = parse_include_router_call(node, bytes)
|
|
{
|
|
out.push(edge);
|
|
}
|
|
for child in named_children(node) {
|
|
walk_for_include_router(child, bytes, out);
|
|
}
|
|
}
|
|
|
|
fn parse_include_router_call(node: Node<'_>, bytes: &[u8]) -> Option<RouterIncludeEdge> {
|
|
let function = node.child_by_field_name("function")?;
|
|
if function.kind() != "attribute" {
|
|
return None;
|
|
}
|
|
let attr = function.child_by_field_name("attribute")?;
|
|
if text(attr, bytes) != "include_router" {
|
|
return None;
|
|
}
|
|
let object = function.child_by_field_name("object")?;
|
|
let parent_var = match object.kind() {
|
|
"identifier" => text(object, bytes).trim().to_string(),
|
|
_ => return None,
|
|
};
|
|
if parent_var.is_empty() {
|
|
return None;
|
|
}
|
|
let arguments = node.child_by_field_name("arguments")?;
|
|
// First positional arg (skip keyword_argument children).
|
|
let first = named_children(arguments)
|
|
.into_iter()
|
|
.find(|child| child.kind() != "keyword_argument")?;
|
|
if first.kind() != "attribute" {
|
|
return None;
|
|
}
|
|
let child_attr = first.child_by_field_name("attribute")?;
|
|
let child_var = text(child_attr, bytes).trim().to_string();
|
|
if child_var.is_empty() {
|
|
return None;
|
|
}
|
|
let child_object = first.child_by_field_name("object")?;
|
|
// Use the **last segment** of a possibly-dotted module reference as
|
|
// the cross-file module id. `task_instances.router` →
|
|
// module_id="task_instances"; `pkg.task_instances.router` →
|
|
// module_id="task_instances" (last attribute segment).
|
|
let child_module_id = match child_object.kind() {
|
|
"identifier" => text(child_object, bytes).trim().to_string(),
|
|
"attribute" => {
|
|
let inner_attr = child_object.child_by_field_name("attribute")?;
|
|
text(inner_attr, bytes).trim().to_string()
|
|
}
|
|
_ => return None,
|
|
};
|
|
if child_module_id.is_empty() {
|
|
return None;
|
|
}
|
|
Some(RouterIncludeEdge {
|
|
parent_var,
|
|
child_module_id,
|
|
child_var,
|
|
})
|
|
}
|
|
|
|
fn keyword_argument_value<'tree>(
|
|
arguments: Node<'tree>,
|
|
bytes: &[u8],
|
|
name: &str,
|
|
) -> Option<Node<'tree>> {
|
|
for arg in named_children(arguments) {
|
|
if arg.kind() != "keyword_argument" {
|
|
continue;
|
|
}
|
|
let key = arg.child_by_field_name("name")?;
|
|
if text(key, bytes) == name {
|
|
return arg.child_by_field_name("value");
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Local copy of the router-constructor recogniser (parallel to
|
|
/// [`crate::auth_analysis::extract::flask::is_router_like_constructor`]
|
|
/// to avoid the visibility tangle).
|
|
fn is_router_like_constructor(callee: &str) -> bool {
|
|
let trimmed = callee.trim();
|
|
let tail = trimmed.rsplit('.').next().unwrap_or(trimmed);
|
|
if tail == "APIRouter" || tail == "FastAPI" || tail == "VersionedAPIRouter" {
|
|
return true;
|
|
}
|
|
if tail.len() > "Router".len()
|
|
&& tail.ends_with("Router")
|
|
&& tail.chars().next().is_some_and(|c| c.is_ascii_uppercase())
|
|
{
|
|
return true;
|
|
}
|
|
false
|
|
}
|
|
|
|
/// Cross-file dep-marker unwrapper. Differs from the in-file
|
|
/// [`crate::auth_analysis::extract::flask::unwrap_depends_call`] in
|
|
/// the *scoped-security* gating policy:
|
|
///
|
|
/// * **In-file** (per-route or per-router declarations visible to
|
|
/// the active file's FlaskExtractor): only `Security(callable,
|
|
/// scopes=[non-empty])` flips `scoped_security = true`. A bare
|
|
/// `Security(callable)` stays as a LoginGuard — conservative because
|
|
/// per-route bare Security is often used for login-only deps.
|
|
///
|
|
/// * **Cross-file via `include_router`** (this function, persisted
|
|
/// into the project-wide router-facts index for the cross-file lift):
|
|
/// ANY `Security(...)` marker at the parent-router level flips
|
|
/// `scoped_security = true`, regardless of explicit `scopes=[...]`.
|
|
/// Rationale: the FastAPI architectural pattern
|
|
/// `parent_router = APIRouter(dependencies=[Security(callable)])`
|
|
/// followed by `parent_router.include_router(child_router, ...)` is
|
|
/// structurally a declaration that **every route under the child
|
|
/// router is auth-protected**. Treating it as authorization (Other
|
|
/// AuthCheckKind, via the existing `inject_middleware_auth` scoped
|
|
/// promotion) is semantically correct — the developer's `Security`
|
|
/// marker placement IS the authorization signal. Bare `Depends(...)`
|
|
/// at the parent-router level is NOT promoted (it's a generic dep,
|
|
/// often a login fetcher).
|
|
fn unwrap_depends_call(node: Node<'_>, bytes: &[u8]) -> Option<(CallSite, bool)> {
|
|
if node.kind() != "call" {
|
|
return None;
|
|
}
|
|
let function = node.child_by_field_name("function")?;
|
|
let function_text = text(function, bytes);
|
|
if !is_dep_marker_callee(&function_text) {
|
|
return None;
|
|
}
|
|
let is_security = is_security_marker(&function_text);
|
|
let arguments = node.child_by_field_name("arguments")?;
|
|
let children = named_children(arguments);
|
|
let first = children
|
|
.iter()
|
|
.copied()
|
|
.find(|child| child.kind() != "keyword_argument")?;
|
|
// Cross-file scoped policy: any Security marker at parent-router
|
|
// level → scoped=true. See doc comment above for rationale.
|
|
let scoped_security = is_security;
|
|
let _ = string_literal_value;
|
|
let _ = keyword_argument_value;
|
|
match first.kind() {
|
|
"call" => Some((call_site_from_node(first, bytes), scoped_security)),
|
|
"identifier" | "attribute" | "scoped_identifier" => {
|
|
Some((call_site_from_node(first, bytes), scoped_security))
|
|
}
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
fn is_dep_marker_callee(callee: &str) -> bool {
|
|
let trimmed = callee.trim();
|
|
matches!(
|
|
trimmed,
|
|
"Depends"
|
|
| "fastapi.Depends"
|
|
| "fastapi.params.Depends"
|
|
| "Security"
|
|
| "fastapi.Security"
|
|
| "fastapi.params.Security"
|
|
)
|
|
}
|
|
|
|
fn is_security_marker(callee: &str) -> bool {
|
|
let trimmed = callee.trim();
|
|
matches!(
|
|
trimmed,
|
|
"Security" | "fastapi.Security" | "fastapi.params.Security"
|
|
)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use tree_sitter::Parser;
|
|
|
|
fn parse_python(source: &str) -> Tree {
|
|
let mut parser = Parser::new();
|
|
parser
|
|
.set_language(&tree_sitter::Language::from(tree_sitter_python::LANGUAGE))
|
|
.expect("python language");
|
|
parser.parse(source, None).expect("parse")
|
|
}
|
|
|
|
#[test]
|
|
fn module_id_for_path_strips_py_extension() {
|
|
assert_eq!(
|
|
module_id_for_path(Path::new("/x/y/task_instances.py")),
|
|
Some("task_instances".into())
|
|
);
|
|
// `__init__` returns None — parent files are storage-only, not
|
|
// lookup keys.
|
|
assert_eq!(module_id_for_path(Path::new("/x/y/__init__.py")), None);
|
|
}
|
|
|
|
#[test]
|
|
fn module_id_for_storage_uses_full_path_to_avoid_basename_collisions() {
|
|
// Different `routes/__init__.py` files in different packages
|
|
// must produce DIFFERENT keys — basename / parent-dir keying
|
|
// would collide on real codebases (airflow alone has 17
|
|
// `routes/__init__.py` files across its provider tree).
|
|
let a = module_id_for_storage(Path::new(
|
|
"/x/airflow-core/src/airflow/api_fastapi/execution_api/routes/__init__.py",
|
|
))
|
|
.unwrap();
|
|
let b = module_id_for_storage(Path::new(
|
|
"/x/airflow-core/src/airflow/api_fastapi/core_api/routes/__init__.py",
|
|
))
|
|
.unwrap();
|
|
assert_ne!(a, b);
|
|
}
|
|
|
|
/// Canonical airflow shape — `routes/__init__.py` declares
|
|
/// `authenticated_router = VersionedAPIRouter(dependencies=[Security(require_auth)])`
|
|
/// and lifts every per-file child router via `include_router(...)`.
|
|
/// Pass 1 must capture both the parent's local deps and the edges
|
|
/// targeting `task_instances.router`. Cross-file Security wrappers
|
|
/// (regardless of explicit `scopes=[...]`) are flagged scoped — the
|
|
/// architectural intent of
|
|
/// `parent_router = X(dependencies=[Security(callable)])` followed by
|
|
/// `parent_router.include_router(child_router)` is auth scoping over
|
|
/// every child route. See the `unwrap_depends_call` doc comment for
|
|
/// the policy rationale.
|
|
#[test]
|
|
fn extract_router_facts_captures_parent_and_edges() {
|
|
let src = "from cadwyn import VersionedAPIRouter\n\
|
|
from fastapi import APIRouter, Security\n\
|
|
from . import task_instances, dag_runs\n\
|
|
from .security import require_auth\n\
|
|
\n\
|
|
execution_api_router = APIRouter()\n\
|
|
authenticated_router = VersionedAPIRouter(dependencies=[Security(require_auth)])\n\
|
|
\n\
|
|
authenticated_router.include_router(task_instances.router, prefix=\"/task-instances\")\n\
|
|
authenticated_router.include_router(dag_runs.router, prefix=\"/dag-runs\")\n\
|
|
execution_api_router.include_router(authenticated_router)\n";
|
|
let tree = parse_python(src);
|
|
let bytes = src.as_bytes();
|
|
let facts = extract_router_facts_for_python(&tree, bytes);
|
|
|
|
let parent_deps = facts
|
|
.local_router_deps
|
|
.get("authenticated_router")
|
|
.expect("authenticated_router deps captured");
|
|
assert_eq!(parent_deps.len(), 1);
|
|
let (site, scoped) = &parent_deps[0];
|
|
assert_eq!(site.name, "require_auth");
|
|
assert!(
|
|
*scoped,
|
|
"cross-file: any Security marker is scoped-equivalent"
|
|
);
|
|
|
|
// execution_api_router has no deps → no entry.
|
|
assert!(!facts.local_router_deps.contains_key("execution_api_router"));
|
|
|
|
// Two child include_router edges + one nested
|
|
// execution_api_router.include_router(authenticated_router) edge.
|
|
assert!(facts.include_router_edges.iter().any(|e| {
|
|
e.parent_var == "authenticated_router"
|
|
&& e.child_module_id == "task_instances"
|
|
&& e.child_var == "router"
|
|
}));
|
|
assert!(facts.include_router_edges.iter().any(|e| {
|
|
e.parent_var == "authenticated_router"
|
|
&& e.child_module_id == "dag_runs"
|
|
&& e.child_var == "router"
|
|
}));
|
|
}
|
|
|
|
/// `<parent>.include_router(<bare_var>)` — child reference is a bare
|
|
/// identifier, no module segment. Cannot resolve to a specific
|
|
/// file, so no edge is emitted. This includes the canonical
|
|
/// `execution_api_router.include_router(authenticated_router)` chain
|
|
/// where the child is a sibling router declared in the same file —
|
|
/// transitive in-file lifts are handled by the local-deps map, not
|
|
/// the cross-file edge list.
|
|
#[test]
|
|
fn extract_router_facts_skips_bare_identifier_child_refs() {
|
|
let src = "outer = APIRouter()\nouter.include_router(authenticated_router)\n";
|
|
let tree = parse_python(src);
|
|
let bytes = src.as_bytes();
|
|
let facts = extract_router_facts_for_python(&tree, bytes);
|
|
assert!(facts.include_router_edges.is_empty());
|
|
}
|
|
|
|
/// Scoped Security at the parent level (real-world airflow
|
|
/// `ti_id_router` flavor). The `scoped` flag must round-trip.
|
|
#[test]
|
|
fn extract_router_facts_picks_up_scoped_security() {
|
|
let src = "ti_id_router = VersionedAPIRouter(\n route_class=ExecutionAPIRoute,\n dependencies=[\n Security(require_auth, scopes=[\"ti:self\"]),\n ],\n)\n";
|
|
let tree = parse_python(src);
|
|
let bytes = src.as_bytes();
|
|
let facts = extract_router_facts_for_python(&tree, bytes);
|
|
let deps = facts
|
|
.local_router_deps
|
|
.get("ti_id_router")
|
|
.expect("ti_id_router deps captured");
|
|
let (_site, scoped) = &deps[0];
|
|
assert!(*scoped, "scopes=[\"ti:self\"] must mark scoped");
|
|
}
|
|
|
|
/// Cross-file `Depends(callable)` at parent-router level is NOT
|
|
/// scoped — the policy promotes only Security markers (which
|
|
/// signal authorization intent), not generic Depends (which are
|
|
/// often login fetchers). Bare `Depends(get_current_user)` lifted
|
|
/// onto a child router via `include_router` stays as a LoginGuard
|
|
/// on the child's per-route auth checks.
|
|
#[test]
|
|
fn extract_router_facts_does_not_promote_depends() {
|
|
let src = "from fastapi import APIRouter, Depends\n\
|
|
v1 = APIRouter(dependencies=[Depends(get_current_user)])\n";
|
|
let tree = parse_python(src);
|
|
let bytes = src.as_bytes();
|
|
let facts = extract_router_facts_for_python(&tree, bytes);
|
|
let deps = facts.local_router_deps.get("v1").expect("v1 deps captured");
|
|
let (_site, scoped) = &deps[0];
|
|
assert!(!*scoped, "Depends never scoped-security at cross-file lift");
|
|
}
|
|
}
|