Python fp and docs updtes (#58)

* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
2026-06-15 20:05:13 +02:00 · 2026-04-29 19:53:34 -04:00 · 2026-04-29 19:53:34 -04:00 · a438886217
commit a438886217
parent 4db0805de6
291 changed files with 9485 additions and 3851 deletions
--- a/src/auth_analysis/extract/axum.rs
+++ b/src/auth_analysis/extract/axum.rs
@ -384,8 +384,8 @@ fn classify_rocket_param(
 ///
 /// **Looser than [`super::common::is_self_actor_type_text`] by
 /// design.**  This recogniser runs only on the type of a route-bound
-/// parameter — appearing in a route handler signature is itself a
-/// strong signal — and a false positive here just over-credits the
+/// parameter, appearing in a route handler signature is itself a
+/// strong signal, and a false positive here just over-credits the
 /// route with a login guard, which is conservative w.r.t. flagging.
 /// `is_self_actor_type_text` runs on every parameter, including in
 /// non-route functions, and a false positive there suppresses
@ -625,6 +625,11 @@ pub(crate) fn inject_guard_checks(
            line,
            args: call.args.clone(),
            condition_text: None,
+            // Route-level guard injected from a tower / axum layer
+            // (`RequireAuthorizationLayer`, `axum_login::login_required!`,
+            // …).  Tells `auth_check_covers_subject` to short-circuit
+            // for any non-login-guard match.
+            is_route_level: true,
        });
    }
 }
--- a/src/auth_analysis/extract/common.rs
+++ b/src/auth_analysis/extract/common.rs
--- a/src/auth_analysis/extract/django.rs
+++ b/src/auth_analysis/extract/django.rs
@ -209,7 +209,12 @@ fn collect_class_based_routes(
        }
        let line = method_node.start_position().row + 1;
        for call in &middleware_calls {
-            if let Some(check) = auth_check_from_call_site(call, line, rules) {
+            if let Some(mut check) = auth_check_from_call_site(call, line, rules) {
+                // Django class-based-view decorators (`@method_decorator(login_required)`,
+                // `@permission_required(...)`) and DRF `permission_classes`
+                // are declared at the route boundary; mark route-level
+                // so coverage applies to the action body's operations.
+                check.is_route_level = true;
                unit.auth_checks.push(check);
            }
        }
@ -443,7 +448,14 @@ fn inject_middleware_auth(
        return;
    };
    for call in middleware_calls {
-        if let Some(check) = auth_check_from_call_site(call, line, rules) {
+        if let Some(mut check) = auth_check_from_call_site(call, line, rules) {
+            // Django decorators (`@login_required`, `@permission_required`,
+            // `@user_passes_test`, etc.) and DRF `permission_classes` are
+            // declared at the route boundary; mark route-level so
+            // `auth_check_covers_subject` short-circuits `true` for any
+            // non-login-guard match.  See flask.rs / model.rs for the
+            // full rationale.
+            check.is_route_level = true;
            unit.auth_checks.push(check);
        }
    }
--- a/src/auth_analysis/extract/flask.rs
+++ b/src/auth_analysis/extract/flask.rs
@ -67,6 +67,15 @@ fn maybe_collect_flask_route(
    for decorator in decorator_expressions(node) {
        if let Some(mut specs) = parse_flask_route_decorator(decorator, bytes) {
            route_specs.append(&mut specs);
+            // FastAPI puts route-level dependencies (auth checks +
+            // logging hooks) inside the route decorator's
+            // `dependencies=[Depends(...)]` keyword argument, instead
+            // of as separate `@decorator` lines like Flask.  Walk the
+            // route decorator's keyword args for that shape and lift
+            // each `Depends(call(...))` element into the
+            // middleware_calls list, so the same `inject_middleware_auth`
+            // path that Flask uses also picks up FastAPI auth deps.
+            middleware_calls.extend(extract_fastapi_dependencies(decorator, bytes));
        } else {
            middleware_calls.extend(expand_decorator_calls(decorator, bytes));
        }
@ -220,6 +229,75 @@ fn expand_decorator_calls(node: Node<'_>, bytes: &[u8]) -> Vec<CallSite> {
    vec![call_site_from_node(node, bytes)]
 }

+/// Walk the route-decorator call's keyword args looking for the FastAPI
+/// `dependencies=[Depends(call(...)), Depends(call), ...]` shape.  For
+/// each `Depends(...)` list element, extract the inner callable as a
+/// `CallSite` so it can flow through `inject_middleware_auth` and be
+/// matched against the per-language authorization-check / login-guard
+/// name lists.  Refuses non-call elements and `Depends(...)` without a
+/// recognised inner call shape.
+///
+/// The function is decoupled from Flask semantics (Flask routes never
+/// use `dependencies=`); the lookup is purely structural and matches
+/// FastAPI's documented dependency-injection convention.  Lives in the
+/// flask module because Flask's route-decorator parser already targets
+/// the `@<router>.<method>(<path>, ...)` shape that FastAPI shares.
+fn extract_fastapi_dependencies(decorator_expr: Node<'_>, bytes: &[u8]) -> Vec<CallSite> {
+    if decorator_expr.kind() != "call" {
+        return Vec::new();
+    }
+    let Some(arguments) = decorator_expr.child_by_field_name("arguments") else {
+        return Vec::new();
+    };
+    let Some(value) = keyword_argument_value(arguments, bytes, "dependencies") else {
+        return Vec::new();
+    };
+    let mut out = Vec::new();
+    for element in named_children(value) {
+        if let Some(call) = unwrap_depends_call(element, bytes) {
+            out.push(call);
+        }
+    }
+    out
+}
+
+/// Unwrap one `Depends(...)` list element from a FastAPI `dependencies`
+/// list and return the inner callable as a `CallSite`.  Three shapes
+/// are accepted:
+///   * `Depends(callee(arg1, arg2))`, most common, the inner call is
+///     the callable factory invocation; record `callee` as the auth
+///     check.
+///   * `Depends(callee)`, bare reference; record `callee` itself.
+///   * `Depends()` / non-`Depends` items, skipped.
+fn unwrap_depends_call(node: Node<'_>, bytes: &[u8]) -> Option<CallSite> {
+    if node.kind() != "call" {
+        return None;
+    }
+    let function = node.child_by_field_name("function")?;
+    let function_text = text(function, bytes);
+    if !is_depends_callee(&function_text) {
+        return None;
+    }
+    let arguments = node.child_by_field_name("arguments")?;
+    let first = named_children(arguments).into_iter().next()?;
+    match first.kind() {
+        "call" => Some(call_site_from_node(first, bytes)),
+        "identifier" | "attribute" | "scoped_identifier" => Some(call_site_from_node(first, bytes)),
+        _ => None,
+    }
+}
+
+/// True for the FastAPI `Depends` marker, including the
+/// fully-qualified `fastapi.Depends` form.  Conservative: only literal
+/// matches, no canonicalisation.
+fn is_depends_callee(callee: &str) -> bool {
+    let trimmed = callee.trim();
+    matches!(
+        trimmed,
+        "Depends" | "fastapi.Depends" | "fastapi.params.Depends"
+    )
+}
+
 fn inject_middleware_auth(
    model: &mut AuthorizationModel,
    unit_idx: usize,
@ -231,8 +309,48 @@ fn inject_middleware_auth(
        return;
    };
    for call in middleware_calls {
-        if let Some(check) = auth_check_from_call_site(call, line, rules) {
+        if let Some(mut check) = auth_check_from_call_site(call, line, rules) {
+            // Mark as route-level: the check is declared at the route
+            // boundary (Flask `@requires_role(...)` decorator, FastAPI
+            // `dependencies=[Depends(...)]`, or any custom-router
+            // equivalent) and semantically authorizes every value the
+            // handler receives, path param, body, query, downstream
+            // row fetches, the lot.  `auth_check_covers_subject` reads
+            // `is_route_level` and short-circuits `true` for any
+            // non-login-guard match, which is the correct shape for a
+            // decorator-level guard whose inner call carries no
+            // per-arg subject ref pointing back into the handler body.
+            // LoginGuard / TokenExpiry / TokenRecipient kinds are
+            // already excluded by `has_prior_subject_auth`'s filter
+            // before they reach `auth_check_covers_subject`, so the
+            // flag is safe to set unconditionally here, it has no
+            // effect on those kinds.
+            check.is_route_level = true;
            unit.auth_checks.push(check);
        }
    }
 }
+
+#[cfg(test)]
+mod fastapi_dependencies_tests {
+    use super::is_depends_callee;
+
+    /// `is_depends_callee` only matches the FastAPI `Depends` marker.
+    /// Any other wrapper call inside `dependencies=[...]` is ignored ,
+    /// extracting an inner callee from the wrong wrapper would
+    /// misclassify logging hooks or filter callables as auth checks.
+    #[test]
+    fn is_depends_callee_recognises_canonical_forms() {
+        assert!(is_depends_callee("Depends"));
+        assert!(is_depends_callee("fastapi.Depends"));
+        assert!(is_depends_callee("fastapi.params.Depends"));
+        // Whitespace tolerance.
+        assert!(is_depends_callee(" Depends "));
+        // Negatives.
+        assert!(!is_depends_callee("Annotated"));
+        assert!(!is_depends_callee("Body"));
+        assert!(!is_depends_callee("Depends.something"));
+        assert!(!is_depends_callee("RequiresAuth"));
+        assert!(!is_depends_callee(""));
+    }
+}
--- a/src/auth_analysis/extract/mod.rs
+++ b/src/auth_analysis/extract/mod.rs
@ -61,5 +61,104 @@ pub fn extract_authorization_model(
        }
    }

+    // **Dedup units by span across extractors.**  Multiple extractors
+    // (e.g. Flask + Django on a Python file) each call
+    // `collect_top_level_units`, producing one unit per top-level
+    // function.  When one extractor also recognises a route on that
+    // function and promotes its copy to `RouteHandler` (with injected
+    // middleware auth checks), the *other* extractor's untouched
+    // `Function` copy still runs through `check_ownership_gaps` and
+    // emits the FP from a unit that never saw the middleware-derived
+    // auth check.
+    //
+    // This step keeps a single canonical unit per source span,
+    // preferring `RouteHandler` over `Function`, merging auth_checks
+    // and folding operation lists conservatively.  Route registrations
+    // are remapped to the surviving unit index.
+    deduplicate_units_by_span(&mut model);
+
    model
 }
+
+fn deduplicate_units_by_span(model: &mut AuthorizationModel) {
+    use crate::auth_analysis::model::{AnalysisUnit, AnalysisUnitKind};
+    use std::collections::HashMap;
+
+    // First pass: choose a winner for each span, prefer the
+    // first-seen `RouteHandler` over any `Function` copy.
+    let mut winner_by_span: HashMap<(usize, usize), usize> = HashMap::new();
+    for (idx, unit) in model.units.iter().enumerate() {
+        let key = unit.span;
+        match winner_by_span.get(&key) {
+            None => {
+                winner_by_span.insert(key, idx);
+            }
+            Some(&existing) => {
+                let prev_kind = model.units[existing].kind;
+                if prev_kind != AnalysisUnitKind::RouteHandler
+                    && unit.kind == AnalysisUnitKind::RouteHandler
+                {
+                    winner_by_span.insert(key, idx);
+                }
+            }
+        }
+    }
+
+    // Second pass: drain auth_checks from losers so we can append them
+    // to the winners after the layout collapses.
+    let mut moved_checks: Vec<Vec<crate::auth_analysis::model::AuthCheck>> =
+        Vec::with_capacity(model.units.len());
+    for old_idx in 0..model.units.len() {
+        let span = model.units[old_idx].span;
+        let winner = *winner_by_span.get(&span).unwrap_or(&old_idx);
+        if winner == old_idx {
+            moved_checks.push(Vec::new());
+        } else {
+            moved_checks.push(std::mem::take(&mut model.units[old_idx].auth_checks));
+        }
+    }
+
+    // Third pass: emit surviving units (clone the winners) and build
+    // the old-idx → new-idx remap.
+    let mut new_idx_for_old: HashMap<usize, usize> = HashMap::new();
+    let mut surviving: Vec<AnalysisUnit> = Vec::with_capacity(winner_by_span.len());
+    for old_idx in 0..model.units.len() {
+        let span = model.units[old_idx].span;
+        let winner = *winner_by_span.get(&span).unwrap_or(&old_idx);
+        if winner == old_idx {
+            new_idx_for_old.insert(old_idx, surviving.len());
+            surviving.push(model.units[old_idx].clone());
+        }
+    }
+
+    // Fourth pass: drain loser auth_checks into their winners, deduping
+    // by (span, callee).  Operations are not merged: both extractor
+    // passes recompute the same operation list from the AST, so the
+    // winner already carries the canonical set.
+    for (old_idx, checks) in moved_checks.iter_mut().enumerate() {
+        let span = model.units[old_idx].span;
+        let winner = *winner_by_span.get(&span).unwrap_or(&old_idx);
+        if winner == old_idx {
+            continue;
+        }
+        let Some(&new_winner_idx) = new_idx_for_old.get(&winner) else {
+            continue;
+        };
+        for check in checks.drain(..) {
+            let already_present = surviving[new_winner_idx]
+                .auth_checks
+                .iter()
+                .any(|existing| existing.span == check.span && existing.callee == check.callee);
+            if !already_present {
+                surviving[new_winner_idx].auth_checks.push(check);
+            }
+        }
+    }
+
+    model.units = surviving;
+    for route in &mut model.routes {
+        if let Some(&new_idx) = new_idx_for_old.get(&route.unit_idx) {
+            route.unit_idx = new_idx;
+        }
+    }
+}
--- a/src/auth_analysis/extract/rails.rs
+++ b/src/auth_analysis/extract/rails.rs
@ -137,7 +137,14 @@ fn maybe_collect_controller(
        let line = child.start_position().row + 1;
        let middleware_calls = applicable_filters(&filter_directives, &action_name);
        for call in &middleware_calls {
-            if let Some(check) = auth_check_from_call_site(call, line, rules) {
+            if let Some(mut check) = auth_check_from_call_site(call, line, rules) {
+                // Rails `before_action :authorize_user`-style filter
+                // callbacks run before the action and authorize the
+                // entire request, same shape as FastAPI / Flask
+                // `dependencies=[Depends(...)]`.  Mark route-level so
+                // `auth_check_covers_subject` covers the row-fetches
+                // and downstream sinks the action body performs.
+                check.is_route_level = true;
                unit.auth_checks.push(check);
            }
        }
--- a/src/auth_analysis/extract/sinatra.rs
+++ b/src/auth_analysis/extract/sinatra.rs
@ -114,7 +114,13 @@ fn maybe_collect_route(
    );
    let line = block.start_position().row + 1;
    for call in before_filters {
-        if let Some(check) = auth_check_from_call_site(call, line, rules) {
+        if let Some(mut check) = auth_check_from_call_site(call, line, rules) {
+            // Sinatra `before` filters run before the route handler
+            // body and authorize the request as a whole, same shape
+            // as Rails `before_action`.  Route-level so coverage
+            // applies to the handler's row fetches and downstream
+            // sinks.
+            check.is_route_level = true;
            unit.auth_checks.push(check);
        }
    }
--- a/src/auth_analysis/extract/spring.rs
+++ b/src/auth_analysis/extract/spring.rs
@ -111,7 +111,15 @@ fn maybe_collect_controller(
                rules,
            );
            for call in &middleware_calls {
-                if let Some(check) = auth_check_from_call_site(call, line, rules) {
+                if let Some(mut check) = auth_check_from_call_site(call, line, rules) {
+                    // Spring `@PreAuthorize` / `@Secured` /
+                    // `@RolesAllowed` annotations are declared at the
+                    // method or class boundary and authorize the entire
+                    // request, same shape as FastAPI / Flask
+                    // `dependencies=[Depends(...)]`.  Mark route-level
+                    // so `auth_check_covers_subject` covers row fetches
+                    // and downstream sinks in the handler body.
+                    check.is_route_level = true;
                    unit.auth_checks.push(check);
                }
            }