Authorization analysis logic improvements (#61)

2026-06-15 20:05:13 +02:00 · 2026-05-02 16:44:49 -04:00 · 2026-05-02 16:44:49 -04:00 · 40995e45e7
commit 40995e45e7
parent 3c89bddbf2
55 changed files with 4193 additions and 134 deletions
--- a/src/auth_analysis/checks.rs
+++ b/src/auth_analysis/checks.rs
@ -15,11 +15,14 @@ pub struct AuthFinding {

 pub fn run_checks(model: &AuthorizationModel, rules: &AuthAnalysisRules) -> Vec<AuthFinding> {
    let mut findings = Vec::new();
+    let web_signal = model.lang_web_framework_signal;
    findings.extend(check_admin_routes(model, rules));
-    findings.extend(check_ownership_gaps(model, rules));
-    findings.extend(check_partial_batch_authorization(model, rules));
-    findings.extend(check_stale_authorization(model, rules));
-    findings.extend(check_token_override_without_validation(model, rules));
+    findings.extend(check_ownership_gaps(model, rules, web_signal));
+    findings.extend(check_partial_batch_authorization(model, rules, web_signal));
+    findings.extend(check_stale_authorization(model, rules, web_signal));
+    findings.extend(check_token_override_without_validation(
+        model, rules, web_signal,
+    ));
    findings.sort_by(|a, b| a.span.cmp(&b.span).then_with(|| a.rule_id.cmp(&b.rule_id)));
    findings.dedup_by(|a, b| a.span == b.span && a.rule_id == b.rule_id);
    findings
@ -63,11 +66,15 @@ fn check_admin_routes(model: &AuthorizationModel, rules: &AuthAnalysisRules) ->
    findings
 }

-fn check_ownership_gaps(model: &AuthorizationModel, rules: &AuthAnalysisRules) -> Vec<AuthFinding> {
+fn check_ownership_gaps(
+    model: &AuthorizationModel,
+    rules: &AuthAnalysisRules,
+    web_signal: Option<bool>,
+) -> Vec<AuthFinding> {
    let mut findings = Vec::new();

    for unit in &model.units {
-        if !unit_has_user_input_evidence(unit) {
+        if !unit_has_user_input_evidence(unit, web_signal) {
            continue;
        }
        for op in &unit.operations {
@ -115,11 +122,12 @@ fn check_ownership_gaps(model: &AuthorizationModel, rules: &AuthAnalysisRules) -
 fn check_partial_batch_authorization(
    model: &AuthorizationModel,
    rules: &AuthAnalysisRules,
+    web_signal: Option<bool>,
 ) -> Vec<AuthFinding> {
    let mut findings = Vec::new();

    for unit in &model.units {
-        if !unit_has_user_input_evidence(unit) {
+        if !unit_has_user_input_evidence(unit, web_signal) {
            continue;
        }
        for op in &unit.operations {
@ -169,11 +177,12 @@ fn check_partial_batch_authorization(
 fn check_stale_authorization(
    model: &AuthorizationModel,
    rules: &AuthAnalysisRules,
+    web_signal: Option<bool>,
 ) -> Vec<AuthFinding> {
    let mut findings = Vec::new();

    for unit in &model.units {
-        if !unit_has_user_input_evidence(unit) {
+        if !unit_has_user_input_evidence(unit, web_signal) {
            continue;
        }
        for op in unit.operations.iter().filter(|operation| {
@ -216,6 +225,7 @@ fn check_stale_authorization(
 fn check_token_override_without_validation(
    model: &AuthorizationModel,
    rules: &AuthAnalysisRules,
+    web_signal: Option<bool>,
 ) -> Vec<AuthFinding> {
    let mut findings = Vec::new();

@ -229,7 +239,7 @@ fn check_token_override_without_validation(
        // call shape happens to look token-y (`account.token = …;
        // account.save()`).  Gate on positive user-input evidence so
        // these pure backend units are never claimed as a token flow.
-        if !unit_has_user_input_evidence(unit) {
+        if !unit_has_user_input_evidence(unit, web_signal) {
            continue;
        }
        let Some(token_lookup) = unit
@ -600,6 +610,82 @@ fn is_relevant_target_subject(subject: &ValueRef, unit: &AnalysisUnit) -> bool {
        && !is_actor_context_subject(subject, unit)
        && !is_const_bound_subject(subject, unit)
        && !is_typed_bounded_subject(subject, unit)
+        && !is_caller_scope_entity_subject(subject, unit)
+}
+
+/// True iff `subject` is a member-access of form `<entity>.id` /
+/// `<entity>.pk` whose root identifier is a unit parameter named after
+/// a scope-bearing domain entity (`organization`, `project`, `team`,
+/// `workspace`, `tenant`, `account`, `community`, `repository`, …).
+///
+/// Such subjects are the *scope* of the operation — the ownership
+/// constraint the caller passed in — not a user-controlled target.
+/// Helpers like
+/// `def get_environments(request, organization: Organization): …
+///  Environment.objects.filter(organization_id=organization.id, …)`
+/// inherit the caller's authorization on the entity object; the call
+/// itself enforces tenant scoping.  Without this exemption, every
+/// internal helper in a multi-tenant Django/Rails/Laravel codebase
+/// flags `missing_ownership_check` because the engine cannot tell
+/// "scoping arg" from "user-targeted arg".
+///
+/// Conservative scope:
+/// * Field must be `id` or `pk` (the canonical primary-key fields).
+///   `entity.name` / `entity.slug` are deliberately excluded — those
+///   could be user-supplied display strings even on a typed entity.
+/// * Root must be exactly a unit parameter (not a derived local).
+/// * Root name must be in the scope-entity vocabulary.  Names like
+///   `user`, `member`, `actor` are deliberately omitted: those carry
+///   actor semantics and are handled separately by
+///   `is_actor_context_subject`.
+fn is_caller_scope_entity_subject(subject: &ValueRef, unit: &AnalysisUnit) -> bool {
+    let Some(field) = subject.field.as_deref() else {
+        return false;
+    };
+    let field_lower = field.to_ascii_lowercase();
+    if !matches!(field_lower.as_str(), "id" | "pk") {
+        return false;
+    }
+    let Some(base) = subject.base.as_deref() else {
+        return false;
+    };
+    let root = base.split('.').next().unwrap_or(base);
+    if !is_caller_scope_entity_name(root) {
+        return false;
+    }
+    unit.params.iter().any(|p| p == root)
+}
+
+/// Recognises parameter names that conventionally carry a *scope*
+/// entity — the multi-tenant ownership boundary inherited from the
+/// caller — rather than a user-controlled target identifier.  Used
+/// only by `is_caller_scope_entity_subject` to suppress
+/// `missing_ownership_check` on `<entity>.id` arguments to ORM /
+/// query / mutation calls.
+///
+/// Vocabulary matches the canonical multi-tenant primitives across
+/// Django (Sentry, Saleor), Rails (Discourse, Mastodon), and Laravel
+/// /  Symfony idioms.  Both singular and short forms are matched
+/// (`organization` / `org`, `repository` / `repo`).  Excluded:
+/// `user`, `member`, `actor` (actor semantics, covered by
+/// `is_actor_context_subject` and per-actor self-id detectors).
+fn is_caller_scope_entity_name(name: &str) -> bool {
+    let lower = name.to_ascii_lowercase();
+    matches!(
+        lower.as_str(),
+        "organization"
+            | "org"
+            | "project"
+            | "team"
+            | "workspace"
+            | "tenant"
+            | "account"
+            | "community"
+            | "group"
+            | "repository"
+            | "repo"
+            | "company"
+    )
 }

 /// True iff `subject` is a plain identifier whose declaration binds
@ -852,10 +938,25 @@ fn is_id_like_name(name: &str) -> bool {
 /// pure utility helpers fail all three conditions and are skipped ,
 /// they cannot, by construction, be the entry point of an
 /// authentication-bearing flow.
-fn unit_has_user_input_evidence(unit: &AnalysisUnit) -> bool {
+fn unit_has_user_input_evidence(unit: &AnalysisUnit, web_signal: Option<bool>) -> bool {
    if unit.kind == AnalysisUnitKind::RouteHandler {
        return true;
    }
+    // Project-level web-framework gate.  When the project's manifest
+    // was inspected and named no web framework matching the file's
+    // language, AND no per-file import override applied, the file
+    // lives in a project with no HTTP boundary.  Step 2 (context
+    // inputs) and step 3 (param-name heuristic) are both name-shape
+    // heuristics that overshoot in non-web Rust crates ─ e.g. zed's
+    // GUI test code where `session.update(cx, ...)` (a debug-session
+    // handle, not an auth session) trips `matches_session_context`
+    // and lands in `context_inputs`, opening every test method's
+    // sinks.  Refuse here, after the RouteHandler step (which is
+    // determined by framework extractors and is robust evidence on
+    // its own).
+    if web_signal == Some(false) {
+        return false;
+    }
    if !unit.context_inputs.is_empty() {
        return true;
    }
@ -934,8 +1035,9 @@ fn is_batch_collection(subject: &ValueRef) -> bool {
 #[cfg(test)]
 mod tests {
    use super::{
-        auth_check_covers_subject, is_actor_context_subject, is_external_input_param_name,
-        is_relevant_target_subject, unit_has_user_input_evidence,
+        auth_check_covers_subject, is_actor_context_subject, is_caller_scope_entity_name,
+        is_caller_scope_entity_subject, is_external_input_param_name, is_relevant_target_subject,
+        unit_has_user_input_evidence,
    };
    use crate::auth_analysis::model::{AnalysisUnit, AnalysisUnitKind, ValueRef, ValueSourceKind};
    use std::collections::{HashMap, HashSet};
@ -1083,6 +1185,146 @@ mod tests {
        assert!(is_relevant_target_subject(&member("req", "id"), &unit));
    }

+    /// Real-repo regression: caller-passed scope entity used as
+    /// ownership constraint (sentry api/helpers/environments.py
+    /// `get_environments(request, organization)` and
+    /// api/endpoints/organization_releases.py
+    /// `_filter_releases_by_query(queryset, organization, query, ...)`).
+    /// The helper inherits the caller's auth on the entity object;
+    /// the `<entity>.id` arg IS the ownership scope, not a target.
+    #[test]
+    fn caller_scope_entity_subject_recognises_unit_param_id() {
+        let mut unit = empty_unit();
+        unit.params.push("organization".into());
+
+        // `organization.id` where `organization` is a unit param and
+        // matches the scope-entity vocabulary -> recognised as scope.
+        assert!(is_caller_scope_entity_subject(
+            &member("organization", "id"),
+            &unit
+        ));
+        assert!(is_caller_scope_entity_subject(
+            &member("organization", "pk"),
+            &unit
+        ));
+        // Suppression flows through to `is_relevant_target_subject`.
+        assert!(!is_relevant_target_subject(
+            &member("organization", "id"),
+            &unit
+        ));
+
+        // Other scope-entity names: project, team, workspace, ...
+        let mut unit_p = empty_unit();
+        unit_p.params.push("project".into());
+        assert!(is_caller_scope_entity_subject(
+            &member("project", "id"),
+            &unit_p
+        ));
+
+        let mut unit_t = empty_unit();
+        unit_t.params.push("team".into());
+        assert!(is_caller_scope_entity_subject(
+            &member("team", "id"),
+            &unit_t
+        ));
+
+        let mut unit_w = empty_unit();
+        unit_w.params.push("workspace".into());
+        assert!(is_caller_scope_entity_subject(
+            &member("workspace", "id"),
+            &unit_w
+        ));
+
+        let mut unit_r = empty_unit();
+        unit_r.params.push("repo".into());
+        assert!(is_caller_scope_entity_subject(
+            &member("repo", "id"),
+            &unit_r
+        ));
+    }
+
+    /// Pitfall guards for `is_caller_scope_entity_subject`.
+    #[test]
+    fn caller_scope_entity_subject_does_not_overreach() {
+        // `organization` not declared as a unit param -> not exempt.
+        let unit = empty_unit();
+        assert!(!is_caller_scope_entity_subject(
+            &member("organization", "id"),
+            &unit
+        ));
+
+        // Field other than id/pk -> not exempt (could be display name).
+        let mut unit = empty_unit();
+        unit.params.push("organization".into());
+        assert!(!is_caller_scope_entity_subject(
+            &member("organization", "name"),
+            &unit
+        ));
+        assert!(!is_caller_scope_entity_subject(
+            &member("organization", "slug"),
+            &unit
+        ));
+
+        // `user.id` / `member.id` / `actor.id` are deliberately NOT
+        // recognised as scope entities (actor semantics, handled by
+        // is_actor_context_subject).  They must not be widened here.
+        let mut unit_u = empty_unit();
+        unit_u.params.push("user".into());
+        assert!(!is_caller_scope_entity_subject(
+            &member("user", "id"),
+            &unit_u
+        ));
+
+        let mut unit_m = empty_unit();
+        unit_m.params.push("member".into());
+        assert!(!is_caller_scope_entity_subject(
+            &member("member", "id"),
+            &unit_m
+        ));
+
+        // Bare identifier -> not exempt (no field).
+        let mut unit_b = empty_unit();
+        unit_b.params.push("organization".into());
+        assert!(!is_caller_scope_entity_subject(
+            &plain("organization"),
+            &unit_b
+        ));
+    }
+
+    /// Vocabulary check for `is_caller_scope_entity_name`.  Pinned so
+    /// future widening is intentional.
+    #[test]
+    fn caller_scope_entity_name_vocabulary() {
+        // Recognised scope entities.
+        for name in [
+            "organization",
+            "Organization",
+            "ORG",
+            "project",
+            "team",
+            "workspace",
+            "tenant",
+            "account",
+            "community",
+            "group",
+            "repository",
+            "repo",
+            "company",
+        ] {
+            assert!(
+                is_caller_scope_entity_name(name),
+                "expected {name} to be recognised as scope entity"
+            );
+        }
+        // Excluded (actor semantics or generic).
+        for name in ["user", "member", "actor", "request", "self", "ctx"] {
+            assert!(
+                !is_caller_scope_entity_name(name),
+                "expected {name} NOT to be recognised as scope entity"
+            );
+        }
+    }
+
    /// Hierarchy: a parameter whose
    /// static type was recovered as `Int`/`Bool` (Spring `Long userId`,
    /// Axum `Path<i64>`, FastAPI `user_id: int`) has its name added to
@ -1119,23 +1361,23 @@ mod tests {
        // Function with no params and no context_inputs (Celery task
        // shape), must NOT count as user-input-bearing.
        let mut unit = empty_unit();
-        assert!(!unit_has_user_input_evidence(&unit));
+        assert!(!unit_has_user_input_evidence(&unit, None));

        // Adding internal-typed params (apps, schema_editor, Django
        // migration RunPython callback shape) keeps the gate closed.
        unit.params.push("apps".into());
        unit.params.push("schema_editor".into());
-        assert!(!unit_has_user_input_evidence(&unit));
+        assert!(!unit_has_user_input_evidence(&unit, None));

        // pytest hook shape: (config, items), gate stays closed.
        let mut unit = empty_unit();
        unit.params.push("config".into());
        unit.params.push("items".into());
-        assert!(!unit_has_user_input_evidence(&unit));
+        assert!(!unit_has_user_input_evidence(&unit, None));

        // Adding an id-like param flips the gate open.
        unit.params.push("doc_id".into());
-        assert!(unit_has_user_input_evidence(&unit));
+        assert!(unit_has_user_input_evidence(&unit, None));

        // Token-named param flips the gate open (Express helper
        // `acceptInvitation(token, currentUser, roleOverride)`).
@ -1143,23 +1385,72 @@ mod tests {
        unit.params.push("token".into());
        unit.params.push("currentUser".into());
        unit.params.push("roleOverride".into());
-        assert!(unit_has_user_input_evidence(&unit));
+        assert!(unit_has_user_input_evidence(&unit, None));

        // Framework request-name param flips the gate open
        // (Django/Flask `def view(request, project_id):`).
        let mut unit = empty_unit();
        unit.params.push("request".into());
-        assert!(unit_has_user_input_evidence(&unit));
+        assert!(unit_has_user_input_evidence(&unit, None));

        // Axum/Actix typed-extractor convention name flips it open.
        let mut unit = empty_unit();
        unit.params.push("path".into());
-        assert!(unit_has_user_input_evidence(&unit));
+        assert!(unit_has_user_input_evidence(&unit, None));

        // RouteHandler kind always wins, regardless of params.
        let mut unit = empty_unit();
        unit.kind = AnalysisUnitKind::RouteHandler;
-        assert!(unit_has_user_input_evidence(&unit));
+        assert!(unit_has_user_input_evidence(&unit, None));
+    }
+
+    /// Web-framework signal `Some(false)` (project's manifest was
+    /// inspected and named no web framework matching the file's
+    /// language, AND no per-file import override) suppresses both
+    /// the `context_inputs` arm and the param-name arm — both are
+    /// name-shape heuristics that overshoot in non-web Rust crates
+    /// (e.g. a debug-session handle named `session` trips
+    /// `matches_session_context` and lands in `context_inputs`).
+    /// Only RouteHandler classification (step 1) survives the gate
+    /// because that flag is set by framework extractors with concrete
+    /// route-registration evidence.
+    #[test]
+    fn web_framework_signal_gates_user_input_heuristics() {
+        // Param-name arm: helper named `<thing>_id` in a project the
+        // auth detector confirmed has no Rust web framework.  Without
+        // the gate this would flip step 3 open and flood the rule on
+        // every desktop helper.
+        let mut unit = empty_unit();
+        unit.params.push("session_id".into());
+        assert!(unit_has_user_input_evidence(&unit, None));
+        assert!(unit_has_user_input_evidence(&unit, Some(true)));
+        assert!(!unit_has_user_input_evidence(&unit, Some(false)));
+
+        // Step 1 (RouteHandler) still wins regardless of the gate.
+        // RouteHandler kind is set by framework extractors (axum /
+        // actix_web / rocket) on concrete route-registration shapes —
+        // robust enough to bypass the project-level gate even when
+        // the manifest doesn't name the framework.
+        unit.kind = AnalysisUnitKind::RouteHandler;
+        assert!(unit_has_user_input_evidence(&unit, Some(false)));
+
+        // context_inputs arm: bare `session.foo` on a debug-session
+        // handle (not an auth session) lands in `context_inputs` via
+        // `matches_session_context`.  The gate suppresses this so
+        // non-web Rust crates don't fire on `session.update(cx, ...)`
+        // shapes from desktop test code.
+        let mut unit = empty_unit();
+        unit.context_inputs.push(ValueRef {
+            source_kind: ValueSourceKind::Session,
+            name: "session.update".into(),
+            base: Some("session".into()),
+            field: Some("update".into()),
+            index: None,
+            span: (0, 0),
+        });
+        assert!(unit_has_user_input_evidence(&unit, None));
+        assert!(unit_has_user_input_evidence(&unit, Some(true)));
+        assert!(!unit_has_user_input_evidence(&unit, Some(false)));
    }

    /// `is_external_input_param_name` covers id-, token-, and
--- a/src/auth_analysis/config.rs
+++ b/src/auth_analysis/config.rs
@ -9,6 +9,17 @@ pub struct AuthAnalysisRules {
    pub admin_path_patterns: Vec<String>,
    pub admin_guard_names: Vec<String>,
    pub login_guard_names: Vec<String>,
+    /// Typed-extractor wrapper names that carry route-level
+    /// authorization (capability/policy enforcement) rather than mere
+    /// authentication.  Match by `matches_name` (last-segment +
+    /// case-insensitive `starts_with`), so a single pattern like
+    /// `"Guarded"` covers `Guarded`, `GuardedData`, `GuardedRoute`.
+    /// Consulted only by `inject_guard_checks` for typed-extractor
+    /// route-level injection — distinct from `login_guard_names` /
+    /// `admin_guard_names` so the pattern doesn't pollute regular call
+    /// recognition (where a function like `guarded_load(..)` would
+    /// otherwise be wrongly classified as a login guard).
+    pub policy_guard_names: Vec<String>,
    pub authorization_check_names: Vec<String>,
    pub mutation_indicator_names: Vec<String>,
    pub read_indicator_names: Vec<String>,
@ -54,6 +65,7 @@ impl AuthAnalysisRules {
            admin_path_patterns: Vec::new(),
            admin_guard_names: Vec::new(),
            login_guard_names: Vec::new(),
+            policy_guard_names: Vec::new(),
            authorization_check_names: Vec::new(),
            mutation_indicator_names: Vec::new(),
            read_indicator_names: Vec::new(),
@ -353,6 +365,19 @@ impl AuthAnalysisRules {
            .any(|pattern| matches_name(name, pattern))
    }

+    /// Typed-extractor wrapper that proves the request passed a
+    /// route-level capability/policy check (e.g. meilisearch's
+    /// `GuardedData<ActionPolicy<X>, _>`).  Distinct from
+    /// `is_login_guard` because policy enforcement is more than mere
+    /// authentication, it includes the per-action permission decision
+    /// the Policy term encodes.  Used only by `inject_guard_checks`
+    /// for typed-extractor route-level injection.
+    pub fn is_policy_guard(&self, name: &str) -> bool {
+        self.policy_guard_names
+            .iter()
+            .any(|pattern| matches_name(name, pattern))
+    }
+
    pub fn is_authorization_check(&self, name: &str) -> bool {
        if self
            .authorization_check_names
@ -482,6 +507,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
                "ensure_authenticated".into(),
                "require_auth".into(),
            ],
+            policy_guard_names: Vec::new(),
            authorization_check_names: vec![
                "check_membership".into(),
                "has_membership".into(),
@ -595,6 +621,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
                "login_required".into(),
                "login_required!".into(),
            ],
+            policy_guard_names: Vec::new(),
            authorization_check_names: vec![
                "authorize".into(),
                "authorize!".into(),
@ -762,6 +789,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
                "requireAuth".into(),
                "ensureAuthenticated".into(),
            ],
+            policy_guard_names: Vec::new(),
            authorization_check_names: vec![
                "CheckMembership".into(),
                "HasMembership".into(),
@ -853,6 +881,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
                "Authenticated".into(),
                "isAuthenticated".into(),
            ],
+            policy_guard_names: Vec::new(),
            authorization_check_names: vec![
                "checkMembership".into(),
                "hasMembership".into(),
@ -951,6 +980,14 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
                "RequireLogin".into(),
                "RequireAuth".into(),
            ],
+            // `Guarded` (case-insensitive starts_with) recognises
+            // typed-extractor wrappers like meilisearch's
+            // `GuardedData<ActionPolicy<{ actions::KEYS_GET }>, _>` as
+            // route-level policy guards (capability enforcement).  The
+            // wrapper proves the request passed a permission check, so
+            // any sink in the handler is route-gated even when the
+            // engine cannot model the inner Policy term.
+            policy_guard_names: vec!["Guarded".into()],
            authorization_check_names: vec![
                "check_membership".into(),
                "has_membership".into(),
@ -1120,6 +1157,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
                "ensureAuth".into(),
                "require_login".into(),
            ],
+            policy_guard_names: Vec::new(),
            authorization_check_names: vec![
                "checkMembership".into(),
                "hasWorkspaceMembership".into(),
@ -1272,6 +1310,10 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
            &mut rules.login_guard_names,
            &lang_cfg.auth.login_guard_names,
        );
+        extend_unique(
+            &mut rules.policy_guard_names,
+            &lang_cfg.auth.policy_guard_names,
+        );
        extend_unique(
            &mut rules.authorization_check_names,
            &lang_cfg.auth.authorization_check_names,
--- a/src/auth_analysis/extract/actix_web.rs
+++ b/src/auth_analysis/extract/actix_web.rs
@ -1,7 +1,7 @@
 use super::AuthExtractor;
 use super::axum::{
-    GuardFramework, apply_aliases, dedup_call_sites, expanded_guard_call_sites,
-    guard_calls_for_handler, inject_guard_checks, rust_param_aliases,
+    GuardFramework, apply_aliases, apply_typed_extractor_guards_to_units, dedup_call_sites,
+    expanded_guard_call_sites, guard_calls_for_handler, inject_guard_checks, rust_param_aliases,
 };
 use super::common::{
    attach_route_handler, call_name, collect_top_level_units, named_children, resolve_handler_node,
@ -36,6 +36,13 @@ impl AuthExtractor for ActixWebExtractor {

        collect_top_level_units(root, bytes, rules, &mut model);
        collect_routes(root, root, bytes, path, rules, &mut model);
+        apply_typed_extractor_guards_to_units(
+            root,
+            bytes,
+            rules,
+            &mut model,
+            GuardFramework::ActixWeb,
+        );

        model
    }
--- a/src/auth_analysis/extract/axum.rs
+++ b/src/auth_analysis/extract/axum.rs
@ -35,6 +35,7 @@ impl AuthExtractor for AxumExtractor {

        collect_top_level_units(root, bytes, rules, &mut model);
        collect_routes(root, root, bytes, path, rules, &mut model);
+        apply_typed_extractor_guards_to_units(root, bytes, rules, &mut model, GuardFramework::Axum);

        model
    }
@ -391,7 +392,61 @@ fn classify_rocket_param(
 /// non-route functions, and a false positive there suppresses
 /// downstream `V.id` flagging entirely; that path uses a structural
 /// recogniser keyed on the `<PREFIX>User<SUFFIX>?` shape.
+///
+/// Recognition is **outer-wrapper based**: classify by the outermost
+/// type name only, not by substring-anywhere on the whole text.  This
+/// avoids both directions of leakage:
+/// * A bare data-only extractor like `web::Path<u64>` early-returns
+///   `None` regardless of inner type tokens (preserves existing
+///   behaviour).
+/// * A policy-bearing wrapper like
+///   `GuardedData<ActionPolicy<X>, Data<AuthController>>` is
+///   classified by the outer `GuardedData`, not by whether the inner
+///   `Data<AuthController>` happens to lowercase-contain "auth".  The
+///   wrapper proves capability enforcement → `AuthCheckKind::Other`
+///   (the route-level short-circuit in `auth_check_covers_subject`
+///   suppresses missing-ownership-check for non-LoginGuard kinds).
 fn classify_guard_type(type_text: &str) -> Option<AuthCheckKind> {
+    let outer = outermost_type_name(type_text);
+    let outer_lower = outer.to_ascii_lowercase();
+
+    // Bare data-only extractors are *not* auth-bearing regardless of
+    // their inner generic args.  Outer-name match (case-insensitive
+    // exact) — `Path<u64>` / `web::Path<...>` / `Query<X>` /
+    // `Json<X>` / `Form<X>` / `State<X>` / `Extension<X>` /
+    // `Data<X>`.
+    if is_data_only_extractor_outer(&outer_lower) {
+        return None;
+    }
+
+    // Policy/guard-bearing outer wrapper.  Names containing
+    // `guarded` (e.g. `GuardedData`, `GuardedRoute`) signal the
+    // wrapper enforced a capability/permission check at request
+    // construction.  Distinct from `LoginGuard` because Policy
+    // enforcement is more than authentication, it's authorization.
+    if outer_lower.contains("guarded") || outer_lower.contains("guard") {
+        if outer_lower.contains("admin") {
+            return Some(AuthCheckKind::AdminGuard);
+        }
+        return Some(AuthCheckKind::Other);
+    }
+
+    if outer_lower.contains("admin") {
+        return Some(AuthCheckKind::AdminGuard);
+    }
+    if outer_lower.contains("user")
+        || outer_lower.contains("auth")
+        || outer_lower.contains("session")
+        || outer_lower.contains("identity")
+        || outer_lower.contains("principal")
+    {
+        return Some(AuthCheckKind::LoginGuard);
+    }
+
+    // Backwards-compat fallback: legacy whole-text substring check
+    // for unusual shapes whose outer wrapper is generic but whose
+    // qualified path still mentions an auth token.  Preserves
+    // pre-2026-05-02 behaviour for non-Guarded wrappers.
    let lower = type_text.to_ascii_lowercase();
    if is_extractor_wrapper(&lower) {
        return None;
@ -409,6 +464,49 @@ fn classify_guard_type(type_text: &str) -> Option<AuthCheckKind> {
    }
 }

+/// Outermost type name: text before the first `<`, with reference
+/// markers (`&`, `&mut`, `&'a`, etc.) and module-path prefix
+/// (`std::collections::`) stripped.  Returns the empty string for
+/// inputs that don't parse as a type.
+fn outermost_type_name(type_text: &str) -> &str {
+    let trimmed = type_text.trim();
+    let mut after_refs = trimmed;
+    loop {
+        let next = after_refs
+            .trim_start_matches('&')
+            .trim_start_matches("mut ")
+            .trim_start();
+        // Strip any single lifetime token like `'a ` after the `&`.
+        let next = if let Some(rest) = next.strip_prefix('\'') {
+            rest.split_once(' ')
+                .map(|(_, after)| after.trim_start())
+                .unwrap_or(rest)
+        } else {
+            next
+        };
+        if next == after_refs {
+            break;
+        }
+        after_refs = next;
+    }
+    let prefix = after_refs.split('<').next().unwrap_or(after_refs).trim();
+    prefix.rsplit("::").next().unwrap_or(prefix).trim()
+}
+
+/// Outer wrapper name (lowercase, exact-match) that the engine treats
+/// as a bare data-only extractor: yielding the inner type to the
+/// handler without any auth side-effect.  Matched on the outer name
+/// only so policy-bearing wrappers carrying a data extractor as one
+/// of their generic args (e.g.
+/// `GuardedData<Policy, web::Path<u64>>`) are not mis-suppressed by
+/// the inner `Path<...>`.
+fn is_data_only_extractor_outer(outer_lower: &str) -> bool {
+    matches!(
+        outer_lower,
+        "path" | "query" | "json" | "form" | "extension" | "state" | "data" | "reqdata"
+    )
+}
+
 fn classify_rocket_guard_type(
    type_text: &str,
    binding: &str,
@ -612,6 +710,14 @@ pub(crate) fn inject_guard_checks(
    for call in guard_calls {
        let kind = if rules.is_admin_guard(&call.name, &call.args) {
            AuthCheckKind::AdminGuard
+        } else if rules.is_policy_guard(&call.name) {
+            // Policy/capability-bearing typed extractor (e.g.
+            // meilisearch's `GuardedData<ActionPolicy<X>, _>`).
+            // Recorded as `Other` so the route-level short-circuit in
+            // `auth_check_covers_subject` covers any sink in the
+            // handler — the wrapper proves authorization, not just
+            // authentication.
+            AuthCheckKind::Other
        } else if rules.is_login_guard(&call.name) {
            AuthCheckKind::LoginGuard
        } else {
@ -633,3 +739,153 @@ pub(crate) fn inject_guard_checks(
        });
    }
 }
+
+/// Walk every `Function`-kind unit in `model` and inject route-level
+/// guard checks for any parameter whose type is recognised as a
+/// typed auth/policy extractor (e.g. meilisearch's `GuardedData<P, D>`,
+/// `axum::extract::State<AuthCtx>`).  Complements the route-walk path
+/// in `collect_routes`: handlers registered by attribute macros
+/// (`#[routes::path(...)]`, `#[get("/path")]`) or by external
+/// service-config builders are never matched as route registrations
+/// here, so their typed-extractor guards would otherwise never be
+/// injected and `missing_ownership_check` would fire on every
+/// id-shaped sink they contain.
+///
+/// `RouteHandler`-kind units already had their guards injected during
+/// the route walk and are skipped to avoid duplicate `AuthCheck`
+/// entries.
+pub(crate) fn apply_typed_extractor_guards_to_units(
+    root: Node<'_>,
+    bytes: &[u8],
+    rules: &AuthAnalysisRules,
+    model: &mut crate::auth_analysis::model::AuthorizationModel,
+    framework: GuardFramework,
+) {
+    use crate::auth_analysis::model::AnalysisUnitKind;
+    let function_nodes = collect_function_definition_nodes(root);
+    for unit_idx in 0..model.units.len() {
+        let span = {
+            let unit = &model.units[unit_idx];
+            if unit.kind == AnalysisUnitKind::RouteHandler {
+                continue;
+            }
+            unit.span
+        };
+        let Some(handler_node) = function_nodes
+            .iter()
+            .find(|node| node.start_byte() == span.0 && node.end_byte() == span.1)
+            .copied()
+        else {
+            continue;
+        };
+        let guard_calls = guard_calls_for_handler(handler_node, "", bytes, framework);
+        if guard_calls.is_empty() {
+            continue;
+        }
+        let unit = &mut model.units[unit_idx];
+        inject_guard_checks(unit, &guard_calls, rules);
+    }
+}
+
+fn collect_function_definition_nodes<'tree>(root: Node<'tree>) -> Vec<Node<'tree>> {
+    let mut out = Vec::new();
+    walk_function_definitions(root, &mut out);
+    out
+}
+
+fn walk_function_definitions<'tree>(node: Node<'tree>, out: &mut Vec<Node<'tree>>) {
+    // Free / impl / trait fn definitions in tree-sitter-rust.
+    if node.kind() == "function_item" {
+        out.push(node);
+    }
+    for child in named_children(node) {
+        walk_function_definitions(child, out);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn outermost_type_name_strips_refs_and_module_prefix() {
+        assert_eq!(outermost_type_name("GuardedData<P, D>"), "GuardedData");
+        assert_eq!(outermost_type_name("&GuardedData<P, D>"), "GuardedData");
+        assert_eq!(
+            outermost_type_name("&'a mut GuardedData<P, D>"),
+            "GuardedData"
+        );
+        assert_eq!(outermost_type_name("web::Path<u64>"), "Path");
+        assert_eq!(outermost_type_name("std::sync::Arc<Mutex<T>>"), "Arc");
+        assert_eq!(outermost_type_name(""), "");
+        assert_eq!(outermost_type_name("Bare"), "Bare");
+    }
+
+    #[test]
+    fn classify_guard_type_recognises_guarded_data_outer_wrapper() {
+        // Real meilisearch shape with both an admin-token-bearing inner
+        // type and a Data inner extractor — must classify as `Other`
+        // (route-level policy), not LoginGuard (filtered out by
+        // `has_prior_subject_auth`) and not None (over-suppression
+        // would happen if the inner `Data<>` early-return fired).
+        let kind = classify_guard_type(
+            "GuardedData<ActionPolicy<{ actions::KEYS_GET }>, Data<AuthController>>",
+        );
+        assert_eq!(kind, Some(AuthCheckKind::Other));
+    }
+
+    #[test]
+    fn classify_guard_type_data_only_extractor_outer_returns_none() {
+        // Outer `Data<>` is a bare actix data extractor — not auth.
+        // Even though the inner type lower-cases to contain "auth",
+        // the outer-wrapper recognition correctly returns None.
+        assert_eq!(
+            classify_guard_type("Data<AuthController>"),
+            None,
+            "outer Data<> is a bare data extractor, not auth-bearing"
+        );
+        assert_eq!(classify_guard_type("web::Path<UserId>"), None);
+        assert_eq!(classify_guard_type("Json<CreateUser>"), None);
+        assert_eq!(classify_guard_type("Form<LoginForm>"), None);
+    }
+
+    #[test]
+    fn classify_guard_type_preserves_existing_login_guard_recognition() {
+        assert_eq!(
+            classify_guard_type("LocalUserView"),
+            Some(AuthCheckKind::LoginGuard)
+        );
+        assert_eq!(
+            classify_guard_type("Authenticated"),
+            Some(AuthCheckKind::LoginGuard)
+        );
+        assert_eq!(
+            classify_guard_type("AdminUser"),
+            Some(AuthCheckKind::AdminGuard)
+        );
+        assert_eq!(
+            classify_guard_type("CurrentUser"),
+            Some(AuthCheckKind::LoginGuard)
+        );
+    }
+
+    #[test]
+    fn classify_guard_type_admin_guarded_takes_admin_priority() {
+        // `AdminGuard` outer wrapper has both "admin" and "guard" tokens
+        // — admin-priority rule wins inside the Guarded branch.
+        assert_eq!(
+            classify_guard_type("AdminGuard<P, D>"),
+            Some(AuthCheckKind::AdminGuard)
+        );
+        assert_eq!(
+            classify_guard_type("GuardedAdmin<X>"),
+            Some(AuthCheckKind::AdminGuard)
+        );
+    }
+
+    #[test]
+    fn classify_guard_type_unknown_outer_returns_none() {
+        assert_eq!(classify_guard_type("MyCustomWrapper<T>"), None);
+        assert_eq!(classify_guard_type(""), None);
+    }
+}
--- a/src/auth_analysis/extract/common.rs
+++ b/src/auth_analysis/extract/common.rs
@ -3455,6 +3455,33 @@ pub fn extract_value_refs(node: Node<'_>, bytes: &[u8]) -> Vec<ValueRef> {
            index: None,
            span: span(node),
        }],
+        // Keyword / named arguments: `Model.objects.filter(organization_id=org.id)`.
+        // Tree-sitter exposes a `name` child (the schema column / parameter
+        // name) and a `value` child (the actual expression).  The default
+        // recurse-all-children arm would surface `organization_id` as a
+        // bare-identifier subject, which `is_id_like_name` then flags as
+        // a scoped-identifier user-input.  But the kwarg key is the
+        // ORM/RPC schema field name, fixed at call time, never
+        // attacker-controlled.  Only the value carries a subject.
+        //
+        // Covers Python `keyword_argument`, JavaScript / TypeScript
+        // `pair` (object property syntax used as kwargs in client libs
+        // like prisma's `where: { id: foo }` is handled separately),
+        // Ruby `pair` (hash kwargs in `Model.where(field: value)`), Go
+        // composite-literal element keys, PHP / C# named arguments.
+        "keyword_argument"
+        | "keyword_arg"
+        | "named_argument"
+        | "named_arg" => {
+            if let Some(value) = node
+                .child_by_field_name("value")
+                .or_else(|| node.child_by_field_name("argument"))
+            {
+                extract_value_refs(value, bytes)
+            } else {
+                Vec::new()
+            }
+        }
        _ => {
            let mut refs = Vec::new();
            for idx in 0..node.named_child_count() {
--- a/src/auth_analysis/extract/flask.rs
+++ b/src/auth_analysis/extract/flask.rs
@ -127,6 +127,9 @@ fn parse_flask_route_decorator(
    };

    let callee = text(function, bytes);
+    if callee_is_test_decorator(&callee) {
+        return None;
+    }
    let method_name = bare_method_name(&callee);
    let arguments = decorator_expr.child_by_field_name("arguments")?;
    let args = named_children(arguments);
@ -173,6 +176,45 @@ fn parse_methods_keyword(arguments: Node<'_>, bytes: &[u8]) -> Option<Vec<HttpMe
    }
 }

+/// True iff the callee text matches a known Python test-framework
+/// decorator that incidentally collides with the Flask `<app>.<verb>`
+/// shape.  `unittest.mock.patch` is the dominant collision: it takes a
+/// string literal as its first positional arg (the import path of the
+/// thing being patched), and `bare_method_name("mock.patch")` is
+/// `patch`, which `parse_flask_route_decorator` previously matched as
+/// HTTP PATCH.  Every test method decorated with `@mock.patch("...")`
+/// was therefore being attached as a Flask route handler, which
+/// flipped its `unit.kind` to `RouteHandler` and made it pass
+/// `unit_has_user_input_evidence` unconditionally — flooding the
+/// pytest test suites with `missing_ownership_check` findings.
+///
+/// The denylist mirrors common mock / monkeypatch / parametrize forms.
+/// Conservative: matches only the canonical receiver chains; an
+/// imported alias `from unittest.mock import patch` then bare
+/// `@patch("x")` would still match `patch` as PATCH, but the
+/// decorator must also carry a string-literal first arg AND the
+/// route-attached unit must come back through the auth analysis to
+/// fire — handlers with a string-arg decorator are rare outside Flask
+/// itself, and the wider precondition path now covers most of those.
+fn callee_is_test_decorator(callee: &str) -> bool {
+    matches!(
+        callee,
+        "mock.patch"
+            | "mock.patch.object"
+            | "mock.patch.dict"
+            | "mock.patch.multiple"
+            | "unittest.mock.patch"
+            | "unittest.mock.patch.object"
+            | "unittest.mock.patch.dict"
+            | "unittest.mock.patch.multiple"
+            | "monkeypatch.setattr"
+            | "monkeypatch.setenv"
+            | "monkeypatch.delattr"
+            | "monkeypatch.delenv"
+            | "pytest.mark.parametrize"
+    )
+}
+
 fn keyword_argument_string(arguments: Node<'_>, bytes: &[u8], name: &str) -> Option<String> {
    let value = keyword_argument_value(arguments, bytes, name)?;
    string_literal_value(value, bytes)
@ -331,6 +373,41 @@ fn inject_middleware_auth(
    }
 }

+#[cfg(test)]
+mod test_decorator_tests {
+    use super::callee_is_test_decorator;
+
+    /// Test-framework decorators that share their bare method name with
+    /// a Flask HTTP verb (`patch`, `delete`, ...) must be excluded
+    /// from `parse_flask_route_decorator`.  Without the denylist,
+    /// every `@mock.patch("module")` in pytest test files attaches
+    /// the test method as a Flask PATCH route handler — flooding the
+    /// auth-analysis with FPs.
+    #[test]
+    fn callee_is_test_decorator_recognises_canonical_forms() {
+        // unittest.mock variants.
+        assert!(callee_is_test_decorator("mock.patch"));
+        assert!(callee_is_test_decorator("mock.patch.object"));
+        assert!(callee_is_test_decorator("mock.patch.dict"));
+        assert!(callee_is_test_decorator("mock.patch.multiple"));
+        assert!(callee_is_test_decorator("unittest.mock.patch"));
+        assert!(callee_is_test_decorator("unittest.mock.patch.object"));
+        // pytest fixtures.
+        assert!(callee_is_test_decorator("monkeypatch.setattr"));
+        assert!(callee_is_test_decorator("monkeypatch.setenv"));
+        assert!(callee_is_test_decorator("pytest.mark.parametrize"));
+        // Negatives — real Flask decorators must still match.
+        assert!(!callee_is_test_decorator("app.route"));
+        assert!(!callee_is_test_decorator("app.get"));
+        assert!(!callee_is_test_decorator("app.post"));
+        assert!(!callee_is_test_decorator("app.patch"));
+        assert!(!callee_is_test_decorator("bp.delete"));
+        assert!(!callee_is_test_decorator("blueprint.put"));
+        assert!(!callee_is_test_decorator("router.get"));
+        assert!(!callee_is_test_decorator(""));
+    }
+}
+
 #[cfg(test)]
 mod fastapi_dependencies_tests {
    use super::is_depends_callee;
--- a/src/auth_analysis/extract/mod.rs
+++ b/src/auth_analysis/extract/mod.rs
@ -1,6 +1,6 @@
 use super::config::AuthAnalysisRules;
 use super::model::AuthorizationModel;
-use crate::utils::project::FrameworkContext;
+use crate::utils::project::{FrameworkContext, rust_file_imports_web_framework};
 use std::path::Path;
 use tree_sitter::Tree;

@ -61,6 +61,18 @@ pub fn extract_authorization_model(
        }
    }

+    // Per-language web-framework signal used to gate the param-name arm
+    // of `unit_has_user_input_evidence`.  Combines the project-root
+    // manifest detection (`framework_ctx`) with a per-file `use`/`import`
+    // check, so a single file in a workspace whose root manifest does
+    // not name a web framework can still opt back in by directly
+    // importing one (e.g. `crates/collab/src/rpc.rs` in zed: workspace
+    // root has no axum, but the file uses `axum::Router`).
+    //
+    // Three-valued: `Some(true)` keeps step 3 firing, `Some(false)`
+    // suppresses it, `None` means no detection ran ─ behavior unchanged.
+    model.lang_web_framework_signal = compute_web_framework_signal(lang, framework_ctx, bytes);
+
    // **Dedup units by span across extractors.**  Multiple extractors
    // (e.g. Flask + Django on a Python file) each call
    // `collect_top_level_units`, producing one unit per top-level
@ -80,6 +92,53 @@ pub fn extract_authorization_model(
    model
 }

+/// Compute the per-file web-framework signal used to gate the
+/// param-name arm of `unit_has_user_input_evidence`.
+///
+/// Currently emits a non-`None` value only for Rust files.  The Rust
+/// auth analysis is the single biggest source of internal-helper FPs
+/// in non-web crates (zed's GUI / editor crates); the other languages
+/// have their own handler-classification policies that already filter
+/// effectively, so they keep their existing behavior (None →
+/// fall-through to the param-name heuristic) until each is validated.
+///
+/// Three-valued semantics:
+/// * `Some(true)` ─ project root manifest names a Rust web framework
+///   (axum / actix_web / rocket), OR the file directly imports one.
+///   Param-name evidence stays on.
+/// * `Some(false)` ─ project root manifest was inspected (Cargo.toml
+///   exists) and named no Rust web framework, AND the file does not
+///   directly import one.  Param-name evidence is suppressed: the
+///   project has no HTTP boundary in Rust.
+/// * `None` ─ no detection ran (no `framework_ctx`, no Cargo.toml
+///   inspected).  Behavior unchanged.
+fn compute_web_framework_signal(
+    lang: &str,
+    framework_ctx: Option<&FrameworkContext>,
+    bytes: &[u8],
+) -> Option<bool> {
+    if !matches!(lang, "rust" | "rs") {
+        return None;
+    }
+    let project_signal = framework_ctx.and_then(|ctx| ctx.lang_has_web_framework("rust"));
+    if project_signal == Some(true) {
+        return Some(true);
+    }
+    // Project says "no Rust framework" or never inspected.  Consult the
+    // file's own imports as a per-file fallback; if the file uses an
+    // axum / actix_web / rocket symbol directly, treat it as a handler
+    // file even when the workspace-root Cargo.toml does not list the
+    // crate.  (Real example: zed's `crates/collab/src/rpc.rs` imports
+    // axum but the workspace root Cargo.toml does not.)
+    if rust_file_imports_web_framework(bytes) {
+        return Some(true);
+    }
+    // No file-level evidence either.  Only flip to `Some(false)` if a
+    // Cargo.toml manifest was actually inspected — single-file scans
+    // without project context get `None` and preserve prior behavior.
+    project_signal
+}
+
 fn deduplicate_units_by_span(model: &mut AuthorizationModel) {
    use crate::auth_analysis::model::{AnalysisUnit, AnalysisUnitKind};
    use std::collections::HashMap;
--- a/src/auth_analysis/model.rs
+++ b/src/auth_analysis/model.rs
@ -348,6 +348,20 @@ pub struct RouteRegistration {
 pub struct AuthorizationModel {
    pub routes: Vec<RouteRegistration>,
    pub units: Vec<AnalysisUnit>,
+    /// Per-language web-framework presence signal used to gate the
+    /// `is_external_input_param_name` arm of `unit_has_user_input_evidence`.
+    ///
+    /// `None` means detection did not run (single-file unit-test paths,
+    /// languages without a framework gate yet).  `Some(true)` means the
+    /// project manifest or the file's imports name a web framework that
+    /// matches this language ─ helper functions are plausibly reachable
+    /// from a route handler, so the param-name heuristic stays on.
+    /// `Some(false)` means detection ran and named no matching framework
+    /// ─ the file lives in a project with no HTTP boundary, so internal
+    /// helper params named `*_id` / `req` / `payload` are not user input.
+    ///
+    /// Currently set only for Rust by `extract_authorization_model`.
+    pub lang_web_framework_signal: Option<bool>,
 }

 impl AuthorizationModel {
@ -359,5 +373,22 @@ impl AuthorizationModel {
                route.unit_idx += unit_offset;
                route
            }));
+        // Take the strongest signal across extractor outputs: `Some(true)`
+        // wins over `Some(false)` wins over `None`.  In practice every
+        // extractor for a given file sees the same `framework_ctx + bytes`
+        // so they all derive identical signals; this is just a defensive
+        // merge.
+        self.lang_web_framework_signal = max_signal(
+            self.lang_web_framework_signal,
+            other.lang_web_framework_signal,
+        );
+    }
+}
+
+fn max_signal(a: Option<bool>, b: Option<bool>) -> Option<bool> {
+    match (a, b) {
+        (Some(true), _) | (_, Some(true)) => Some(true),
+        (Some(false), _) | (_, Some(false)) => Some(false),
+        _ => None,
    }
 }