Python fp and docs updtes (#58)

* refactor: Update comments for clarity and add expectations.json files for performance metrics

* feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks

* feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks

* refactor: Simplify code formatting for better readability in multiple files

* refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration

* feat: Update Java and Python patterns to include new security rules

* refactor: Improve comment clarity and consistency across multiple Rust files

* refactor: Simplify code formatting for improved readability in integration tests and module files

* refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
Eli Peter 2026-04-29 19:53:34 -04:00 committed by GitHub
parent 4db0805de6
commit a438886217
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
291 changed files with 9485 additions and 3851 deletions

View file

@ -220,7 +220,7 @@ fn check_token_override_without_validation(
let mut findings = Vec::new();
for unit in &model.units {
// The rule reasons about "Token acceptance flow" by
// The rule reasons about "Token acceptance flow", by
// construction, that is a user-facing handler that receives a
// token from the client and writes through token-bound state.
// Internal helpers, Celery / cron tasks, Django migrations,
@ -335,15 +335,12 @@ fn has_prior_subject_auth(
})
}
/// Phase A4 row-fetch exemption.
/// Row-fetch exemption.
///
/// Recognises the canonical "fetch-then-authorize" idiom in row-level
/// authz code: a route handler fetches a row by id (`let community =
/// Community::read(pool, data.community_id)?`), then calls a named
/// authorization function on the fetched row (`check_community_user_action(
/// &user, &community, ...)`). The authorization check appears
/// textually after the fetch, so the existing `check.line <= op.line`
/// rule cannot cover the fetch.
/// Recognises the "fetch-then-authorize" idiom: a handler fetches a
/// row by id then calls a named authorization function on it. The
/// check appears textually after the fetch, so the
/// `check.line <= op.line` rule cannot cover the fetch.
///
/// The exemption fires only when:
/// 1. `op` is the row-fetch operation itself (line == row let-line).
@ -353,7 +350,7 @@ fn has_prior_subject_auth(
/// Coverage is intentionally narrow: only the row-fetch operation is
/// exempted. Any sink that runs *between* the fetch and the check
/// (e.g. `delete(community)` before `check_*`) still flags, because
/// its subject is `community` itself — not a fetch arg — and we
/// its subject is `community` itself, not a fetch arg, and we
/// require the operation to be a row-fetch site to apply the
/// exemption.
fn has_row_fetch_exemption(unit: &AnalysisUnit, op: &SensitiveOperation) -> bool {
@ -374,8 +371,8 @@ fn has_row_fetch_exemption(unit: &AnalysisUnit, op: &SensitiveOperation) -> bool
// Look for any non-login auth check whose subjects mention the row.
// Match against the *root* of the subject's chain (`a.b.c` → `a`)
// so an auth check on a row's nested field e.g.
// `is_mod_or_admin(pool, &user, comment_view.community.id)`
// so an auth check on a row's nested field, e.g.
// `is_mod_or_admin(pool, &user, comment_view.community.id)` ,
// still names the row var.
unit.auth_checks.iter().any(|check| {
if matches!(
@ -425,6 +422,32 @@ fn has_prior_collection_auth(
}
fn auth_check_covers_subject(check: &AuthCheck, subject: &ValueRef, unit: &AnalysisUnit) -> bool {
// **Route-level guard short-circuit.**
//
// A check declared at the route boundary (Flask `@requires_role`,
// FastAPI `dependencies=[Depends(requires_access_dag(method=
// "POST", access_entity=DagAccessEntity.RUN))]`, Django
// `@permission_required`, Spring `@PreAuthorize`, Rails
// `before_action :authorize`, axum `RequireAuthorizationLayer`)
// gates the entire handler. The decorator / dependency call is
// opaque to the engine, the inner `requires_access_dag` carries
// no per-arg `ValueRef` pointing back into the handler body, so
// the per-name subject coverage walk below cannot match it. The
// structural shape, however, is unambiguous: every value the
// handler receives, every row it fetches, and every sink it
// calls runs after the route-level check has decided
// authorization.
//
// `has_prior_subject_auth` already filters out
// `LoginGuard` / `TokenExpiry` / `TokenRecipient` kinds before
// calling this helper (login alone proves identity, not
// authorization), so by the time we land here the kind is
// `Other` / `Membership` / `Ownership` / `AdminGuard`, i.e. an
// authorization-bearing decorator-level check. Returning `true`
// unconditionally for those is the correct semantics.
if check.is_route_level {
return true;
}
let subject_key = canonical_subject_name(subject);
let subject_related_base = related_subject_base(subject);
// A2 + B3: walk the row-binding chain from this subject so a
@ -447,7 +470,7 @@ fn auth_check_covers_subject(check: &AuthCheck, subject: &ValueRef, unit: &Analy
// check authorizes the resulting row (e.g. `check_community_user_action(
// &user, &community, ..)` after `let community = Community::read(
// pool, data.community_id)`), the check materially covers
// `data.community_id` too it gated access to the row that was
// `data.community_id` too, it gated access to the row that was
// fetched using that id, so any subsequent operation re-using the
// same id (read of a related view, mutation on the row itself) is
// within the scope of that authorization.
@ -527,7 +550,7 @@ fn auth_check_covers_subject(check: &AuthCheck, subject: &ValueRef, unit: &Analy
/// to recover every ancestor row binding name. Cycle-safe via a
/// visited set; depth-bounded at 16 hops to keep the worst case
/// trivial. Returns a vec containing `start` followed by each
/// ancestor empty when `start` is empty.
/// ancestor, empty when `start` is empty.
fn row_binding_chain(unit: &AnalysisUnit, start: &str) -> Vec<String> {
let mut chain: Vec<String> = Vec::new();
if start.is_empty() {
@ -583,7 +606,7 @@ fn is_relevant_target_subject(subject: &ValueRef, unit: &AnalysisUnit) -> bool {
/// it to a literal constant (`id := "id"`, `let userId = 1`, etc.).
/// Such bindings cannot be user-controlled and so must not be
/// classified as scoped-identifier subjects. Only matches plain
/// `Identifier`-kind subjects (no base/field) member chains like
/// `Identifier`-kind subjects (no base/field), member chains like
/// `req.params.id` still pass through to the regular checks.
fn is_const_bound_subject(subject: &ValueRef, unit: &AnalysisUnit) -> bool {
if subject.base.is_some() || subject.field.is_some() {
@ -594,22 +617,22 @@ fn is_const_bound_subject(subject: &ValueRef, unit: &AnalysisUnit) -> bool {
/// True iff `subject` is a plain identifier that resolves to a
/// function parameter whose static type is a payload-incompatible
/// scalar (numeric or boolean see [`super::apply_typed_bounded_params`]).
/// scalar (numeric or boolean, see [`super::apply_typed_bounded_params`]).
/// Spring `@PathVariable Long userId`, Axum `Path<i64>`, NestJS
/// `@Param('id') id: number`, and FastAPI `user_id: int` all qualify.
///
/// Phase 6: also matches member-access subjects like `dto.userId`
/// also matches member-access subjects like `dto.userId`
/// when `dto` is a typed-extractor parameter recognised by a Phase
/// 1-2 matcher AND the field's declared TypeKind is Int/Bool.
fn is_typed_bounded_subject(subject: &ValueRef, unit: &AnalysisUnit) -> bool {
if subject.base.is_none() && subject.field.is_none() {
return unit.typed_bounded_vars.contains(&subject.name);
}
// Phase 6: member-access shape `base.field` whose `base` is a
// member-access shape `base.field` whose `base` is a
// typed-extractor parameter and whose field is declared as an
// Int/Bool in the same-file DTO definition. Per Hard Rule 3,
// only fires when the base param itself was recognised by a
// Phase 1-2 matcher — bare `dto.age` without a framework gate
// typed-extractor matcher, bare `dto.age` without a framework gate
// never lifts.
let Some(base) = subject.base.as_deref() else {
return false;
@ -645,7 +668,7 @@ fn is_actor_context_subject(subject: &ValueRef, unit: &AnalysisUnit) -> bool {
// A3: `V.id`-shape subjects where `V` is bound from a login-guard /
// auth-check call (or from a typed self-actor extractor parameter)
// are the caller's own id. `V.group_id` / `V.workspace_id` stay
// relevant only self-identifier fields trip this branch, so
// relevant, only self-identifier fields trip this branch, so
// foreign scoped ids on the same actor binding still flag.
if let Some(base) = subject.base.as_deref() {
let root = base.split('.').next().unwrap_or(base);
@ -657,7 +680,7 @@ fn is_actor_context_subject(subject: &ValueRef, unit: &AnalysisUnit) -> bool {
}
// Transitive copy of `V.id`: `let uid = user.id; query(.., &[uid])`
// the subject `uid` is a plain identifier with no base/field, but
//, the subject `uid` is a plain identifier with no base/field, but
// was recorded as a self-actor id copy at extract time. Treat it
// as actor context.
if unit.self_actor_id_vars.contains(&subject.name) {
@ -810,15 +833,15 @@ fn is_id_like_name(name: &str) -> bool {
}
/// True when the analysis unit shows positive evidence of receiving
/// user-controlled input the precondition for any auth rule that
/// user-controlled input, the precondition for any auth rule that
/// reasons about "scoped identifier" or "token-acceptance flow"
/// shapes.
///
/// A unit qualifies if any of the following hold:
/// * It is a recognised framework route handler (`RouteHandler`
/// * It is a recognised framework route handler (`RouteHandler` ,
/// the strongest signal: registered with a router).
/// * It accesses a request-shaped value (`request.body`, `req.params`,
/// `c.Query(..)`, etc.) populated as `context_inputs`.
/// `c.Query(..)`, etc.), populated as `context_inputs`.
/// * It declares at least one parameter whose name signals an
/// externally-supplied value (id-like, token-like, request-like).
/// Internal helpers that take only typed objects
@ -826,7 +849,7 @@ fn is_id_like_name(name: &str) -> bool {
/// `items`) are excluded.
///
/// Migrations, Celery tasks, pytest fixtures, conftest hooks, and
/// pure utility helpers fail all three conditions and are skipped
/// pure utility helpers fail all three conditions and are skipped ,
/// they cannot, by construction, be the entry point of an
/// authentication-bearing flow.
fn unit_has_user_input_evidence(unit: &AnalysisUnit) -> bool {
@ -843,7 +866,7 @@ fn unit_has_user_input_evidence(unit: &AnalysisUnit) -> bool {
/// as part of its calling contract? Captures three classes of name:
/// * id-like (`*_id`, `*Id`, `id`, `*Ids`),
/// * token-like (`token`, `*_token`, `accessToken`),
/// * framework-request objects (`request`, `req`, `ctx` the
/// * framework-request objects (`request`, `req`, `ctx`, the
/// standard names used by Express/Django/Flask/Gin/Axum/NestJS
/// handlers as the parameter that carries the HTTP request).
///
@ -851,12 +874,26 @@ fn unit_has_user_input_evidence(unit: &AnalysisUnit) -> bool {
/// functions that, while not registered as route handlers, are
/// clearly invoked with caller-supplied identifiers or request data.
fn is_external_input_param_name(name: &str) -> bool {
// Pytest / unittest.mock convention: parameters injected by
// `@mock.patch(...)` decorators are universally named
// `mock_<thing>` (`mock_project_id`, `mock_session`,
// `mock_user_id`). Their values are MagicMock instances created
// by the test framework, not user-supplied input, even when the
// suffix carries an id-shaped tail. Refusing the entire `mock_`
// prefix is structural (mirrors pytest's documented convention)
// and closes the airflow `tests/unit/google/cloud/hooks/`
// cluster where every test method takes
// `(self, get_conn, mock_project_id)` and the suffix tripped the
// id-like heuristic.
if name.starts_with("mock_") || name.starts_with("mocked_") {
return false;
}
if is_id_like_name(name) {
return true;
}
let lower = name.to_ascii_lowercase();
// Token-shaped: bare `token` or any `*_token` / `*Token` /
// `accessToken` / `refreshToken`-style suffix. Conservative —
// `accessToken` / `refreshToken`-style suffix. Conservative ,
// only fires on explicit token-naming, not on incidental
// substrings.
if lower == "token" || lower.ends_with("_token") || lower.ends_with("token") {
@ -951,7 +988,7 @@ mod tests {
assert!(is_actor_context_subject(&member("user", "uid"), &unit));
// Pitfall guard: `user.group_id` / `user.workspace_id` stay
// relevant only self-identifier fields trip the widening.
// relevant, only self-identifier fields trip the widening.
assert!(!is_actor_context_subject(
&member("user", "group_id"),
&unit
@ -962,7 +999,7 @@ mod tests {
));
// Variables not in self_actor_vars fall back to the existing
// identity-key match `target.id` still flags.
// identity-key match, `target.id` still flags.
assert!(!is_actor_context_subject(&member("target", "id"), &unit));
}
@ -1036,7 +1073,7 @@ mod tests {
assert!(!is_relevant_target_subject(&plain("id"), &unit));
// Plain `id` NOT in the const-bound set still flags as
// relevant regression guard for the user-controlled case.
// relevant, regression guard for the user-controlled case.
let unit2 = empty_unit();
assert!(is_relevant_target_subject(&plain("id"), &unit2));
@ -1046,12 +1083,12 @@ mod tests {
assert!(is_relevant_target_subject(&member("req", "id"), &unit));
}
/// Phase 5 typed-bounded subject exclusion: a parameter whose
/// Hierarchy: a parameter whose
/// static type was recovered as `Int`/`Bool` (Spring `Long userId`,
/// Axum `Path<i64>`, FastAPI `user_id: int`) has its name added to
/// `unit.typed_bounded_vars` by `apply_typed_bounded_params`. The
/// subject `userId` then must not be classified as a scoped
/// identifier the framework guarantees the value is numeric and
/// identifier, the framework guarantees the value is numeric and
/// cannot drive ownership-bypass.
#[test]
fn typed_bounded_plain_subjects_are_not_relevant() {
@ -1066,7 +1103,7 @@ mod tests {
assert!(is_relevant_target_subject(&plain("user_id"), &unit2));
// Member access `req.user_id` is unaffected (only plain
// identifiers are exempted fields/base remain regular
// identifiers are exempted, fields/base remain regular
// subjects so DTO-shape leaks still flag).
unit.typed_bounded_vars.insert("req".into());
assert!(is_relevant_target_subject(&member("req", "user_id"), &unit));
@ -1080,17 +1117,17 @@ mod tests {
#[test]
fn unit_user_input_evidence_recognises_external_inputs() {
// Function with no params and no context_inputs (Celery task
// shape) must NOT count as user-input-bearing.
// shape), must NOT count as user-input-bearing.
let mut unit = empty_unit();
assert!(!unit_has_user_input_evidence(&unit));
// Adding internal-typed params (apps, schema_editor Django
// Adding internal-typed params (apps, schema_editor, Django
// migration RunPython callback shape) keeps the gate closed.
unit.params.push("apps".into());
unit.params.push("schema_editor".into());
assert!(!unit_has_user_input_evidence(&unit));
// pytest hook shape: (config, items) gate stays closed.
// pytest hook shape: (config, items), gate stays closed.
let mut unit = empty_unit();
unit.params.push("config".into());
unit.params.push("items".into());
@ -1161,14 +1198,22 @@ mod tests {
assert!(!is_external_input_param_name("manager"));
// `c` alone is too common as a local variable to count.
assert!(!is_external_input_param_name("c"));
// Pytest / unittest.mock fixture-injected mocks: `mock_<x>` /
// `mocked_<x>` names are MagicMock instances, not user input,
// even when the suffix (`mock_project_id`) is id-shaped.
assert!(!is_external_input_param_name("mock_project_id"));
assert!(!is_external_input_param_name("mock_session"));
assert!(!is_external_input_param_name("mock_user_id"));
assert!(!is_external_input_param_name("mocked_request"));
assert!(!is_external_input_param_name("mocked_token"));
}
/// Phase A4 row-fetch exemption.
/// Row-fetch exemption.
///
/// Row var declared at line 10; auth check naming the row appears
/// at line 20. An operation at line 10 (the fetch) is exempted
/// because the auth check authorises the resulting row. Coverage
/// is intentionally narrow — operations between fetch (10) and
/// is intentionally narrow, operations between fetch (10) and
/// check (20) that are NOT row-fetch sites must still flag.
#[test]
fn row_fetch_exemption_covers_fetch_when_check_names_row() {
@ -1192,6 +1237,7 @@ mod tests {
line: 20,
args: Vec::new(),
condition_text: None,
is_route_level: false,
});
let fetch_op = SensitiveOperation {
@ -1206,7 +1252,7 @@ mod tests {
assert!(has_row_fetch_exemption(&unit, &fetch_op));
// Operation at a different line (between fetch and check) is
// NOT a row-fetch site exemption does not apply.
// NOT a row-fetch site, exemption does not apply.
let mid_op = SensitiveOperation {
kind: OperationKind::Mutation,
sink_class: None,
@ -1229,7 +1275,7 @@ mod tests {
"community".to_string(),
(10, vec![member("data", "community_id")]),
);
// No auth check pushed exemption must NOT apply.
// No auth check pushed, exemption must NOT apply.
let fetch_op = SensitiveOperation {
kind: OperationKind::Read,
@ -1256,7 +1302,7 @@ mod tests {
(10, vec![member("data", "community_id")]),
);
// Login-only check on the row should NOT exempt the row-fetch
// login proves identity, not authorization.
//, login proves identity, not authorization.
unit.auth_checks.push(AuthCheck {
kind: AuthCheckKind::LoginGuard,
callee: "require_login".into(),
@ -1265,6 +1311,7 @@ mod tests {
line: 20,
args: Vec::new(),
condition_text: None,
is_route_level: false,
});
let fetch_op = SensitiveOperation {
@ -1305,10 +1352,11 @@ mod tests {
line: 20,
args: Vec::new(),
condition_text: None,
is_route_level: false,
};
// Direct member subject `data.community_id` (the original
// request field) covered via reverse-walk.
// request field), covered via reverse-walk.
assert!(auth_check_covers_subject(
&check,
&member("data", "community_id"),
@ -1334,7 +1382,7 @@ mod tests {
/// Subject as plain identifier copied from the request
/// (`let community_id = data.community_id; let community =
/// Community::read(pool, community_id);`) must also benefit from
/// the reverse-walk `row_population_data["community"]` then
/// the reverse-walk, `row_population_data["community"]` then
/// records `[community_id]` (a plain identifier, not the
/// member-access shape).
#[test]
@ -1352,6 +1400,7 @@ mod tests {
line: 20,
args: Vec::new(),
condition_text: None,
is_route_level: false,
};
assert!(auth_check_covers_subject(
@ -1392,9 +1441,10 @@ mod tests {
line: 20,
args: Vec::new(),
condition_text: None,
is_route_level: false,
};
// Sink subject is the bare alias covered via the chain.
// Sink subject is the bare alias, covered via the chain.
assert!(auth_check_covers_subject(
&check,
&plain("community_id"),
@ -1412,4 +1462,73 @@ mod tests {
// Plain identifier with no alias entry must NOT be covered.
assert!(!auth_check_covers_subject(&check, &plain("post_id"), &unit));
}
/// Route-level guard short-circuit (FastAPI / Flask /
/// Django / Spring / Rails / axum decorator-level auth).
///
/// The decorator-level `@requires_role` /
/// `dependencies=[Depends(requires_access_dag(...))]` /
/// `before_action :authorize` runs before the handler body and
/// authorizes every value the handler receives. The check has
/// no per-arg `ValueRef` pointing back into the body, so the
/// per-name subject coverage walk cannot model the semantics.
/// `auth_check_covers_subject` short-circuits `true` for any
/// authorization-bearing route-level check (LoginGuard etc. are
/// already filtered out by `has_prior_subject_auth`).
#[test]
fn auth_check_covers_subject_route_level_short_circuits() {
use crate::auth_analysis::model::{AuthCheck, AuthCheckKind};
let unit = empty_unit();
let route_check = AuthCheck {
kind: AuthCheckKind::Other,
callee: "requires_access_dag".into(),
subjects: Vec::new(), // route-level checks carry no body subjects
span: (0, 0),
line: 0,
args: Vec::new(),
condition_text: None,
is_route_level: true,
};
// Any subject is covered when the check is route-level ,
// path param, request body field, row-fetch receiver, all of
// them. The per-name walk would have rejected each.
assert!(auth_check_covers_subject(
&route_check,
&plain("dag_id"),
&unit
));
assert!(auth_check_covers_subject(
&route_check,
&member("req", "dag_run_id"),
&unit
));
assert!(auth_check_covers_subject(
&route_check,
&plain("dag"),
&unit
));
// Sanity check: an in-body check with no subjects (the prior
// shape) does NOT cover arbitrary subjects. Without the
// route-level flag, the empty subjects vec means the
// `check.subjects.iter().any(...)` walk fails for every
// candidate.
let in_body_check = AuthCheck {
kind: AuthCheckKind::Other,
callee: "requires_access_dag".into(),
subjects: Vec::new(),
span: (0, 0),
line: 0,
args: Vec::new(),
condition_text: None,
is_route_level: false,
};
assert!(!auth_check_covers_subject(
&in_body_check,
&plain("dag_id"),
&unit
));
}
}

View file

@ -173,7 +173,7 @@ impl AuthAnalysisRules {
/// Does the LAST segment of the callee match a configured non-sink
/// method name (case-sensitive exact)? Used to recognise DOM-API
/// methods like `addEventListener` / `appendChild` regardless of
/// receiver `someElement.addEventListener` is just as
/// receiver, `someElement.addEventListener` is just as
/// categorically client-side as `document.addEventListener`.
pub fn callee_has_non_sink_method(&self, callee: &str) -> bool {
let last = bare_method_name(callee);
@ -200,19 +200,19 @@ impl AuthAnalysisRules {
/// Classify a call into a [`SinkClass`].
///
/// Dispatch order (first match wins):
/// 1. `InMemoryLocal` receiver is a known non-sink collection
/// 1. `InMemoryLocal`, receiver is a known non-sink collection
/// (tracked in `non_sink_vars` or matches a configured
/// non-sink prefix).
/// 2. `RealtimePublish` receiver first-segment matches a
/// 2. `RealtimePublish`, receiver first-segment matches a
/// configured realtime prefix (e.g. `realtime`, `pubsub`).
/// 3. `OutboundNetwork` receiver first-segment matches a
/// 3. `OutboundNetwork`, receiver first-segment matches a
/// configured outbound-network prefix (e.g. `http`, `reqwest`).
/// 4. `CacheCrossTenant` receiver first-segment matches a
/// 4. `CacheCrossTenant`, receiver first-segment matches a
/// configured cache prefix (e.g. `cache`, `redis`).
/// 5. `DbMutation` callee name matches `mutation_indicator_names`.
/// 6. `DbCrossTenantRead` callee name matches `read_indicator_names`.
/// 5. `DbMutation`, callee name matches `mutation_indicator_names`.
/// 6. `DbCrossTenantRead`, callee name matches `read_indicator_names`.
///
/// Returns `None` when the callee matches none of the above the
/// Returns `None` when the callee matches none of the above, the
/// call site is ignored by ownership-gap checks.
pub fn classify_sink_class(
&self,
@ -227,8 +227,8 @@ impl AuthAnalysisRules {
// (`el.addEventListener`, `parent.appendChild`) are categorically
// not data-layer auth-relevant operations. These shapes would
// otherwise prefix-match read/mutation indicators (`get`, `add`,
// `remove`) `getElementById` canonicalises to `getelementbyid`
// which `starts_with("get")` and falsely classify as
// `remove`), `getElementById` canonicalises to `getelementbyid`
// which `starts_with("get")`, and falsely classify as
// `DbCrossTenantRead` / `DbMutation`.
if self.callee_has_non_sink_global_receiver(callee)
|| self.callee_has_non_sink_method(callee)
@ -251,7 +251,7 @@ impl AuthAnalysisRules {
// receiver. When the receiver chain itself contains a call
// expression (`w.Header().Get(..)`, `r.URL.Query().Get(..)`,
// `db.Tx(..).Query(..)`), the receiver is the *return value of
// another call* its type is opaque to the auth analyser and
// another call*, its type is opaque to the auth analyser and
// the bare verb match is too speculative to assume a data-layer
// sink. The realtime/outbound/cache prefix dispatches above
// already match by the chain root; if none of them claimed the
@ -501,6 +501,13 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
"user_passes_test".into(),
"verify_access".into(),
"authorize".into(),
// FastAPI dependency-injection auth idiom: airflow uses
// `Depends(requires_access_dag(method="GET"))`,
// `requires_access_connection(...)`, etc. The unwrapped
// inner call name is `requires_access_<resource>`; the
// `requires_access` prefix matches all variants via
// `matches_name`.
"requires_access".into(),
],
mutation_indicator_names: vec![
"update".into(),
@ -615,7 +622,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
"verify_access!".into(),
"can_access?".into(),
"can?".into(),
// Rails per-record permission predicates the canonical
// Rails per-record permission predicates, the canonical
// "load by id, then check on the loaded record" idiom
// (see redmine `app/controllers/issues_controller.rb`,
// mastodon controllers, diaspora ApplicationController).
@ -961,7 +968,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
"can_access".into(),
"can_manage".into(),
// Common project-specific helpers seen in real Axum/Rocket
// codebases kept as defaults so user code that names
// codebases, kept as defaults so user code that names
// its membership helper after the resource still gets
// recognised. Users can extend via `nyx.toml`.
"require_group_member".into(),
@ -1045,7 +1052,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
"FxHashSet".into(),
"DashMap".into(),
"DashSet".into(),
// `serde_json::Map` (last-segment `Map`) common JSON
// `serde_json::Map` (last-segment `Map`), common JSON
// body builder where `m.insert("k", v)` is a string-key
// assignment on an in-memory object, not a DB write.
"Map".into(),
@ -1161,7 +1168,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
],
non_sink_receiver_types: Vec::new(),
non_sink_receiver_name_prefixes: Vec::new(),
// Browser/DOM globals calls on these receivers are
// Browser/DOM globals, calls on these receivers are
// categorically client-side (no server-side authorization
// semantics). Without this list, `document.getElementById`
// would prefix-match the read-indicator `get`,
@ -1196,7 +1203,7 @@ pub fn build_auth_rules(config: &Config, lang_slug: &str) -> AuthAnalysisRules {
"WeakMap".into(),
"WeakSet".into(),
],
// DOM-API methods when the LAST segment of the callee
// DOM-API methods, when the LAST segment of the callee
// matches, the call is non-data-layer regardless of receiver
// (`el.addEventListener`, `parent.appendChild`). These
// methods would otherwise prefix-match `add`, `remove`,
@ -1345,7 +1352,7 @@ pub fn first_receiver_segment(callee: &str) -> &str {
callee.split('.').next().unwrap_or(callee)
}
/// True when the callee's receiver chain contains a call expression
/// True when the callee's receiver chain contains a call expression ,
/// i.e. the LAST segment is being invoked on the *return value* of an
/// earlier call (`w.Header().Get`, `r.URL.Query().Get`,
/// `db.Tx(opts).Query`). Detected as: the substring before the last
@ -1366,7 +1373,7 @@ pub fn receiver_is_chained_call(callee: &str) -> bool {
/// (`member`, `owner`, `admin`, `access`, `permission`, `manager`,
/// `editor`, `viewer`, `user`, `mod`). The resource segment is
/// project-specific (`trip`, `doc`, `project`, `community`, …) and
/// cannot be enumerated in the static defaults but the
/// cannot be enumerated in the static defaults, but the
/// prefix+role pattern is unambiguous enough that recognising it as
/// an authorization check is safe. Also accepts `is_<role>` /
/// `is_<role>_(or|and)_<role>...` predicate forms (`is_admin`,
@ -1398,7 +1405,7 @@ fn is_require_resource_role_call(name: &str) -> bool {
}
// Pattern 2: `is_<role>` and `is_<role>_(or|and)_<role>...`.
// Conservative role list excludes `user` / `staff` to avoid
// Conservative role list, excludes `user` / `staff` to avoid
// matching ambiguous predicates like `is_user`.
if let Some(rest) = lower.strip_prefix("is_")
&& !rest.is_empty()
@ -1682,7 +1689,7 @@ mod tests {
assert!(receiver_is_chained_call("r.URL.Query().Get"));
assert!(receiver_is_chained_call("db.Tx(opts).Query"));
assert!(receiver_is_chained_call("client.WithToken(t).Get"));
// Pure field/identifier chain no `(` anywhere.
// Pure field/identifier chain, no `(` anywhere.
assert!(!receiver_is_chained_call("repo.Find"));
assert!(!receiver_is_chained_call("c.Fs.Create"));
assert!(!receiver_is_chained_call("globalBatchJobsMetrics.save"));
@ -1701,7 +1708,7 @@ mod tests {
let empty: HashSet<String> = HashSet::new();
// Chained-call receiver: verb-name fallback is suppressed.
// The minio `w.Header().Get(constName)` cluster `Get` would
// The minio `w.Header().Get(constName)` cluster, `Get` would
// match the `Get` read indicator on a bare receiver but the
// chained-call shape masks the receiver type.
assert_eq!(rules.classify_sink_class("w.Header().Get", &empty), None);
@ -1742,7 +1749,7 @@ mod tests {
let rules = build_auth_rules(&cfg, "javascript");
let empty: HashSet<String> = HashSet::new();
// Globals receiver-first-segment match.
// Globals, receiver-first-segment match.
assert_eq!(
rules.classify_sink_class("document.getElementById", &empty),
Some(SinkClass::InMemoryLocal)
@ -1760,7 +1767,7 @@ mod tests {
Some(SinkClass::InMemoryLocal)
);
// Method allowlist last-segment match regardless of receiver.
// Method allowlist, last-segment match regardless of receiver.
assert_eq!(
rules.classify_sink_class("input.addEventListener", &empty),
Some(SinkClass::InMemoryLocal)
@ -1801,22 +1808,22 @@ mod tests {
assert!(rules.is_authorization_check("authz::require_trip_member"));
assert!(rules.is_authorization_check("self.require_album_editor"));
// Negatives random `require_*` calls without a known role
// Negatives, random `require_*` calls without a known role
// suffix do NOT count as authorization.
assert!(!rules.is_authorization_check("require_db"));
assert!(!rules.is_authorization_check("require_user"));
assert!(!rules.is_authorization_check("require_login"));
// Bare `require_member` / `require_owner` (no resource segment)
// aren't enough the resource segment is what makes the helper
// aren't enough, the resource segment is what makes the helper
// unambiguous.
assert!(!rules.is_authorization_check("require_member"));
assert!(!rules.is_authorization_check("require_owner"));
}
/// Phase A4 — broader verb / role / context-suffix shapes seen in
/// real-world Rust apps. `check_<resource>_<role>_action` is the
/// canonical lemmy idiom; verifying the `is_<role>` predicate
/// recogniser closes `is_mod_or_admin` style checks.
/// Broader verb / role / context-suffix shapes seen in real-world
/// Rust apps. `check_<resource>_<role>_action` is the canonical
/// lemmy idiom; the `is_<role>` predicate recogniser closes
/// `is_mod_or_admin` style checks.
#[test]
fn is_authorization_check_recognises_check_action_and_predicate_shapes() {
let cfg = Config::default();
@ -1847,7 +1854,7 @@ mod tests {
assert!(rules.is_authorization_check("is_admin_or_moderator"));
assert!(rules.is_authorization_check("is_member_and_owner"));
// Negatives predicates whose tokens are NOT known auth roles.
// Negatives, predicates whose tokens are NOT known auth roles.
assert!(!rules.is_authorization_check("is_user"));
assert!(!rules.is_authorization_check("is_logged_in"));
assert!(!rules.is_authorization_check("is_active"));

View file

@ -384,8 +384,8 @@ fn classify_rocket_param(
///
/// **Looser than [`super::common::is_self_actor_type_text`] by
/// design.** This recogniser runs only on the type of a route-bound
/// parameter appearing in a route handler signature is itself a
/// strong signal and a false positive here just over-credits the
/// parameter, appearing in a route handler signature is itself a
/// strong signal, and a false positive here just over-credits the
/// route with a login guard, which is conservative w.r.t. flagging.
/// `is_self_actor_type_text` runs on every parameter, including in
/// non-route functions, and a false positive there suppresses
@ -625,6 +625,11 @@ pub(crate) fn inject_guard_checks(
line,
args: call.args.clone(),
condition_text: None,
// Route-level guard injected from a tower / axum layer
// (`RequireAuthorizationLayer`, `axum_login::login_required!`,
// …). Tells `auth_check_covers_subject` to short-circuit
// for any non-login-guard match.
is_route_level: true,
});
}
}

File diff suppressed because it is too large Load diff

View file

@ -209,7 +209,12 @@ fn collect_class_based_routes(
}
let line = method_node.start_position().row + 1;
for call in &middleware_calls {
if let Some(check) = auth_check_from_call_site(call, line, rules) {
if let Some(mut check) = auth_check_from_call_site(call, line, rules) {
// Django class-based-view decorators (`@method_decorator(login_required)`,
// `@permission_required(...)`) and DRF `permission_classes`
// are declared at the route boundary; mark route-level
// so coverage applies to the action body's operations.
check.is_route_level = true;
unit.auth_checks.push(check);
}
}
@ -443,7 +448,14 @@ fn inject_middleware_auth(
return;
};
for call in middleware_calls {
if let Some(check) = auth_check_from_call_site(call, line, rules) {
if let Some(mut check) = auth_check_from_call_site(call, line, rules) {
// Django decorators (`@login_required`, `@permission_required`,
// `@user_passes_test`, etc.) and DRF `permission_classes` are
// declared at the route boundary; mark route-level so
// `auth_check_covers_subject` short-circuits `true` for any
// non-login-guard match. See flask.rs / model.rs for the
// full rationale.
check.is_route_level = true;
unit.auth_checks.push(check);
}
}

View file

@ -67,6 +67,15 @@ fn maybe_collect_flask_route(
for decorator in decorator_expressions(node) {
if let Some(mut specs) = parse_flask_route_decorator(decorator, bytes) {
route_specs.append(&mut specs);
// FastAPI puts route-level dependencies (auth checks +
// logging hooks) inside the route decorator's
// `dependencies=[Depends(...)]` keyword argument, instead
// of as separate `@decorator` lines like Flask. Walk the
// route decorator's keyword args for that shape and lift
// each `Depends(call(...))` element into the
// middleware_calls list, so the same `inject_middleware_auth`
// path that Flask uses also picks up FastAPI auth deps.
middleware_calls.extend(extract_fastapi_dependencies(decorator, bytes));
} else {
middleware_calls.extend(expand_decorator_calls(decorator, bytes));
}
@ -220,6 +229,75 @@ fn expand_decorator_calls(node: Node<'_>, bytes: &[u8]) -> Vec<CallSite> {
vec![call_site_from_node(node, bytes)]
}
/// Walk the route-decorator call's keyword args looking for the FastAPI
/// `dependencies=[Depends(call(...)), Depends(call), ...]` shape. For
/// each `Depends(...)` list element, extract the inner callable as a
/// `CallSite` so it can flow through `inject_middleware_auth` and be
/// matched against the per-language authorization-check / login-guard
/// name lists. Refuses non-call elements and `Depends(...)` without a
/// recognised inner call shape.
///
/// The function is decoupled from Flask semantics (Flask routes never
/// use `dependencies=`); the lookup is purely structural and matches
/// FastAPI's documented dependency-injection convention. Lives in the
/// flask module because Flask's route-decorator parser already targets
/// the `@<router>.<method>(<path>, ...)` shape that FastAPI shares.
fn extract_fastapi_dependencies(decorator_expr: Node<'_>, bytes: &[u8]) -> Vec<CallSite> {
if decorator_expr.kind() != "call" {
return Vec::new();
}
let Some(arguments) = decorator_expr.child_by_field_name("arguments") else {
return Vec::new();
};
let Some(value) = keyword_argument_value(arguments, bytes, "dependencies") else {
return Vec::new();
};
let mut out = Vec::new();
for element in named_children(value) {
if let Some(call) = unwrap_depends_call(element, bytes) {
out.push(call);
}
}
out
}
/// Unwrap one `Depends(...)` list element from a FastAPI `dependencies`
/// list and return the inner callable as a `CallSite`. Three shapes
/// are accepted:
/// * `Depends(callee(arg1, arg2))`, most common, the inner call is
/// the callable factory invocation; record `callee` as the auth
/// check.
/// * `Depends(callee)`, bare reference; record `callee` itself.
/// * `Depends()` / non-`Depends` items, skipped.
fn unwrap_depends_call(node: Node<'_>, bytes: &[u8]) -> Option<CallSite> {
if node.kind() != "call" {
return None;
}
let function = node.child_by_field_name("function")?;
let function_text = text(function, bytes);
if !is_depends_callee(&function_text) {
return None;
}
let arguments = node.child_by_field_name("arguments")?;
let first = named_children(arguments).into_iter().next()?;
match first.kind() {
"call" => Some(call_site_from_node(first, bytes)),
"identifier" | "attribute" | "scoped_identifier" => Some(call_site_from_node(first, bytes)),
_ => None,
}
}
/// True for the FastAPI `Depends` marker, including the
/// fully-qualified `fastapi.Depends` form. Conservative: only literal
/// matches, no canonicalisation.
fn is_depends_callee(callee: &str) -> bool {
let trimmed = callee.trim();
matches!(
trimmed,
"Depends" | "fastapi.Depends" | "fastapi.params.Depends"
)
}
fn inject_middleware_auth(
model: &mut AuthorizationModel,
unit_idx: usize,
@ -231,8 +309,48 @@ fn inject_middleware_auth(
return;
};
for call in middleware_calls {
if let Some(check) = auth_check_from_call_site(call, line, rules) {
if let Some(mut check) = auth_check_from_call_site(call, line, rules) {
// Mark as route-level: the check is declared at the route
// boundary (Flask `@requires_role(...)` decorator, FastAPI
// `dependencies=[Depends(...)]`, or any custom-router
// equivalent) and semantically authorizes every value the
// handler receives, path param, body, query, downstream
// row fetches, the lot. `auth_check_covers_subject` reads
// `is_route_level` and short-circuits `true` for any
// non-login-guard match, which is the correct shape for a
// decorator-level guard whose inner call carries no
// per-arg subject ref pointing back into the handler body.
// LoginGuard / TokenExpiry / TokenRecipient kinds are
// already excluded by `has_prior_subject_auth`'s filter
// before they reach `auth_check_covers_subject`, so the
// flag is safe to set unconditionally here, it has no
// effect on those kinds.
check.is_route_level = true;
unit.auth_checks.push(check);
}
}
}
#[cfg(test)]
mod fastapi_dependencies_tests {
use super::is_depends_callee;
/// `is_depends_callee` only matches the FastAPI `Depends` marker.
/// Any other wrapper call inside `dependencies=[...]` is ignored ,
/// extracting an inner callee from the wrong wrapper would
/// misclassify logging hooks or filter callables as auth checks.
#[test]
fn is_depends_callee_recognises_canonical_forms() {
assert!(is_depends_callee("Depends"));
assert!(is_depends_callee("fastapi.Depends"));
assert!(is_depends_callee("fastapi.params.Depends"));
// Whitespace tolerance.
assert!(is_depends_callee(" Depends "));
// Negatives.
assert!(!is_depends_callee("Annotated"));
assert!(!is_depends_callee("Body"));
assert!(!is_depends_callee("Depends.something"));
assert!(!is_depends_callee("RequiresAuth"));
assert!(!is_depends_callee(""));
}
}

View file

@ -61,5 +61,104 @@ pub fn extract_authorization_model(
}
}
// **Dedup units by span across extractors.** Multiple extractors
// (e.g. Flask + Django on a Python file) each call
// `collect_top_level_units`, producing one unit per top-level
// function. When one extractor also recognises a route on that
// function and promotes its copy to `RouteHandler` (with injected
// middleware auth checks), the *other* extractor's untouched
// `Function` copy still runs through `check_ownership_gaps` and
// emits the FP from a unit that never saw the middleware-derived
// auth check.
//
// This step keeps a single canonical unit per source span,
// preferring `RouteHandler` over `Function`, merging auth_checks
// and folding operation lists conservatively. Route registrations
// are remapped to the surviving unit index.
deduplicate_units_by_span(&mut model);
model
}
fn deduplicate_units_by_span(model: &mut AuthorizationModel) {
use crate::auth_analysis::model::{AnalysisUnit, AnalysisUnitKind};
use std::collections::HashMap;
// First pass: choose a winner for each span, prefer the
// first-seen `RouteHandler` over any `Function` copy.
let mut winner_by_span: HashMap<(usize, usize), usize> = HashMap::new();
for (idx, unit) in model.units.iter().enumerate() {
let key = unit.span;
match winner_by_span.get(&key) {
None => {
winner_by_span.insert(key, idx);
}
Some(&existing) => {
let prev_kind = model.units[existing].kind;
if prev_kind != AnalysisUnitKind::RouteHandler
&& unit.kind == AnalysisUnitKind::RouteHandler
{
winner_by_span.insert(key, idx);
}
}
}
}
// Second pass: drain auth_checks from losers so we can append them
// to the winners after the layout collapses.
let mut moved_checks: Vec<Vec<crate::auth_analysis::model::AuthCheck>> =
Vec::with_capacity(model.units.len());
for old_idx in 0..model.units.len() {
let span = model.units[old_idx].span;
let winner = *winner_by_span.get(&span).unwrap_or(&old_idx);
if winner == old_idx {
moved_checks.push(Vec::new());
} else {
moved_checks.push(std::mem::take(&mut model.units[old_idx].auth_checks));
}
}
// Third pass: emit surviving units (clone the winners) and build
// the old-idx → new-idx remap.
let mut new_idx_for_old: HashMap<usize, usize> = HashMap::new();
let mut surviving: Vec<AnalysisUnit> = Vec::with_capacity(winner_by_span.len());
for old_idx in 0..model.units.len() {
let span = model.units[old_idx].span;
let winner = *winner_by_span.get(&span).unwrap_or(&old_idx);
if winner == old_idx {
new_idx_for_old.insert(old_idx, surviving.len());
surviving.push(model.units[old_idx].clone());
}
}
// Fourth pass: drain loser auth_checks into their winners, deduping
// by (span, callee). Operations are not merged: both extractor
// passes recompute the same operation list from the AST, so the
// winner already carries the canonical set.
for (old_idx, checks) in moved_checks.iter_mut().enumerate() {
let span = model.units[old_idx].span;
let winner = *winner_by_span.get(&span).unwrap_or(&old_idx);
if winner == old_idx {
continue;
}
let Some(&new_winner_idx) = new_idx_for_old.get(&winner) else {
continue;
};
for check in checks.drain(..) {
let already_present = surviving[new_winner_idx]
.auth_checks
.iter()
.any(|existing| existing.span == check.span && existing.callee == check.callee);
if !already_present {
surviving[new_winner_idx].auth_checks.push(check);
}
}
}
model.units = surviving;
for route in &mut model.routes {
if let Some(&new_idx) = new_idx_for_old.get(&route.unit_idx) {
route.unit_idx = new_idx;
}
}
}

View file

@ -137,7 +137,14 @@ fn maybe_collect_controller(
let line = child.start_position().row + 1;
let middleware_calls = applicable_filters(&filter_directives, &action_name);
for call in &middleware_calls {
if let Some(check) = auth_check_from_call_site(call, line, rules) {
if let Some(mut check) = auth_check_from_call_site(call, line, rules) {
// Rails `before_action :authorize_user`-style filter
// callbacks run before the action and authorize the
// entire request, same shape as FastAPI / Flask
// `dependencies=[Depends(...)]`. Mark route-level so
// `auth_check_covers_subject` covers the row-fetches
// and downstream sinks the action body performs.
check.is_route_level = true;
unit.auth_checks.push(check);
}
}

View file

@ -114,7 +114,13 @@ fn maybe_collect_route(
);
let line = block.start_position().row + 1;
for call in before_filters {
if let Some(check) = auth_check_from_call_site(call, line, rules) {
if let Some(mut check) = auth_check_from_call_site(call, line, rules) {
// Sinatra `before` filters run before the route handler
// body and authorize the request as a whole, same shape
// as Rails `before_action`. Route-level so coverage
// applies to the handler's row fetches and downstream
// sinks.
check.is_route_level = true;
unit.auth_checks.push(check);
}
}

View file

@ -111,7 +111,15 @@ fn maybe_collect_controller(
rules,
);
for call in &middleware_calls {
if let Some(check) = auth_check_from_call_site(call, line, rules) {
if let Some(mut check) = auth_check_from_call_site(call, line, rules) {
// Spring `@PreAuthorize` / `@Secured` /
// `@RolesAllowed` annotations are declared at the
// method or class boundary and authorize the entire
// request, same shape as FastAPI / Flask
// `dependencies=[Depends(...)]`. Mark route-level
// so `auth_check_covers_subject` covers row fetches
// and downstream sinks in the handler body.
check.is_route_level = true;
unit.auth_checks.push(check);
}
}

View file

@ -1,3 +1,5 @@
#![doc = include_str!(concat!(env!("OUT_DIR"), "/auth_analysis.md"))]
pub mod checks;
pub mod config;
pub mod extract;
@ -26,7 +28,7 @@ fn byte_offset_to_point(tree: &Tree, byte: usize) -> tree_sitter::Point {
/// source-level variable name. Built at `run_auth_analysis` call sites
/// by merging type facts across all bodies in the file; a variable name
/// with conflicting types in different bodies is dropped (absence is
/// safe the sink gate just falls back to name-based classification).
/// safe, the sink gate just falls back to name-based classification).
pub type VarTypes = HashMap<String, TypeKind>;
#[allow(clippy::too_many_arguments)]
@ -87,7 +89,7 @@ pub fn run_auth_analysis(
/// Used by pass 1 to persist per-file auth summaries for cross-file
/// helper lifting. Only returns summaries for units whose body
/// already proves at least one positional parameter under ownership /
/// membership / admin / authorization check i.e. the exact
/// membership / admin / authorization check, i.e. the exact
/// single-file lift set, so the cross-file variant does not widen what
/// counts as a helper.
pub fn extract_auth_summaries_by_key(
@ -198,7 +200,7 @@ fn build_unit_summary(unit: &model::AnalysisUnit) -> Option<model::AuthCheckSumm
/// Walk every `SensitiveOperation` in the model and, when the call's
/// receiver root variable has a known SSA type, override `sink_class`
/// to the type-implied class. Strictly additive only overrides
/// to the type-implied class. Strictly additive, only overrides
/// when the type map produces a definite class, otherwise leaves the
/// name/prefix-derived classification intact.
fn apply_var_types_to_model(
@ -229,11 +231,11 @@ fn apply_var_types_to_model(
/// reassignment from user input (`let id = req.params.id`) never gets
/// suppressed by accident.
///
/// Phase 6: when a parameter's type is a [`TypeKind::Dto`], lift each
/// when a parameter's type is a [`TypeKind::Dto`], lift each
/// of its `Int`/`Bool` fields as `typed_bounded_dto_fields[<param>]`
/// so member-access subjects like `dto.age` are recognised as
/// payload-incompatible. Only fires when the base param itself was
/// recognised as a typed extractor by a Phase 1-2 matcher — bare
/// recognised as a typed extractor by a typed-extractor matcher, bare
/// parameters with no framework gate never lift their fields.
fn apply_typed_bounded_params(model: &mut model::AuthorizationModel, var_types: &VarTypes) {
for unit in &mut model.units {
@ -310,7 +312,7 @@ fn sink_class_for_type(
///
/// When `global_summaries` is `Some`, cross-file helpers are looked up
/// via [`GlobalSummaries::get_auth`] after the same-file summary
/// gather this recovers the handler-in-file-A calling
/// gather, this recovers the handler-in-file-A calling
/// `require_owner`-in-file-B case that single-file lifting cannot see.
fn apply_helper_lifting(
model: &mut model::AuthorizationModel,
@ -408,7 +410,7 @@ fn build_helper_summaries(
let mut summary = AuthCheckSummary::default();
for check in &unit.auth_checks {
// We only lift checks that actively prove ownership /
// membership / admin-rights / authorize-helper login
// membership / admin-rights / authorize-helper, login
// and token-validity checks don't justify foreign-id
// mutations and we want to keep parity with
// `has_prior_subject_auth`'s filter.
@ -435,7 +437,7 @@ fn build_helper_summaries(
}
}
if !summary.param_auth_kinds.is_empty() {
// Deduplicate by last segment of the function name the
// Deduplicate by last segment of the function name, the
// lifting site matches the call's last segment too.
let last = name.rsplit('.').next().unwrap_or(name).to_string();
summaries
@ -492,7 +494,7 @@ fn stronger_check_kind(a: model::AuthCheckKind, b: model::AuthCheckKind) -> mode
/// For one unit, synthesise an `AuthCheck` at every call site that
/// targets a helper with a non-trivial summary. Subjects are taken
/// from `call_site.args_value_refs[K]` for each auth-checked param
/// position K these are the caller's concrete subjects passed at
/// position K, these are the caller's concrete subjects passed at
/// that arg slot, exactly what `auth_check_covers_subject` needs.
fn synthesise_checks_for_unit(
unit: &model::AnalysisUnit,
@ -501,7 +503,7 @@ fn synthesise_checks_for_unit(
let line_of = |span: (usize, usize)| -> usize {
// Span is byte offsets; we don't have direct access to a Tree
// here. Caller assigns line via `line` field on call_site
// through CallSite metadata absence fall back to the unit's
// through CallSite metadata absence, fall back to the unit's
// line since covers_subject uses `check.line <= op.line` and
// helper calls are typically near the unit start.
let _ = span;
@ -541,6 +543,7 @@ fn synthesise_checks_for_unit(
line,
args: call.args.clone(),
condition_text: None,
is_route_level: false,
});
}
out
@ -563,7 +566,7 @@ fn call_site_line(unit: &model::AnalysisUnit, call: &model::CallSite) -> Option<
None
}
/// Cross-file variant of [`synthesise_checks_for_unit`] for each
/// Cross-file variant of [`synthesise_checks_for_unit`], for each
/// call site in `unit`, resolve the callee against `GlobalSummaries`
/// and look up an `AuthCheckSummary` that was persisted by some other
/// file's pass-1 extraction. Skips call sites already handled by the
@ -589,7 +592,7 @@ fn synthesise_cross_file_checks_for_unit(
if unit.name.as_deref() == Some(last) {
continue;
}
// Skip if the single-file map already handled this callee
// Skip if the single-file map already handled this callee ,
// that path has richer same-file context (existing
// summaries from sibling units in this model) and its
// synthesised check is strictly more precise.
@ -636,6 +639,7 @@ fn synthesise_cross_file_checks_for_unit(
line,
args: call.args.clone(),
condition_text: None,
is_route_level: false,
});
}
out
@ -767,7 +771,7 @@ mod tests {
Some(SinkClass::DbCrossTenantRead)
);
// DatabaseConnection: unrecognized verb (`execute`) → DbMutation
// (conservative default treat as write-shaped).
// (conservative default, treat as write-shaped).
assert_eq!(
sink_class_for_type(&TypeKind::DatabaseConnection, "conn.execute", &rules),
Some(SinkClass::DbMutation)
@ -819,7 +823,7 @@ mod tests {
)));
let var_types: VarTypes = HashMap::new();
apply_var_types_to_model(&mut model, &rules, &var_types);
// Unchanged no entry in var_types for `db`.
// Unchanged, no entry in var_types for `db`.
assert_eq!(
model.units[0].operations[0].sink_class,
Some(SinkClass::DbMutation)

View file

@ -55,7 +55,7 @@ pub enum OperationKind {
}
/// Classification of a sensitive operation by the resource it targets.
/// `check_ownership_gaps` only fires on the first five classes
/// `check_ownership_gaps` only fires on the first five classes ,
/// `InMemoryLocal` is never authorization-relevant.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SinkClass {
@ -76,7 +76,7 @@ pub enum SinkClass {
/// (Redis / memcache / distributed cache client).
CacheCrossTenant,
/// A method call against a local, in-memory collection (HashMap,
/// HashSet, Vec, …) never authorization-relevant.
/// HashSet, Vec, …), never authorization-relevant.
InMemoryLocal,
}
@ -133,6 +133,33 @@ pub struct AuthCheck {
pub line: usize,
pub args: Vec<String>,
pub condition_text: Option<String>,
/// True when the check was declared at the route boundary
/// (decorator / middleware / dependency-injection list) rather
/// than as a per-call check inside the handler body.
///
/// Route-level non-login-guard checks authorize the *entire*
/// handler, they gate every value the handler receives, every
/// row the handler fetches, and every operation downstream. An
/// in-body `auth_check_covers_subject` walk that requires a
/// per-name subject match cannot model that semantics: a
/// FastAPI `dependencies=[Depends(requires_access_dag(method=
/// "POST", access_entity=DagAccessEntity.RUN))]` is opaque to
/// the engine, the inner `requires_access_dag` call carries no
/// per-arg subject ref pointing to `dag_id` or `dag.id`. The
/// flag tells `auth_check_covers_subject` to short-circuit
/// `true` for any non-login-guard route-level check, leaving
/// only the LoginGuard / TokenExpiry / TokenRecipient kinds
/// (already excluded upstream by `has_prior_subject_auth`'s
/// filter) to be ignored.
///
/// Set by `inject_middleware_auth` (Django, Flask, FastAPI) at
/// the route-decorator entry point. Default `false` for
/// in-body checks (`require_membership(user, group_id)`,
/// `is_admin(user)`, etc.), those still flow through the
/// per-subject coverage logic so a check on
/// `community.creator_id` doesn't blanket-suppress every other
/// subject in the unit.
pub is_route_level: bool,
}
#[derive(Debug, Clone)]
@ -140,7 +167,7 @@ pub struct SensitiveOperation {
pub kind: OperationKind,
/// Sink classification. `None` means the operation was recorded
/// for taxonomy completeness but does not match any known resource
/// class defensive, and currently unused.
/// class, defensive, and currently unused.
pub sink_class: Option<SinkClass>,
pub callee: String,
pub subjects: Vec<ValueRef>,
@ -183,7 +210,7 @@ pub struct AnalysisUnit {
/// "fetch-then-authorize" exemption in `checks.rs`: if a row-fetch
/// operation produces variable `V` and SOME auth check elsewhere
/// in the unit names `V`, the row-fetch operation is considered
/// authorized even though the check appears textually after the
/// authorized, even though the check appears textually after the
/// fetch. This is the standard idiom in row-level authz code:
/// fetch the row first to extract the resource id, then call
/// `check_<resource>_<role>(&user, &row, ...)` to authorize it.
@ -199,7 +226,7 @@ pub struct AnalysisUnit {
/// copies of `V.id` / `V.user_id` / `V.uid` / `V.userId` for some
/// `V ∈ self_actor_vars`). Populated when the extractor sees
/// `let X = V.id` or `let X = (V.id as ..).into()` / `V.id.into()`
/// shapes anywhere a route-handler reduces the authenticated
/// shapes, anywhere a route-handler reduces the authenticated
/// principal to a scalar id and reuses it as a SQL parameter.
/// Consulted by `is_actor_context_subject` so subjects whose `name`
/// is in this set count as actor context, not foreign scoped IDs.
@ -217,7 +244,7 @@ pub struct AnalysisUnit {
/// one of these names.
pub authorized_sql_vars: HashSet<String>,
/// Local variables bound (by `let`, `:=`, `var`, `const`) to a
/// pure literal string, integer, float, or boolean. These are
/// pure literal, string, integer, float, or boolean. These are
/// developer-chosen constants and cannot be user-controlled, so
/// they must never trip `<lang>.auth.missing_ownership_check`
/// even when the variable name passes `is_id_like`. Closes the
@ -231,22 +258,21 @@ pub struct AnalysisUnit {
/// `is_typed_bounded_subject` so parameters like Spring `Long
/// userId`, Axum `Path<i64>`, or FastAPI `user_id: int` are not
/// classified as scoped-identifier subjects even when their name
/// passes `is_id_like` the framework guarantees the value is a
/// passes `is_id_like`, the framework guarantees the value is a
/// number that cannot carry a SQL/file/shell payload.
pub typed_bounded_vars: HashSet<String>,
/// Phase 6: per-DTO-extractor parameter, the field names whose
/// per-DTO-extractor parameter, the field names whose
/// declared type is a payload-incompatible scalar. Map key is the
/// parameter name (e.g. `dto`), value is the list of field names
/// (e.g. `["age", "count"]`). Populated by
/// [`super::apply_typed_bounded_params`] only when the parameter
/// itself was recognised as a typed extractor by a Phase 1-2
/// matcher — bare parameters with no framework gate never lift
/// their fields.
/// itself was recognised as a typed extractor, bare parameters
/// with no framework gate never lift their fields.
pub typed_bounded_dto_fields: HashMap<String, Vec<String>>,
/// Per-unit dynamic session-base text set, supplementing the
/// hard-coded list in `is_self_scoped_session_base`. Populated by
/// the extractor when a parameter's static type signals a known
/// auth-context shape e.g. TRPC's `Options { ctx: { user:
/// auth-context shape, e.g. TRPC's `Options { ctx: { user:
/// NonNullable<TrpcSessionUser> } }` adds `<localCtx>.user` so
/// downstream `ctx.user.id` accesses count as actor context. Each
/// entry is the dotted base text (e.g. `"ctx.user"`,

View file

@ -28,7 +28,7 @@
pub enum SqlAuthClassification {
/// Query is auth-gated. The JOIN (or direct WHERE) pins returned
/// rows to the bound user. We don't track *which* bind position
/// here the caller treats whichever bind value flows into the
/// here, the caller treats whichever bind value flows into the
/// query as the user-id witness; that's safe because the caller
/// already requires the row binding to come from a `let X = …`
/// site we can name.
@ -37,12 +37,12 @@ pub enum SqlAuthClassification {
/// Classify `sql` as auth-gated under the configured ACL tables.
/// Returns `Some(Authorized)` when one of the recognized patterns
/// holds, `None` otherwise (conservative unknown shapes are treated
/// holds, `None` otherwise (conservative, unknown shapes are treated
/// as unauthorized).
pub fn classify_sql_query(sql: &str, acl_tables: &[String]) -> Option<SqlAuthClassification> {
let normalized = normalize_sql(sql);
if !normalized.trim_start().starts_with("select") {
// For B3 we only authorize SELECT queries INSERT/UPDATE/DELETE
// For B3 we only authorize SELECT queries, INSERT/UPDATE/DELETE
// need their own analysis and aren't in scope. (A literal
// `DELETE … WHERE user_id = ?N` could be safely authorized,
// but the call sites we care about for FP suppression are
@ -60,7 +60,7 @@ pub fn classify_sql_query(sql: &str, acl_tables: &[String]) -> Option<SqlAuthCla
}
/// `SELECT … FROM <T> [AS] <ALIAS>? JOIN <ACL> [AS] <GA>? ON … WHERE
/// <GA?>.user_id = ?N` verifies that an ACL table appears in a JOIN
/// <GA?>.user_id = ?N`, verifies that an ACL table appears in a JOIN
/// clause and that the WHERE clause contains a `<…>.user_id = ?` (or
/// bare `user_id = ?`) predicate. Order of the WHERE predicates
/// doesn't matter; AND/OR connectors are ignored.
@ -87,14 +87,14 @@ fn matches_join_through_acl(sql: &str, acl_tables: &[String]) -> bool {
where_clause_contains_user_id_bind(where_clause)
}
/// Direct ownership: `SELECT … FROM <T> WHERE … user_id = ?N` no
/// Direct ownership: `SELECT … FROM <T> WHERE … user_id = ?N`, no
/// JOIN. Covers single-table reads where the row already carries the
/// owning user id (`SELECT … FROM docs WHERE user_id = ?1`). We do
/// NOT require `id = ?M` to also be present; the `user_id = ?N`
/// predicate alone is sufficient, since any row returned must be
/// owned by the bound user.
///
/// Refuses to fire when a JOIN is present the JOIN target may not
/// Refuses to fire when a JOIN is present, the JOIN target may not
/// be in the ACL list, so the WHERE predicate (which may apply to
/// the joined table, e.g. `WHERE al.user_id = ?N` against an
/// `audit_log` JOIN) doesn't actually pin the primary rows to the
@ -125,7 +125,7 @@ fn where_clause_contains_user_id_bind(where_clause: &str) -> bool {
for (idx, _) in where_only.match_indices(needle) {
// Make sure this is a column boundary on the left side
// (avoid matching `posted_user_id` or `target_user_id`
// those don't pin to the actor).
//, those don't pin to the actor).
let before = where_only[..idx].chars().last();
if !is_column_boundary_left(before) {
continue;
@ -158,11 +158,11 @@ fn looks_like_bind_param(after_eq: &str) -> bool {
return false;
}
match bytes[0] {
// ?N (sqlite/sqlx anonymous) accept ?, ?1, ?2…
// ?N (sqlite/sqlx anonymous), accept ?, ?1, ?2…
b'?' => true,
// $N (postgres style) require a digit after.
// $N (postgres style), require a digit after.
b'$' => bytes.get(1).is_some_and(|b| b.is_ascii_digit()),
// :name (named bind) require an identifier char after.
// :name (named bind), require an identifier char after.
b':' => bytes
.get(1)
.is_some_and(|b| b.is_ascii_alphabetic() || *b == b'_'),
@ -277,7 +277,7 @@ mod tests {
#[test]
fn join_against_non_acl_table_is_not_authorized() {
// `audit_log` is not in the configured ACL list JOIN doesn't
// `audit_log` is not in the configured ACL list, JOIN doesn't
// pin rows to the bound user, so the query is unauthorized.
let sql = "SELECT d.* FROM docs d \
JOIN audit_log al ON al.doc_id = d.id \
@ -301,7 +301,7 @@ mod tests {
#[test]
fn similar_column_names_do_not_trip_user_id_match() {
// `posted_user_id` shouldn't satisfy the `user_id = ?` check
// `posted_user_id` shouldn't satisfy the `user_id = ?` check ,
// that column doesn't pin to the actor.
let sql = "SELECT * FROM posts WHERE posted_user_id = ?1";
assert_eq!(classify_sql_query(sql, &acl()), None);