mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
363 lines
14 KiB
Rust
363 lines
14 KiB
Rust
use serde::{Deserialize, Serialize};
|
|
use std::collections::{HashMap, HashSet};
|
|
use std::path::PathBuf;
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum Framework {
|
|
Express,
|
|
Koa,
|
|
Fastify,
|
|
Gin,
|
|
Echo,
|
|
Flask,
|
|
Django,
|
|
Spring,
|
|
Rails,
|
|
Sinatra,
|
|
Axum,
|
|
ActixWeb,
|
|
Rocket,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum HttpMethod {
|
|
Get,
|
|
Post,
|
|
Put,
|
|
Delete,
|
|
Patch,
|
|
All,
|
|
Use,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum AnalysisUnitKind {
|
|
RouteHandler,
|
|
Function,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
|
pub enum AuthCheckKind {
|
|
LoginGuard,
|
|
AdminGuard,
|
|
Ownership,
|
|
Membership,
|
|
TokenExpiry,
|
|
TokenRecipient,
|
|
Other,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum OperationKind {
|
|
Read,
|
|
Mutation,
|
|
TokenLookup,
|
|
}
|
|
|
|
/// Classification of a sensitive operation by the resource it targets.
|
|
/// `check_ownership_gaps` only fires on the first five classes ,
|
|
/// `InMemoryLocal` is never authorization-relevant.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum SinkClass {
|
|
/// A write against a persistent datastore (SQL, ORM, or KV that
|
|
/// crosses tenant boundaries).
|
|
DbMutation,
|
|
/// A read against a persistent datastore that may return rows
|
|
/// belonging to another tenant without an explicit ownership check.
|
|
DbCrossTenantRead,
|
|
/// A publish / broadcast against a realtime bus (pub/sub, websocket
|
|
/// channel, event stream). Always auth-relevant because receivers
|
|
/// are typically scoped by tenant id.
|
|
RealtimePublish,
|
|
/// An outbound HTTP / RPC call whose target or payload can encode a
|
|
/// tenant-scoped identifier.
|
|
OutboundNetwork,
|
|
/// A cache read/write whose keys routinely cross tenant boundaries
|
|
/// (Redis / memcache / distributed cache client).
|
|
CacheCrossTenant,
|
|
/// A method call against a local, in-memory collection (HashMap,
|
|
/// HashSet, Vec, …), never authorization-relevant.
|
|
InMemoryLocal,
|
|
}
|
|
|
|
impl SinkClass {
|
|
/// Does this sink class participate in the missing-ownership gate?
|
|
/// Only `InMemoryLocal` is excluded; all other classes are treated
|
|
/// as potential cross-tenant sinks.
|
|
pub fn is_auth_relevant(&self) -> bool {
|
|
!matches!(self, SinkClass::InMemoryLocal)
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum ValueSourceKind {
|
|
RequestParam,
|
|
RequestBody,
|
|
RequestQuery,
|
|
Session,
|
|
Identifier,
|
|
MemberField,
|
|
TokenField,
|
|
ArrayIndex,
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub struct ValueRef {
|
|
pub source_kind: ValueSourceKind,
|
|
pub name: String,
|
|
pub base: Option<String>,
|
|
pub field: Option<String>,
|
|
pub index: Option<String>,
|
|
pub span: (usize, usize),
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub struct CallSite {
|
|
pub name: String,
|
|
pub args: Vec<String>,
|
|
pub span: (usize, usize),
|
|
/// Per-positional-argument value-refs. Populated only by the
|
|
/// structured `collect_call` path (the auxiliary
|
|
/// `call_site_from_node` constructor leaves this empty); used to
|
|
/// attribute synthesised helper-call auth checks to the concrete
|
|
/// subjects passed by the caller.
|
|
pub args_value_refs: Vec<Vec<ValueRef>>,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct AuthCheck {
|
|
pub kind: AuthCheckKind,
|
|
pub callee: String,
|
|
pub subjects: Vec<ValueRef>,
|
|
pub span: (usize, usize),
|
|
pub line: usize,
|
|
pub args: Vec<String>,
|
|
pub condition_text: Option<String>,
|
|
/// True when the check was declared at the route boundary
|
|
/// (decorator / middleware / dependency-injection list) rather
|
|
/// than as a per-call check inside the handler body.
|
|
///
|
|
/// Route-level non-login-guard checks authorize the *entire*
|
|
/// handler, they gate every value the handler receives, every
|
|
/// row the handler fetches, and every operation downstream. An
|
|
/// in-body `auth_check_covers_subject` walk that requires a
|
|
/// per-name subject match cannot model that semantics: a
|
|
/// FastAPI `dependencies=[Depends(requires_access_dag(method=
|
|
/// "POST", access_entity=DagAccessEntity.RUN))]` is opaque to
|
|
/// the engine, the inner `requires_access_dag` call carries no
|
|
/// per-arg subject ref pointing to `dag_id` or `dag.id`. The
|
|
/// flag tells `auth_check_covers_subject` to short-circuit
|
|
/// `true` for any non-login-guard route-level check, leaving
|
|
/// only the LoginGuard / TokenExpiry / TokenRecipient kinds
|
|
/// (already excluded upstream by `has_prior_subject_auth`'s
|
|
/// filter) to be ignored.
|
|
///
|
|
/// Set by `inject_middleware_auth` (Django, Flask, FastAPI) at
|
|
/// the route-decorator entry point. Default `false` for
|
|
/// in-body checks (`require_membership(user, group_id)`,
|
|
/// `is_admin(user)`, etc.), those still flow through the
|
|
/// per-subject coverage logic so a check on
|
|
/// `community.creator_id` doesn't blanket-suppress every other
|
|
/// subject in the unit.
|
|
pub is_route_level: bool,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct SensitiveOperation {
|
|
pub kind: OperationKind,
|
|
/// Sink classification. `None` means the operation was recorded
|
|
/// for taxonomy completeness but does not match any known resource
|
|
/// class, defensive, and currently unused.
|
|
pub sink_class: Option<SinkClass>,
|
|
pub callee: String,
|
|
pub subjects: Vec<ValueRef>,
|
|
pub span: (usize, usize),
|
|
pub line: usize,
|
|
pub text: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct AnalysisUnit {
|
|
pub kind: AnalysisUnitKind,
|
|
pub name: Option<String>,
|
|
pub span: (usize, usize),
|
|
pub params: Vec<String>,
|
|
pub context_inputs: Vec<ValueRef>,
|
|
pub call_sites: Vec<CallSite>,
|
|
pub auth_checks: Vec<AuthCheck>,
|
|
pub operations: Vec<SensitiveOperation>,
|
|
pub value_refs: Vec<ValueRef>,
|
|
pub condition_texts: Vec<String>,
|
|
pub line: usize,
|
|
/// Map from local variable name to the row binding it was read from.
|
|
/// Populated when the extractor sees `let V = ROW.method(..)` or
|
|
/// `let V = ROW.field`. Used by `auth_check_covers_subject` so a
|
|
/// row-level ownership-equality check on the row implicitly covers
|
|
/// downstream uses of fields read from the same row.
|
|
pub row_field_vars: HashMap<String, String>,
|
|
/// Map from local variable name to the full member-chain expression
|
|
/// it was bound from (`let community_id = req.community_id` →
|
|
/// `community_id → "req.community_id"`). Distinct from
|
|
/// `row_field_vars`, which records only the receiver (loses the
|
|
/// field name). Powers the row-population reverse-walk's local-
|
|
/// alias case: when a sink subject is a plain identifier, the
|
|
/// reverse walk consults this map to also accept rows whose
|
|
/// population args contain the aliased chain.
|
|
pub var_alias_chain: HashMap<String, String>,
|
|
/// Per row-binding metadata: the `let ROW = CALL(..)` declaration
|
|
/// line and the value-refs appearing in the call's arguments.
|
|
/// Populated for every `let V = call(..)` shape. Powers the
|
|
/// "fetch-then-authorize" exemption in `checks.rs`: if a row-fetch
|
|
/// operation produces variable `V` and SOME auth check elsewhere
|
|
/// in the unit names `V`, the row-fetch operation is considered
|
|
/// authorized, even though the check appears textually after the
|
|
/// fetch. This is the standard idiom in row-level authz code:
|
|
/// fetch the row first to extract the resource id, then call
|
|
/// `check_<resource>_<role>(&user, &row, ...)` to authorize it.
|
|
pub row_population_data: HashMap<String, (usize, Vec<ValueRef>)>,
|
|
/// Variables bound to an authenticated-user value. Populated from
|
|
/// `let V = require_auth(..).await?` (or any call matching the
|
|
/// configured login-guard / authorization-check names) and from
|
|
/// typed route-handler parameters (`CurrentUser`, `AuthUser`, …).
|
|
/// Consulted by `is_actor_context_subject` so `V.id`-shaped subjects
|
|
/// are treated as the caller's own id, not as a scoped foreign id.
|
|
pub self_actor_vars: HashSet<String>,
|
|
/// Variables holding the authenticated actor's identifier (transitive
|
|
/// copies of `V.id` / `V.user_id` / `V.uid` / `V.userId` for some
|
|
/// `V ∈ self_actor_vars`). Populated when the extractor sees
|
|
/// `let X = V.id` or `let X = (V.id as ..).into()` / `V.id.into()`
|
|
/// shapes, anywhere a route-handler reduces the authenticated
|
|
/// principal to a scalar id and reuses it as a SQL parameter.
|
|
/// Consulted by `is_actor_context_subject` so subjects whose `name`
|
|
/// is in this set count as actor context, not foreign scoped IDs.
|
|
pub self_actor_id_vars: HashSet<String>,
|
|
/// Local variables bound (directly or transitively) to a SQL query
|
|
/// whose literal text classifies as authorization-gated by
|
|
/// `sql_semantics::classify_sql_query`. Includes:
|
|
/// * the `let X = db.prepare(LIT)…` result var,
|
|
/// * the loop var of `for ROW in X`,
|
|
/// * column-binding vars `let Y = ROW.get(..)` whose receiver is
|
|
/// itself in this set.
|
|
///
|
|
/// `auth_check_covers_subject` walks `row_field_vars` transitively
|
|
/// and treats a subject as covered when the chain terminates in
|
|
/// one of these names.
|
|
pub authorized_sql_vars: HashSet<String>,
|
|
/// Local variables bound (by `let`, `:=`, `var`, `const`) to a
|
|
/// pure literal, string, integer, float, or boolean. These are
|
|
/// developer-chosen constants and cannot be user-controlled, so
|
|
/// they must never trip `<lang>.auth.missing_ownership_check`
|
|
/// even when the variable name passes `is_id_like`. Closes the
|
|
/// gin/context_test.go FP where `id := "id"` triggered the rule.
|
|
pub const_bound_vars: HashSet<String>,
|
|
/// Function parameter names whose static type maps to a
|
|
/// payload-incompatible scalar ([`crate::ssa::type_facts::TypeKind::Int`]
|
|
/// or [`crate::ssa::type_facts::TypeKind::Bool`]). Populated
|
|
/// per-file by [`super::apply_typed_bounded_params`] using the
|
|
/// SSA-derived `VarTypes` map. Consulted by
|
|
/// `is_typed_bounded_subject` so parameters like Spring `Long
|
|
/// userId`, Axum `Path<i64>`, or FastAPI `user_id: int` are not
|
|
/// classified as scoped-identifier subjects even when their name
|
|
/// passes `is_id_like`, the framework guarantees the value is a
|
|
/// number that cannot carry a SQL/file/shell payload.
|
|
pub typed_bounded_vars: HashSet<String>,
|
|
/// per-DTO-extractor parameter, the field names whose
|
|
/// declared type is a payload-incompatible scalar. Map key is the
|
|
/// parameter name (e.g. `dto`), value is the list of field names
|
|
/// (e.g. `["age", "count"]`). Populated by
|
|
/// [`super::apply_typed_bounded_params`] only when the parameter
|
|
/// itself was recognised as a typed extractor, bare parameters
|
|
/// with no framework gate never lift their fields.
|
|
pub typed_bounded_dto_fields: HashMap<String, Vec<String>>,
|
|
/// Per-unit dynamic session-base text set, supplementing the
|
|
/// hard-coded list in `is_self_scoped_session_base`. Populated by
|
|
/// the extractor when a parameter's static type signals a known
|
|
/// auth-context shape, e.g. TRPC's `Options { ctx: { user:
|
|
/// NonNullable<TrpcSessionUser> } }` adds `<localCtx>.user` so
|
|
/// downstream `ctx.user.id` accesses count as actor context. Each
|
|
/// entry is the dotted base text (e.g. `"ctx.user"`,
|
|
/// `"opts.ctx.user"`) that should match a subject's `base` when
|
|
/// the subject's `field` is an id-like field name. Distinct from
|
|
/// `self_actor_vars` (single-segment locals) because TRPC
|
|
/// destructures route through a base chain, not a top-level
|
|
/// binding.
|
|
pub self_scoped_session_bases: HashSet<String>,
|
|
}
|
|
|
|
/// Per-function summary of which positional parameters are
|
|
/// auth-checked inside the function body. When a caller invokes this
|
|
/// function with `subject` at position K, and the summary says param
|
|
/// K has an auth check of kind `kind`, the caller's subject is
|
|
/// considered covered as if it were checked at the call site.
|
|
///
|
|
/// Serialises as a `Vec<(usize, AuthCheckKind)>` so same-shape on-disk
|
|
/// rows survive across HashMap iteration-order changes; the in-memory
|
|
/// type stays a HashMap for point-lookup efficiency.
|
|
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
|
pub struct AuthCheckSummary {
|
|
#[serde(
|
|
serialize_with = "serialize_param_auth_kinds",
|
|
deserialize_with = "deserialize_param_auth_kinds"
|
|
)]
|
|
pub param_auth_kinds: HashMap<usize, AuthCheckKind>,
|
|
}
|
|
|
|
fn serialize_param_auth_kinds<S>(
|
|
map: &HashMap<usize, AuthCheckKind>,
|
|
serializer: S,
|
|
) -> Result<S::Ok, S::Error>
|
|
where
|
|
S: serde::Serializer,
|
|
{
|
|
use serde::ser::SerializeSeq;
|
|
let mut entries: Vec<(usize, AuthCheckKind)> =
|
|
map.iter().map(|(idx, kind)| (*idx, *kind)).collect();
|
|
entries.sort_by_key(|(idx, _)| *idx);
|
|
let mut seq = serializer.serialize_seq(Some(entries.len()))?;
|
|
for entry in entries {
|
|
seq.serialize_element(&entry)?;
|
|
}
|
|
seq.end()
|
|
}
|
|
|
|
fn deserialize_param_auth_kinds<'de, D>(
|
|
deserializer: D,
|
|
) -> Result<HashMap<usize, AuthCheckKind>, D::Error>
|
|
where
|
|
D: serde::Deserializer<'de>,
|
|
{
|
|
let entries: Vec<(usize, AuthCheckKind)> = Vec::deserialize(deserializer)?;
|
|
Ok(entries.into_iter().collect())
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct RouteRegistration {
|
|
pub framework: Framework,
|
|
pub method: HttpMethod,
|
|
pub path: String,
|
|
pub middleware: Vec<String>,
|
|
pub handler_span: (usize, usize),
|
|
pub handler_params: Vec<String>,
|
|
pub file: PathBuf,
|
|
pub line: usize,
|
|
pub unit_idx: usize,
|
|
pub middleware_calls: Vec<CallSite>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Default)]
|
|
pub struct AuthorizationModel {
|
|
pub routes: Vec<RouteRegistration>,
|
|
pub units: Vec<AnalysisUnit>,
|
|
}
|
|
|
|
impl AuthorizationModel {
|
|
pub fn extend(&mut self, other: AuthorizationModel) {
|
|
let unit_offset = self.units.len();
|
|
self.units.extend(other.units);
|
|
self.routes
|
|
.extend(other.routes.into_iter().map(|mut route| {
|
|
route.unit_idx += unit_offset;
|
|
route
|
|
}));
|
|
}
|
|
}
|