From 200fcbb21506826e0d7d7be8842310ba3fdded99 Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Mon, 1 Jun 2026 11:28:00 +0200 Subject: [PATCH] Surface policy-engine errors from stored-query invoke The invoke handler mapped every authorize_request failure to 404 ('stored query not found'), which collapsed the authorization decision (deny -> 403) together with operational failures (no actor -> 401, Cedar evaluation error -> 500). A real policy-engine 500 was hidden as a missing query. Separate the two concerns instead of sniffing the masked status. Extract authorize() returning an Authz { Allowed, Denied(msg) } decision and reserve Err for operational failures only; authorize_request becomes a thin wrapper that maps Denied -> 403, so the 16 deny-as-403 callers are unchanged. The invoke handler now matches the decision directly: a denial stays 404 (deny == missing, so the catalog can't be probed without the grant), while a 401/500 propagates with its true status. 500 is now a reachable outcome on POST /queries/{name}; document it in the endpoint responses and regenerate openapi.json. --- crates/omnigraph-server/src/lib.rs | 140 ++++++++++++++++++++++++++--- openapi.json | 10 +++ 2 files changed, 137 insertions(+), 13 deletions(-) diff --git a/crates/omnigraph-server/src/lib.rs b/crates/omnigraph-server/src/lib.rs index ce62c52..64e7570 100644 --- a/crates/omnigraph-server/src/lib.rs +++ b/crates/omnigraph-server/src/lib.rs @@ -1663,7 +1663,21 @@ fn log_policy_decision(actor_id: &str, request: &PolicyRequest, decision: &Polic ); } -/// HTTP-layer Cedar policy gate. Two sources of the policy engine: +/// The allow/deny **decision** an authorization check produces, kept +/// separate from the operational failures (`Err`) that can occur while +/// computing it. [`authorize_request`] collapses `Denied` to a 403; a caller +/// that needs to remap a denial without also remapping operational failures +/// (the stored-query invoke handler hides a denial as a 404) matches on this +/// directly, so a real 401 (missing bearer) or 500 (policy-evaluation error) +/// keeps its true status instead of being masked as the denial's response. +enum Authz { + Allowed, + Denied(String), +} + +/// HTTP-layer Cedar policy gate, returning the allow/deny [`Authz`] decision +/// and reserving `Err` for operational failures (401 missing bearer, 500 +/// policy-evaluation error). Two sources of the policy engine: /// * Per-graph handler — passes `handle.policy.as_deref()` so the /// graph's Cedar rules govern read/change/branch_*/schema_apply. /// * Management handler — passes `state.server_policy.as_deref()` so @@ -1677,11 +1691,11 @@ fn log_policy_decision(actor_id: &str, request: &PolicyRequest, decision: &Polic /// dropped from the type), so handlers cannot smuggle it through the /// request. See `actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers` /// at `tests/server.rs`. -fn authorize_request( +fn authorize( actor: Option<&ResolvedActor>, policy: Option<&PolicyEngine>, request: PolicyRequest, -) -> std::result::Result<(), ApiError> { +) -> std::result::Result { let Some(engine) = policy else { // No PolicyEngine installed. Three runtime states can reach this: // @@ -1708,21 +1722,23 @@ fn authorize_request( // operator's only path to enabling it is configuring an // explicit `server.policy.file` in omnigraph.yaml. if request.action.resource_kind() == PolicyResourceKind::Server { - return Err(ApiError::forbidden( + return Ok(Authz::Denied( "server-scoped actions require an explicit `server.policy.file` \ configured in omnigraph.yaml — the management surface is closed \ by default in every runtime state, including --unauthenticated, \ - so that server topology is never exposed without operator opt-in.", + so that server topology is never exposed without operator opt-in." + .to_string(), )); } if actor.is_some() && request.action != PolicyAction::Read { - return Err(ApiError::forbidden( + return Ok(Authz::Denied( "server runs in default-deny mode (bearer tokens configured but no \ policy file). Only `read` actions are permitted; configure \ - `policy.file` in omnigraph.yaml to enable other actions.", + `policy.file` in omnigraph.yaml to enable other actions." + .to_string(), )); } - return Ok(()); + return Ok(Authz::Allowed); }; let Some(actor) = actor else { return Err(ApiError::unauthorized("missing bearer token")); @@ -1744,9 +1760,26 @@ fn authorize_request( .map_err(|err| ApiError::internal(format!("policy: {err}")))?; log_policy_decision(actor_id, &request, &decision); if decision.allowed { - Ok(()) + Ok(Authz::Allowed) } else { - Err(ApiError::forbidden(decision.message)) + Ok(Authz::Denied(decision.message)) + } +} + +/// Thin wrapper over [`authorize`] for the handlers that treat any denial as a +/// 403: a denial becomes `ApiError::forbidden`, and operational failures +/// (401 missing bearer, 500 policy-evaluation error) propagate unchanged. The +/// stored-query invoke handler does **not** use this — it consumes the +/// [`Authz`] decision directly to hide a denial as a 404 while letting an +/// operational failure keep its true status. +fn authorize_request( + actor: Option<&ResolvedActor>, + policy: Option<&PolicyEngine>, + request: PolicyRequest, +) -> std::result::Result<(), ApiError> { + match authorize(actor, policy, request)? { + Authz::Allowed => Ok(()), + Authz::Denied(message) => Err(ApiError::forbidden(message)), } } @@ -2206,6 +2239,7 @@ struct QueryNamePath { (status = 404, description = "Unknown stored query, or `invoke_query` denied — indistinguishable to a caller without the grant", body = ErrorOutput), (status = 409, description = "Merge conflict", body = ErrorOutput), (status = 429, description = "Per-actor admission cap exceeded; honor `Retry-After` header", body = ErrorOutput), + (status = 500, description = "Policy evaluation error (a denial is reported as 404, not 500)", body = ErrorOutput), ), security(("bearer_token" = [])), )] @@ -2235,7 +2269,11 @@ async fn server_invoke_query( let actor_ref = actor.as_ref().map(|Extension(actor)| actor); // Boundary gate (authentication already ran in `require_bearer_auth`). - authorize_request( + // A denial is hidden as 404 (deny == missing, so the catalog can't be + // probed without the grant), but operational failures (401 missing bearer, + // 500 policy-evaluation error) propagate with their true status via `?` + // rather than being masked as a missing query. + match authorize( actor_ref, handle.policy.as_deref(), PolicyRequest { @@ -2247,8 +2285,10 @@ async fn server_invoke_query( branch: None, target_branch: None, }, - ) - .map_err(|_| ApiError::not_found(NOT_FOUND))?; + )? { + Authz::Allowed => {} + Authz::Denied(_) => return Err(ApiError::not_found(NOT_FOUND)), + } // Resolve against the per-graph registry (same 404 on a miss). let stored = handle @@ -3009,6 +3049,80 @@ mod tests { use std::fs; use tempfile::tempdir; + /// `authorize` returns the allow/deny **decision** (`Authz`) and reserves + /// `Err` for operational failures, so the invoke handler can hide a denial + /// as 404 without also masking a 401/500. Pins each outcome. + #[test] + fn authorize_splits_decision_from_operational_error() { + use super::{Authz, PolicyAction, PolicyCompiler, PolicyConfig, PolicyRequest, ResolvedActor, authorize}; + use std::sync::Arc; + + fn req(action: PolicyAction) -> PolicyRequest { + PolicyRequest { action, branch: None, target_branch: None } + } + let actor = ResolvedActor::cluster_static(Arc::from("act-alice")); + + // --- No policy engine installed (open / default-deny modes) --- + // A server-scoped action is denied in every no-policy state. + assert!(matches!( + authorize(Some(&actor), None, req(PolicyAction::GraphList)).unwrap(), + Authz::Denied(_) + )); + // Authenticated actor + a non-read per-graph action → default-deny. + assert!(matches!( + authorize(Some(&actor), None, req(PolicyAction::Change)).unwrap(), + Authz::Denied(_) + )); + // `read` is the one per-graph action permitted without a policy. + assert!(matches!( + authorize(Some(&actor), None, req(PolicyAction::Read)).unwrap(), + Authz::Allowed + )); + // Open mode (no actor, no policy) → allowed. + assert!(matches!( + authorize(None, None, req(PolicyAction::Read)).unwrap(), + Authz::Allowed + )); + + // --- Policy engine installed --- + let policy: PolicyConfig = serde_yaml::from_str( + "version: 1\n\ + groups:\n team: [act-alice]\n\ + rules:\n - id: team-read\n allow:\n actors: { group: team }\n actions: [read]\n branch_scope: any\n", + ) + .unwrap(); + let engine = PolicyCompiler::compile(&policy, "graph").unwrap(); + + // A matched allow rule → Allowed. + assert!(matches!( + authorize( + Some(&actor), + Some(&engine), + PolicyRequest { action: PolicyAction::Read, branch: Some("main".to_string()), target_branch: None }, + ) + .unwrap(), + Authz::Allowed + )); + // Known actor, no matching allow rule → Denied, carrying the decision message. + match authorize( + Some(&actor), + Some(&engine), + PolicyRequest { action: PolicyAction::Change, branch: Some("main".to_string()), target_branch: None }, + ) + .unwrap() + { + Authz::Denied(message) => assert!(!message.is_empty(), "a deny carries its decision message"), + Authz::Allowed => panic!("change must be denied: only read is allowed"), + } + // Policy installed but no actor → operational failure (`Err`), NOT a + // decision. This is the split that keeps a 401/500 from being masked + // as the denial's response in the invoke handler. + assert!( + authorize(None, Some(&engine), req(PolicyAction::Read)).is_err(), + "a missing actor with a policy installed is an operational error, not a deny" + ); + } + #[test] fn hash_bearer_token_produces_32_byte_output() { let hash = hash_bearer_token("any-token"); diff --git a/openapi.json b/openapi.json index d71a89e..f794904 100644 --- a/openapi.json +++ b/openapi.json @@ -975,6 +975,16 @@ } } } + }, + "500": { + "description": "Policy evaluation error (a denial is reported as 404, not 500)", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorOutput" + } + } + } } }, "security": [