mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-12 01:45:14 +02:00
mr-668: centralize policy-requires-tokens check in the runtime classifier
Single-mode `open_with_bearer_tokens_and_policy` bailed at lib.rs:380 when policy was installed and no tokens. Multi-mode `open_multi_graph_state` had no equivalent: the server started, every request 401'd because no token could ever match, and the operator spent time debugging a misconfiguration the single-mode path would have caught at startup. The doc/code contradiction made the gap easy to miss: the `ServerRuntimeState::PolicyEnabled` docstring said tokens-or-not was "unusual but valid — every request fails 401 without a bearer, which is effectively 'locked'." The single-mode bail contradicted that. In practice, silent-401-on-every-request is bug-shaped, not feature-shaped (operators wanting deny-all should configure tokens plus a deny-all Cedar rule to get meaningful 403s with policy-decision logging). Symptomatic fix: add a copy of the bail to multi-mode. Two copies that can drift again the next time a startup path is added. Correct-by-design fix: hoist the check into `classify_server_runtime_state` so both modes get the same enforcement from one source of truth. The classifier becomes the single source of truth for "should we start?" and adding a startup invariant there is now the natural extension point for any future mode. Classifier matrix is now complete: | has_tokens | has_policy | allow_unauthenticated | Result | |---|---|---|---| | F | F | F | bail (existing) | | F | F | T | Open (existing) | | T | F | * | DefaultDeny (existing) | | F | T | * | bail (NEW — closes the gap) | | T | T | * | PolicyEnabled (existing) | Changes: * `classify_server_runtime_state` (lib.rs:870-890) gains the `(false, true, _) => bail!(…)` arm with a clear message naming the failure mode and the two valid resolutions. * `open_with_bearer_tokens_and_policy` (lib.rs:369+) drops its redundant local bail — the classifier rejected the invalid case before construction was reached. * `ServerRuntimeState::PolicyEnabled` docstring is rewritten: drops the "(unusual but valid)" carve-out and states plainly that PolicyEnabled requires tokens. Names the explicit alternative (tokens + deny-all Cedar rule) for operators who want the all-requests-denied behavior. * `classify_policy_enabled_always_wins` test is renamed to `classify_policy_enabled_requires_tokens` and the now-invalid `(false, true, _)` assertion is removed (covered by the new rejection test). * New `classify_policy_without_tokens_is_rejected` test covers the new arm. * New `serve_refuses_to_start_with_policy_but_no_tokens_multi_mode` integration test pins the multi-mode propagation path — symmetric with the existing single-mode `serve_refuses_to_start_in_state_1_without_unauthenticated`. Closes the "single mode and multi mode startup branches can drift on safety invariants" class. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
8f558e6ab9
commit
4120448431
1 changed files with 111 additions and 21 deletions
|
|
@ -371,15 +371,17 @@ impl AppState {
|
|||
bearer_tokens: Vec<(String, String)>,
|
||||
policy_file: Option<&PathBuf>,
|
||||
) -> Result<Self> {
|
||||
// The "policy requires tokens" invariant is enforced once by
|
||||
// `classify_server_runtime_state` in `serve()`, before either
|
||||
// single-mode or multi-mode construction is reached. By the
|
||||
// time we get here, the (policy, no-tokens) combination has
|
||||
// already been rejected — no second bail needed.
|
||||
let uri = uri.into();
|
||||
let db = Omnigraph::open(&uri).await?;
|
||||
let policy_engine = match policy_file {
|
||||
Some(path) => Some(PolicyEngine::load_graph(path, &uri)?),
|
||||
None => None,
|
||||
};
|
||||
if policy_engine.is_some() && bearer_tokens.is_empty() {
|
||||
bail!("policy requires at least one configured bearer token actor");
|
||||
}
|
||||
Ok(Self::new_with_bearer_tokens_and_policy(
|
||||
uri,
|
||||
db,
|
||||
|
|
@ -854,10 +856,14 @@ pub fn server_config_is_multi(config: &ServerConfig) -> bool {
|
|||
/// server requires a valid bearer token; once authenticated, every
|
||||
/// action except `Read` is denied with 403. Closes the "tokens but
|
||||
/// forgot the policy file" trap.
|
||||
/// * **PolicyEnabled** — policy file configured. Cedar evaluates every
|
||||
/// authenticated request. Tokens may also be configured (typical) or
|
||||
/// not (unusual but valid — every request fails 401 without a
|
||||
/// bearer, which is effectively "locked").
|
||||
/// * **PolicyEnabled** — policy file configured and at least one
|
||||
/// bearer token configured. Cedar evaluates every authenticated
|
||||
/// request. Policy without tokens is rejected at startup —
|
||||
/// such a server would 401 every request, which is bug-shaped
|
||||
/// rather than feature-shaped (operators wanting "deny all
|
||||
/// unauthenticated traffic" should configure tokens plus a
|
||||
/// deny-all policy to get meaningful 403s with policy-decision
|
||||
/// logging instead).
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
|
||||
pub enum ServerRuntimeState {
|
||||
Open,
|
||||
|
|
@ -866,8 +872,15 @@ pub enum ServerRuntimeState {
|
|||
}
|
||||
|
||||
/// Compute the [`ServerRuntimeState`] from the configured inputs.
|
||||
/// Pulled out as a pure function so the 3-state matrix is unit-testable
|
||||
/// Pulled out as a pure function so the matrix is unit-testable
|
||||
/// without standing up the full server.
|
||||
///
|
||||
/// The classifier is the **single source of truth** for "should we
|
||||
/// start?" — both `serve()`'s single-mode and multi-mode branches
|
||||
/// call this before constructing their `AppState`. Adding a startup
|
||||
/// invariant here means both modes enforce it automatically; the
|
||||
/// alternative (per-constructor `bail!`) drifts the moment a third
|
||||
/// mode is added.
|
||||
pub fn classify_server_runtime_state(
|
||||
has_tokens: bool,
|
||||
has_policy: bool,
|
||||
|
|
@ -882,7 +895,14 @@ pub fn classify_server_runtime_state(
|
|||
),
|
||||
(false, false, true) => Ok(ServerRuntimeState::Open),
|
||||
(true, false, _) => Ok(ServerRuntimeState::DefaultDeny),
|
||||
(_, true, _) => Ok(ServerRuntimeState::PolicyEnabled),
|
||||
(false, true, _) => bail!(
|
||||
"policy file is configured but no bearer tokens — every request would 401 \
|
||||
because no token can ever match. Configure at least one bearer token (see \
|
||||
docs/user/server.md), or remove the policy file. To deny all unauthenticated \
|
||||
traffic deliberately, configure tokens plus a deny-all Cedar rule — that \
|
||||
produces meaningful 403s with policy-decision logging instead of silent 401s."
|
||||
),
|
||||
(true, true, _) => Ok(ServerRuntimeState::PolicyEnabled),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2377,9 +2397,10 @@ fn server_bearer_tokens_from_env() -> Result<Vec<(String, String)>> {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{
|
||||
ServerConfig, ServerConfigMode, ServerRuntimeState, classify_server_runtime_state,
|
||||
hash_bearer_token, load_server_settings, normalize_bearer_token,
|
||||
parse_bearer_tokens_json, serve, server_bearer_tokens_from_env,
|
||||
GraphStartupConfig, ServerConfig, ServerConfigMode, ServerRuntimeState,
|
||||
classify_server_runtime_state, hash_bearer_token, load_server_settings,
|
||||
normalize_bearer_token, parse_bearer_tokens_json, serve,
|
||||
server_bearer_tokens_from_env,
|
||||
};
|
||||
use serial_test::serial;
|
||||
use std::env;
|
||||
|
|
@ -2542,6 +2563,57 @@ server:
|
|||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[serial]
|
||||
async fn serve_refuses_to_start_with_policy_but_no_tokens_multi_mode() {
|
||||
// Bug 2 from the bot-review pass: multi-mode startup was missing
|
||||
// the "policy requires tokens" check that single-mode enforces.
|
||||
// After centralizing the check in `classify_server_runtime_state`,
|
||||
// both modes get the same enforcement. This test guards the
|
||||
// multi-mode propagation path.
|
||||
//
|
||||
// Sibling test below pins single mode. Together they pin that
|
||||
// the classifier is called from both branches of `serve()`.
|
||||
let _guard = EnvGuard::set(&[
|
||||
("OMNIGRAPH_SERVER_BEARER_TOKEN", None),
|
||||
("OMNIGRAPH_SERVER_BEARER_TOKENS_FILE", None),
|
||||
("OMNIGRAPH_SERVER_BEARER_TOKENS_JSON", None),
|
||||
("OMNIGRAPH_SERVER_BEARER_TOKENS_AWS_SECRET", None),
|
||||
("OMNIGRAPH_UNAUTHENTICATED", None),
|
||||
]);
|
||||
let temp = tempdir().unwrap();
|
||||
// The classifier reads `has_policy_configured` from the config
|
||||
// shape (does the Option contain a path?), not from file
|
||||
// existence, so we can hand it a path without writing a real
|
||||
// policy file — the bail fires before policy load.
|
||||
let policy_path = temp.path().join("server-policy.yaml");
|
||||
let config = ServerConfig {
|
||||
mode: ServerConfigMode::Multi {
|
||||
graphs: vec![GraphStartupConfig {
|
||||
graph_id: "alpha".to_string(),
|
||||
uri: temp
|
||||
.path()
|
||||
.join("alpha.omni")
|
||||
.to_string_lossy()
|
||||
.into_owned(),
|
||||
policy_file: None,
|
||||
}],
|
||||
config_path: temp.path().join("omnigraph.yaml"),
|
||||
server_policy_file: Some(policy_path),
|
||||
},
|
||||
bind: "127.0.0.1:0".to_string(),
|
||||
allow_unauthenticated: false,
|
||||
};
|
||||
let result = serve(config).await;
|
||||
let err = result
|
||||
.expect_err("serve should refuse to start in multi mode with policy but no tokens");
|
||||
let msg = format!("{:?}", err);
|
||||
assert!(
|
||||
msg.contains("policy file is configured but no bearer tokens"),
|
||||
"expected policy-without-tokens rejection in multi mode, got: {msg}",
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[serial]
|
||||
async fn serve_refuses_to_start_in_state_1_without_unauthenticated() {
|
||||
|
|
@ -2657,25 +2729,43 @@ server:
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn classify_policy_enabled_always_wins() {
|
||||
// State 3: any setup with a policy file → PolicyEnabled. The
|
||||
// flag doesn't matter and tokens-or-not doesn't matter (no
|
||||
// tokens + policy is unusual but valid — every request fails
|
||||
// 401 without a bearer, which is effectively "locked").
|
||||
fn classify_policy_enabled_requires_tokens() {
|
||||
// State 3: tokens + policy → PolicyEnabled, regardless of the
|
||||
// `allow_unauthenticated` flag (Cedar evaluates the bearer,
|
||||
// the flag is moot once tokens exist).
|
||||
assert_eq!(
|
||||
classify_server_runtime_state(true, true, false).unwrap(),
|
||||
ServerRuntimeState::PolicyEnabled
|
||||
);
|
||||
assert_eq!(
|
||||
classify_server_runtime_state(false, true, false).unwrap(),
|
||||
ServerRuntimeState::PolicyEnabled
|
||||
);
|
||||
assert_eq!(
|
||||
classify_server_runtime_state(true, true, true).unwrap(),
|
||||
ServerRuntimeState::PolicyEnabled
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_policy_without_tokens_is_rejected() {
|
||||
// Closes the "policy installed but no tokens → silent 401 on
|
||||
// every request" footgun. The same shape that single-mode
|
||||
// `open_with_bearer_tokens_and_policy` used to bail on
|
||||
// privately is now rejected by the classifier so both single
|
||||
// and multi mode get the same enforcement from one source of
|
||||
// truth.
|
||||
for allow_unauthenticated in [false, true] {
|
||||
let err = classify_server_runtime_state(false, true, allow_unauthenticated)
|
||||
.unwrap_err();
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("policy file is configured but no bearer tokens"),
|
||||
"expected policy-without-tokens rejection message; got: {msg}"
|
||||
);
|
||||
assert!(
|
||||
msg.contains("every request would 401"),
|
||||
"rejection message must name the failure mode; got: {msg}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_bearer_token_trims_and_filters_blank_values() {
|
||||
assert_eq!(normalize_bearer_token(None), None);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue