mr-668: add GraphId newtype + Cloud-mode forward identity stubs (PR 1/10)

PR 1 of the MR-668 multi-graph server work. Pure types, no runtime
behavior changes yet.

Ships the validated identity vocabulary that the rest of the implementation
will consume:

- `GraphId(String)` — `^[a-zA-Z0-9-]{1,64}$`, leading underscore rejected
  (engine reserves every `_*` filename), reserved route names rejected
  (`policies`, `healthz`, `openapi`, `openapi.json`, `graphs`). Validation
  lives in `try_from` only; serde `Deserialize` re-runs it so JSON payloads
  cannot bypass.
- `TenantId(String)` — same regex shape as GraphId. `None` in Cluster
  mode; reserved for Cloud mode (RFC 0003) where it carries the OAuth
  `org_id` claim.
- `GraphKey { tenant_id: Option<TenantId>, graph_id }` — the registry
  HashMap key. `cluster()` constructor for the Cluster-mode default.
- `Scope` enum with `Full` variant — Cluster mode default; RFC 0004 will
  extend with OAuth scopes (`graph:read`/`write`/`admin`/`*`).
- `AuthSource` enum with `Static` variant — Cluster mode default; RFC
  0001 step 1 will add `Oidc`.
- `ResolvedActor { actor_id, tenant_id, scopes, source }` — replaces the
  upcoming refactor of `AuthenticatedActor(Arc<str>)` in PR 4a.

Per MR-668 design decision 13: ship the Cloud-mode forward type shapes
now (no `TokenVerifier` trait yet — that's RFC 0001 step 1) so handler
signatures stay stable across the Cluster → Cloud trajectory. `Scope`
and `AuthSource` use `#[non_exhaustive]` so future variants don't break
caller matches.

Tests: 26 new (15 graph_id + 11 identity), all passing. No regression
in the existing 36 server library tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ragnor Comerford 2026-05-25 18:51:49 +02:00
parent cc2412dc65
commit 1ed3eea26d
No known key found for this signature in database
5 changed files with 568 additions and 0 deletions

View file

@ -38,6 +38,7 @@ sha2 = { workspace = true }
subtle = { workspace = true }
async-trait = { workspace = true }
dashmap = "6"
regex = { workspace = true }
aws-config = { version = "1", optional = true, default-features = false, features = ["rustls", "rt-tokio", "credentials-process", "sso"] }
aws-sdk-secretsmanager = { version = "1", optional = true, default-features = false, features = ["rustls", "rt-tokio"] }

View file

@ -0,0 +1,250 @@
//! `GraphId` — registry-level identity for a graph in multi-graph mode (MR-668).
//!
//! Validation lives in `GraphId::try_from(String)`; nothing else can construct a
//! `GraphId`. The newtype prevents `graph_id` strings from escaping the storage
//! root via path traversal or colliding with engine-reserved filenames.
//!
//! Regex: `^[a-zA-Z0-9-]{1,64}$`
//!
//! The engine reserves every filename starting with `_` at the graph root
//! (`_schema.pg`, `_schema.ir.json`, `__schema_state.json`, `__manifest/`,
//! `__recovery/`, etc.). Disallowing leading underscores at the regex level
//! means a `graph_id` can never collide with engine-managed files. Path
//! traversal (`..`, `/`) is unrepresentable.
//!
//! `policies` is additionally reserved as a future-proofing measure for a
//! potential `/graphs/policies/...` cluster route.
use std::fmt;
use std::sync::OnceLock;
use color_eyre::eyre::{Result, bail};
use regex::Regex;
use serde::{Deserialize, Serialize};
/// Maximum length of a `GraphId` value.
pub const GRAPH_ID_MAX_LEN: usize = 64;
/// Validated registry-level identity for a graph.
///
/// Constructed only via `GraphId::try_from(String)` or
/// `GraphId::try_from(&str)`. The inner `String` is private to enforce the
/// validation contract.
#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize)]
#[serde(transparent)]
pub struct GraphId(String);
impl GraphId {
/// View the validated identifier as `&str`.
pub fn as_str(&self) -> &str {
&self.0
}
}
impl fmt::Display for GraphId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
impl AsRef<str> for GraphId {
fn as_ref(&self) -> &str {
&self.0
}
}
impl TryFrom<String> for GraphId {
type Error = color_eyre::eyre::Error;
fn try_from(value: String) -> Result<Self> {
validate(value.as_str())?;
Ok(Self(value))
}
}
impl TryFrom<&str> for GraphId {
type Error = color_eyre::eyre::Error;
fn try_from(value: &str) -> Result<Self> {
validate(value)?;
Ok(Self(value.to_string()))
}
}
// Custom Deserialize that re-runs validation. Otherwise a serde-derived impl
// would accept any String, defeating the newtype's guarantee.
impl<'de> Deserialize<'de> for GraphId {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Self::try_from(s).map_err(serde::de::Error::custom)
}
}
fn validate(value: &str) -> Result<()> {
if value.is_empty() {
bail!("graph_id must not be empty");
}
if value.len() > GRAPH_ID_MAX_LEN {
bail!(
"graph_id '{}' is {} chars; max {}",
value,
value.len(),
GRAPH_ID_MAX_LEN
);
}
if !regex().is_match(value) {
bail!(
"graph_id '{}' must match ^[a-zA-Z0-9-]{{1,64}}$ — \
no underscores (engine reserves them), no path separators, no unicode",
value
);
}
if is_reserved(value) {
bail!(
"graph_id '{}' is reserved (would collide with engine-managed names or \
future cluster routes)",
value
);
}
Ok(())
}
fn regex() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| Regex::new(r"^[a-zA-Z0-9-]{1,64}$").expect("regex literal"))
}
/// Reserved `graph_id` values that the regex alone wouldn't catch.
/// The leading-underscore rule already excludes every engine-managed
/// filename pattern (`_schema.pg`, `__manifest`, etc.); this list covers
/// route-prefix collisions and standard endpoint names.
fn is_reserved(value: &str) -> bool {
matches!(
value,
"policies" | "healthz" | "openapi" | "openapi.json" | "graphs"
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn accepts_simple_alphanumeric_ids() {
for ok in ["alpha", "beta", "tenant-001", "A", "g", "X-9-z"] {
GraphId::try_from(ok).unwrap_or_else(|_| panic!("expected accept: {ok}"));
}
}
#[test]
fn accepts_64_char_max() {
let max = "a".repeat(64);
GraphId::try_from(max.as_str()).unwrap();
}
#[test]
fn rejects_empty() {
assert!(GraphId::try_from("").is_err());
}
#[test]
fn rejects_over_64_chars() {
let too_long = "a".repeat(65);
assert!(GraphId::try_from(too_long.as_str()).is_err());
}
#[test]
fn rejects_leading_underscore() {
// Engine reserves every `_*` filename at the graph root.
assert!(GraphId::try_from("_internal").is_err());
assert!(GraphId::try_from("__manifest").is_err());
}
#[test]
fn rejects_underscores_anywhere() {
// The regex doesn't allow `_` at all — keeps the disallow-leading-`_`
// rule cheap to enforce. If the rule changes later, we'd need to
// distinguish "starts with `_`" from "contains `_`".
assert!(GraphId::try_from("tenant_alpha").is_err());
}
#[test]
fn rejects_path_separators() {
for bad in ["alpha/beta", "../etc", "..", "alpha\\beta"] {
assert!(
GraphId::try_from(bad).is_err(),
"expected reject: {bad}"
);
}
}
#[test]
fn rejects_unicode() {
assert!(GraphId::try_from("αlpha").is_err());
assert!(GraphId::try_from("graph-✨").is_err());
}
#[test]
fn rejects_whitespace() {
assert!(GraphId::try_from(" alpha").is_err());
assert!(GraphId::try_from("alpha ").is_err());
assert!(GraphId::try_from("alpha beta").is_err());
assert!(GraphId::try_from("\talpha").is_err());
}
#[test]
fn rejects_dots() {
// Reserves the "extension"-shaped ids that look like filenames.
assert!(GraphId::try_from(".").is_err());
assert!(GraphId::try_from("alpha.beta").is_err());
assert!(GraphId::try_from("alpha.").is_err());
}
#[test]
fn rejects_reserved_route_names() {
for bad in ["policies", "healthz", "openapi", "openapi.json", "graphs"] {
assert!(
GraphId::try_from(bad).is_err(),
"expected reject (reserved): {bad}"
);
}
}
#[test]
fn display_returns_inner_string() {
let id = GraphId::try_from("alpha").unwrap();
assert_eq!(format!("{id}"), "alpha");
assert_eq!(id.as_str(), "alpha");
}
#[test]
fn serialize_round_trips_via_json() {
let id = GraphId::try_from("tenant-007").unwrap();
let json = serde_json::to_string(&id).unwrap();
assert_eq!(json, "\"tenant-007\"");
let back: GraphId = serde_json::from_str(&json).unwrap();
assert_eq!(back, id);
}
#[test]
fn deserialize_runs_validation() {
// Hostile payload must not produce a GraphId.
let bad = serde_json::from_str::<GraphId>("\"_evil\"");
assert!(bad.is_err());
let bad = serde_json::from_str::<GraphId>("\"../../etc\"");
assert!(bad.is_err());
}
#[test]
fn hash_equality_works_for_use_as_map_key() {
use std::collections::HashMap;
let a = GraphId::try_from("alpha").unwrap();
let b = GraphId::try_from("alpha").unwrap();
let mut m = HashMap::new();
m.insert(a, 1u32);
assert_eq!(m.get(&b), Some(&1));
}
}

View file

@ -0,0 +1,311 @@
//! Identity types for the multi-graph server (MR-668) + forward-compatible
//! shapes for Cloud mode (RFC 0003) and OAuth provider (RFC 0004).
//!
//! Per decision 13 in the implementation plan: ship the type shapes that
//! Cloud mode will consume, without committing to any trait shape
//! (`TokenVerifier` stays draft in RFC 0001). Every Cluster-mode call site
//! constructs these types with their Cluster-mode-specific values:
//!
//! - `tenant_id: None` (Cloud will set `Some(...)` from the OAuth `org_id` claim)
//! - `scopes: vec![Scope::Full]` (Cloud will populate from the OAuth `scope` claim)
//! - `source: AuthSource::Static` (Cloud / OIDC will set `AuthSource::Oidc`)
//!
//! The enums use `#[non_exhaustive]` so RFC 0001 step 1 / RFC 0004 can
//! add variants without breaking exhaustive matches in callers.
use std::fmt;
use std::sync::Arc;
use std::sync::OnceLock;
use color_eyre::eyre::{Result, bail};
use regex::Regex;
use serde::{Deserialize, Serialize};
use crate::graph_id::GraphId;
/// Maximum length of a `TenantId` value.
pub const TENANT_ID_MAX_LEN: usize = 64;
/// Cloud-mode tenant identifier. Validated with the same regex as
/// `GraphId` so the two interchange syntactically.
///
/// `None` in Cluster mode; Cloud mode (RFC 0003) sets `Some(...)` from
/// the OAuth `org_id` claim. Constructed only via `try_from` so callers
/// cannot bypass validation.
#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize)]
#[serde(transparent)]
pub struct TenantId(String);
impl TenantId {
pub fn as_str(&self) -> &str {
&self.0
}
}
impl fmt::Display for TenantId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
impl AsRef<str> for TenantId {
fn as_ref(&self) -> &str {
&self.0
}
}
impl TryFrom<String> for TenantId {
type Error = color_eyre::eyre::Error;
fn try_from(value: String) -> Result<Self> {
validate_tenant_id(value.as_str())?;
Ok(Self(value))
}
}
impl TryFrom<&str> for TenantId {
type Error = color_eyre::eyre::Error;
fn try_from(value: &str) -> Result<Self> {
validate_tenant_id(value)?;
Ok(Self(value.to_string()))
}
}
impl<'de> Deserialize<'de> for TenantId {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Self::try_from(s).map_err(serde::de::Error::custom)
}
}
fn validate_tenant_id(value: &str) -> Result<()> {
if value.is_empty() {
bail!("tenant_id must not be empty");
}
if value.len() > TENANT_ID_MAX_LEN {
bail!(
"tenant_id '{}' is {} chars; max {}",
value,
value.len(),
TENANT_ID_MAX_LEN
);
}
if !tenant_id_regex().is_match(value) {
bail!(
"tenant_id '{}' must match ^[a-zA-Z0-9-]{{1,64}}$",
value
);
}
Ok(())
}
fn tenant_id_regex() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| Regex::new(r"^[a-zA-Z0-9-]{1,64}$").expect("regex literal"))
}
/// Registry HashMap key. Cluster mode populates `tenant_id: None`;
/// Cloud mode (RFC 0003) populates `tenant_id: Some(...)`.
///
/// The `Option<TenantId>` field is the **single forward-compatibility seam**
/// between Cluster and Cloud modes. Every handler reaches the engine via
/// `state.registry.get(&key)` — the key shape stays stable, so handlers
/// don't get re-touched when Cloud mode lands.
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct GraphKey {
pub tenant_id: Option<TenantId>,
pub graph_id: GraphId,
}
impl GraphKey {
/// Cluster-mode constructor (`tenant_id: None`).
pub fn cluster(graph_id: GraphId) -> Self {
Self {
tenant_id: None,
graph_id,
}
}
/// Cloud-mode constructor — reserved for RFC 0003; included here so
/// the seam is visible even though no Cluster-mode code path calls it.
pub fn cloud(tenant_id: TenantId, graph_id: GraphId) -> Self {
Self {
tenant_id: Some(tenant_id),
graph_id,
}
}
}
impl fmt::Display for GraphKey {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.tenant_id {
Some(t) => write!(f, "{}/{}", t, self.graph_id),
None => write!(f, "{}", self.graph_id),
}
}
}
/// Authorization scope. Cluster mode: every authenticated actor gets
/// `Scope::Full`. Cloud mode (RFC 0004) adds OAuth-style scopes via the
/// dashboard-configured `graph:read`, `graph:write`, `graph:admin`,
/// `graph:*` set; those become additional variants here.
///
/// `#[non_exhaustive]` so RFC 0004 can extend without breaking matches.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
#[non_exhaustive]
pub enum Scope {
/// Full access. The Cluster-mode default — every authenticated actor
/// has unrestricted access subject to Cedar policy.
Full,
}
/// How the actor was authenticated. Cluster mode: every actor authenticates
/// via the existing SHA-256 hash compare against a static token set, so
/// `AuthSource::Static`. RFC 0001 step 1 adds `AuthSource::Oidc` when the
/// `OidcJwtVerifier` ships.
///
/// `#[non_exhaustive]` so RFC 0001 can extend without breaking matches.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
#[non_exhaustive]
pub enum AuthSource {
/// Authenticated via the static bearer-token hash table.
Static,
}
/// Server-resolved actor identity. Replaces the previous
/// `AuthenticatedActor(Arc<str>)` from `lib.rs`.
///
/// The fields are populated by `authenticate_bearer_token` after a successful
/// constant-time hash match. **Clients cannot set any of these fields directly**
/// — this is the MR-731 invariant. See `authorize_request` in `lib.rs` for the
/// chokepoint that overwrites any client-supplied actor identity.
///
/// Cluster mode constructs this with `tenant_id: None`, `scopes: vec![Scope::Full]`,
/// `source: AuthSource::Static` via the convenience constructor below.
#[derive(Debug, Clone)]
pub struct ResolvedActor {
pub actor_id: Arc<str>,
pub tenant_id: Option<TenantId>,
pub scopes: Vec<Scope>,
pub source: AuthSource,
}
impl ResolvedActor {
/// Cluster-mode constructor — Static auth, no tenant, Full scope.
/// Used by `authenticate_bearer_token` after a successful hash match.
pub fn cluster_static(actor_id: Arc<str>) -> Self {
Self {
actor_id,
tenant_id: None,
scopes: vec![Scope::Full],
source: AuthSource::Static,
}
}
/// View the actor identifier as `&str`. Stable across the Cluster/Cloud
/// boundary — Cedar always sees this value as the principal.
pub fn actor_id_str(&self) -> &str {
&self.actor_id
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tenant_id_accepts_simple_values() {
for ok in ["alpha", "tenant-001", "X", "01HZWA0KT0H0V0V0V0V0V0V0V0"] {
TenantId::try_from(ok).unwrap_or_else(|_| panic!("expected accept: {ok}"));
}
}
#[test]
fn tenant_id_rejects_empty_and_over_max() {
assert!(TenantId::try_from("").is_err());
let too_long = "a".repeat(65);
assert!(TenantId::try_from(too_long.as_str()).is_err());
}
#[test]
fn tenant_id_rejects_path_traversal() {
assert!(TenantId::try_from("../etc").is_err());
assert!(TenantId::try_from("alpha/beta").is_err());
}
#[test]
fn tenant_id_deserialize_runs_validation() {
let bad: Result<TenantId, _> = serde_json::from_str("\"../evil\"");
assert!(bad.is_err());
}
#[test]
fn graph_key_cluster_constructor_sets_no_tenant() {
let id = GraphId::try_from("alpha").unwrap();
let key = GraphKey::cluster(id.clone());
assert!(key.tenant_id.is_none());
assert_eq!(key.graph_id, id);
}
#[test]
fn graph_key_cloud_constructor_sets_tenant() {
let tenant = TenantId::try_from("acme").unwrap();
let id = GraphId::try_from("alpha").unwrap();
let key = GraphKey::cloud(tenant.clone(), id.clone());
assert_eq!(key.tenant_id.as_ref(), Some(&tenant));
assert_eq!(key.graph_id, id);
}
#[test]
fn graph_key_displays_with_or_without_tenant() {
let id = GraphId::try_from("alpha").unwrap();
let cluster_key = GraphKey::cluster(id.clone());
assert_eq!(format!("{cluster_key}"), "alpha");
let tenant = TenantId::try_from("acme").unwrap();
let cloud_key = GraphKey::cloud(tenant, id);
assert_eq!(format!("{cloud_key}"), "acme/alpha");
}
#[test]
fn graph_key_is_hashable_for_map_use() {
use std::collections::HashMap;
let a = GraphKey::cluster(GraphId::try_from("alpha").unwrap());
let b = GraphKey::cluster(GraphId::try_from("alpha").unwrap());
let mut m: HashMap<GraphKey, u32> = HashMap::new();
m.insert(a, 1);
assert_eq!(m.get(&b), Some(&1));
}
#[test]
fn graph_key_distinguishes_tenants() {
let id = GraphId::try_from("alpha").unwrap();
let t1 = TenantId::try_from("acme").unwrap();
let t2 = TenantId::try_from("globex").unwrap();
let k1 = GraphKey::cloud(t1, id.clone());
let k2 = GraphKey::cloud(t2, id);
assert_ne!(k1, k2);
}
#[test]
fn resolved_actor_cluster_defaults() {
let actor = ResolvedActor::cluster_static(Arc::<str>::from("act-alice"));
assert_eq!(actor.actor_id_str(), "act-alice");
assert!(actor.tenant_id.is_none());
assert_eq!(actor.scopes, vec![Scope::Full]);
assert_eq!(actor.source, AuthSource::Static);
}
#[test]
fn scope_and_auth_source_are_non_exhaustive() {
// Regression: keep the `#[non_exhaustive]` annotation. If someone
// removes it, this test still passes (matches are still legal); it's
// the cross-crate compile that catches it. Document the contract here.
let _scope = Scope::Full;
let _src = AuthSource::Static;
}
}

View file

@ -1,9 +1,14 @@
pub mod api;
pub mod auth;
pub mod config;
pub mod graph_id;
pub mod identity;
pub mod policy;
pub mod workload;
pub use graph_id::GraphId;
pub use identity::{AuthSource, GraphKey, ResolvedActor, Scope, TenantId};
use std::collections::{HashMap, HashSet};
use std::fs;
use std::io;