omnigraph/crates/omnigraph-server/tests/server.rs

use std::env;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Arc;

use axum::Router;
use axum::body::{Body, to_bytes};
use axum::http::header::AUTHORIZATION;
use axum::http::{Method, Request, StatusCode};
use lance::index::DatasetIndexExt;
use omnigraph::db::{Omnigraph, ReadTarget, SchemaApplyOptions};
use omnigraph::error::OmniError;
use omnigraph::loader::{LoadMode, load_jsonl};
use omnigraph_policy::{PolicyChecker, PolicyEngine};
use omnigraph_server::api::{
    BranchCreateRequest, BranchMergeRequest, ChangeRequest, ErrorOutput, ExportRequest,
    IngestRequest, QueryRequest, ReadRequest, SchemaApplyRequest, SchemaOutput,
};
use omnigraph_server::{AppState, build_app};
use serde_json::{Value, json};
use serial_test::serial;
use tower::ServiceExt;

const MUTATION_QUERIES: &str = r#"
query insert_person($name: String, $age: I32) {
    insert Person { name: $name, age: $age }
}

query set_age($name: String, $age: I32) {
    update Person set { age: $age } where name = $name
}
"#;

const POLICY_YAML: &str = r#"
version: 1
groups:
  team: [act-andrew, act-bruno, act-ragnor]
  admins: [act-ragnor]
protected_branches: [main]
rules:
  - id: team-read
    allow:
      actors: { group: team }
      actions: [read]
      branch_scope: any
  - id: admins-export
    allow:
      actors: { group: admins }
      actions: [export]
      branch_scope: any
  - id: team-write-unprotected
    allow:
      actors: { group: team }
      actions: [change]
      branch_scope: unprotected
  - id: admins-merge
    allow:
      actors: { group: admins }
      actions: [branch_delete, branch_merge]
      target_branch_scope: protected
"#;

const POLICY_PROTECTED_READ_YAML: &str = r#"
version: 1
groups:
  team: [act-bruno]
protected_branches: [main]
rules:
  - id: protected-read
    allow:
      actors: { group: team }
      actions: [read]
      branch_scope: protected
"#;

const INGEST_CREATE_ONLY_POLICY_YAML: &str = r#"
version: 1
groups:
  team: [act-bruno]
protected_branches: [main]
rules:
  - id: team-branch-create
    allow:
      actors: { group: team }
      actions: [branch_create]
      target_branch_scope: unprotected
"#;

const SCHEMA_APPLY_POLICY_YAML: &str = r#"
version: 1
groups:
  admins: [act-ragnor]
protected_branches: [main]
rules:
  - id: admins-schema-apply
    allow:
      actors: { group: admins }
      actions: [schema_apply]
      target_branch_scope: protected
"#;

fn fixture(name: &str) -> PathBuf {
    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
        .join("../omnigraph/tests/fixtures")
        .join(name)
}

async fn init_loaded_graph() -> tempfile::TempDir {
    init_graph_with_schema_and_data(
        &fs::read_to_string(fixture("test.pg")).unwrap(),
        &fs::read_to_string(fixture("test.jsonl")).unwrap(),
    )
    .await
}

async fn init_graph_with_schema_and_data(schema: &str, data: &str) -> tempfile::TempDir {
    let temp = tempfile::tempdir().unwrap();
    let graph = graph_path(temp.path());
    fs::create_dir_all(&graph).unwrap();
    Omnigraph::init(graph.to_str().unwrap(), schema)
        .await
        .unwrap();
    let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    load_jsonl(&mut db, data, LoadMode::Overwrite)
        .await
        .unwrap();
    temp
}

async fn init_graph_with_schema(schema: &str) -> tempfile::TempDir {
    let temp = tempfile::tempdir().unwrap();
    let graph = graph_path(temp.path());
    fs::create_dir_all(&graph).unwrap();
    Omnigraph::init(graph.to_str().unwrap(), schema)
        .await
        .unwrap();
    temp
}

fn graph_path(root: &Path) -> PathBuf {
    root.join("server.omni")
}

fn drifted_test_schema() -> String {
    fs::read_to_string(fixture("test.pg"))
        .unwrap()
        .replace("age: I32?", "age: I64?")
}

async fn manifest_dataset_version(graph: &Path) -> u64 {
    Omnigraph::open(graph.to_string_lossy().as_ref())
        .await
        .unwrap()
        .snapshot_of(ReadTarget::branch("main"))
        .await
        .unwrap()
        .version()
}

fn s3_test_graph_uri(suite: &str) -> Option<String> {
    let bucket = env::var("OMNIGRAPH_S3_TEST_BUCKET").ok()?;
    let prefix = env::var("OMNIGRAPH_S3_TEST_PREFIX")
        .ok()
        .filter(|value| !value.trim().is_empty())
        .unwrap_or_else(|| "omnigraph-itests".to_string());
    let unique = std::time::SystemTime::now()
        .duration_since(std::time::UNIX_EPOCH)
        .ok()?
        .as_nanos();
    Some(format!("s3://{}/{}/{}/{}", bucket, prefix, suite, unique))
}

async fn app_for_loaded_graph() -> (tempfile::TempDir, Router) {
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let state = AppState::open(graph.to_string_lossy().to_string())
        .await
        .unwrap();
    (temp, build_app(state))
}

/// Build a permit-all policy YAML that grants every action used by the
/// HTTP-layer tests to the listed actor names. MR-723 default-deny
/// closed the "tokens but no policy" loophole; helpers that used to
/// represent "auth without policy" now install this permit-all policy
/// so test cases retain their pre-MR-723 semantics ("auth required,
/// every action permitted") without conflicting with the new state
/// matrix. Tests that specifically need the State-2 deny path use
/// `app_for_graph_with_auth_tokens_only` instead.
fn permit_all_policy_yaml(actors: &[&str]) -> String {
    let members = actors
        .iter()
        .map(|a| format!("\"{a}\""))
        .collect::<Vec<_>>()
        .join(", ");
    format!(
        r#"
version: 1
groups:
  permitted: [{members}]
protected_branches: [main]
rules:
  - id: permit-data
    allow:
      actors: {{ group: permitted }}
      actions: [read, change, export]
      branch_scope: any
  - id: permit-protected-target-actions
    allow:
      actors: {{ group: permitted }}
      actions: [schema_apply, branch_create, branch_delete, branch_merge]
      target_branch_scope: any
"#
    )
}

async fn app_for_loaded_graph_with_auth(token: &str) -> (tempfile::TempDir, Router) {
    // `AppState::new_with_bearer_token(token)` maps the token to actor "default";
    // permit-all policy needs to include that actor.
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let policy_path = temp.path().join("policy.yaml");
    fs::write(&policy_path, permit_all_policy_yaml(&["default"])).unwrap();
    let state = AppState::open_with_bearer_tokens_and_policy(
        graph.to_string_lossy().to_string(),
        vec![("default".to_string(), token.to_string())],
        Some(&policy_path),
    )
    .await
    .unwrap();
    (temp, build_app(state))
}

async fn app_for_loaded_graph_with_auth_tokens(
    tokens: &[(&str, &str)],
) -> (tempfile::TempDir, Router) {
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let policy_path = temp.path().join("policy.yaml");
    let actors: Vec<&str> = tokens.iter().map(|(actor, _)| *actor).collect();
    fs::write(&policy_path, permit_all_policy_yaml(&actors)).unwrap();
    let state = AppState::open_with_bearer_tokens_and_policy(
        graph.to_string_lossy().to_string(),
        tokens
            .iter()
            .map(|(actor, token)| ((*actor).to_string(), (*token).to_string()))
            .collect(),
        Some(&policy_path),
    )
    .await
    .unwrap();
    (temp, build_app(state))
}

async fn app_for_loaded_graph_with_auth_tokens_and_policy(
    tokens: &[(&str, &str)],
    policy: &str,
) -> (tempfile::TempDir, Router) {
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let policy_path = temp.path().join("policy.yaml");
    fs::write(&policy_path, policy).unwrap();
    let state = AppState::open_with_bearer_tokens_and_policy(
        graph.to_string_lossy().to_string(),
        tokens
            .iter()
            .map(|(actor, token)| ((*actor).to_string(), (*token).to_string()))
            .collect(),
        Some(&policy_path),
    )
    .await
    .unwrap();
    (temp, build_app(state))
}

async fn app_for_graph_with_auth_tokens_and_policy(
    schema: &str,
    tokens: &[(&str, &str)],
    policy: &str,
) -> (tempfile::TempDir, Router) {
    let temp = init_graph_with_schema(schema).await;
    let graph = graph_path(temp.path());
    let policy_path = temp.path().join("policy.yaml");
    fs::write(&policy_path, policy).unwrap();
    let state = AppState::open_with_bearer_tokens_and_policy(
        graph.to_string_lossy().to_string(),
        tokens
            .iter()
            .map(|(actor, token)| ((*actor).to_string(), (*token).to_string()))
            .collect(),
        Some(&policy_path),
    )
    .await
    .unwrap();
    (temp, build_app(state))
}

/// MR-723 default-deny mode: bearer tokens configured, no policy file.
/// Exercises ServerRuntimeState::DefaultDeny — authenticated requests
/// for Read succeed, every other action is rejected with 403 from
/// `authorize_request`'s state-2 branch.
async fn app_for_graph_with_auth_tokens_only(
    schema: &str,
    tokens: &[(&str, &str)],
) -> (tempfile::TempDir, Router) {
    let temp = init_graph_with_schema(schema).await;
    let graph = graph_path(temp.path());
    let state = AppState::open_with_bearer_tokens_and_policy(
        graph.to_string_lossy().to_string(),
        tokens
            .iter()
            .map(|(actor, token)| ((*actor).to_string(), (*token).to_string()))
            .collect(),
        None,
    )
    .await
    .unwrap();
    (temp, build_app(state))
}

fn additive_schema_with_nickname() -> String {
    fs::read_to_string(fixture("test.pg")).unwrap().replace(
        "    age: I32?\n}",
        "    age: I32?\n    nickname: String?\n}",
    )
}

fn schema_without_age() -> String {
    // Drop the nullable `age` column from the test schema. Used by the
    // HTTP soft/hard drop tests below.
    fs::read_to_string(fixture("test.pg"))
        .unwrap()
        .replace("    age: I32?\n", "")
}

fn schema_without_company() -> String {
    // Drop the `Company` node type and the edge referencing it. Used
    // by the HTTP DropType test below. Hand-crafted (no template
    // string replace) because the fixture interleaves the type and
    // its edge.
    r#"node Person {
    name: String @key
    age: I32?
}

edge Knows: Person -> Person {
    since: Date?
}
"#
    .to_string()
}

fn renamed_person_schema() -> String {
    fs::read_to_string(fixture("test.pg"))
        .unwrap()
        .replace("node Person {\n", "node Human @rename_from(\"Person\") {\n")
        .replace("edge Knows: Person -> Person", "edge Knows: Human -> Human")
        .replace(
            "edge WorksAt: Person -> Company",
            "edge WorksAt: Human -> Company",
        )
}

fn renamed_age_schema() -> String {
    fs::read_to_string(fixture("test.pg"))
        .unwrap()
        .replace("age: I32?", "years: I32? @rename_from(\"age\")")
}

fn indexed_name_schema() -> String {
    fs::read_to_string(fixture("test.pg"))
        .unwrap()
        .replace("name: String @key", "name: String @key @index")
}

fn unsupported_schema_change() -> String {
    fs::read_to_string(fixture("test.pg"))
        .unwrap()
        .replace("age: I32?", "age: I64?")
}

async fn json_response(app: &Router, request: Request<Body>) -> (StatusCode, Value) {
    let response = app.clone().oneshot(request).await.unwrap();
    let status = response.status();
    let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
    let value = serde_json::from_slice(&body).unwrap();
    (status, value)
}

#[tokio::test]
async fn schema_apply_route_updates_graph_for_authorized_admin() {
    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
        &fs::read_to_string(fixture("test.pg")).unwrap(),
        &[("act-ragnor", "admin-token")],
        SCHEMA_APPLY_POLICY_YAML,
    )
    .await;
    let schema = additive_schema_with_nickname();

    let request = Request::builder()
        .method(Method::POST)
        .uri("/schema/apply")
        .header("content-type", "application/json")
        .header("authorization", "Bearer admin-token")
        .body(Body::from(
            serde_json::to_vec(&SchemaApplyRequest {
                schema_source: schema,
                ..Default::default()
            })
            .unwrap(),
        ))
        .unwrap();
    let (status, payload) = json_response(&app, request).await;

    assert_eq!(status, StatusCode::OK);
    assert_eq!(payload["applied"], true);
    let graph = graph_path(temp.path());
    let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    assert!(
        reopened.catalog().node_types["Person"]
            .properties
            .contains_key("nickname")
    );
}

#[tokio::test]
async fn schema_apply_route_requires_schema_apply_policy_permission() {
    let (_temp, app) = app_for_graph_with_auth_tokens_and_policy(
        &fs::read_to_string(fixture("test.pg")).unwrap(),
        &[("act-ragnor", "admin-token")],
        POLICY_YAML,
    )
    .await;

    let request = Request::builder()
        .method(Method::POST)
        .uri("/schema/apply")
        .header("content-type", "application/json")
        .header("authorization", "Bearer admin-token")
        .body(Body::from(
            serde_json::to_vec(&SchemaApplyRequest {
                schema_source: additive_schema_with_nickname(),
                ..Default::default()
            })
            .unwrap(),
        ))
        .unwrap();
    let (status, payload) = json_response(&app, request).await;

    assert_eq!(status, StatusCode::FORBIDDEN);
    assert_eq!(
        payload["code"],
        serde_json::to_value(omnigraph_server::api::ErrorCode::Forbidden).unwrap()
    );
}

#[tokio::test]
async fn schema_apply_route_requires_bearer_token_when_policy_enabled() {
    let (_temp, app) = app_for_graph_with_auth_tokens_and_policy(
        &fs::read_to_string(fixture("test.pg")).unwrap(),
        &[("act-ragnor", "admin-token")],
        SCHEMA_APPLY_POLICY_YAML,
    )
    .await;

    let request = Request::builder()
        .method(Method::POST)
        .uri("/schema/apply")
        .header("content-type", "application/json")
        .body(Body::from(
            serde_json::to_vec(&SchemaApplyRequest {
                schema_source: additive_schema_with_nickname(),
                ..Default::default()
            })
            .unwrap(),
        ))
        .unwrap();
    let (status, payload) = json_response(&app, request).await;

    assert_eq!(status, StatusCode::UNAUTHORIZED);
    assert_eq!(
        payload["code"],
        serde_json::to_value(omnigraph_server::api::ErrorCode::Unauthorized).unwrap()
    );
}

#[tokio::test]
async fn schema_apply_route_can_rename_type() {
    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
        &fs::read_to_string(fixture("test.pg")).unwrap(),
        &[("act-ragnor", "admin-token")],
        SCHEMA_APPLY_POLICY_YAML,
    )
    .await;

    let request = Request::builder()
        .method(Method::POST)
        .uri("/schema/apply")
        .header("content-type", "application/json")
        .header("authorization", "Bearer admin-token")
        .body(Body::from(
            serde_json::to_vec(&SchemaApplyRequest {
                schema_source: renamed_person_schema(),
                ..Default::default()
            })
            .unwrap(),
        ))
        .unwrap();
    let (status, payload) = json_response(&app, request).await;

    assert_eq!(status, StatusCode::OK);
    assert_eq!(payload["applied"], true);
    let graph = graph_path(temp.path());
    let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    let snapshot = reopened
        .snapshot_of(ReadTarget::branch("main"))
        .await
        .unwrap();
    assert!(snapshot.entry("node:Human").is_some());
    assert!(snapshot.entry("node:Person").is_none());
}

#[tokio::test]
async fn schema_apply_route_can_rename_property() {
    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
        &fs::read_to_string(fixture("test.pg")).unwrap(),
        &[("act-ragnor", "admin-token")],
        SCHEMA_APPLY_POLICY_YAML,
    )
    .await;

    let request = Request::builder()
        .method(Method::POST)
        .uri("/schema/apply")
        .header("content-type", "application/json")
        .header("authorization", "Bearer admin-token")
        .body(Body::from(
            serde_json::to_vec(&SchemaApplyRequest {
                schema_source: renamed_age_schema(),
                ..Default::default()
            })
            .unwrap(),
        ))
        .unwrap();
    let (status, payload) = json_response(&app, request).await;

    assert_eq!(status, StatusCode::OK);
    assert_eq!(payload["applied"], true);
    let graph = graph_path(temp.path());
    let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    let person = &reopened.catalog().node_types["Person"];
    assert!(person.properties.contains_key("years"));
    assert!(!person.properties.contains_key("age"));
}

#[tokio::test]
async fn schema_apply_route_can_add_index() {
    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
        &fs::read_to_string(fixture("test.pg")).unwrap(),
        &[("act-ragnor", "admin-token")],
        SCHEMA_APPLY_POLICY_YAML,
    )
    .await;
    let graph = graph_path(temp.path());
    let before_index_count = {
        let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
        let snapshot = db.snapshot_of(ReadTarget::branch("main")).await.unwrap();
        let dataset = snapshot.open("node:Person").await.unwrap();
        dataset.load_indices().await.unwrap().len()
    };

    let request = Request::builder()
        .method(Method::POST)
        .uri("/schema/apply")
        .header("content-type", "application/json")
        .header("authorization", "Bearer admin-token")
        .body(Body::from(
            serde_json::to_vec(&SchemaApplyRequest {
                schema_source: indexed_name_schema(),
                ..Default::default()
            })
            .unwrap(),
        ))
        .unwrap();
    let (status, payload) = json_response(&app, request).await;

    assert_eq!(status, StatusCode::OK);
    assert_eq!(payload["applied"], true);
    let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    let snapshot = reopened
        .snapshot_of(ReadTarget::branch("main"))
        .await
        .unwrap();
    let dataset = snapshot.open("node:Person").await.unwrap();
    let after_index_count = dataset.load_indices().await.unwrap().len();
    assert!(after_index_count > before_index_count);
}

#[tokio::test]
async fn schema_apply_route_rejects_unsupported_plan() {
    let (_temp, app) = app_for_graph_with_auth_tokens_and_policy(
        &fs::read_to_string(fixture("test.pg")).unwrap(),
        &[("act-ragnor", "admin-token")],
        SCHEMA_APPLY_POLICY_YAML,
    )
    .await;

    let request = Request::builder()
        .method(Method::POST)
        .uri("/schema/apply")
        .header("content-type", "application/json")
        .header("authorization", "Bearer admin-token")
        .body(Body::from(
            serde_json::to_vec(&SchemaApplyRequest {
                schema_source: unsupported_schema_change(),
                ..Default::default()
            })
            .unwrap(),
        ))
        .unwrap();
    let (status, payload) = json_response(&app, request).await;

    assert_eq!(status, StatusCode::BAD_REQUEST);
    assert_eq!(
        payload["code"],
        serde_json::to_value(omnigraph_server::api::ErrorCode::BadRequest).unwrap()
    );
}

#[tokio::test]
async fn schema_apply_route_rejects_when_non_main_branch_exists() {
    let temp = init_graph_with_schema(&fs::read_to_string(fixture("test.pg")).unwrap()).await;
    let graph = graph_path(temp.path());
    let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    db.branch_create("feature").await.unwrap();
    drop(db);

    let policy_path = temp.path().join("policy.yaml");
    fs::write(&policy_path, SCHEMA_APPLY_POLICY_YAML).unwrap();
    let state = AppState::open_with_bearer_tokens_and_policy(
        graph.to_string_lossy().to_string(),
        vec![("act-ragnor".to_string(), "admin-token".to_string())],
        Some(&policy_path),
    )
    .await
    .unwrap();
    let app = build_app(state);

    let request = Request::builder()
        .method(Method::POST)
        .uri("/schema/apply")
        .header("content-type", "application/json")
        .header("authorization", "Bearer admin-token")
        .body(Body::from(
            serde_json::to_vec(&SchemaApplyRequest {
                schema_source: additive_schema_with_nickname(),
                ..Default::default()
            })
            .unwrap(),
        ))
        .unwrap();
    let (status, payload) = json_response(&app, request).await;

    assert_eq!(status, StatusCode::CONFLICT);
    assert_eq!(
        payload["code"],
        serde_json::to_value(omnigraph_server::api::ErrorCode::Conflict).unwrap()
    );
}

struct EnvGuard {
    saved: Vec<(&'static str, Option<String>)>,
}

impl EnvGuard {
    fn set(vars: &[(&'static str, Option<&str>)]) -> Self {
        let saved = vars
            .iter()
            .map(|(name, _)| (*name, env::var(name).ok()))
            .collect::<Vec<_>>();
        for (name, value) in vars {
            unsafe {
                match value {
                    Some(value) => env::set_var(name, value),
                    None => env::remove_var(name),
                }
            }
        }
        Self { saved }
    }
}

impl Drop for EnvGuard {
    fn drop(&mut self) {
        for (name, value) in self.saved.drain(..) {
            unsafe {
                match value {
                    Some(value) => env::set_var(name, value),
                    None => env::remove_var(name),
                }
            }
        }
    }
}

fn format_vector(values: &[f32]) -> String {
    values
        .iter()
        .map(|value| format!("{:.8}", value))
        .collect::<Vec<_>>()
        .join(", ")
}

fn normalize_vector(mut values: Vec<f32>) -> Vec<f32> {
    let norm = values
        .iter()
        .map(|value| (*value as f64) * (*value as f64))
        .sum::<f64>()
        .sqrt() as f32;
    if norm > f32::EPSILON {
        for value in &mut values {
            *value /= norm;
        }
    }
    values
}

fn fnv1a64(bytes: &[u8]) -> u64 {
    let mut hash = 14695981039346656037u64;
    for byte in bytes {
        hash ^= *byte as u64;
        hash = hash.wrapping_mul(1099511628211u64);
    }
    hash
}

fn xorshift64(mut x: u64) -> u64 {
    x ^= x << 13;
    x ^= x >> 7;
    x ^= x << 17;
    x
}

fn mock_embedding(input: &str, dim: usize) -> Vec<f32> {
    let mut seed = fnv1a64(input.as_bytes());
    let mut out = Vec::with_capacity(dim);
    for _ in 0..dim {
        seed = xorshift64(seed);
        let ratio = (seed as f64 / u64::MAX as f64) as f32;
        out.push((ratio * 2.0) - 1.0);
    }
    normalize_vector(out)
}

#[tokio::test(flavor = "multi_thread")]
async fn healthz_succeeds_after_startup() {
    let (_temp, app) = app_for_loaded_graph().await;
    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/healthz")
            .method(Method::GET)
            .body(Body::empty())
            .unwrap(),
    )
    .await;

    assert_eq!(status, StatusCode::OK);
    assert_eq!(body["status"], "ok");
    assert_eq!(body["version"], env!("CARGO_PKG_VERSION"));
    match option_env!("OMNIGRAPH_SOURCE_VERSION") {
        Some(source_version) => assert_eq!(body["source_version"], source_version),
        None => assert!(body.get("source_version").is_none()),
    }
}

#[tokio::test(flavor = "multi_thread")]
async fn schema_drift_returns_conflict_for_snapshot_read_and_change() {
    let (temp, app) = app_for_loaded_graph().await;
    let graph = graph_path(temp.path());
    fs::write(graph.join("_schema.pg"), drifted_test_schema()).unwrap();

    let (snapshot_status, snapshot_body) = json_response(
        &app,
        Request::builder()
            .uri("/snapshot?branch=main")
            .method(Method::GET)
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    let snapshot_error: ErrorOutput = serde_json::from_value(snapshot_body).unwrap();
    assert_eq!(snapshot_status, StatusCode::CONFLICT);
    assert_eq!(
        snapshot_error.code,
        Some(omnigraph_server::api::ErrorCode::Conflict)
    );
    assert!(
        snapshot_error
            .error
            .contains("schema evolution is locked down in phase 1")
    );

    let read = ReadRequest {
        query_source: fs::read_to_string(fixture("test.gq")).unwrap(),
        query_name: Some("get_person".to_string()),
        params: Some(json!({ "name": "Alice" })),
        branch: Some("main".to_string()),
        snapshot: None,
    };
    let (read_status, read_body) = json_response(
        &app,
        Request::builder()
            .uri("/read")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&read).unwrap()))
            .unwrap(),
    )
    .await;
    let read_error: ErrorOutput = serde_json::from_value(read_body).unwrap();
    assert_eq!(read_status, StatusCode::CONFLICT);
    assert_eq!(
        read_error.code,
        Some(omnigraph_server::api::ErrorCode::Conflict)
    );
    assert!(
        read_error
            .error
            .contains("schema evolution is locked down in phase 1")
    );

    let change = ChangeRequest {
        query: MUTATION_QUERIES.to_string(),
        name: Some("insert_person".to_string()),
        params: Some(json!({ "name": "Mina", "age": 28 })),
        branch: Some("main".to_string()),
    };
    let (change_status, change_body) = json_response(
        &app,
        Request::builder()
            .uri("/change")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&change).unwrap()))
            .unwrap(),
    )
    .await;
    let change_error: ErrorOutput = serde_json::from_value(change_body).unwrap();
    assert_eq!(change_status, StatusCode::CONFLICT);
    assert_eq!(
        change_error.code,
        Some(omnigraph_server::api::ErrorCode::Conflict)
    );
    assert!(
        change_error
            .error
            .contains("schema evolution is locked down in phase 1")
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn protected_routes_require_bearer_token() {
    let (_temp, app) = app_for_loaded_graph_with_auth("demo-token").await;
    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/branches")
            .method(Method::GET)
            .body(Body::empty())
            .unwrap(),
    )
    .await;

    let error: ErrorOutput = serde_json::from_value(body).unwrap();
    assert_eq!(status, StatusCode::UNAUTHORIZED);
    assert_eq!(
        error.code,
        Some(omnigraph_server::api::ErrorCode::Unauthorized)
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn protected_routes_accept_valid_bearer_token_while_healthz_stays_open() {
    let (_temp, app) = app_for_loaded_graph_with_auth("demo-token").await;

    let health = app
        .clone()
        .oneshot(
            Request::builder()
                .uri("/healthz")
                .method(Method::GET)
                .body(Body::empty())
                .unwrap(),
        )
        .await
        .unwrap();
    assert_eq!(health.status(), StatusCode::OK);

    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/branches")
            .method(Method::GET)
            .header("authorization", "Bearer demo-token")
            .body(Body::empty())
            .unwrap(),
    )
    .await;

    assert_eq!(status, StatusCode::OK);
    assert!(body["branches"].is_array());
}

#[tokio::test(flavor = "multi_thread")]
async fn export_route_returns_jsonl_for_branch_snapshot() {
    let token = "demo-token";
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    db.branch_create_from(ReadTarget::branch("main"), "feature")
        .await
        .unwrap();
    db.load(
        "feature",
        r#"{"type":"Person","data":{"name":"Eve","age":29}}"#,
        LoadMode::Append,
    )
    .await
    .unwrap();
    let expected = db
        .export_jsonl("feature", &["Person".to_string()], &[])
        .await
        .unwrap();
    drop(db);

    // MR-723: tokens-without-policy is now default-deny. Install a
    // permit-all policy alongside the bearer token so /export
    // (action=Export) passes Cedar evaluation. The test is exercising
    // export semantics, not policy — the policy is just enough to clear
    // the State 3 path.
    let policy_path = temp.path().join("policy.yaml");
    fs::write(&policy_path, permit_all_policy_yaml(&["default"])).unwrap();
    let state = AppState::open_with_bearer_tokens_and_policy(
        graph.to_string_lossy().to_string(),
        vec![("default".to_string(), token.to_string())],
        Some(&policy_path),
    )
    .await
    .unwrap();
    let app = build_app(state);

    let response = app
        .clone()
        .oneshot(
            Request::builder()
                .uri("/export")
                .method(Method::POST)
                .header("content-type", "application/json")
                .header("authorization", format!("Bearer {}", token))
                .body(Body::from(
                    serde_json::to_vec(&ExportRequest {
                        branch: Some("feature".to_string()),
                        type_names: vec!["Person".to_string()],
                        table_keys: Vec::new(),
                    })
                    .unwrap(),
                ))
                .unwrap(),
        )
        .await
        .unwrap();

    assert_eq!(response.status(), StatusCode::OK);
    assert_eq!(
        response.headers().get("content-type").unwrap(),
        "application/x-ndjson; charset=utf-8"
    );
    let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
    let text = String::from_utf8(body.to_vec()).unwrap();
    assert_eq!(text, expected);
}

#[tokio::test(flavor = "multi_thread")]
async fn protected_routes_accept_any_configured_team_bearer_token() {
    let (_temp, app) = app_for_loaded_graph_with_auth_tokens(&[
        ("team-01", "token-one"),
        ("team-02", "token-two"),
    ])
    .await;

    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/branches")
            .method(Method::GET)
            .header("authorization", "Bearer token-two")
            .body(Body::empty())
            .unwrap(),
    )
    .await;

    assert_eq!(status, StatusCode::OK);
    assert!(body["branches"].is_array());
}

/// Verifies the hashed-token lookup correctly resolves each bearer to its
/// associated actor, and that the resolved actor — not the handler-supplied
/// default — is what the policy engine sees. Two tokens for two distinct
/// actors; policy grants read to actor-A only. Swapping tokens must swap
/// the policy outcome.
#[tokio::test(flavor = "multi_thread")]
async fn bearer_token_resolves_to_correct_actor_for_policy_decisions() {
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let policy_path = temp.path().join("policy.yaml");
    fs::write(
        &policy_path,
        r#"
version: 1
groups:
  readers: [act-a]
  writers: [act-b]
protected_branches: [main]
rules:
  - id: readers-only
    allow:
      actors: { group: readers }
      actions: [read]
      branch_scope: any
"#,
    )
    .unwrap();
    let state = AppState::open_with_bearer_tokens_and_policy(
        graph.to_string_lossy().to_string(),
        vec![
            ("act-a".to_string(), "token-a".to_string()),
            ("act-b".to_string(), "token-b".to_string()),
        ],
        Some(&policy_path),
    )
    .await
    .unwrap();
    let app = build_app(state);

    // act-a is authenticated AND authorized.
    let (ok_status, _) = json_response(
        &app,
        Request::builder()
            .uri("/snapshot?branch=main")
            .method(Method::GET)
            .header("authorization", "Bearer token-a")
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(ok_status, StatusCode::OK);

    // act-b is authenticated but policy rejects — proves the resolved actor
    // (not some default) was the policy subject.
    let (denied_status, denied_body) = json_response(
        &app,
        Request::builder()
            .uri("/snapshot?branch=main")
            .method(Method::GET)
            .header("authorization", "Bearer token-b")
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    let denied_error: ErrorOutput = serde_json::from_value(denied_body).unwrap();
    assert_eq!(denied_status, StatusCode::FORBIDDEN);
    assert_eq!(
        denied_error.code,
        Some(omnigraph_server::api::ErrorCode::Forbidden)
    );

    // Unknown token: 401, never reaches the policy engine.
    let (bad_status, _) = json_response(
        &app,
        Request::builder()
            .uri("/snapshot?branch=main")
            .method(Method::GET)
            .header("authorization", "Bearer wrong-token")
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(bad_status, StatusCode::UNAUTHORIZED);
}

/// Regression test for MR-731: actor identity comes from the matched
/// bearer token, never from a client-supplied request header. A future
/// "convenience" PR that lets clients override `actor_id` to spoof
/// another identity must break this test. The principle is named in
/// `docs/dev/invariants.md` Hard Invariant 11 and at the actor-resolution
/// site in `omnigraph-server/src/lib.rs::authorize_request`.
///
/// Two assertions in one fixture:
/// 1. Spoof-up: bearer for a *denied* actor + X-Actor-Id naming an
///    *allowed* actor — policy still denies (proves the spoof header
///    doesn't promote the request).
/// 2. Spoof-down: bearer for an *allowed* actor + X-Actor-Id naming a
///    *denied* actor — policy still allows (proves the server-resolved
///    identity wins; the spoof can't trick the request into a denial
///    either, which would otherwise be a confusing UX trap).
///
/// Cross-reference: MR-777 covers boundary cases like actor-id
/// *collision* (two distinct tokens minting the same actor_id) and
/// malformed bearer header parsing. See `auth_boundary_case_coverage`
/// suite when it lands; the two tests together pin the full bearer-token
/// → actor identity contract.
#[tokio::test(flavor = "multi_thread")]
async fn actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers() {
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let policy_path = temp.path().join("policy.yaml");
    // Same readers/writers split as
    // `bearer_token_resolves_to_correct_actor_for_policy_decisions` —
    // `act-a` can read main, `act-b` cannot. The asymmetry is what
    // makes the spoof-up/spoof-down distinction observable.
    fs::write(
        &policy_path,
        r#"
version: 1
groups:
  readers: [act-a]
  writers: [act-b]
protected_branches: [main]
rules:
  - id: readers-only
    allow:
      actors: { group: readers }
      actions: [read]
      branch_scope: any
"#,
    )
    .unwrap();
    let state = AppState::open_with_bearer_tokens_and_policy(
        graph.to_string_lossy().to_string(),
        vec![
            ("act-a".to_string(), "token-a".to_string()),
            ("act-b".to_string(), "token-b".to_string()),
        ],
        Some(&policy_path),
    )
    .await
    .unwrap();
    let app = build_app(state);

    // (1) Spoof-up: bearer for act-b (denied) + X-Actor-Id: act-a (allowed).
    // If the server were trusting the header, this would succeed as
    // act-a. The contract is: the bearer wins. Expect 403 because
    // act-b can't read.
    let (spoof_up_status, spoof_up_body) = json_response(
        &app,
        Request::builder()
            .uri("/snapshot?branch=main")
            .method(Method::GET)
            .header("authorization", "Bearer token-b")
            .header("x-actor-id", "act-a")
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    let spoof_up_error: ErrorOutput = serde_json::from_value(spoof_up_body).unwrap();
    assert_eq!(
        spoof_up_status,
        StatusCode::FORBIDDEN,
        "X-Actor-Id must not promote a denied bearer to an allowed actor",
    );
    assert_eq!(
        spoof_up_error.code,
        Some(omnigraph_server::api::ErrorCode::Forbidden),
    );

    // (2) Spoof-down: bearer for act-a (allowed) + X-Actor-Id: act-b (denied).
    // If the server were trusting the header, this would fail as act-b.
    // The contract is: the bearer wins. Expect 200 because act-a can read.
    let (spoof_down_status, _) = json_response(
        &app,
        Request::builder()
            .uri("/snapshot?branch=main")
            .method(Method::GET)
            .header("authorization", "Bearer token-a")
            .header("x-actor-id", "act-b")
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(
        spoof_down_status,
        StatusCode::OK,
        "X-Actor-Id must not demote an allowed bearer to a denied actor",
    );

    // (3) Empty-string spoof attempt: an X-Actor-Id of "" must not
    // leak through as the policy subject. Same expectation as (1):
    // bearer for act-b is denied regardless of what the header tries.
    let (empty_spoof_status, _) = json_response(
        &app,
        Request::builder()
            .uri("/snapshot?branch=main")
            .method(Method::GET)
            .header("authorization", "Bearer token-b")
            .header("x-actor-id", "")
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(
        empty_spoof_status,
        StatusCode::FORBIDDEN,
        "empty X-Actor-Id must not clear the resolved actor",
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn policy_allows_read_but_distinguishes_401_from_403() {
    let (_temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy(
        &[("act-bruno", "team-token"), ("act-ragnor", "admin-token")],
        POLICY_YAML,
    )
    .await;

    let (missing_status, missing_body) = json_response(
        &app,
        Request::builder()
            .uri("/snapshot?branch=main")
            .method(Method::GET)
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    let missing_error: ErrorOutput = serde_json::from_value(missing_body).unwrap();
    assert_eq!(missing_status, StatusCode::UNAUTHORIZED);
    assert_eq!(
        missing_error.code,
        Some(omnigraph_server::api::ErrorCode::Unauthorized)
    );

    let (snapshot_status, snapshot_body) = json_response(
        &app,
        Request::builder()
            .uri("/snapshot?branch=main")
            .method(Method::GET)
            .header("authorization", "Bearer team-token")
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(snapshot_status, StatusCode::OK);
    assert_eq!(snapshot_body["branch"], "main");

    let export_request = ExportRequest {
        branch: Some("main".to_string()),
        type_names: Vec::new(),
        table_keys: Vec::new(),
    };
    let (forbidden_status, forbidden_body) = json_response(
        &app,
        Request::builder()
            .uri("/export")
            .method(Method::POST)
            .header("authorization", "Bearer team-token")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&export_request).unwrap()))
            .unwrap(),
    )
    .await;
    let forbidden_error: ErrorOutput = serde_json::from_value(forbidden_body).unwrap();
    assert_eq!(forbidden_status, StatusCode::FORBIDDEN);
    assert_eq!(
        forbidden_error.code,
        Some(omnigraph_server::api::ErrorCode::Forbidden)
    );

    let response = app
        .clone()
        .oneshot(
            Request::builder()
                .uri("/export")
                .method(Method::POST)
                .header("authorization", "Bearer admin-token")
                .header("content-type", "application/json")
                .body(Body::from(serde_json::to_vec(&export_request).unwrap()))
                .unwrap(),
        )
        .await
        .unwrap();
    assert_eq!(response.status(), StatusCode::OK);
}

#[tokio::test(flavor = "multi_thread")]
async fn policy_uses_resolved_branch_for_snapshot_reads() {
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let snapshot_id = {
        let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
        db.resolve_snapshot("main").await.unwrap().to_string()
    };
    let policy_path = temp.path().join("policy.yaml");
    fs::write(&policy_path, POLICY_PROTECTED_READ_YAML).unwrap();
    let state = AppState::open_with_bearer_tokens_and_policy(
        graph.to_string_lossy().to_string(),
        vec![("act-bruno".to_string(), "team-token".to_string())],
        Some(&policy_path),
    )
    .await
    .unwrap();
    let app = build_app(state);

    let read = ReadRequest {
        query_source: fs::read_to_string(fixture("test.gq")).unwrap(),
        query_name: Some("get_person".to_string()),
        params: Some(json!({ "name": "Alice" })),
        branch: None,
        snapshot: Some(snapshot_id),
    };
    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/read")
            .method(Method::POST)
            .header("authorization", "Bearer team-token")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&read).unwrap()))
            .unwrap(),
    )
    .await;

    assert_eq!(status, StatusCode::OK);
    assert_eq!(body["target"]["branch"], Value::Null);
    assert_eq!(
        body["target"]["snapshot"].as_str(),
        read.snapshot.as_deref()
    );
    assert_eq!(body["row_count"], 1);
}

#[tokio::test(flavor = "multi_thread")]
async fn snapshot_route_returns_manifest_dataset_version() {
    let (temp, app) = app_for_loaded_graph().await;
    let graph = graph_path(temp.path());
    let expected_manifest_version = manifest_dataset_version(&graph).await;

    let (snapshot_status, snapshot_body) = json_response(
        &app,
        Request::builder()
            .uri("/snapshot?branch=main")
            .method(Method::GET)
            .body(Body::empty())
            .unwrap(),
    )
    .await;

    assert_eq!(snapshot_status, StatusCode::OK);
    assert_eq!(snapshot_body["branch"], "main");
    assert_eq!(
        snapshot_body["manifest_version"].as_u64().unwrap(),
        expected_manifest_version
    );
    assert!(snapshot_body["tables"].is_array());
}

#[tokio::test(flavor = "multi_thread")]
async fn schema_route_returns_current_source() {
    let (_temp, app) = app_for_loaded_graph().await;
    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/schema")
            .method(Method::GET)
            .body(Body::empty())
            .unwrap(),
    )
    .await;

    assert_eq!(status, StatusCode::OK);
    let output: SchemaOutput = serde_json::from_value(body).unwrap();
    assert!(output.schema_source.contains("node Person"));
}

#[tokio::test(flavor = "multi_thread")]
async fn schema_route_requires_bearer_token_when_auth_configured() {
    let (_temp, app) = app_for_loaded_graph_with_auth("demo-token").await;

    let (missing_status, missing_body) = json_response(
        &app,
        Request::builder()
            .uri("/schema")
            .method(Method::GET)
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    let missing_error: ErrorOutput = serde_json::from_value(missing_body).unwrap();
    assert_eq!(missing_status, StatusCode::UNAUTHORIZED);
    assert_eq!(
        missing_error.code,
        Some(omnigraph_server::api::ErrorCode::Unauthorized)
    );

    let (ok_status, ok_body) = json_response(
        &app,
        Request::builder()
            .uri("/schema")
            .method(Method::GET)
            .header("authorization", "Bearer demo-token")
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(ok_status, StatusCode::OK);
    let output: SchemaOutput = serde_json::from_value(ok_body).unwrap();
    assert!(!output.schema_source.is_empty());
}

#[tokio::test(flavor = "multi_thread")]
async fn schema_route_denied_when_actor_lacks_read_permission() {
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let policy_path = temp.path().join("policy.yaml");
    // Policy grants branch_create only — no read action for act-bruno.
    fs::write(&policy_path, INGEST_CREATE_ONLY_POLICY_YAML).unwrap();
    let state = AppState::open_with_bearer_tokens_and_policy(
        graph.to_string_lossy().to_string(),
        vec![("act-bruno".to_string(), "team-token".to_string())],
        Some(&policy_path),
    )
    .await
    .unwrap();
    let app = build_app(state);

    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/schema")
            .method(Method::GET)
            .header("authorization", "Bearer team-token")
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    let error: ErrorOutput = serde_json::from_value(body).unwrap();
    assert_eq!(status, StatusCode::FORBIDDEN);
    assert_eq!(
        error.code,
        Some(omnigraph_server::api::ErrorCode::Forbidden)
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn policy_blocks_change_on_protected_main_but_allows_unprotected_branch() {
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    db.branch_create_from(ReadTarget::branch("main"), "feature")
        .await
        .unwrap();
    drop(db);

    let policy_path = temp.path().join("policy.yaml");
    fs::write(&policy_path, POLICY_YAML).unwrap();
    let state = AppState::open_with_bearer_tokens_and_policy(
        graph.to_string_lossy().to_string(),
        vec![("act-bruno".to_string(), "team-token".to_string())],
        Some(&policy_path),
    )
    .await
    .unwrap();
    let app = build_app(state);

    let main_change = ChangeRequest {
        query: MUTATION_QUERIES.to_string(),
        name: Some("insert_person".to_string()),
        params: Some(json!({ "name": "Mina", "age": 28 })),
        branch: Some("main".to_string()),
    };
    let (main_status, main_body) = json_response(
        &app,
        Request::builder()
            .uri("/change")
            .method(Method::POST)
            .header("authorization", "Bearer team-token")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&main_change).unwrap()))
            .unwrap(),
    )
    .await;
    let main_error: ErrorOutput = serde_json::from_value(main_body).unwrap();
    assert_eq!(main_status, StatusCode::FORBIDDEN);
    assert_eq!(
        main_error.code,
        Some(omnigraph_server::api::ErrorCode::Forbidden)
    );

    let feature_change = ChangeRequest {
        query: MUTATION_QUERIES.to_string(),
        name: Some("insert_person".to_string()),
        params: Some(json!({ "name": "Mina", "age": 28 })),
        branch: Some("feature".to_string()),
    };
    let (feature_status, feature_body) = json_response(
        &app,
        Request::builder()
            .uri("/change")
            .method(Method::POST)
            .header("authorization", "Bearer team-token")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&feature_change).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(feature_status, StatusCode::OK);
    assert_eq!(feature_body["branch"], "feature");
    assert_eq!(feature_body["affected_nodes"], 1);
}

#[tokio::test(flavor = "multi_thread")]
async fn policy_blocks_non_admin_merge_to_main_and_allows_admin() {
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    db.branch_create_from(ReadTarget::branch("main"), "feature")
        .await
        .unwrap();
    db.load(
        "feature",
        r#"{"type":"Person","data":{"name":"Zoe","age":33}}"#,
        LoadMode::Append,
    )
    .await
    .unwrap();
    drop(db);

    let policy_path = temp.path().join("policy.yaml");
    fs::write(&policy_path, POLICY_YAML).unwrap();
    let state = AppState::open_with_bearer_tokens_and_policy(
        graph.to_string_lossy().to_string(),
        vec![
            ("act-bruno".to_string(), "team-token".to_string()),
            ("act-ragnor".to_string(), "admin-token".to_string()),
        ],
        Some(&policy_path),
    )
    .await
    .unwrap();
    let app = build_app(state);

    let merge = BranchMergeRequest {
        source: "feature".to_string(),
        target: Some("main".to_string()),
    };
    let (deny_status, deny_body) = json_response(
        &app,
        Request::builder()
            .uri("/branches/merge")
            .method(Method::POST)
            .header("authorization", "Bearer team-token")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&merge).unwrap()))
            .unwrap(),
    )
    .await;
    let deny_error: ErrorOutput = serde_json::from_value(deny_body).unwrap();
    assert_eq!(deny_status, StatusCode::FORBIDDEN);
    assert_eq!(
        deny_error.code,
        Some(omnigraph_server::api::ErrorCode::Forbidden)
    );

    let (allow_status, allow_body) = json_response(
        &app,
        Request::builder()
            .uri("/branches/merge")
            .method(Method::POST)
            .header("authorization", "Bearer admin-token")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&merge).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(allow_status, StatusCode::OK);
    assert_eq!(allow_body["actor_id"], "act-ragnor");
}

#[tokio::test(flavor = "multi_thread")]
async fn authenticated_change_stamps_actor_on_commits() {
    // With the Run state machine removed, actor_id is recorded
    // directly on the commit graph (no intermediate run record).
    let (_temp, app) = app_for_loaded_graph_with_auth_tokens(&[("act-andrew", "token-one")]).await;

    let change = ChangeRequest {
        query: MUTATION_QUERIES.to_string(),
        name: Some("insert_person".to_string()),
        params: Some(json!({ "name": "Mina", "age": 28 })),
        branch: Some("main".to_string()),
    };
    let (change_status, change_body) = json_response(
        &app,
        Request::builder()
            .uri("/change")
            .method(Method::POST)
            .header("authorization", "Bearer token-one")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&change).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(change_status, StatusCode::OK);
    assert_eq!(change_body["actor_id"], "act-andrew");

    let (commits_status, commits_body) = json_response(
        &app,
        Request::builder()
            .uri("/commits?branch=main")
            .method(Method::GET)
            .header("authorization", "Bearer token-one")
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(commits_status, StatusCode::OK);
    let head = commits_body["commits"]
        .as_array()
        .unwrap()
        .last()
        .expect("head commit should exist");
    assert_eq!(head["actor_id"], "act-andrew");
}

#[tokio::test(flavor = "multi_thread")]
async fn ingest_creates_branch_returns_metadata_and_stamps_actor() {
    let (temp, app) = app_for_loaded_graph_with_auth_tokens(&[("act-andrew", "token-one")]).await;
    let graph = graph_path(temp.path());
    let ingest = IngestRequest {
        branch: Some("feature-ingest".to_string()),
        from: Some("main".to_string()),
        mode: Some(LoadMode::Merge),
        data: r#"{"type":"Person","data":{"name":"Zoe","age":33}}
{"type":"Person","data":{"name":"Bob","age":26}}"#
            .to_string(),
    };

    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/ingest")
            .method(Method::POST)
            .header("authorization", "Bearer token-one")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&ingest).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(status, StatusCode::OK);
    assert_eq!(body["branch"], "feature-ingest");
    assert_eq!(body["base_branch"], "main");
    assert_eq!(body["branch_created"], true);
    assert_eq!(body["mode"], "merge");
    assert_eq!(body["actor_id"], "act-andrew");
    assert_eq!(body["tables"][0]["table_key"], "node:Person");
    assert_eq!(body["tables"][0]["rows_loaded"], 2);

    let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    let snapshot = db
        .snapshot_of(ReadTarget::branch("feature-ingest"))
        .await
        .unwrap();
    let person_ds = snapshot.open("node:Person").await.unwrap();
    assert_eq!(person_ds.count_rows(None).await.unwrap(), 5);
    let head = db
        .list_commits(Some("feature-ingest"))
        .await
        .unwrap()
        .into_iter()
        .last()
        .unwrap();
    assert_eq!(head.actor_id.as_deref(), Some("act-andrew"));
}

#[tokio::test(flavor = "multi_thread")]
async fn ingest_existing_branch_skips_branch_create_policy_check() {
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    {
        let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
        db.branch_create_from(ReadTarget::branch("main"), "feature")
            .await
            .unwrap();
    }
    let policy_path = temp.path().join("policy.yaml");
    fs::write(&policy_path, POLICY_YAML).unwrap();
    let state = AppState::open_with_bearer_tokens_and_policy(
        graph.to_string_lossy().to_string(),
        vec![("act-bruno".to_string(), "team-token".to_string())],
        Some(&policy_path),
    )
    .await
    .unwrap();
    let app = build_app(state);
    let ingest = IngestRequest {
        branch: Some("feature".to_string()),
        from: Some("other-base".to_string()),
        mode: Some(LoadMode::Merge),
        data: r#"{"type":"Person","data":{"name":"Zoe","age":33}}"#.to_string(),
    };

    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/ingest")
            .method(Method::POST)
            .header("authorization", "Bearer team-token")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&ingest).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(status, StatusCode::OK);
    assert_eq!(body["branch"], "feature");
    assert_eq!(body["branch_created"], false);
    assert_eq!(body["base_branch"], "other-base");
}

#[tokio::test(flavor = "multi_thread")]
async fn ingest_denies_missing_branch_without_branch_create_permission() {
    let (_temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy(
        &[("act-bruno", "team-token")],
        POLICY_YAML,
    )
    .await;
    let ingest = IngestRequest {
        branch: Some("feature".to_string()),
        from: Some("main".to_string()),
        mode: Some(LoadMode::Merge),
        data: r#"{"type":"Person","data":{"name":"Zoe","age":33}}"#.to_string(),
    };

    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/ingest")
            .method(Method::POST)
            .header("authorization", "Bearer team-token")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&ingest).unwrap()))
            .unwrap(),
    )
    .await;
    let error: ErrorOutput = serde_json::from_value(body).unwrap();
    assert_eq!(status, StatusCode::FORBIDDEN);
    assert_eq!(
        error.code,
        Some(omnigraph_server::api::ErrorCode::Forbidden)
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn ingest_denies_when_actor_lacks_change_permission() {
    let (_temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy(
        &[("act-bruno", "team-token")],
        INGEST_CREATE_ONLY_POLICY_YAML,
    )
    .await;
    let ingest = IngestRequest {
        branch: Some("feature".to_string()),
        from: Some("main".to_string()),
        mode: Some(LoadMode::Merge),
        data: r#"{"type":"Person","data":{"name":"Zoe","age":33}}"#.to_string(),
    };

    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/ingest")
            .method(Method::POST)
            .header("authorization", "Bearer team-token")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&ingest).unwrap()))
            .unwrap(),
    )
    .await;
    let error: ErrorOutput = serde_json::from_value(body).unwrap();
    assert_eq!(status, StatusCode::FORBIDDEN);
    assert_eq!(
        error.code,
        Some(omnigraph_server::api::ErrorCode::Forbidden)
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn ingest_rejects_payloads_over_32_mib() {
    let (_temp, app) = app_for_loaded_graph().await;
    let oversize = IngestRequest {
        branch: Some("feature".to_string()),
        from: Some("main".to_string()),
        mode: Some(LoadMode::Merge),
        data: "x".repeat(33 * 1024 * 1024),
    };

    let response = app
        .clone()
        .oneshot(
            Request::builder()
                .uri("/ingest")
                .method(Method::POST)
                .header("content-type", "application/json")
                .body(Body::from(serde_json::to_vec(&oversize).unwrap()))
                .unwrap(),
        )
        .await
        .unwrap();
    assert_eq!(response.status(), StatusCode::PAYLOAD_TOO_LARGE);
}

#[tokio::test(flavor = "multi_thread")]
async fn authenticated_branch_merge_stamps_merge_actor_on_head_commit() {
    let (_temp, app) = app_for_loaded_graph_with_auth_tokens(&[
        ("act-andrew", "token-one"),
        ("act-ragnor", "token-two"),
    ])
    .await;

    let create = BranchCreateRequest {
        from: Some("main".to_string()),
        name: "feature".to_string(),
    };
    let (create_status, _) = json_response(
        &app,
        Request::builder()
            .uri("/branches")
            .method(Method::POST)
            .header("authorization", "Bearer token-one")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&create).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(create_status, StatusCode::OK);

    let change = ChangeRequest {
        query: MUTATION_QUERIES.to_string(),
        name: Some("insert_person".to_string()),
        params: Some(json!({ "name": "Zoe", "age": 33 })),
        branch: Some("feature".to_string()),
    };
    let (change_status, _) = json_response(
        &app,
        Request::builder()
            .uri("/change")
            .method(Method::POST)
            .header("authorization", "Bearer token-one")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&change).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(change_status, StatusCode::OK);

    let merge = BranchMergeRequest {
        source: "feature".to_string(),
        target: Some("main".to_string()),
    };
    let (merge_status, merge_body) = json_response(
        &app,
        Request::builder()
            .uri("/branches/merge")
            .method(Method::POST)
            .header("authorization", "Bearer token-two")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&merge).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(merge_status, StatusCode::OK);
    assert_eq!(merge_body["actor_id"], "act-ragnor");

    let (commit_status, commit_body) = json_response(
        &app,
        Request::builder()
            .uri("/commits?branch=main")
            .method(Method::GET)
            .header("authorization", "Bearer token-two")
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(commit_status, StatusCode::OK);
    let head = commit_body["commits"]
        .as_array()
        .unwrap()
        .last()
        .expect("head commit should exist");
    assert_eq!(head["actor_id"], "act-ragnor");
}

#[tokio::test(flavor = "multi_thread")]
async fn branch_merge_conflict_response_includes_structured_conflicts() {
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    db.branch_create_from(ReadTarget::branch("main"), "feature")
        .await
        .unwrap();
    db.mutate(
        "main",
        MUTATION_QUERIES,
        "set_age",
        &omnigraph_compiler::json_params_to_param_map(
            Some(&json!({"name": "Alice", "age": 31 })),
            &omnigraph_compiler::find_named_query(MUTATION_QUERIES, "set_age")
                .unwrap()
                .params,
            omnigraph_compiler::JsonParamMode::Standard,
        )
        .unwrap(),
    )
    .await
    .unwrap();
    db.mutate(
        "feature",
        MUTATION_QUERIES,
        "set_age",
        &omnigraph_compiler::json_params_to_param_map(
            Some(&json!({"name": "Alice", "age": 32 })),
            &omnigraph_compiler::find_named_query(MUTATION_QUERIES, "set_age")
                .unwrap()
                .params,
            omnigraph_compiler::JsonParamMode::Standard,
        )
        .unwrap(),
    )
    .await
    .unwrap();
    drop(db);

    let state = AppState::open(graph.to_string_lossy().to_string())
        .await
        .unwrap();
    let app = build_app(state);
    let merge = BranchMergeRequest {
        source: "feature".to_string(),
        target: Some("main".to_string()),
    };
    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/branches/merge")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&merge).unwrap()))
            .unwrap(),
    )
    .await;

    let error: ErrorOutput = serde_json::from_value(body).unwrap();
    assert_eq!(status, StatusCode::CONFLICT);
    assert_eq!(error.code, Some(omnigraph_server::api::ErrorCode::Conflict));
    assert!(error.error.contains("merge conflict"));
    assert!(error.merge_conflicts.iter().any(|conflict| {
        conflict.table_key == "node:Person"
            && conflict.row_id.as_deref() == Some("Alice")
            && conflict.kind == omnigraph_server::api::MergeConflictKindOutput::DivergentUpdate
    }));
}

#[tokio::test(flavor = "multi_thread")]
async fn repeated_read_after_change_sees_updated_state_from_same_app() {
    let (_temp, app) = app_for_loaded_graph().await;

    let change = ChangeRequest {
        query: MUTATION_QUERIES.to_string(),
        name: Some("insert_person".to_string()),
        params: Some(json!({ "name": "Mina", "age": 28 })),
        branch: Some("main".to_string()),
    };
    let (change_status, change_body) = json_response(
        &app,
        Request::builder()
            .uri("/change")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&change).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(change_status, StatusCode::OK);
    assert_eq!(change_body["affected_nodes"], 1);

    let read = ReadRequest {
        query_source: fs::read_to_string(fixture("test.gq")).unwrap(),
        query_name: Some("get_person".to_string()),
        params: Some(json!({ "name": "Mina" })),
        branch: Some("main".to_string()),
        snapshot: None,
    };
    let (read_status, read_body) = json_response(
        &app,
        Request::builder()
            .uri("/read")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&read).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(read_status, StatusCode::OK);
    assert_eq!(read_body["row_count"], 1);
    assert_eq!(read_body["rows"][0]["p.name"], "Mina");
}

#[tokio::test(flavor = "multi_thread")]
async fn query_endpoint_runs_inline_read() {
    let (_temp, app) = app_for_loaded_graph().await;

    let query = QueryRequest {
        query: fs::read_to_string(fixture("test.gq")).unwrap(),
        name: Some("get_person".to_string()),
        params: Some(json!({ "name": "Alice" })),
        branch: Some("main".to_string()),
        snapshot: None,
    };
    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/query")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&query).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(status, StatusCode::OK);
    assert_eq!(body["query_name"], "get_person");
    assert_eq!(body["row_count"], 1);
    assert_eq!(body["rows"][0]["p.name"], "Alice");
}

#[tokio::test(flavor = "multi_thread")]
async fn query_endpoint_rejects_mutation_with_400() {
    let (_temp, app) = app_for_loaded_graph().await;

    let query = QueryRequest {
        query: MUTATION_QUERIES.to_string(),
        name: Some("insert_person".to_string()),
        params: Some(json!({ "name": "Should", "age": 1 })),
        branch: Some("main".to_string()),
        snapshot: None,
    };
    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/query")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&query).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(status, StatusCode::BAD_REQUEST);
    let err = body["error"].as_str().unwrap_or_default();
    assert!(
        err.contains("contains mutations") && err.contains("POST /mutate"),
        "expected mutation-rejection message pointing at canonical /mutate, got: {err}"
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn mutate_endpoint_runs_inline_mutation() {
    // Canonical mutation endpoint. Pairs with `/query` on the read side.
    // Same wire shape as `/change`, no deprecation signal.
    let (_temp, app) = app_for_loaded_graph().await;

    let request = json!({
        "query": MUTATION_QUERIES,
        "name": "insert_person",
        "params": { "name": "Mutie", "age": 30 },
        "branch": "main",
    });
    let response = app
        .clone()
        .oneshot(
            Request::builder()
                .uri("/mutate")
                .method(Method::POST)
                .header("content-type", "application/json")
                .body(Body::from(serde_json::to_vec(&request).unwrap()))
                .unwrap(),
        )
        .await
        .unwrap();

    assert_eq!(response.status(), StatusCode::OK);
    // Canonical route is NOT deprecated; no Deprecation header expected.
    assert!(
        response.headers().get("deprecation").is_none(),
        "POST /mutate must not advertise itself as deprecated"
    );
    let body_bytes = to_bytes(response.into_body(), usize::MAX).await.unwrap();
    let body: Value = serde_json::from_slice(&body_bytes).unwrap();
    assert_eq!(body["affected_nodes"], 1);
    assert_eq!(body["query_name"], "insert_person");
    assert_eq!(body["branch"], "main");
}

#[tokio::test(flavor = "multi_thread")]
async fn change_endpoint_emits_deprecation_headers() {
    // `/change` is kept indefinitely for back-compat but flagged at runtime
    // per RFC 9745 (`Deprecation: true`) + RFC 8288 (`Link: </mutate>;
    // rel="successor-version"`). The OpenAPI side is covered by
    // `openapi_change_is_deprecated` in tests/openapi.rs.
    let (_temp, app) = app_for_loaded_graph().await;

    let request = json!({
        "query": MUTATION_QUERIES,
        "name": "insert_person",
        "params": { "name": "Legacyer", "age": 33 },
        "branch": "main",
    });
    let response = app
        .clone()
        .oneshot(
            Request::builder()
                .uri("/change")
                .method(Method::POST)
                .header("content-type", "application/json")
                .body(Body::from(serde_json::to_vec(&request).unwrap()))
                .unwrap(),
        )
        .await
        .unwrap();

    assert_eq!(response.status(), StatusCode::OK);
    assert_eq!(
        response
            .headers()
            .get("deprecation")
            .and_then(|v| v.to_str().ok()),
        Some("true"),
        "POST /change must advertise `Deprecation: true` (RFC 9745)"
    );
    assert_eq!(
        response.headers().get("link").and_then(|v| v.to_str().ok()),
        Some("</mutate>; rel=\"successor-version\""),
        "POST /change must point at /mutate via `Link` rel=successor-version (RFC 8288)"
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn read_endpoint_emits_deprecation_headers() {
    // `/read` is kept indefinitely for byte-stable back-compat but flagged
    // at runtime per RFC 9745 + RFC 8288. Successor is `/query`.
    let (_temp, app) = app_for_loaded_graph().await;

    let request = ReadRequest {
        query_source: fs::read_to_string(fixture("test.gq")).unwrap(),
        query_name: Some("get_person".to_string()),
        params: Some(json!({ "name": "Alice" })),
        branch: Some("main".to_string()),
        snapshot: None,
    };
    let response = app
        .clone()
        .oneshot(
            Request::builder()
                .uri("/read")
                .method(Method::POST)
                .header("content-type", "application/json")
                .body(Body::from(serde_json::to_vec(&request).unwrap()))
                .unwrap(),
        )
        .await
        .unwrap();

    assert_eq!(response.status(), StatusCode::OK);
    assert_eq!(
        response
            .headers()
            .get("deprecation")
            .and_then(|v| v.to_str().ok()),
        Some("true"),
        "POST /read must advertise `Deprecation: true` (RFC 9745)"
    );
    assert_eq!(
        response.headers().get("link").and_then(|v| v.to_str().ok()),
        Some("</query>; rel=\"successor-version\""),
        "POST /read must point at /query via `Link` rel=successor-version (RFC 8288)"
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn query_endpoint_does_not_emit_deprecation_headers() {
    // Sanity check the inverse: the canonical `/query` endpoint must not
    // carry deprecation signaling, so SDK codegens don't propagate a
    // bogus `@deprecated` marker.
    let (_temp, app) = app_for_loaded_graph().await;

    let request = QueryRequest {
        query: fs::read_to_string(fixture("test.gq")).unwrap(),
        name: Some("get_person".to_string()),
        params: Some(json!({ "name": "Alice" })),
        branch: Some("main".to_string()),
        snapshot: None,
    };
    let response = app
        .clone()
        .oneshot(
            Request::builder()
                .uri("/query")
                .method(Method::POST)
                .header("content-type", "application/json")
                .body(Body::from(serde_json::to_vec(&request).unwrap()))
                .unwrap(),
        )
        .await
        .unwrap();

    assert_eq!(response.status(), StatusCode::OK);
    assert!(
        response.headers().get("deprecation").is_none(),
        "POST /query is canonical and must not advertise itself as deprecated"
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn change_endpoint_accepts_legacy_field_names() {
    // The canonical wire field names on /change are `query` and `name`, but
    // serde aliases keep the legacy `query_source`/`query_name` payload
    // shape working for clients that haven't migrated yet. Pin both shapes.
    let (_temp, app) = app_for_loaded_graph().await;

    let legacy_body = json!({
        "query_source": MUTATION_QUERIES,
        "query_name": "insert_person",
        "params": { "name": "Legacy", "age": 21 },
        "branch": "main",
    });
    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/change")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&legacy_body).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(status, StatusCode::OK);
    assert_eq!(body["affected_nodes"], 1);

    let canonical_body = json!({
        "query": MUTATION_QUERIES,
        "name": "insert_person",
        "params": { "name": "Canonical", "age": 22 },
        "branch": "main",
    });
    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/change")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&canonical_body).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(status, StatusCode::OK);
    assert_eq!(body["affected_nodes"], 1);
}

#[tokio::test(flavor = "multi_thread")]
async fn remote_branch_list_create_merge_flow_works() {
    let (_temp, app) = app_for_loaded_graph().await;

    let (list_status, list_body) = json_response(
        &app,
        Request::builder()
            .uri("/branches")
            .method(Method::GET)
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(list_status, StatusCode::OK);
    assert_eq!(list_body["branches"], json!(["main"]));

    let create = BranchCreateRequest {
        from: Some("main".to_string()),
        name: "feature".to_string(),
    };
    let (create_status, create_body) = json_response(
        &app,
        Request::builder()
            .uri("/branches")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&create).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(create_status, StatusCode::OK);
    assert_eq!(create_body["from"], "main");
    assert_eq!(create_body["name"], "feature");

    let (list_status, list_body) = json_response(
        &app,
        Request::builder()
            .uri("/branches")
            .method(Method::GET)
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(list_status, StatusCode::OK);
    assert_eq!(list_body["branches"], json!(["feature", "main"]));

    let change = ChangeRequest {
        query: MUTATION_QUERIES.to_string(),
        name: Some("insert_person".to_string()),
        params: Some(json!({ "name": "Zoe", "age": 33 })),
        branch: Some("feature".to_string()),
    };
    let (change_status, change_body) = json_response(
        &app,
        Request::builder()
            .uri("/change")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&change).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(change_status, StatusCode::OK);
    assert_eq!(change_body["branch"], "feature");
    assert_eq!(change_body["affected_nodes"], 1);

    let read_main_before = ReadRequest {
        query_source: fs::read_to_string(fixture("test.gq")).unwrap(),
        query_name: Some("get_person".to_string()),
        params: Some(json!({ "name": "Zoe" })),
        branch: Some("main".to_string()),
        snapshot: None,
    };
    let (read_status, read_body) = json_response(
        &app,
        Request::builder()
            .uri("/read")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&read_main_before).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(read_status, StatusCode::OK);
    assert_eq!(read_body["row_count"], 0);

    let merge = BranchMergeRequest {
        source: "feature".to_string(),
        target: Some("main".to_string()),
    };
    let (merge_status, merge_body) = json_response(
        &app,
        Request::builder()
            .uri("/branches/merge")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&merge).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(merge_status, StatusCode::OK);
    assert_eq!(merge_body["source"], "feature");
    assert_eq!(merge_body["target"], "main");
    assert_eq!(merge_body["outcome"], "fast_forward");

    let read_main_after = ReadRequest {
        query_source: fs::read_to_string(fixture("test.gq")).unwrap(),
        query_name: Some("get_person".to_string()),
        params: Some(json!({ "name": "Zoe" })),
        branch: Some("main".to_string()),
        snapshot: None,
    };
    let (read_status, read_body) = json_response(
        &app,
        Request::builder()
            .uri("/read")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&read_main_after).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(read_status, StatusCode::OK);
    assert_eq!(read_body["row_count"], 1);
    assert_eq!(read_body["rows"][0]["p.name"], "Zoe");
}

#[tokio::test(flavor = "multi_thread")]
async fn remote_branch_delete_flow_works() {
    let (_temp, app) = app_for_loaded_graph().await;

    let create = BranchCreateRequest {
        from: Some("main".to_string()),
        name: "feature".to_string(),
    };
    let (create_status, _) = json_response(
        &app,
        Request::builder()
            .uri("/branches")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&create).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(create_status, StatusCode::OK);

    let (delete_status, delete_body) = json_response(
        &app,
        Request::builder()
            .uri("/branches/feature")
            .method(Method::DELETE)
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(delete_status, StatusCode::OK);
    assert_eq!(delete_body["name"], "feature");

    let (list_status, list_body) = json_response(
        &app,
        Request::builder()
            .uri("/branches")
            .method(Method::GET)
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(list_status, StatusCode::OK);
    assert_eq!(list_body["branches"], json!(["main"]));
}

#[tokio::test(flavor = "multi_thread")]
async fn branch_delete_denies_without_policy_permission() {
    let (temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy(
        &[("act-andrew", "token-admin"), ("act-bruno", "token-team")],
        POLICY_YAML,
    )
    .await;
    let graph = graph_path(temp.path());

    let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    db.branch_create_from(ReadTarget::branch("main"), "feature")
        .await
        .unwrap();
    drop(db);

    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/branches/feature")
            .method(Method::DELETE)
            .header("authorization", "Bearer token-team")
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(status, StatusCode::FORBIDDEN);
    assert!(
        body["error"]
            .as_str()
            .unwrap()
            .contains("policy denied action 'branch_delete'")
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn server_opens_s3_graph_directly_and_serves_snapshot_and_read() {
    let Some(uri) = s3_test_graph_uri("server") else {
        eprintln!("skipping s3 server test: OMNIGRAPH_S3_TEST_BUCKET is not set");
        return;
    };

    Omnigraph::init(&uri, &fs::read_to_string(fixture("test.pg")).unwrap())
        .await
        .unwrap();
    let mut db = Omnigraph::open(&uri).await.unwrap();
    load_jsonl(
        &mut db,
        &fs::read_to_string(fixture("test.jsonl")).unwrap(),
        LoadMode::Overwrite,
    )
    .await
    .unwrap();

    let app = build_app(
        AppState::open_with_bearer_token(uri.clone(), Some("s3-token".to_string()))
            .await
            .unwrap(),
    );

    let (snapshot_status, snapshot_body) = json_response(
        &app,
        Request::builder()
            .uri("/snapshot")
            .method(Method::GET)
            .header("authorization", "Bearer s3-token")
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(snapshot_status, StatusCode::OK);
    assert!(snapshot_body["tables"].is_array());

    let read = ReadRequest {
        query_source: fs::read_to_string(fixture("test.gq")).unwrap(),
        query_name: Some("get_person".to_string()),
        params: Some(json!({ "name": "Alice" })),
        branch: Some("main".to_string()),
        snapshot: None,
    };
    let (read_status, read_body) = json_response(
        &app,
        Request::builder()
            .uri("/read")
            .method(Method::POST)
            .header("authorization", "Bearer s3-token")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&read).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(read_status, StatusCode::OK);
    assert_eq!(read_body["row_count"], 1);
    assert_eq!(read_body["rows"][0]["p.name"], "Alice");
}

#[tokio::test(flavor = "multi_thread")]
#[serial]
async fn remote_read_embeds_string_nearest_queries_with_mock_runtime() {
    const EMBED_SCHEMA: &str = r#"
node Doc {
    slug: String @key
    title: String @index
    embedding: Vector(4) @index
}
"#;
    const EMBED_QUERY: &str = r#"
query vector_search_string($q: String) {
    match { $d: Doc }
    return { $d.slug, $d.title }
    order { nearest($d.embedding, $q) }
    limit 3
}
"#;

    let alpha = mock_embedding("alpha", 4);
    let beta = mock_embedding("beta", 4);
    let gamma = mock_embedding("gamma", 4);
    let data = format!(
        concat!(
            r#"{{"type":"Doc","data":{{"slug":"alpha-doc","title":"alpha guide","embedding":[{}]}}}}"#,
            "\n",
            r#"{{"type":"Doc","data":{{"slug":"beta-doc","title":"beta guide","embedding":[{}]}}}}"#,
            "\n",
            r#"{{"type":"Doc","data":{{"slug":"gamma-doc","title":"gamma handbook","embedding":[{}]}}}}"#
        ),
        format_vector(&alpha),
        format_vector(&beta),
        format_vector(&gamma),
    );

    let _guard = EnvGuard::set(&[
        ("OMNIGRAPH_EMBEDDINGS_MOCK", Some("1")),
        ("GEMINI_API_KEY", None),
    ]);
    let temp = init_graph_with_schema_and_data(EMBED_SCHEMA, &data).await;
    let graph = graph_path(temp.path());
    let state = AppState::open(graph.to_string_lossy().to_string())
        .await
        .unwrap();
    let app = build_app(state);

    let read = ReadRequest {
        query_source: EMBED_QUERY.to_string(),
        query_name: Some("vector_search_string".to_string()),
        params: Some(json!({ "q": "alpha" })),
        branch: Some("main".to_string()),
        snapshot: None,
    };
    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/read")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&read).unwrap()))
            .unwrap(),
    )
    .await;

    assert_eq!(status, StatusCode::OK);
    assert_eq!(body["row_count"], 3);
    assert_eq!(body["rows"][0]["d.slug"], "alpha-doc");
}

#[tokio::test(flavor = "multi_thread")]
async fn change_conflict_returns_manifest_conflict_409() {
    // A write that races with another writer surfaces as HTTP 409 with
    // a structured `manifest_conflict` body — `table_key`, `expected`,
    // and `actual` — so clients can detect-and-retry without parsing
    // the message.
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());

    // Build the server first so its handle pins the pre-mutation manifest
    // version. Then advance the manifest from outside the server. The
    // server's next /change call will capture stale `expected_versions`
    // (from its still-pinned snapshot) and the publisher's CAS rejects.
    let state = AppState::open(graph.to_string_lossy().to_string())
        .await
        .unwrap();
    let app = build_app(state);

    {
        let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
        db.mutate(
            "main",
            MUTATION_QUERIES,
            "set_age",
            &omnigraph_compiler::json_params_to_param_map(
                Some(&json!({"name": "Alice", "age": 31 })),
                &omnigraph_compiler::find_named_query(MUTATION_QUERIES, "set_age")
                    .unwrap()
                    .params,
                omnigraph_compiler::JsonParamMode::Standard,
            )
            .unwrap(),
        )
        .await
        .unwrap();
    }

    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/change")
            .method(Method::POST)
            .header("content-type", "application/json")
            .body(Body::from(
                serde_json::to_vec(&ChangeRequest {
                    query: MUTATION_QUERIES.to_string(),
                    name: Some("set_age".to_string()),
                    params: Some(json!({ "name": "Alice", "age": 33 })),
                    branch: Some("main".to_string()),
                })
                .unwrap(),
            ))
            .unwrap(),
    )
    .await;

    assert_eq!(status, StatusCode::CONFLICT);
    let error: ErrorOutput = serde_json::from_value(body).unwrap();
    assert_eq!(error.code, Some(omnigraph_server::api::ErrorCode::Conflict));
    let conflict = error
        .manifest_conflict
        .expect("publisher CAS rejection must populate manifest_conflict body");
    assert_eq!(conflict.table_key, "node:Person");
    assert!(
        conflict.actual > conflict.expected,
        "actual ({}) should be ahead of expected ({})",
        conflict.actual,
        conflict.expected,
    );
}

#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn change_concurrent_inserts_same_key_serialize_without_409() {
    // PR 2 Phase 2 (MR-686): pin the design fix for the same-key
    // concurrency hazard. Pre-fix, in-process concurrent inserts on
    // the same `(table, branch)` rejected with 409 manifest_conflict
    // because `ensure_expected_version` fired before the per-table
    // queue was acquired and saw Lance HEAD already advanced by a
    // peer writer. Post-fix, Insert/Merge skip the strict pre-stage
    // check (see `MutationOpKind::strict_pre_stage_version_check`);
    // the queue serializes commit_staged; Lance's natural rebase
    // handles the in-flight stage; the publisher's CAS on a fresh
    // per-branch snapshot under the queue catches genuine cross-
    // process drift.
    //
    // This test spawns N concurrent /change inserts on a single
    // node type and asserts: every request returns 200 (no 409),
    // and the final row count equals the seed count + N (every
    // staged batch actually committed).
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let state = AppState::open(graph.to_string_lossy().to_string())
        .await
        .unwrap();
    let app = build_app(state);

    // test.jsonl seeds 4 Persons (Alice, Bob, Charlie, Diana).
    const SEED_PERSON_ROWS: u64 = 4;
    const N: usize = 12;

    let mut handles = Vec::with_capacity(N);
    for i in 0..N {
        let app = app.clone();
        handles.push(tokio::spawn(async move {
            let body = serde_json::to_vec(&ChangeRequest {
                query: MUTATION_QUERIES.to_string(),
                name: Some("insert_person".to_string()),
                params: Some(json!({ "name": format!("racer-{i}"), "age": i as i32 })),
                branch: Some("main".to_string()),
            })
            .unwrap();
            let req = Request::builder()
                .uri("/change")
                .method(Method::POST)
                .header("content-type", "application/json")
                .body(Body::from(body))
                .unwrap();
            let response = app.oneshot(req).await.unwrap();
            response.status()
        }));
    }

    let mut statuses = Vec::with_capacity(N);
    for h in handles {
        statuses.push(h.await.unwrap());
    }

    let bad: Vec<_> = statuses
        .iter()
        .enumerate()
        .filter(|(_, s)| **s != StatusCode::OK)
        .collect();
    assert!(
        bad.is_empty(),
        "expected every concurrent insert to return 200, got non-200 for: {:?}",
        bad
    );

    // Verify the inserts actually landed. The status check above only proves
    // the publisher CAS didn't reject; the row count proves none of the
    // concurrent commits silently overwrote a peer.
    let (snapshot_status, snapshot_body) = json_response(
        &app,
        Request::builder()
            .uri("/snapshot?branch=main")
            .method(Method::GET)
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(snapshot_status, StatusCode::OK);
    let person_rows = snapshot_body["tables"]
        .as_array()
        .and_then(|tables| {
            tables
                .iter()
                .find(|t| t["table_key"].as_str() == Some("node:Person"))
        })
        .and_then(|t| t["row_count"].as_u64())
        .expect("snapshot must include node:Person row_count");
    assert_eq!(
        person_rows,
        SEED_PERSON_ROWS + N as u64,
        "expected {} seeded + {} concurrent inserts = {} Person rows; got {}",
        SEED_PERSON_ROWS,
        N,
        SEED_PERSON_ROWS + N as u64,
        person_rows,
    );
}

#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn change_concurrent_updates_same_key_serialize_via_publisher_cas() {
    // Pin Update RYW semantics under in-process concurrency on the same
    // `(table, branch)`. With per-table queue serialization and op-kind-aware
    // drift detection at commit time, exactly one of N concurrent UPDATEs
    // on the same row commits; the rest are rejected as 409 manifest_conflict.
    //
    // Pre-fix bug class: in `MutationStaging::commit_all`, after queue
    // acquisition, the staged Lance transaction is handed straight to
    // `commit_staged`. For a writer whose staged dataset is at V0 but
    // Lance HEAD has advanced to V1 (because the queue's prior winner
    // already published), Lance's transaction conflict resolver fires
    // `RetryableCommitConflict` on Update vs Update on the same row.
    // That error gets wrapped as `OmniError::Lance(<string>)` and the
    // API surfaces it as **500 internal**, not 409. Users see "internal
    // server error" instead of a retryable conflict, breaking the
    // documented 409 contract for in-process drift.
    //
    // Post-fix invariant: `commit_all` does an op-kind-aware drift check
    // before each `commit_staged`. For tables whose tracked op_kind has
    // `strict_pre_stage_version_check() == true` (Update / Delete /
    // SchemaRewrite), if the staged dataset's version doesn't match the
    // fresh manifest pin, return `OmniError::manifest_expected_version_mismatch`
    // → 409 ExpectedVersionMismatch. The N-1 losers see a clean 409
    // before Lance's commit_staged ever runs.
    //
    // Why correct-by-design: closing the class "Lance internal conflict
    // surfaces as 500 instead of 409" rather than mapping the specific
    // Lance error variant. The drift check fires at the right architectural
    // layer (engine boundary, under the queue) and respects the existing
    // `MutationOpKind` policy.
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let state = AppState::open(graph.to_string_lossy().to_string())
        .await
        .unwrap();
    let app = build_app(state);

    // Spawn N=8 concurrent UPDATEs on Alice (from test.jsonl, age=30 at V0)
    // writing distinct ages.
    const N: usize = 8;
    let mut handles = Vec::with_capacity(N);
    for i in 0..N {
        let app = app.clone();
        let target_age = 100 + i as i32;
        handles.push(tokio::spawn(async move {
            let body = serde_json::to_vec(&ChangeRequest {
                query: MUTATION_QUERIES.to_string(),
                name: Some("set_age".to_string()),
                params: Some(json!({ "name": "Alice", "age": target_age })),
                branch: Some("main".to_string()),
            })
            .unwrap();
            let req = Request::builder()
                .uri("/change")
                .method(Method::POST)
                .header("content-type", "application/json")
                .body(Body::from(body))
                .unwrap();
            let response = app.oneshot(req).await.unwrap();
            let status = response.status();
            let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
            (status, body.to_vec())
        }));
    }

    let mut results = Vec::with_capacity(N);
    for h in handles {
        results.push(h.await.unwrap());
    }
    let statuses: Vec<StatusCode> = results.iter().map(|(s, _)| *s).collect();

    let ok_count = statuses.iter().filter(|s| **s == StatusCode::OK).count();
    let conflict_count = statuses
        .iter()
        .filter(|s| **s == StatusCode::CONFLICT)
        .count();
    let other: Vec<_> = statuses
        .iter()
        .enumerate()
        .filter(|(_, s)| **s != StatusCode::OK && **s != StatusCode::CONFLICT)
        .collect();

    let other_bodies: Vec<(usize, StatusCode, String)> = other
        .iter()
        .map(|(i, s)| {
            let body_str = String::from_utf8_lossy(&results[*i].1).to_string();
            (*i, **s, body_str)
        })
        .collect();
    assert!(
        other.is_empty(),
        "expected only 200 or 409 statuses, got non-200/409 entries: {:?}",
        other_bodies
    );
    assert_eq!(
        ok_count + conflict_count,
        N,
        "all responses must be 200 or 409 to satisfy the RYW invariant; statuses: {:?}",
        statuses
    );
    assert_eq!(
        ok_count,
        1,
        "expected exactly one update to commit and N-1 to receive 409 manifest_conflict \
         (op-kind-aware drift check rejects stale-V0 staged datasets at commit_all entry). \
         Got {} OK + {} 409 + {} other. \
         Pre-fix symptom: 1 OK + (N-1) x 500 because Lance's RetryableCommitConflict for \
         Update vs Update on the same row bubbles up as `OmniError::Lance(<string>)` and \
         the API maps it to 500 internal, not 409. Statuses: {:?}",
        ok_count,
        conflict_count,
        statuses.len() - ok_count - conflict_count,
        statuses,
    );
}

// ─────────────────────────────────────────────────────────────────────────
// Branch-ops morphological matrix
//
// Table-driven test covering all interesting (op_a, op_b, target_overlap)
// concurrent-pair cells with the C1-C6 invariants asserted uniformly:
//
//   C1 — both complete (no deadlock, no hang)
//   C2 — status: both 200, or exactly one clean conflict (409/429), no 500
//   C3 — per-target row count
//   C4 — per-target row identity (present + absent named persons)
//   C5 — engine state remains coherent (subsequent /snapshot is consistent)
//   C6 — post-op /change on main succeeds (engine state isn't poisoned)
//
// Cell list (a-k) below. Each cell uses a fresh tempdir + AppState so a
// failure in one doesn't leak into the next. Within a cell, ops align at
// a tokio::sync::Barrier so both reach the engine close in time, and the
// pair is wrapped in tokio::time::timeout(15s) so a deadlock surfaces
// as a clean panic.
//
// Replaces the three narrow concurrent_branch_* tests below; their
// scenarios are folded into cells f, h, i (branch_create_from race),
// cell a (merge race with C4 identity assertions), and cell d
// (concurrent change-during-merge).
// ─────────────────────────────────────────────────────────────────────────

mod matrix {
    use super::*;
    use std::time::Duration;
    use tokio::sync::Barrier;

    #[derive(Debug)]
    pub(super) struct OpStatus {
        pub status: StatusCode,
        pub body: Vec<u8>,
    }

    pub(super) struct Harness {
        pub _temp: tempfile::TempDir,
        pub app: Router,
    }

    impl Harness {
        pub async fn new() -> Self {
            let temp = init_loaded_graph().await;
            let graph = graph_path(temp.path());
            // Build the WorkloadController explicitly with defaults rather
            // than letting `AppState::open` call
            // `WorkloadController::from_env()`. The admission-gate test
            // (`ingest_per_actor_admission_cap_returns_429`) sets
            // OMNIGRAPH_PER_ACTOR_INFLIGHT_MAX=1 inside an EnvGuard while
            // it runs. Process-wide env vars are visible to
            // concurrently-running tests; if a matrix cell reads env at
            // AppState construction time during that window it picks up
            // cap=1 and the second concurrent merge in cell b surfaces
            // 429 instead of the expected 200. Constructing the
            // controller here with explicit defaults makes cells
            // independent of any env mutation other tests perform.
            let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
            let workload = omnigraph_server::workload::WorkloadController::with_defaults();
            let state = AppState::new_with_workload(
                graph.to_string_lossy().to_string(),
                db,
                Vec::new(),
                workload,
            );
            let app = build_app(state);
            Self { _temp: temp, app }
        }

        pub async fn create_branch(&self, from: &str, name: &str) {
            let body = serde_json::to_vec(&BranchCreateRequest {
                from: Some(from.to_string()),
                name: name.to_string(),
            })
            .unwrap();
            let r = self
                .app
                .clone()
                .oneshot(
                    Request::builder()
                        .uri("/branches")
                        .method(Method::POST)
                        .header("content-type", "application/json")
                        .body(Body::from(body))
                        .unwrap(),
                )
                .await
                .unwrap();
            assert_eq!(
                r.status(),
                StatusCode::OK,
                "setup create_branch {} from {} failed",
                name,
                from
            );
        }

        pub async fn insert_person(&self, branch: &str, name: &str, age: i32) {
            let body = serde_json::to_vec(&ChangeRequest {
                query: MUTATION_QUERIES.to_string(),
                name: Some("insert_person".to_string()),
                params: Some(json!({ "name": name, "age": age })),
                branch: Some(branch.to_string()),
            })
            .unwrap();
            let r = self
                .app
                .clone()
                .oneshot(
                    Request::builder()
                        .uri("/change")
                        .method(Method::POST)
                        .header("content-type", "application/json")
                        .body(Body::from(body))
                        .unwrap(),
                )
                .await
                .unwrap();
            assert_eq!(
                r.status(),
                StatusCode::OK,
                "setup insert {} on {} failed",
                name,
                branch
            );
        }

        /// Run two ops concurrently with barrier alignment + 15s deadlock
        /// timeout. Returns `(op_a, op_b)`. Panics on timeout.
        pub async fn run_pair(
            &self,
            op_a: impl FnOnce(Router, Arc<Barrier>) -> tokio::task::JoinHandle<OpStatus>,
            op_b: impl FnOnce(Router, Arc<Barrier>) -> tokio::task::JoinHandle<OpStatus>,
        ) -> (OpStatus, OpStatus) {
            let barrier = Arc::new(Barrier::new(2));
            let h_a = op_a(self.app.clone(), Arc::clone(&barrier));
            let h_b = op_b(self.app.clone(), Arc::clone(&barrier));
            let result = tokio::time::timeout(Duration::from_secs(15), async {
                let a = h_a.await.unwrap();
                let b = h_b.await.unwrap();
                (a, b)
            })
            .await;
            result.expect("concurrent op pair deadlocked (>15s)")
        }

        pub async fn person_count(&self, branch: &str) -> u64 {
            let r = self
                .app
                .clone()
                .oneshot(
                    Request::builder()
                        .uri(format!("/snapshot?branch={}", branch))
                        .method(Method::GET)
                        .body(Body::empty())
                        .unwrap(),
                )
                .await
                .unwrap();
            assert_eq!(r.status(), StatusCode::OK, "snapshot {} failed", branch);
            let body = to_bytes(r.into_body(), usize::MAX).await.unwrap();
            let v: Value = serde_json::from_slice(&body).unwrap();
            v["tables"]
                .as_array()
                .and_then(|tables| {
                    tables
                        .iter()
                        .find(|t| t["table_key"].as_str() == Some("node:Person"))
                })
                .and_then(|t| t["row_count"].as_u64())
                .unwrap_or_else(|| panic!("snapshot {} missing node:Person", branch))
        }

        /// True iff the named Person exists on `branch`. Uses the
        /// `get_person` query from `test.gq` for identity rather than
        /// just count.
        pub async fn person_exists(&self, branch: &str, name: &str) -> bool {
            let body = serde_json::to_vec(&ReadRequest {
                query_source: include_str!("../../omnigraph/tests/fixtures/test.gq").to_string(),
                query_name: Some("get_person".to_string()),
                params: Some(json!({ "name": name })),
                branch: Some(branch.to_string()),
                snapshot: None,
            })
            .unwrap();
            let r = self
                .app
                .clone()
                .oneshot(
                    Request::builder()
                        .uri("/read")
                        .method(Method::POST)
                        .header("content-type", "application/json")
                        .body(Body::from(body))
                        .unwrap(),
                )
                .await
                .unwrap();
            assert_eq!(
                r.status(),
                StatusCode::OK,
                "person_exists query for {} on {} failed",
                name,
                branch
            );
            let body = to_bytes(r.into_body(), usize::MAX).await.unwrap();
            let v: Value = serde_json::from_slice(&body).unwrap();
            v["row_count"].as_u64().unwrap_or(0) > 0
        }

        /// Asserts each name in `present` exists on `branch` and each in
        /// `absent` does not. Identity-grade check that catches symmetric
        /// swap races a row-count assertion would miss.
        pub async fn assert_persons(
            &self,
            branch: &str,
            cell: &str,
            present: &[&str],
            absent: &[&str],
        ) {
            for name in present {
                assert!(
                    self.person_exists(branch, name).await,
                    "[{}] expected {} to be present on {}",
                    cell,
                    name,
                    branch
                );
            }
            for name in absent {
                assert!(
                    !self.person_exists(branch, name).await,
                    "[{}] expected {} to be absent from {}",
                    cell,
                    name,
                    branch
                );
            }
        }

        /// C6: insert a uniquely-named sentinel on main and verify it
        /// landed. Catches engine-state poisoning where a cell's
        /// concurrent ops left the engine half-broken — subsequent
        /// /change either deadlocks or returns a non-200.
        pub async fn assert_post_op_sentinel(&self, cell: &str, sentinel: &str) {
            let body = serde_json::to_vec(&ChangeRequest {
                query: MUTATION_QUERIES.to_string(),
                name: Some("insert_person".to_string()),
                params: Some(json!({ "name": sentinel, "age": 99 })),
                branch: Some("main".to_string()),
            })
            .unwrap();
            let r = self
                .app
                .clone()
                .oneshot(
                    Request::builder()
                        .uri("/change")
                        .method(Method::POST)
                        .header("content-type", "application/json")
                        .body(Body::from(body))
                        .unwrap(),
                )
                .await
                .unwrap();
            assert_eq!(
                r.status(),
                StatusCode::OK,
                "[{}] post-op sentinel /change on main failed (engine poisoned?)",
                cell
            );
            assert!(
                self.person_exists("main", sentinel).await,
                "[{}] sentinel {} did not land on main",
                cell,
                sentinel
            );
        }
    }

    // Helpers that build the closures for `run_pair`. Each takes a
    // Router + Barrier and returns a JoinHandle yielding the status/body.

    pub(super) fn op_merge(
        source: String,
        target: String,
    ) -> impl FnOnce(Router, Arc<Barrier>) -> tokio::task::JoinHandle<OpStatus> {
        move |app: Router, barrier: Arc<Barrier>| {
            tokio::spawn(async move {
                barrier.wait().await;
                let body = serde_json::to_vec(&BranchMergeRequest {
                    source,
                    target: Some(target),
                })
                .unwrap();
                let response = app
                    .oneshot(
                        Request::builder()
                            .uri("/branches/merge")
                            .method(Method::POST)
                            .header("content-type", "application/json")
                            .body(Body::from(body))
                            .unwrap(),
                    )
                    .await
                    .unwrap();
                let status = response.status();
                let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
                OpStatus {
                    status,
                    body: body.to_vec(),
                }
            })
        }
    }

    pub(super) fn op_change_insert(
        branch: String,
        name: String,
        age: i32,
    ) -> impl FnOnce(Router, Arc<Barrier>) -> tokio::task::JoinHandle<OpStatus> {
        move |app: Router, barrier: Arc<Barrier>| {
            tokio::spawn(async move {
                barrier.wait().await;
                let body = serde_json::to_vec(&ChangeRequest {
                    query: MUTATION_QUERIES.to_string(),
                    name: Some("insert_person".to_string()),
                    params: Some(json!({ "name": name, "age": age })),
                    branch: Some(branch),
                })
                .unwrap();
                let response = app
                    .oneshot(
                        Request::builder()
                            .uri("/change")
                            .method(Method::POST)
                            .header("content-type", "application/json")
                            .body(Body::from(body))
                            .unwrap(),
                    )
                    .await
                    .unwrap();
                let status = response.status();
                let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
                OpStatus {
                    status,
                    body: body.to_vec(),
                }
            })
        }
    }

    pub(super) fn op_branch_create(
        from: String,
        name: String,
    ) -> impl FnOnce(Router, Arc<Barrier>) -> tokio::task::JoinHandle<OpStatus> {
        move |app: Router, barrier: Arc<Barrier>| {
            tokio::spawn(async move {
                barrier.wait().await;
                let body = serde_json::to_vec(&BranchCreateRequest {
                    from: Some(from),
                    name,
                })
                .unwrap();
                let response = app
                    .oneshot(
                        Request::builder()
                            .uri("/branches")
                            .method(Method::POST)
                            .header("content-type", "application/json")
                            .body(Body::from(body))
                            .unwrap(),
                    )
                    .await
                    .unwrap();
                let status = response.status();
                let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
                OpStatus {
                    status,
                    body: body.to_vec(),
                }
            })
        }
    }

    pub(super) fn op_branch_delete(
        name: String,
    ) -> impl FnOnce(Router, Arc<Barrier>) -> tokio::task::JoinHandle<OpStatus> {
        move |app: Router, barrier: Arc<Barrier>| {
            tokio::spawn(async move {
                barrier.wait().await;
                let response = app
                    .oneshot(
                        Request::builder()
                            .uri(format!("/branches/{}", name))
                            .method(Method::DELETE)
                            .body(Body::empty())
                            .unwrap(),
                    )
                    .await
                    .unwrap();
                let status = response.status();
                let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
                OpStatus {
                    status,
                    body: body.to_vec(),
                }
            })
        }
    }
}

#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn concurrent_branch_ops_morphological_matrix() {
    // Cell a: Merge × Merge, distinct targets.
    // Pre-fix on b09a097/22d76db: branch_merge_impl's swap-restore race
    // landed feature_a's content in target_b instead of target_a (and
    // vice versa — symmetric swap). Identity asserts catch both
    // asymmetric and symmetric variants.
    {
        let cell = "a:merge×merge:distinct-targets";
        let h = matrix::Harness::new().await;
        h.create_branch("main", "feature-a-cella").await;
        h.insert_person("feature-a-cella", "EveA-cella", 22).await;
        h.create_branch("main", "feature-b-cella").await;
        h.insert_person("feature-b-cella", "FrankB-cella", 33).await;
        h.create_branch("main", "target-a-cella").await;
        h.create_branch("main", "target-b-cella").await;

        let (sa, sb) = h
            .run_pair(
                matrix::op_merge("feature-a-cella".to_string(), "target-a-cella".to_string()),
                matrix::op_merge("feature-b-cella".to_string(), "target-b-cella".to_string()),
            )
            .await;
        assert_eq!(sa.status, StatusCode::OK, "[{}] merge a", cell);
        assert_eq!(sb.status, StatusCode::OK, "[{}] merge b", cell);
        h.assert_persons("target-a-cella", cell, &["EveA-cella"], &["FrankB-cella"])
            .await;
        h.assert_persons("target-b-cella", cell, &["FrankB-cella"], &["EveA-cella"])
            .await;
        h.assert_post_op_sentinel(cell, "sentinel-cella").await;
    }

    // Cell b: Merge × Merge, same target / distinct sources.
    // Both want to land in main. merge_exclusive serializes; both should
    // succeed and main should contain BOTH sources' contributions.
    {
        let cell = "b:merge×merge:same-target-distinct-sources";
        let h = matrix::Harness::new().await;
        h.create_branch("main", "src-x-cellb").await;
        h.insert_person("src-x-cellb", "Xavier-cellb", 41).await;
        h.create_branch("main", "src-y-cellb").await;
        h.insert_person("src-y-cellb", "Yvonne-cellb", 42).await;

        let (sa, sb) = h
            .run_pair(
                matrix::op_merge("src-x-cellb".to_string(), "main".to_string()),
                matrix::op_merge("src-y-cellb".to_string(), "main".to_string()),
            )
            .await;
        assert_eq!(sa.status, StatusCode::OK, "[{}] merge x", cell);
        assert_eq!(sb.status, StatusCode::OK, "[{}] merge y", cell);
        h.assert_persons("main", cell, &["Xavier-cellb", "Yvonne-cellb"], &[])
            .await;
        h.assert_post_op_sentinel(cell, "sentinel-cellb").await;
    }

    // Cell c: Merge × Merge, same source / distinct targets (fanout).
    // One source merged into two targets simultaneously. merge_exclusive
    // serializes; both targets should reflect the source's content.
    {
        let cell = "c:merge×merge:same-source-distinct-targets";
        let h = matrix::Harness::new().await;
        h.create_branch("main", "src-shared-cellc").await;
        h.insert_person("src-shared-cellc", "Sharon-cellc", 50)
            .await;
        h.create_branch("main", "tgt-1-cellc").await;
        h.create_branch("main", "tgt-2-cellc").await;

        let (sa, sb) = h
            .run_pair(
                matrix::op_merge("src-shared-cellc".to_string(), "tgt-1-cellc".to_string()),
                matrix::op_merge("src-shared-cellc".to_string(), "tgt-2-cellc".to_string()),
            )
            .await;
        assert_eq!(sa.status, StatusCode::OK, "[{}] merge into tgt-1", cell);
        assert_eq!(sb.status, StatusCode::OK, "[{}] merge into tgt-2", cell);
        h.assert_persons("tgt-1-cellc", cell, &["Sharon-cellc"], &[])
            .await;
        h.assert_persons("tgt-2-cellc", cell, &["Sharon-cellc"], &[])
            .await;
        h.assert_post_op_sentinel(cell, "sentinel-cellc").await;
    }

    // Cell d: Merge × Change, both touching main. C2 permits both
    // succeed, or exactly one clean 409 if the merge detects target
    // movement after planning but before acquiring the queue.
    {
        let cell = "d:merge×change:into-target";
        let h = matrix::Harness::new().await;
        h.create_branch("main", "feature-celld").await;
        h.insert_person("feature-celld", "EveD-celld", 22).await;

        let (sa, sb) = h
            .run_pair(
                matrix::op_merge("feature-celld".to_string(), "main".to_string()),
                matrix::op_change_insert("main".to_string(), "FrankD-celld".to_string(), 33),
            )
            .await;
        assert_eq!(sb.status, StatusCode::OK, "[{}] change", cell);
        assert!(
            sa.status == StatusCode::OK || sa.status == StatusCode::CONFLICT,
            "[{}] merge must be 200 or clean 409, got {}",
            cell,
            sa.status
        );
        if sa.status == StatusCode::OK {
            h.assert_persons("main", cell, &["EveD-celld", "FrankD-celld"], &[])
                .await;
        } else {
            let error: ErrorOutput = serde_json::from_slice(&sa.body).unwrap();
            let conflict = error
                .manifest_conflict
                .expect("merge 409 must include manifest_conflict");
            assert_eq!(
                conflict.table_key, "node:Person",
                "[{}] conflict table",
                cell
            );
            h.assert_persons("main", cell, &["FrankD-celld"], &["EveD-celld"])
                .await;
        }
        h.assert_post_op_sentinel(cell, "sentinel-celld").await;
    }

    // Cell e: Merge × BranchCreateFrom-target. Concurrent fork off the
    // merge target while the merge runs. Both should succeed; the new
    // branch should have a coherent view (either pre- or post-merge,
    // both valid). After both, target = main has the merged content.
    {
        let cell = "e:merge×branch_create_from:target";
        let h = matrix::Harness::new().await;
        h.create_branch("main", "src-celle").await;
        h.insert_person("src-celle", "Eve-celle", 22).await;

        let (sa, sb) = h
            .run_pair(
                matrix::op_merge("src-celle".to_string(), "main".to_string()),
                matrix::op_branch_create("main".to_string(), "fork-celle".to_string()),
            )
            .await;
        assert_eq!(sa.status, StatusCode::OK, "[{}] merge", cell);
        assert_eq!(sb.status, StatusCode::OK, "[{}] branch_create_from", cell);
        // Main definitely has Eve.
        h.assert_persons("main", cell, &["Eve-celle"], &[]).await;
        // fork-celle was forked off main at SOME version; main's current
        // count is 5 (4 seeded + Eve). fork-celle has either 4 (pre-merge
        // snapshot) or 5 (post-merge snapshot); both are valid timings.
        let fork_count = h.person_count("fork-celle").await;
        assert!(
            fork_count == 4 || fork_count == 5,
            "[{}] fork-celle row count must be pre- or post-merge view (4 or 5), got {}",
            cell,
            fork_count
        );
        h.assert_post_op_sentinel(cell, "sentinel-celle").await;
    }

    // Cell f: BranchCreateFrom × BranchCreateFrom, distinct parents.
    // Pre-fix on f925ad1: swap-restore race in branch_create_from_impl
    // forked the new branch off the wrong parent. Identity asserts pin
    // that fork-from-A inherits A's content, fork-from-B inherits B's.
    {
        let cell = "f:branch_create_from×branch_create_from:distinct-parents";
        let h = matrix::Harness::new().await;
        h.create_branch("main", "alpha-cellf").await;
        h.insert_person("alpha-cellf", "Eve-cellf", 22).await;
        h.create_branch("main", "beta-cellf").await;

        let (sa, sb) = h
            .run_pair(
                matrix::op_branch_create("alpha-cellf".to_string(), "gamma-cellf".to_string()),
                matrix::op_branch_create("beta-cellf".to_string(), "delta-cellf".to_string()),
            )
            .await;
        assert_eq!(sa.status, StatusCode::OK, "[{}] gamma create", cell);
        assert_eq!(sb.status, StatusCode::OK, "[{}] delta create", cell);
        // gamma forks off alpha → must contain Eve.
        h.assert_persons("gamma-cellf", cell, &["Eve-cellf"], &[])
            .await;
        // delta forks off beta → must NOT contain Eve.
        h.assert_persons("delta-cellf", cell, &[], &["Eve-cellf"])
            .await;
        h.assert_post_op_sentinel(cell, "sentinel-cellf").await;
    }

    // Cell g: BranchCreateFrom × BranchDelete, unrelated branches.
    // Disjoint branches; both should complete cleanly without
    // interference.
    {
        let cell = "g:branch_create_from×branch_delete:unrelated";
        let h = matrix::Harness::new().await;
        h.create_branch("main", "doomed-cellg").await;

        let (sa, sb) = h
            .run_pair(
                matrix::op_branch_create("main".to_string(), "newborn-cellg".to_string()),
                matrix::op_branch_delete("doomed-cellg".to_string()),
            )
            .await;
        assert_eq!(sa.status, StatusCode::OK, "[{}] create newborn", cell);
        assert_eq!(sb.status, StatusCode::OK, "[{}] delete doomed", cell);
        // newborn-cellg exists with main's content.
        h.assert_persons("newborn-cellg", cell, &["Alice"], &[])
            .await;
        h.assert_post_op_sentinel(cell, "sentinel-cellg").await;
    }

    // Cell h: BranchDelete × BranchDelete, distinct branches. Both call
    // refresh() internally; verify no deadlock and both deletes land.
    {
        let cell = "h:branch_delete×branch_delete:distinct";
        let h = matrix::Harness::new().await;
        h.create_branch("main", "doomed1-cellh").await;
        h.create_branch("main", "doomed2-cellh").await;

        let (sa, sb) = h
            .run_pair(
                matrix::op_branch_delete("doomed1-cellh".to_string()),
                matrix::op_branch_delete("doomed2-cellh".to_string()),
            )
            .await;
        assert_eq!(sa.status, StatusCode::OK, "[{}] delete 1", cell);
        assert_eq!(sb.status, StatusCode::OK, "[{}] delete 2", cell);
        // Verify both gone via /branches list (snapshot would still work
        // for a deleted branch via parent fallback in some paths, so we
        // use the explicit list).
        let r = h
            .app
            .clone()
            .oneshot(
                Request::builder()
                    .uri("/branches")
                    .method(Method::GET)
                    .body(Body::empty())
                    .unwrap(),
            )
            .await
            .unwrap();
        assert_eq!(r.status(), StatusCode::OK);
        let body = to_bytes(r.into_body(), usize::MAX).await.unwrap();
        let list_body: Value = serde_json::from_slice(&body).unwrap();
        let branches: Vec<&str> = list_body["branches"]
            .as_array()
            .unwrap()
            .iter()
            .filter_map(|v| v.as_str())
            .collect();
        assert!(
            !branches.contains(&"doomed1-cellh"),
            "[{}] doomed1 still in branch list: {:?}",
            cell,
            branches
        );
        assert!(
            !branches.contains(&"doomed2-cellh"),
            "[{}] doomed2 still in branch list: {:?}",
            cell,
            branches
        );
        h.assert_post_op_sentinel(cell, "sentinel-cellh").await;
    }

    // Cell i: BranchDelete × Change, on a different branch. Delete one
    // branch while a /change runs on main. Both should succeed.
    {
        let cell = "i:branch_delete×change:distinct-branch";
        let h = matrix::Harness::new().await;
        h.create_branch("main", "doomed-celli").await;

        let (sa, sb) = h
            .run_pair(
                matrix::op_branch_delete("doomed-celli".to_string()),
                matrix::op_change_insert("main".to_string(), "Pat-celli".to_string(), 44),
            )
            .await;
        assert_eq!(sa.status, StatusCode::OK, "[{}] delete", cell);
        assert_eq!(sb.status, StatusCode::OK, "[{}] change", cell);
        h.assert_persons("main", cell, &["Pat-celli"], &[]).await;
        h.assert_post_op_sentinel(cell, "sentinel-celli").await;
    }

    // Cell j: BranchCreateFrom × Change, both on main. The fork timing
    // determines whether the new branch sees the change (pre or post).
    // Both valid. Main must contain the inserted row.
    {
        let cell = "j:branch_create_from×change:on-source";
        let h = matrix::Harness::new().await;

        let (sa, sb) = h
            .run_pair(
                matrix::op_branch_create("main".to_string(), "twin-cellj".to_string()),
                matrix::op_change_insert("main".to_string(), "Quincy-cellj".to_string(), 55),
            )
            .await;
        assert_eq!(sa.status, StatusCode::OK, "[{}] branch_create", cell);
        assert_eq!(sb.status, StatusCode::OK, "[{}] change", cell);
        h.assert_persons("main", cell, &["Quincy-cellj"], &[]).await;
        // twin-cellj has either pre-change view (no Quincy) or
        // post-change view (with Quincy); either is valid.
        let twin_has_quincy = h.person_exists("twin-cellj", "Quincy-cellj").await;
        let _ = twin_has_quincy; // either valid timing — just ensure no panic
        h.assert_post_op_sentinel(cell, "sentinel-cellj").await;
    }

    // Cell k: reopen consistency. Run a representative concurrent pair,
    // drop the engine, reopen on a separate handle, verify state matches.
    {
        let cell = "k:reopen-after-pair";
        let h = matrix::Harness::new().await;
        h.create_branch("main", "src-cellk").await;
        h.insert_person("src-cellk", "Rita-cellk", 36).await;

        let (sa, sb) = h
            .run_pair(
                matrix::op_merge("src-cellk".to_string(), "main".to_string()),
                matrix::op_change_insert("main".to_string(), "Steve-cellk".to_string(), 37),
            )
            .await;
        assert_eq!(sb.status, StatusCode::OK, "[{}] change", cell);
        assert!(
            sa.status == StatusCode::OK || sa.status == StatusCode::CONFLICT,
            "[{}] merge must be 200 or clean 409, got {}",
            cell,
            sa.status
        );
        if sa.status == StatusCode::OK {
            h.assert_persons("main", cell, &["Rita-cellk", "Steve-cellk"], &[])
                .await;
        } else {
            let error: ErrorOutput = serde_json::from_slice(&sa.body).unwrap();
            let conflict = error
                .manifest_conflict
                .expect("merge 409 must include manifest_conflict");
            assert_eq!(
                conflict.table_key, "node:Person",
                "[{}] conflict table",
                cell
            );
            h.assert_persons("main", cell, &["Steve-cellk"], &["Rita-cellk"])
                .await;
        }

        // Reopen via a fresh AppState on the same graph.
        let graph_uri = format!("{}/server.omni", h._temp.path().display());
        let reopened = AppState::open(graph_uri.clone()).await.unwrap();
        let app2 = build_app(reopened);
        // Sanity: the same identity check via the new app must see
        // Rita and Steve.
        let r = app2
            .clone()
            .oneshot(
                Request::builder()
                    .uri("/snapshot?branch=main")
                    .method(Method::GET)
                    .body(Body::empty())
                    .unwrap(),
            )
            .await
            .unwrap();
        assert_eq!(r.status(), StatusCode::OK, "[{}] reopen snapshot", cell);
        let body = to_bytes(r.into_body(), usize::MAX).await.unwrap();
        let v: Value = serde_json::from_slice(&body).unwrap();
        let person_rows = v["tables"]
            .as_array()
            .and_then(|tables| {
                tables
                    .iter()
                    .find(|t| t["table_key"].as_str() == Some("node:Person"))
            })
            .and_then(|t| t["row_count"].as_u64())
            .expect("reopen snapshot must include node:Person row_count");
        let expected_rows = if sa.status == StatusCode::OK { 6 } else { 5 };
        assert_eq!(
            person_rows, expected_rows,
            "[{}] reopened main should include seed (4) + committed concurrent writes",
            cell,
        );
    }
}

#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn change_disjoint_table_concurrency_succeeds_at_http_level() {
    // HTTP-level pin for MR-686's disjoint-table promise: concurrent /change
    // requests touching different node types must coexist without admission
    // rejection or publisher-CAS conflict. The bench harness measures
    // throughput; this test is the regression sentinel that catches a
    // future change which accidentally re-introduces graph-wide
    // serialization on the disjoint path.
    //
    // Setup: test.jsonl seeds 4 Persons + 2 Companies. Spawn N=4 concurrent
    // /change inserts on `node:Person` and N=4 concurrent inserts on
    // `node:Company`. All 8 must return 200, and the post-test row counts
    // must reflect every insert.
    const PERSON_QUERY: &str = r#"
query insert_p($name: String, $age: I32) {
    insert Person { name: $name, age: $age }
}
"#;
    const COMPANY_QUERY: &str = r#"
query insert_c($name: String) {
    insert Company { name: $name }
}
"#;
    const SEED_PERSONS: u64 = 4;
    const SEED_COMPANIES: u64 = 2;
    const PER_TYPE: usize = 4;

    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let state = AppState::open(graph.to_string_lossy().to_string())
        .await
        .unwrap();
    let app = build_app(state);

    let mut handles = Vec::with_capacity(PER_TYPE * 2);
    for i in 0..PER_TYPE {
        let app_p = app.clone();
        handles.push(tokio::spawn(async move {
            let body = serde_json::to_vec(&ChangeRequest {
                query: PERSON_QUERY.to_string(),
                name: Some("insert_p".to_string()),
                params: Some(json!({ "name": format!("p-{i}"), "age": i as i32 })),
                branch: Some("main".to_string()),
            })
            .unwrap();
            let req = Request::builder()
                .uri("/change")
                .method(Method::POST)
                .header("content-type", "application/json")
                .body(Body::from(body))
                .unwrap();
            app_p.oneshot(req).await.unwrap().status()
        }));
        let app_c = app.clone();
        handles.push(tokio::spawn(async move {
            let body = serde_json::to_vec(&ChangeRequest {
                query: COMPANY_QUERY.to_string(),
                name: Some("insert_c".to_string()),
                params: Some(json!({ "name": format!("c-{i}") })),
                branch: Some("main".to_string()),
            })
            .unwrap();
            let req = Request::builder()
                .uri("/change")
                .method(Method::POST)
                .header("content-type", "application/json")
                .body(Body::from(body))
                .unwrap();
            app_c.oneshot(req).await.unwrap().status()
        }));
    }

    let mut statuses = Vec::with_capacity(PER_TYPE * 2);
    for h in handles {
        statuses.push(h.await.unwrap());
    }

    let bad: Vec<_> = statuses
        .iter()
        .enumerate()
        .filter(|(_, s)| **s != StatusCode::OK)
        .collect();
    assert!(
        bad.is_empty(),
        "expected every disjoint /change insert to return 200, got non-200 for: {:?}",
        bad,
    );

    // Verify both tables landed every insert.
    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/snapshot?branch=main")
            .method(Method::GET)
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(status, StatusCode::OK);
    let lookup_count = |table_key: &str| -> u64 {
        body["tables"]
            .as_array()
            .and_then(|tables| {
                tables
                    .iter()
                    .find(|t| t["table_key"].as_str() == Some(table_key))
            })
            .and_then(|t| t["row_count"].as_u64())
            .unwrap_or_else(|| panic!("snapshot missing {}", table_key))
    };
    assert_eq!(
        lookup_count("node:Person"),
        SEED_PERSONS + PER_TYPE as u64,
        "Person row count after concurrent inserts",
    );
    assert_eq!(
        lookup_count("node:Company"),
        SEED_COMPANIES + PER_TYPE as u64,
        "Company row count after concurrent inserts",
    );
}

#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn ingest_per_actor_admission_cap_returns_429() {
    // Pin the admission gate on `/ingest`. With per-actor in-flight cap of 1
    // and 8 concurrent requests from the same actor, at least one request
    // must be rejected with HTTP 429 and `code: too_many_requests`.
    //
    // Pre-fix bug class: the admission pattern at `server_change`
    // (`crates/omnigraph-server/src/lib.rs:932`) was the only handler
    // that called `WorkloadController::try_admit`. A heavy actor sending
    // bulk-ingest traffic would exhaust shared engine capacity (Lance I/O
    // threads, manifest churn) without ever hitting an admission cap.
    // Pinned at the HTTP boundary so future refactors that drop the
    // try_admit call from a mutating handler turn this red.
    //
    // Post-fix invariant: `/ingest`, `/branches/create`, `/branches/delete`,
    // `/branches/merge`, and `/schema/apply` all gate on
    // `state.workload.try_admit(&actor_arc, est_bytes)` after Cedar
    // authorization and before the engine call. Cap exhaustion surfaces as
    // 429 with `code: too_many_requests`.
    //
    // Construct the WorkloadController directly with cap=1 instead of
    // mutating `OMNIGRAPH_PER_ACTOR_INFLIGHT_MAX` via EnvGuard. Process-wide
    // env vars are visible to concurrently-running tests; the previous
    // `EnvGuard + #[serial]` pair leaked the override into any other test
    // that called `AppState::open` during the guard's window
    // (matrix CI failure on commit 99b0941). Using the explicit
    // `AppState::new_with_workload` constructor closes that bug class —
    // this test no longer mutates global state and no longer needs
    // `#[serial]`.
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    let workload = omnigraph_server::workload::WorkloadController::new(
        1,             // per-actor in-flight cap (the fixture under test)
        1_000_000_000, // per-actor byte budget — large so it never bottlenecks
    );
    // MR-723: install a permit-all policy alongside the bearer token so
    // /ingest (action=Change) passes Cedar evaluation. The test is
    // exercising the admission cap, not policy — the policy is just
    // enough to clear the State 3 path so the test reaches workload.
    let policy_path = temp.path().join("policy.yaml");
    fs::write(&policy_path, permit_all_policy_yaml(&["act-flooder"])).unwrap();
    let policy_engine =
        omnigraph_server::PolicyEngine::load_graph(&policy_path, graph.to_string_lossy().as_ref())
            .unwrap();
    let state = AppState::new_single(
        graph.to_string_lossy().to_string(),
        db,
        vec![("act-flooder".to_string(), "flooder-token".to_string())],
        Some(policy_engine),
        workload,
    );
    let app = build_app(state);
    let _temp = temp;

    // Eight concurrent ingests, all from act-flooder. Only one fits in a
    // cap=1 in-flight semaphore; the others must 429.
    const N: usize = 8;
    let barrier = Arc::new(tokio::sync::Barrier::new(N));
    let mut handles = Vec::with_capacity(N);
    for i in 0..N {
        let app = app.clone();
        let barrier = Arc::clone(&barrier);
        handles.push(tokio::spawn(async move {
            // Align the 8 tasks at the barrier so they all attempt
            // try_admit close in time.
            barrier.wait().await;

            let body = serde_json::to_vec(&IngestRequest {
                data: format!(
                    "{{\"type\":\"Person\",\"data\":{{\"name\":\"flooder-{i}\",\"age\":{i}}}}}\n"
                ),
                branch: Some("main".to_string()),
                from: Some("main".to_string()),
                mode: Some(omnigraph::loader::LoadMode::Merge),
            })
            .unwrap();
            let req = Request::builder()
                .uri("/ingest")
                .method(Method::POST)
                .header("authorization", "Bearer flooder-token")
                .header("content-type", "application/json")
                .body(Body::from(body))
                .unwrap();
            let response = app.oneshot(req).await.unwrap();
            let status = response.status();
            let headers = response.headers().clone();
            let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
            (status, headers, body.to_vec())
        }));
    }

    let mut results = Vec::with_capacity(N);
    for h in handles {
        results.push(h.await.unwrap());
    }
    let statuses: Vec<StatusCode> = results.iter().map(|(s, _, _)| *s).collect();

    let too_many: Vec<usize> = statuses
        .iter()
        .enumerate()
        .filter(|(_, s)| **s == StatusCode::TOO_MANY_REQUESTS)
        .map(|(i, _)| i)
        .collect();
    assert!(
        !too_many.is_empty(),
        "expected at least one /ingest under cap=1 to return 429; got statuses: {:?}",
        statuses,
    );

    // Validate the structured error body for each 429 (body must carry
    // the `too_many_requests` code so clients can distinguish it from
    // generic conflicts).
    for i in &too_many {
        let body_value: Value = serde_json::from_slice(&results[*i].2).unwrap();
        let error: ErrorOutput = serde_json::from_value(body_value).unwrap();
        assert_eq!(
            error.code,
            Some(omnigraph_server::api::ErrorCode::TooManyRequests),
            "429 body must carry code=too_many_requests; idx {} got {:?}",
            i,
            error.code,
        );
    }

    // Validate the `Retry-After` header is set on every 429. Pinned by
    // the same test so a future refactor that drops the header from
    // `IntoResponse for ApiError` turns this red. The constant
    // matches `crates/omnigraph-server/src/lib.rs::ApiError::into_response`.
    for i in &too_many {
        let retry_after = results[*i]
            .1
            .get(axum::http::header::RETRY_AFTER)
            .and_then(|v| v.to_str().ok())
            .map(str::to_string);
        assert!(
            retry_after.is_some(),
            "429 response must include a Retry-After header; idx {} headers were: {:?}",
            i,
            results[*i].1,
        );
    }
}

/// Regression for B2 (MR-668): when an `AppState` is built with a
/// per-graph policy and a custom workload, the engine inside the
/// routing's `GraphHandle` MUST have the same policy applied via
/// `Omnigraph::with_policy`. Pre-fix, `new_with_workload(...).with_policy_engine(p)`
/// installed the policy only on the HTTP-layer `handle.policy`; the
/// underlying `Arc<Omnigraph>` was reused without `with_policy`, so any
/// caller reaching through `state.routing()` could bypass Cedar.
///
/// This test reaches the engine the same way an embedded SDK consumer
/// or future routing code path would, and asserts the policy still
/// fires. The deny path is "act-blocked has a valid bearer but isn't in
/// the policy's allowed group" — i.e., authenticated-but-unauthorised.
#[tokio::test(flavor = "multi_thread")]
async fn engine_layer_policy_fires_via_direct_arc_omnigraph_from_new_single() {
    use omnigraph_server::GraphRouting;
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();

    // Permit `act-allowed` for change actions; `act-blocked` is not in
    // any allowed group — every change request from them must deny.
    let policy_path = temp.path().join("policy.yaml");
    fs::write(&policy_path, permit_all_policy_yaml(&["act-allowed"])).unwrap();
    let policy_engine =
        omnigraph_server::PolicyEngine::load_graph(&policy_path, graph.to_string_lossy().as_ref())
            .unwrap();

    let workload = omnigraph_server::workload::WorkloadController::new(100, 1_000_000_000);
    let state = AppState::new_single(
        graph.to_string_lossy().to_string(),
        db,
        vec![("act-blocked".to_string(), "block-token".to_string())],
        Some(policy_engine),
        workload,
    );

    // Reach into the routing and pull the engine the same way an
    // embedded consumer holding `Arc<Omnigraph>` would. If `new_single`
    // failed to apply `with_policy` to the engine, this `mutate_as`
    // would succeed — the HTTP-layer is bypassed entirely.
    let handle = match state.routing() {
        GraphRouting::Single { handle } => Arc::clone(handle),
        GraphRouting::Multi { .. } => panic!("expected single-mode routing"),
    };
    let engine = Arc::clone(&handle.engine);

    let mut params: omnigraph_compiler::ParamMap = Default::default();
    params.insert(
        "name".to_string(),
        omnigraph_compiler::Literal::String("EngineLayerBlocked".to_string()),
    );
    params.insert("age".to_string(), omnigraph_compiler::Literal::Integer(30));
    let result = engine
        .mutate_as(
            "main",
            MUTATION_QUERIES,
            "insert_person",
            &params,
            Some("act-blocked"),
        )
        .await;
    match result {
        Err(OmniError::Policy(_)) => { /* expected — engine-layer gate fired */ }
        Ok(_) => panic!(
            "engine-layer policy did NOT fire — act-blocked successfully ran mutate_as via \
             the engine pulled from the registry handle. AppState::new_single failed to apply \
             with_policy to the underlying Omnigraph engine. This is the B2 footgun the \
             with_policy_engine deletion was supposed to close."
        ),
        Err(other) => panic!("expected OmniError::Policy, got: {other:?}"),
    }
}

#[tokio::test(flavor = "multi_thread")]
async fn oversized_request_body_returns_payload_too_large() {
    let (_temp, app) = app_for_loaded_graph().await;
    let oversized = "x".repeat(1_100_000);
    let response = app
        .clone()
        .oneshot(
            Request::builder()
                .uri("/read")
                .method(Method::POST)
                .header("content-type", "application/json")
                .body(Body::from(oversized))
                .unwrap(),
        )
        .await
        .unwrap();

    assert_eq!(response.status(), StatusCode::PAYLOAD_TOO_LARGE);
}

// ─── MR-723 default-deny mode (State 2: tokens without policy) ──────────
//
// `authorize_request` returns 403 for every action except `Read` when a
// PolicyEngine is not installed but bearer tokens are configured. Pinned
// by the three tests below — Read allowed, Change/SchemaApply denied —
// to prevent regressing back to the pre-MR-723 "tokens configured but
// no policy = fully open" trap.

#[tokio::test(flavor = "multi_thread")]
async fn default_deny_mode_allows_read_for_authenticated_actor() {
    let (_temp, app) = app_for_graph_with_auth_tokens_only(
        &fs::read_to_string(fixture("test.pg")).unwrap(),
        &[("act-andrew", "demo-token")],
    )
    .await;

    let (status, _body) = json_response(
        &app,
        Request::builder()
            .uri("/snapshot")
            .method(Method::GET)
            .header(AUTHORIZATION, "Bearer demo-token")
            .body(Body::empty())
            .unwrap(),
    )
    .await;
    assert_eq!(status, StatusCode::OK);
}

#[tokio::test(flavor = "multi_thread")]
async fn default_deny_mode_rejects_change_with_forbidden() {
    let (_temp, app) = app_for_graph_with_auth_tokens_only(
        &fs::read_to_string(fixture("test.pg")).unwrap(),
        &[("act-andrew", "demo-token")],
    )
    .await;

    let change = ChangeRequest {
        query: MUTATION_QUERIES.to_string(),
        name: Some("insert_person".to_string()),
        params: Some(json!({ "name": "DefaultDeny", "age": 1 })),
        branch: Some("main".to_string()),
    };
    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/change")
            .method(Method::POST)
            .header(AUTHORIZATION, "Bearer demo-token")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&change).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(status, StatusCode::FORBIDDEN);
    let error: ErrorOutput = serde_json::from_value(body).unwrap();
    assert!(
        error.error.contains("default-deny"),
        "expected default-deny in error message, got: {}",
        error.error
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn default_deny_mode_rejects_schema_apply_with_forbidden() {
    let (_temp, app) = app_for_graph_with_auth_tokens_only(
        &fs::read_to_string(fixture("test.pg")).unwrap(),
        &[("act-andrew", "demo-token")],
    )
    .await;

    let req = SchemaApplyRequest {
        schema_source: additive_schema_with_nickname(),
        ..Default::default()
    };
    let (status, body) = json_response(
        &app,
        Request::builder()
            .uri("/schema/apply")
            .method(Method::POST)
            .header(AUTHORIZATION, "Bearer demo-token")
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&req).unwrap()))
            .unwrap(),
    )
    .await;
    assert_eq!(status, StatusCode::FORBIDDEN);
    let error: ErrorOutput = serde_json::from_value(body).unwrap();
    assert!(
        error.error.contains("default-deny"),
        "expected default-deny in error message, got: {}",
        error.error
    );
}

// ─── SDK ↔ HTTP decision parity (MR-722 PR A) ─────────────────────────────
//
// Engine and HTTP both consult Cedar via `PolicyChecker::check()`; by
// construction they cannot disagree on a decision. These tests pin that
// property explicitly so a future refactor that introduces a separate
// auth path (or copy-pastes Cedar evaluation logic) turns red.
//
// Four cases cover the per-action scope shapes:
// * Change on a protected branch via `mutate_as` / POST /change
// * Change with an actor that has no permit
// * BranchMerge to a protected target via `branch_merge_as` / POST /branches/merge
// * BranchMerge with an actor that has no permit

const PARITY_POLICY_YAML: &str = r#"
version: 1
groups:
  team: [act-bruno]
  admins: [act-ragnor]
protected_branches: [main]
rules:
  - id: admins-change-anywhere
    allow:
      actors: { group: admins }
      actions: [change]
      branch_scope: any
  - id: admins-merge-to-protected
    allow:
      actors: { group: admins }
      actions: [branch_merge]
      target_branch_scope: protected
"#;

#[derive(Clone, Copy, Debug)]
enum ParityDecision {
    Allow,
    Deny,
}

async fn build_parity_graph() -> (tempfile::TempDir, PathBuf, PathBuf) {
    // Build a graph with `main` loaded and a `feature` branch ready for
    // merge. Returns the graph path and a written policy.yaml path.
    let temp = init_loaded_graph().await;
    let graph = graph_path(temp.path());
    {
        let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
        db.branch_create_from(ReadTarget::branch("main"), "feature")
            .await
            .unwrap();
        db.load_as(
            "feature",
            r#"{"type":"Person","data":{"name":"ParityEve","age":29}}"#,
            LoadMode::Append,
            None,
        )
        .await
        .unwrap();
    }
    let policy_path = temp.path().join("policy.yaml");
    fs::write(&policy_path, PARITY_POLICY_YAML).unwrap();
    (temp, graph, policy_path)
}

async fn sdk_change_decision(graph: &Path, policy_path: &Path, actor: &str) -> ParityDecision {
    let policy = PolicyEngine::load_graph(policy_path, graph.to_string_lossy().as_ref()).unwrap();
    let db = Omnigraph::open(graph.to_str().unwrap())
        .await
        .unwrap()
        .with_policy(Arc::new(policy) as Arc<dyn PolicyChecker>);
    let mut params: omnigraph_compiler::ParamMap = Default::default();
    // Parameter keys are bare names (no `$` prefix); the runtime resolves
    // `$name` references in the query body to `params["name"]`.
    params.insert(
        "name".to_string(),
        omnigraph_compiler::Literal::String("ParityCharlie".to_string()),
    );
    params.insert("age".to_string(), omnigraph_compiler::Literal::Integer(30));
    let result = db
        .mutate_as(
            "main",
            MUTATION_QUERIES,
            "insert_person",
            &params,
            Some(actor),
        )
        .await;
    match result {
        Ok(_) => ParityDecision::Allow,
        Err(OmniError::Policy(_)) => ParityDecision::Deny,
        Err(other) => panic!("unexpected SDK error for change: {other:?}"),
    }
}

async fn http_change_decision(
    graph: &Path,
    policy_path: &PathBuf,
    actor: &str,
    token: &str,
) -> ParityDecision {
    let state = AppState::open_with_bearer_tokens_and_policy(
        graph.to_string_lossy().to_string(),
        vec![(actor.to_string(), token.to_string())],
        Some(policy_path),
    )
    .await
    .unwrap();
    let app = build_app(state);
    let req = ChangeRequest {
        query: MUTATION_QUERIES.to_string(),
        name: Some("insert_person".to_string()),
        params: Some(json!({ "name": "ParityCharlie", "age": 30 })),
        branch: Some("main".to_string()),
    };
    let (status, _body) = json_response(
        &app,
        Request::builder()
            .uri("/change")
            .method(Method::POST)
            .header(AUTHORIZATION, format!("Bearer {token}"))
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&req).unwrap()))
            .unwrap(),
    )
    .await;
    match status {
        StatusCode::OK => ParityDecision::Allow,
        StatusCode::FORBIDDEN => ParityDecision::Deny,
        other => panic!("unexpected HTTP status for change: {other}"),
    }
}

async fn sdk_merge_decision(graph: &Path, policy_path: &Path, actor: &str) -> ParityDecision {
    let policy = PolicyEngine::load_graph(policy_path, graph.to_string_lossy().as_ref()).unwrap();
    let db = Omnigraph::open(graph.to_str().unwrap())
        .await
        .unwrap()
        .with_policy(Arc::new(policy) as Arc<dyn PolicyChecker>);
    let result = db.branch_merge_as("feature", "main", Some(actor)).await;
    match result {
        Ok(_) => ParityDecision::Allow,
        Err(OmniError::Policy(_)) => ParityDecision::Deny,
        Err(other) => panic!("unexpected SDK error for branch_merge: {other:?}"),
    }
}

async fn http_merge_decision(
    graph: &Path,
    policy_path: &PathBuf,
    actor: &str,
    token: &str,
) -> ParityDecision {
    let state = AppState::open_with_bearer_tokens_and_policy(
        graph.to_string_lossy().to_string(),
        vec![(actor.to_string(), token.to_string())],
        Some(policy_path),
    )
    .await
    .unwrap();
    let app = build_app(state);
    let req = BranchMergeRequest {
        source: "feature".to_string(),
        target: Some("main".to_string()),
    };
    let (status, _body) = json_response(
        &app,
        Request::builder()
            .uri("/branches/merge")
            .method(Method::POST)
            .header(AUTHORIZATION, format!("Bearer {token}"))
            .header("content-type", "application/json")
            .body(Body::from(serde_json::to_vec(&req).unwrap()))
            .unwrap(),
    )
    .await;
    match status {
        StatusCode::OK => ParityDecision::Allow,
        StatusCode::FORBIDDEN => ParityDecision::Deny,
        other => panic!("unexpected HTTP status for branch_merge: {other}"),
    }
}

#[tokio::test(flavor = "multi_thread")]
async fn policy_decision_parity_change_admin_on_main_allowed() {
    // (act-ragnor, change, main) — admins-change-anywhere rule applies.
    // Both SDK and HTTP must allow. Each path uses its own fresh graph
    // because allow→side-effects.
    let (_t1, graph1, policy1) = build_parity_graph().await;
    let sdk = sdk_change_decision(&graph1, &policy1, "act-ragnor").await;
    let (_t2, graph2, policy2) = build_parity_graph().await;
    let http = http_change_decision(&graph2, &policy2, "act-ragnor", "ragnor-token").await;
    assert!(
        matches!(sdk, ParityDecision::Allow) && matches!(http, ParityDecision::Allow),
        "SDK={sdk:?} HTTP={http:?} — should both Allow",
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn policy_decision_parity_change_team_on_main_denied() {
    // (act-bruno, change, main) — no rule grants bruno change on
    // protected. Both SDK and HTTP must deny. Same graph is reusable
    // because deny→no side-effects.
    let (_temp, graph, policy) = build_parity_graph().await;
    let sdk = sdk_change_decision(&graph, &policy, "act-bruno").await;
    let http = http_change_decision(&graph, &policy, "act-bruno", "bruno-token").await;
    assert!(
        matches!(sdk, ParityDecision::Deny) && matches!(http, ParityDecision::Deny),
        "SDK={sdk:?} HTTP={http:?} — should both Deny",
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn policy_decision_parity_branch_merge_admin_allowed() {
    // (act-ragnor, branch_merge, feature→main) — admins-merge-to-protected
    // rule applies. Both Allow. Each path uses its own fresh graph —
    // a successful merge consumes the feature branch's commit on main.
    let (_t1, graph1, policy1) = build_parity_graph().await;
    let sdk = sdk_merge_decision(&graph1, &policy1, "act-ragnor").await;
    let (_t2, graph2, policy2) = build_parity_graph().await;
    let http = http_merge_decision(&graph2, &policy2, "act-ragnor", "ragnor-token").await;
    assert!(
        matches!(sdk, ParityDecision::Allow) && matches!(http, ParityDecision::Allow),
        "SDK={sdk:?} HTTP={http:?} — should both Allow",
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn policy_decision_parity_branch_merge_team_denied() {
    // (act-bruno, branch_merge, feature→main) — no rule grants bruno
    // branch_merge. Both Deny.
    let (_temp, graph, policy) = build_parity_graph().await;
    let sdk = sdk_merge_decision(&graph, &policy, "act-bruno").await;
    let http = http_merge_decision(&graph, &policy, "act-bruno", "bruno-token").await;
    assert!(
        matches!(sdk, ParityDecision::Deny) && matches!(http, ParityDecision::Deny),
        "SDK={sdk:?} HTTP={http:?} — should both Deny",
    );
}

// ─── MR-694 PR B: HTTP soft + hard drop semantics + data preservation ────
//
// SDK-level drop semantics are pinned in `crates/omnigraph/tests/schema_apply.rs`.
// These HTTP-side tests mirror the assertions through POST /schema/apply
// and exercise the new `allow_data_loss` field (closes the gap where
// the schema-lint chassis v1.2 shipped Hard mode on the CLI but the
// HTTP request struct had no equivalent field).

#[tokio::test(flavor = "multi_thread")]
async fn schema_apply_route_soft_drops_property_via_http() {
    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
        &fs::read_to_string(fixture("test.pg")).unwrap(),
        &[("act-ragnor", "admin-token")],
        SCHEMA_APPLY_POLICY_YAML,
    )
    .await;
    // Load a row that has the column we're about to drop.
    let graph = graph_path(temp.path());
    {
        let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
        db.load(
            "main",
            r#"{"type":"Person","data":{"name":"PreDrop","age":42}}"#,
            LoadMode::Append,
        )
        .await
        .unwrap();
    }
    let pre_version = manifest_dataset_version(&graph).await;

    let (status, payload) = json_response(
        &app,
        Request::builder()
            .method(Method::POST)
            .uri("/schema/apply")
            .header("content-type", "application/json")
            .header("authorization", "Bearer admin-token")
            .body(Body::from(
                serde_json::to_vec(&SchemaApplyRequest {
                    schema_source: schema_without_age(),
                    ..Default::default()
                })
                .unwrap(),
            ))
            .unwrap(),
    )
    .await;
    assert_eq!(status, StatusCode::OK);
    assert_eq!(payload["applied"], true);

    // Catalog reflects the drop: `age` is gone from the live schema.
    let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    assert!(
        !reopened.catalog().node_types["Person"]
            .properties
            .contains_key("age"),
        "catalog should not contain `age` after drop"
    );

    // Soft drop preserves the prior version — `age` is still readable
    // via time travel to the pre-drop manifest version. Mirrors the
    // SDK-side assertion in `apply_schema_drops_a_nullable_property_softly_preserves_prior_version`.
    let pre_drop_snapshot = reopened.snapshot_at_version(pre_version).await.unwrap();
    let pre_drop_ds = pre_drop_snapshot.open("node:Person").await.unwrap();
    let pre_drop_fields = pre_drop_ds
        .schema()
        .fields
        .iter()
        .map(|f| f.name.clone())
        .collect::<Vec<_>>();
    assert!(
        pre_drop_fields.iter().any(|f| f == "age"),
        "soft drop should leave the pre-drop dataset's `age` column \
         time-travel-reachable; got fields {pre_drop_fields:?}"
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn schema_apply_route_soft_drops_node_type_via_http() {
    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
        &fs::read_to_string(fixture("test.pg")).unwrap(),
        &[("act-ragnor", "admin-token")],
        SCHEMA_APPLY_POLICY_YAML,
    )
    .await;
    let graph = graph_path(temp.path());

    let (status, payload) = json_response(
        &app,
        Request::builder()
            .method(Method::POST)
            .uri("/schema/apply")
            .header("content-type", "application/json")
            .header("authorization", "Bearer admin-token")
            .body(Body::from(
                serde_json::to_vec(&SchemaApplyRequest {
                    schema_source: schema_without_company(),
                    ..Default::default()
                })
                .unwrap(),
            ))
            .unwrap(),
    )
    .await;
    assert_eq!(status, StatusCode::OK);
    assert_eq!(payload["applied"], true);

    let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    assert!(
        !reopened.catalog().node_types.contains_key("Company"),
        "catalog should not contain `Company` after drop"
    );
    assert!(
        !reopened.catalog().edge_types.contains_key("WorksAt"),
        "catalog should not contain `WorksAt` after cascade"
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn schema_apply_route_hard_drops_property_with_allow_data_loss() {
    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
        &fs::read_to_string(fixture("test.pg")).unwrap(),
        &[("act-ragnor", "admin-token")],
        SCHEMA_APPLY_POLICY_YAML,
    )
    .await;
    let graph = graph_path(temp.path());
    {
        let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
        db.load(
            "main",
            r#"{"type":"Person","data":{"name":"PreDropHard","age":50}}"#,
            LoadMode::Append,
        )
        .await
        .unwrap();
    }

    // Apply with allow_data_loss=true → Hard mode promotion.
    let (status, payload) = json_response(
        &app,
        Request::builder()
            .method(Method::POST)
            .uri("/schema/apply")
            .header("content-type", "application/json")
            .header("authorization", "Bearer admin-token")
            .body(Body::from(
                serde_json::to_vec(&SchemaApplyRequest {
                    schema_source: schema_without_age(),
                    allow_data_loss: true,
                })
                .unwrap(),
            ))
            .unwrap(),
    )
    .await;
    assert_eq!(status, StatusCode::OK);
    assert_eq!(payload["applied"], true);

    // Catalog reflects the drop.
    let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    assert!(
        !reopened.catalog().node_types["Person"]
            .properties
            .contains_key("age"),
        "catalog should not contain `age` after Hard drop"
    );
    // Plan steps should show DropMode::Hard for property drops.
    let steps = payload["steps"].as_array().expect("steps array");
    let drop_step = steps
        .iter()
        .find(|s| s["kind"] == "drop_property")
        .expect("plan should include drop_property step");
    let mode = &drop_step["mode"];
    assert_eq!(
        mode, "hard",
        "expected hard mode under allow_data_loss=true"
    );
}

#[tokio::test(flavor = "multi_thread")]
async fn schema_apply_route_keeps_drops_soft_without_flag() {
    // Symmetric to the Hard test: same schema change, but no
    // allow_data_loss flag → drops stay Soft (prior column data
    // remains time-travel-reachable). Pins the default semantics
    // against accidental Hard promotion.
    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
        &fs::read_to_string(fixture("test.pg")).unwrap(),
        &[("act-ragnor", "admin-token")],
        SCHEMA_APPLY_POLICY_YAML,
    )
    .await;
    let graph = graph_path(temp.path());

    let (status, payload) = json_response(
        &app,
        Request::builder()
            .method(Method::POST)
            .uri("/schema/apply")
            .header("content-type", "application/json")
            .header("authorization", "Bearer admin-token")
            .body(Body::from(
                serde_json::to_vec(&SchemaApplyRequest {
                    schema_source: schema_without_age(),
                    allow_data_loss: false,
                })
                .unwrap(),
            ))
            .unwrap(),
    )
    .await;
    assert_eq!(status, StatusCode::OK);
    assert_eq!(payload["applied"], true);

    let steps = payload["steps"].as_array().expect("steps array");
    let drop_step = steps
        .iter()
        .find(|s| s["kind"] == "drop_property")
        .expect("plan should include drop_property step");
    let mode = &drop_step["mode"];
    assert_eq!(mode, "soft", "expected soft mode without allow_data_loss");
    let _ = graph;
}

#[tokio::test(flavor = "multi_thread")]
async fn schema_apply_route_additive_property_preserves_existing_rows() {
    // SDK suite covers rename and drop data preservation. Additive
    // AddProperty wasn't pinned with a row-count check anywhere.
    // Load N rows, apply schema adding nullable property, verify
    // every row is still readable and the new column is null.
    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
        &fs::read_to_string(fixture("test.pg")).unwrap(),
        &[("act-ragnor", "admin-token")],
        SCHEMA_APPLY_POLICY_YAML,
    )
    .await;
    let graph = graph_path(temp.path());

    // Standard fixture data: 4 Persons + 1 Company. Load it.
    let pre_count = {
        let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
        db.load(
            "main",
            &fs::read_to_string(fixture("test.jsonl")).unwrap(),
            LoadMode::Append,
        )
        .await
        .unwrap();
        let snap = db
            .snapshot_of(omnigraph::db::ReadTarget::branch("main"))
            .await
            .unwrap();
        snap.entry("node:Person").expect("Person").row_count
    };
    assert!(pre_count > 0, "fixture should have loaded Person rows");

    let (status, payload) = json_response(
        &app,
        Request::builder()
            .method(Method::POST)
            .uri("/schema/apply")
            .header("content-type", "application/json")
            .header("authorization", "Bearer admin-token")
            .body(Body::from(
                serde_json::to_vec(&SchemaApplyRequest {
                    schema_source: additive_schema_with_nickname(),
                    ..Default::default()
                })
                .unwrap(),
            ))
            .unwrap(),
    )
    .await;
    assert_eq!(status, StatusCode::OK);
    assert_eq!(payload["applied"], true);

    // Row count preserved.
    let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
    let snap = db
        .snapshot_of(omnigraph::db::ReadTarget::branch("main"))
        .await
        .unwrap();
    let post_count = snap.entry("node:Person").expect("Person").row_count;
    assert_eq!(
        post_count, pre_count,
        "AddProperty should preserve row count",
    );
}

// ─── MR-668: multi-graph startup ──────────────────────────────────────────

mod multi_graph_startup {
    use super::*;
    use omnigraph::storage::normalize_root_uri;
    use omnigraph_server::{
        GraphHandle, GraphId, GraphKey, GraphRegistry, InsertError, ServerConfig, ServerConfigMode,
        load_server_settings,
    };
    use std::sync::Arc;

    async fn build_multi_mode_app(graph_ids: &[&str]) -> (Vec<tempfile::TempDir>, Router) {
        let mut dirs = Vec::with_capacity(graph_ids.len());
        let mut handles = Vec::with_capacity(graph_ids.len());
        for id in graph_ids {
            let dir = tempfile::tempdir().unwrap();
            let graph_uri = dir.path().join(id).to_str().unwrap().to_string();
            let schema = fs::read_to_string(fixture("test.pg")).unwrap();
            let engine = Omnigraph::init(&graph_uri, &schema).await.unwrap();
            handles.push(Arc::new(GraphHandle {
                key: GraphKey::cluster(GraphId::try_from(*id).unwrap()),
                uri: graph_uri,
                engine: Arc::new(engine),
                policy: None,
            }));
            dirs.push(dir);
        }
        let workload = omnigraph_server::workload::WorkloadController::from_env();
        let state = AppState::new_multi(handles, Vec::new(), None, workload, None).unwrap();
        let app = build_app(state);
        (dirs, app)
    }

    /// Cluster route `/graphs/{graph_id}/snapshot` resolves to the right
    /// engine. Two graphs side by side; assert each responds to its own
    /// id and does NOT respond to the other's URL.
    #[tokio::test(flavor = "multi_thread")]
    async fn cluster_routes_dispatch_per_graph_handle() {
        let (_dirs, app) = build_multi_mode_app(&["alpha", "beta"]).await;
        for id in ["alpha", "beta"] {
            let resp = app
                .clone()
                .oneshot(
                    Request::builder()
                        .method(Method::GET)
                        .uri(format!("/graphs/{id}/snapshot?branch=main"))
                        .body(Body::empty())
                        .unwrap(),
                )
                .await
                .unwrap();
            assert_eq!(
                resp.status(),
                StatusCode::OK,
                "graph '{id}' must respond OK on its cluster snapshot route"
            );
        }
    }

    /// Unknown graph id under the cluster prefix yields 404 (not 500,
    /// not 410 — `Gone` is reserved for the future DELETE flow).
    #[tokio::test(flavor = "multi_thread")]
    async fn cluster_route_for_unknown_graph_returns_404() {
        let (_dirs, app) = build_multi_mode_app(&["alpha"]).await;
        let resp = app
            .oneshot(
                Request::builder()
                    .method(Method::GET)
                    .uri("/graphs/nonexistent/snapshot?branch=main")
                    .body(Body::empty())
                    .unwrap(),
            )
            .await
            .unwrap();
        assert_eq!(resp.status(), StatusCode::NOT_FOUND);
    }

    /// Coverage net for cluster-route regressions across every
    /// protected handler — not just the few that have inner path
    /// params. Bug-1 surfaced because only `/snapshot` was being
    /// exercised in cluster mode, leaving the other six protected
    /// routes implicitly untested. This sweep hits each one and
    /// asserts the response shows the handler was reached: no 404
    /// (router didn't match), no 500 with "Wrong number of path
    /// arguments" (path extractor broke), no 500 with "missing
    /// extension" (routing middleware didn't inject the handle).
    ///
    /// Status codes are negative assertions because each handler's
    /// happy-path inputs differ — what matters is "the request
    /// reached the handler," not "the handler returned 200." The
    /// individual handlers' logic is already tested in single mode.
    #[tokio::test(flavor = "multi_thread")]
    async fn all_protected_cluster_routes_resolve_to_their_handler() {
        let (_dirs, app) = build_multi_mode_app(&["alpha"]).await;

        // (method, path, body) — one minimal request per protected
        // cluster route. Bodies are valid enough that the router and
        // extractors succeed; whether the engine ultimately returns
        // 200 or 4xx is per-handler and not what this test pins.
        let cases: &[(Method, &str, Option<&str>)] = &[
            (Method::GET, "/graphs/alpha/snapshot?branch=main", None),
            (Method::GET, "/graphs/alpha/schema", None),
            (Method::GET, "/graphs/alpha/branches", None),
            (Method::GET, "/graphs/alpha/commits", None),
            (
                Method::POST,
                "/graphs/alpha/read",
                Some(r#"{"query_source":"query q() { return {} }"}"#),
            ),
            (
                Method::POST,
                "/graphs/alpha/change",
                Some(r#"{"query_source":"query q() { return {} }"}"#),
            ),
            (
                Method::POST,
                "/graphs/alpha/export",
                Some(r#"{"branch":"main"}"#),
            ),
            (
                Method::POST,
                "/graphs/alpha/schema/apply",
                Some(r#"{"schema_source":"","allow_data_loss":false}"#),
            ),
            (Method::POST, "/graphs/alpha/ingest", Some(r#"{"data":""}"#)),
            (
                Method::POST,
                "/graphs/alpha/branches/merge",
                Some(r#"{"source":"main","target":"main"}"#),
            ),
        ];

        for (method, path, body) in cases {
            let req_body = body
                .map(|s| Body::from(s.to_string()))
                .unwrap_or_else(Body::empty);
            let req = Request::builder()
                .method(method.clone())
                .uri(*path)
                .header("content-type", "application/json")
                .body(req_body)
                .unwrap();
            let resp = app.clone().oneshot(req).await.unwrap();
            let status = resp.status();
            let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
            let body_str = String::from_utf8_lossy(&bytes);

            assert_ne!(
                status,
                StatusCode::NOT_FOUND,
                "{} {} — router didn't match (cluster-route mounting regression). Body: {}",
                method,
                path,
                body_str,
            );
            assert!(
                !(status == StatusCode::INTERNAL_SERVER_ERROR
                    && body_str.contains("Wrong number of path arguments")),
                "{} {} — path extractor broke (Bug-1 class regression). Body: {}",
                method,
                path,
                body_str,
            );
            assert!(
                !(status == StatusCode::INTERNAL_SERVER_ERROR
                    && body_str.to_lowercase().contains("missing extension")),
                "{} {} — routing middleware didn't inject GraphHandle. Body: {}",
                method,
                path,
                body_str,
            );
        }
    }

    /// Regression for the bot-surfaced path-extractor bug: cluster
    /// routes whose inner path also captures a parameter
    /// (`/graphs/{graph_id}/branches/{branch}`,
    /// `/graphs/{graph_id}/commits/{commit_id}`) must extract the
    /// inner param cleanly. Axum 0.8 propagates the outer `{graph_id}`
    /// capture into nested handlers, so a `Path<String>` extractor
    /// would see two values and fail with "Wrong number of path
    /// arguments. Expected 1 but got 2." Today both DELETE branch and
    /// GET commit-by-id break in multi-mode because their handlers
    /// use bare `Path<String>` — this test pins the fix.
    ///
    /// The broader `all_protected_cluster_routes_resolve_to_their_handler`
    /// test sweeps the full route surface; this one stays narrowly
    /// targeted at the inner-path-param shape because that's the
    /// specific regression class.
    #[tokio::test(flavor = "multi_thread")]
    async fn cluster_routes_with_inner_path_params_deserialize_correctly() {
        let (_dirs, app) = build_multi_mode_app(&["alpha"]).await;

        // Create a branch we can then delete — DELETE /graphs/alpha/branches/feature
        let create_resp = app
            .clone()
            .oneshot(
                Request::builder()
                    .method(Method::POST)
                    .uri("/graphs/alpha/branches")
                    .header("content-type", "application/json")
                    .body(Body::from(r#"{"name":"feature"}"#))
                    .unwrap(),
            )
            .await
            .unwrap();
        assert_eq!(
            create_resp.status(),
            StatusCode::OK,
            "branch create on the cluster route must succeed before delete can be tested"
        );

        // DELETE /graphs/{graph_id}/branches/{branch} — exercises a handler
        // whose only Path extractor (`branch`) is inside a nested route
        // that also captures `graph_id`. The handler must pick `branch`
        // by name, not by position.
        let delete_resp = app
            .clone()
            .oneshot(
                Request::builder()
                    .method(Method::DELETE)
                    .uri("/graphs/alpha/branches/feature")
                    .body(Body::empty())
                    .unwrap(),
            )
            .await
            .unwrap();
        let delete_status = delete_resp.status();
        let delete_body = to_bytes(delete_resp.into_body(), usize::MAX).await.unwrap();
        assert_eq!(
            delete_status,
            StatusCode::OK,
            "DELETE /graphs/{{id}}/branches/{{branch}} must extract `branch` cleanly. \
             Body: {}",
            String::from_utf8_lossy(&delete_body),
        );

        // GET /graphs/{graph_id}/commits/{commit_id} — same shape: the
        // handler's only Path extractor is the inner `commit_id`, which
        // must deserialize by name even though `graph_id` is also in scope.
        // We don't know a real commit_id, but the failure mode under test
        // is path extraction, not commit lookup — a 404 from the engine
        // is fine; a 500 with "Wrong number of path arguments" is the bug.
        let commit_resp = app
            .oneshot(
                Request::builder()
                    .method(Method::GET)
                    .uri("/graphs/alpha/commits/0000000000000000")
                    .body(Body::empty())
                    .unwrap(),
            )
            .await
            .unwrap();
        let commit_status = commit_resp.status();
        let commit_body = to_bytes(commit_resp.into_body(), usize::MAX).await.unwrap();
        let body_str = String::from_utf8_lossy(&commit_body);
        assert!(
            commit_status != StatusCode::INTERNAL_SERVER_ERROR
                || !body_str.contains("Wrong number of path arguments"),
            "GET /graphs/{{id}}/commits/{{commit_id}} must extract `commit_id` cleanly. \
             Got: {} | {}",
            commit_status,
            body_str,
        );
    }

    /// Flat routes 404 in multi mode — the router only mounts under
    /// `/graphs/{graph_id}/...` so `/snapshot` doesn't resolve.
    #[tokio::test(flavor = "multi_thread")]
    async fn flat_routes_404_in_multi_mode() {
        let (_dirs, app) = build_multi_mode_app(&["alpha"]).await;
        let resp = app
            .oneshot(
                Request::builder()
                    .method(Method::GET)
                    .uri("/snapshot?branch=main")
                    .body(Body::empty())
                    .unwrap(),
            )
            .await
            .unwrap();
        assert_eq!(resp.status(), StatusCode::NOT_FOUND);
    }

    /// `GraphId` validation runs at startup — a reserved name in
    /// `omnigraph.yaml` produces a clear error rather than getting
    /// rejected per-request.
    #[test]
    fn load_server_settings_rejects_reserved_graph_id() {
        let temp = tempfile::tempdir().unwrap();
        let config_path = temp.path().join("omnigraph.yaml");
        fs::write(
            &config_path,
            r#"
graphs:
  policies:
    uri: /tmp/g1.omni
"#,
        )
        .unwrap();
        let err = load_server_settings(Some(&config_path), None, None, None, false).unwrap_err();
        assert!(
            err.to_string().contains("invalid graph id 'policies'"),
            "expected reserved-name rejection, got: {err}"
        );
    }

    #[tokio::test(flavor = "multi_thread")]
    async fn registry_rejects_duplicate_normalized_graph_uris() {
        let dir = tempfile::tempdir().unwrap();
        let graph_uri = dir.path().join("same").to_str().unwrap().to_string();
        let schema = fs::read_to_string(fixture("test.pg")).unwrap();
        let engine = Arc::new(Omnigraph::init(&graph_uri, &schema).await.unwrap());

        let alpha = Arc::new(GraphHandle {
            key: GraphKey::cluster(GraphId::try_from("alpha").unwrap()),
            uri: graph_uri.clone(),
            engine: Arc::clone(&engine),
            policy: None,
        });
        let beta = Arc::new(GraphHandle {
            key: GraphKey::cluster(GraphId::try_from("beta").unwrap()),
            uri: format!("file://{graph_uri}/"),
            engine,
            policy: None,
        });

        match GraphRegistry::from_handles(vec![alpha, beta]) {
            Err(InsertError::DuplicateUri(uri)) => {
                assert!(
                    normalize_root_uri(&uri).is_ok(),
                    "duplicate URI should still be parseable, got {uri}"
                );
            }
            Err(err) => panic!("expected DuplicateUri for normalized aliases, got {err:?}"),
            Ok(_) => panic!("expected DuplicateUri for normalized aliases, got Ok"),
        }
    }

    #[tokio::test(flavor = "multi_thread")]
    async fn registry_stores_canonical_graph_uri() {
        let dir = tempfile::tempdir().unwrap();
        let graph_uri = dir.path().join("canonical").to_str().unwrap().to_string();
        let schema = fs::read_to_string(fixture("test.pg")).unwrap();
        let engine = Omnigraph::init(&graph_uri, &schema).await.unwrap();
        let handle = Arc::new(GraphHandle {
            key: GraphKey::cluster(GraphId::try_from("alpha").unwrap()),
            uri: format!("file://{graph_uri}/"),
            engine: Arc::new(engine),
            policy: None,
        });

        let registry = GraphRegistry::from_handles(vec![handle]).unwrap();
        let listed = registry.list();
        assert_eq!(listed.len(), 1);
        assert_eq!(listed[0].uri, graph_uri);
    }

    // ── Four-rule mode inference matrix ───────────────────────────────

    /// Rule 1: CLI positional URI → Single.
    #[test]
    fn mode_inference_cli_uri_is_single() {
        let settings = load_server_settings(
            None,
            Some("/tmp/cli.omni".to_string()),
            None,
            None,
            true, // allow unauth so we get past the runtime-state check
        )
        .unwrap();
        match settings.mode {
            ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "/tmp/cli.omni"),
            ServerConfigMode::Multi { .. } => panic!("expected Single (rule 1), got Multi"),
        }
    }

    /// Rule 2: --target picks one graph from `graphs:` map → Single.
    #[test]
    fn mode_inference_cli_target_is_single() {
        let temp = tempfile::tempdir().unwrap();
        let config_path = temp.path().join("omnigraph.yaml");
        fs::write(
            &config_path,
            r#"
graphs:
  alpha:
    uri: /tmp/alpha.omni
  beta:
    uri: /tmp/beta.omni
"#,
        )
        .unwrap();
        let settings =
            load_server_settings(Some(&config_path), None, Some("alpha".into()), None, true)
                .unwrap();
        match settings.mode {
            ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "/tmp/alpha.omni"),
            ServerConfigMode::Multi { .. } => panic!("expected Single (rule 2), got Multi"),
        }
    }

    /// Rule 3: `server.graph` set → Single (target picked from config).
    #[test]
    fn mode_inference_server_graph_is_single() {
        let temp = tempfile::tempdir().unwrap();
        let config_path = temp.path().join("omnigraph.yaml");
        fs::write(
            &config_path,
            r#"
graphs:
  alpha:
    uri: /tmp/alpha.omni
  beta:
    uri: /tmp/beta.omni
server:
  graph: beta
"#,
        )
        .unwrap();
        let settings = load_server_settings(Some(&config_path), None, None, None, true).unwrap();
        match settings.mode {
            ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "/tmp/beta.omni"),
            ServerConfigMode::Multi { .. } => panic!("expected Single (rule 3), got Multi"),
        }
    }

    /// Rule 4: `--config` + non-empty `graphs:` + no single-mode selector → Multi.
    #[test]
    fn mode_inference_config_plus_graphs_is_multi() {
        let temp = tempfile::tempdir().unwrap();
        let config_path = temp.path().join("omnigraph.yaml");
        fs::write(
            &config_path,
            r#"
graphs:
  alpha:
    uri: /tmp/alpha.omni
  beta:
    uri: /tmp/beta.omni
"#,
        )
        .unwrap();
        let settings = load_server_settings(Some(&config_path), None, None, None, true).unwrap();
        match settings.mode {
            ServerConfigMode::Multi { graphs, .. } => {
                let ids: Vec<&str> = graphs.iter().map(|g| g.graph_id.as_str()).collect();
                // BTreeMap iteration order is alphabetical.
                assert_eq!(ids, vec!["alpha", "beta"]);
            }
            ServerConfigMode::Single { .. } => panic!("expected Multi (rule 4), got Single"),
        }
    }

    #[test]
    fn mode_inference_multi_rejects_top_level_policy_file() {
        let temp = tempfile::tempdir().unwrap();
        let config_path = temp.path().join("omnigraph.yaml");
        fs::write(
            &config_path,
            r#"
policy:
  file: ./policy.yaml
graphs:
  alpha:
    uri: /tmp/alpha.omni
"#,
        )
        .unwrap();
        let err = load_server_settings(Some(&config_path), None, None, None, true).unwrap_err();
        let msg = err.to_string();
        assert!(
            msg.contains("top-level `policy.file` is single-graph/CLI-local policy only"),
            "expected single-graph policy guidance, got: {msg}"
        );
        assert!(
            msg.contains("graphs.<graph_id>.policy.file"),
            "expected per-graph migration guidance, got: {msg}"
        );
        assert!(
            msg.contains("server.policy.file"),
            "expected server policy migration guidance, got: {msg}"
        );
    }

    #[test]
    fn mode_inference_normalizes_multi_graph_uris() {
        let temp = tempfile::tempdir().unwrap();
        let graph = temp.path().join("alpha.omni");
        let config_path = temp.path().join("omnigraph.yaml");
        fs::write(
            &config_path,
            format!(
                r#"
graphs:
  alpha:
    uri: file://{}/
"#,
                graph.display()
            ),
        )
        .unwrap();
        let settings = load_server_settings(Some(&config_path), None, None, None, true).unwrap();
        match settings.mode {
            ServerConfigMode::Multi { graphs, .. } => {
                assert_eq!(graphs[0].uri, graph.to_string_lossy());
            }
            ServerConfigMode::Single { .. } => panic!("expected Multi"),
        }
    }

    /// Rule 5: nothing → error with migration hint.
    #[test]
    fn mode_inference_no_inputs_errors_with_migration_hint() {
        let err = load_server_settings(None, None, None, None, true).unwrap_err();
        let msg = err.to_string();
        assert!(
            msg.contains("no graph to serve"),
            "expected migration-hint error, got: {msg}"
        );
    }

    /// Rule 4 sub-case: `--config` with empty `graphs:` map and no
    /// single-mode selector → rule 5 fires (no graph to serve).
    #[test]
    fn mode_inference_empty_graphs_map_errors() {
        let temp = tempfile::tempdir().unwrap();
        let config_path = temp.path().join("omnigraph.yaml");
        fs::write(&config_path, "server:\n  bind: 127.0.0.1:8080\n").unwrap();
        let err = load_server_settings(Some(&config_path), None, None, None, true).unwrap_err();
        assert!(err.to_string().contains("no graph to serve"));
    }

    /// `--config` + `<URI>` together: URI wins → Single (the CLI URI
    /// takes precedence over the config's graphs map).
    #[test]
    fn mode_inference_cli_uri_overrides_graphs_map() {
        let temp = tempfile::tempdir().unwrap();
        let config_path = temp.path().join("omnigraph.yaml");
        fs::write(
            &config_path,
            r#"
graphs:
  alpha:
    uri: /tmp/alpha.omni
"#,
        )
        .unwrap();
        let settings = load_server_settings(
            Some(&config_path),
            Some("/tmp/cli-override.omni".to_string()),
            None,
            None,
            true,
        )
        .unwrap();
        match settings.mode {
            ServerConfigMode::Single { uri, .. } => {
                assert_eq!(
                    uri, "/tmp/cli-override.omni",
                    "CLI URI must win over graphs: map"
                );
            }
            ServerConfigMode::Multi { .. } => {
                panic!("expected Single (CLI URI wins), got Multi")
            }
        }
    }

    /// Per-graph `policy.file` is resolved relative to the config base_dir.
    #[test]
    fn per_graph_policy_file_is_resolved_relative_to_base_dir() {
        let temp = tempfile::tempdir().unwrap();
        let config_path = temp.path().join("omnigraph.yaml");
        fs::write(
            &config_path,
            r#"
graphs:
  alpha:
    uri: /tmp/alpha.omni
    policy:
      file: ./policies/alpha.yaml
  beta:
    uri: /tmp/beta.omni
"#,
        )
        .unwrap();
        let settings = load_server_settings(Some(&config_path), None, None, None, true).unwrap();
        let graphs = match settings.mode {
            ServerConfigMode::Multi { graphs, .. } => graphs,
            _ => panic!("expected Multi"),
        };
        // graphs is BTreeMap-iter order (alphabetical).
        let alpha = &graphs[0];
        let beta = &graphs[1];
        assert_eq!(alpha.graph_id, "alpha");
        assert_eq!(
            alpha.policy_file.as_ref().unwrap(),
            &temp.path().join("policies/alpha.yaml")
        );
        assert_eq!(beta.graph_id, "beta");
        assert!(beta.policy_file.is_none());
    }

    /// `server.policy.file` resolves alongside the graphs map.
    #[test]
    fn server_policy_file_is_resolved_relative_to_base_dir() {
        let temp = tempfile::tempdir().unwrap();
        let config_path = temp.path().join("omnigraph.yaml");
        fs::write(
            &config_path,
            r#"
server:
  policy:
    file: ./server-policy.yaml
graphs:
  alpha:
    uri: /tmp/alpha.omni
"#,
        )
        .unwrap();
        let settings = load_server_settings(Some(&config_path), None, None, None, true).unwrap();
        match settings.mode {
            ServerConfigMode::Multi {
                server_policy_file, ..
            } => {
                assert_eq!(
                    server_policy_file.unwrap(),
                    temp.path().join("server-policy.yaml")
                );
            }
            _ => panic!("expected Multi"),
        }
    }

    /// `GET /graphs` must NOT leak the registry in Open mode without
    /// an explicit server policy. Operators who pass `--unauthenticated`
    /// opted into trusting the network for graph DATA, not for leaking
    /// server topology (graph IDs + URIs, which may contain S3 bucket
    /// paths or internal hostnames). Cedar gating the management
    /// surface is the documented contract for `server_graphs_list`
    /// ("don't leak the registry until the operator explicitly
    /// authorizes it"); enforcing that contract in every runtime
    /// state — not just `PolicyEnabled` — is the correct-by-design
    /// closure of the open-mode hole the bot-review pass surfaced.
    ///
    /// Today (pre-fix) this returns 200 because `authorize_request`'s
    /// no-policy fallback only denies when `actor.is_some()`, so Open
    /// mode (`actor: None`) falls through to `Ok(())`. The fix in the
    /// next commit tightens the fallback so server-scoped actions
    /// always require explicit policy.
    ///
    /// Sort-order coverage previously lived here; it has moved to
    /// `get_graphs_with_server_policy_authorizes_per_cedar` where
    /// the response body is now non-empty and operator-authorized.
    #[tokio::test(flavor = "multi_thread")]
    async fn get_graphs_denied_in_open_mode_without_server_policy() {
        let (_dirs, app) = build_multi_mode_app(&["beta", "alpha"]).await;
        let resp = app
            .oneshot(
                Request::builder()
                    .method(Method::GET)
                    .uri("/graphs")
                    .body(Body::empty())
                    .unwrap(),
            )
            .await
            .unwrap();
        let status = resp.status();
        let body = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
        let body_str = String::from_utf8_lossy(&body);
        assert_eq!(
            status,
            StatusCode::FORBIDDEN,
            "GET /graphs must require an explicit server policy in every \
             runtime state; Open-mode bypass would leak server topology. \
             Body: {body_str}",
        );
    }

    /// `GET /graphs` returns 405 in single mode (resource exists in the
    /// API surface, just not operational without a `graphs:` map).
    #[tokio::test(flavor = "multi_thread")]
    async fn get_graphs_returns_405_in_single_mode() {
        let temp = init_loaded_graph().await;
        let graph = graph_path(temp.path());
        let state = AppState::open(graph.to_string_lossy().to_string())
            .await
            .unwrap();
        let app = build_app(state);
        let resp = app
            .oneshot(
                Request::builder()
                    .method(Method::GET)
                    .uri("/graphs")
                    .body(Body::empty())
                    .unwrap(),
            )
            .await
            .unwrap();
        assert_eq!(resp.status(), StatusCode::METHOD_NOT_ALLOWED);
    }

    /// `GET /graphs` requires bearer auth when tokens are configured.
    #[tokio::test(flavor = "multi_thread")]
    async fn get_graphs_requires_bearer_auth_when_configured() {
        use omnigraph_server::{GraphHandle, GraphId, GraphKey};
        // Build a multi-mode app with bearer tokens configured.
        let dir = tempfile::tempdir().unwrap();
        let graph_uri = dir.path().join("alpha").to_str().unwrap().to_string();
        let schema = fs::read_to_string(fixture("test.pg")).unwrap();
        let engine = Omnigraph::init(&graph_uri, &schema).await.unwrap();
        let handle = Arc::new(GraphHandle {
            key: GraphKey::cluster(GraphId::try_from("alpha").unwrap()),
            uri: graph_uri,
            engine: Arc::new(engine),
            policy: None,
        });
        let tokens = vec![("act-andrew".to_string(), "secret-token".to_string())];
        let workload = omnigraph_server::workload::WorkloadController::from_env();
        let state = AppState::new_multi(vec![handle], tokens, None, workload, None).unwrap();
        let app = build_app(state);

        // No Authorization header → 401.
        let resp_no_auth = app
            .clone()
            .oneshot(
                Request::builder()
                    .method(Method::GET)
                    .uri("/graphs")
                    .body(Body::empty())
                    .unwrap(),
            )
            .await
            .unwrap();
        assert_eq!(resp_no_auth.status(), StatusCode::UNAUTHORIZED);

        // With auth but no server policy → 403 (default-deny, since
        // GraphList is not Read).
        let resp_authed = app
            .oneshot(
                Request::builder()
                    .method(Method::GET)
                    .uri("/graphs")
                    .header("authorization", "Bearer secret-token")
                    .body(Body::empty())
                    .unwrap(),
            )
            .await
            .unwrap();
        assert_eq!(resp_authed.status(), StatusCode::FORBIDDEN);
    }

    /// `GET /graphs` with a server policy that allows `graph_list` → 200
    /// and returns the registry sorted alphabetically by `graph_id`.
    /// `GET /graphs` with a server policy that does NOT allow
    /// `graph_list` (viewer group) → 403.
    ///
    /// This test owns the alphabetical-sort coverage that previously
    /// lived in `get_graphs_lists_registered_graphs_in_multi_mode`.
    /// That test now asserts denial in Open mode (server-scoped actions
    /// require explicit policy in every runtime state), so the positive
    /// body-shape assertions need a home where the response is
    /// operator-authorized — here.
    #[tokio::test(flavor = "multi_thread")]
    async fn get_graphs_with_server_policy_authorizes_per_cedar() {
        use omnigraph_policy::PolicyEngine;
        use omnigraph_server::{GraphHandle, GraphId, GraphKey};

        let dir = tempfile::tempdir().unwrap();

        // Two graphs deliberately registered in non-alphabetical order
        // so the test would fail if the handler relied on insertion
        // order instead of server-side sorting.
        let schema = fs::read_to_string(fixture("test.pg")).unwrap();
        let mut handles = Vec::new();
        for id in ["beta", "alpha"] {
            let graph_uri = dir.path().join(id).to_str().unwrap().to_string();
            let engine = Omnigraph::init(&graph_uri, &schema).await.unwrap();
            handles.push(Arc::new(GraphHandle {
                key: GraphKey::cluster(GraphId::try_from(id).unwrap()),
                uri: graph_uri,
                engine: Arc::new(engine),
                policy: None,
            }));
        }

        // Server policy: admins can graph_list, viewers cannot.
        let policy_path = dir.path().join("server-policy.yaml");
        fs::write(
            &policy_path,
            r#"
version: 1
groups:
  admins: [act-andrew]
  viewers: [act-bruno]
rules:
  - id: admins-list-graphs
    allow:
      actors: { group: admins }
      actions: [graph_list]
"#,
        )
        .unwrap();
        let server_policy = PolicyEngine::load_server(&policy_path).unwrap();

        let tokens = vec![
            ("act-andrew".to_string(), "andrew-token".to_string()),
            ("act-bruno".to_string(), "bruno-token".to_string()),
        ];
        let workload = omnigraph_server::workload::WorkloadController::from_env();
        let state =
            AppState::new_multi(handles, tokens, Some(server_policy), workload, None).unwrap();
        let app = build_app(state);

        // Admin → 200, body returns both graphs alphabetically sorted.
        let resp_admin = app
            .clone()
            .oneshot(
                Request::builder()
                    .method(Method::GET)
                    .uri("/graphs")
                    .header("authorization", "Bearer andrew-token")
                    .body(Body::empty())
                    .unwrap(),
            )
            .await
            .unwrap();
        assert_eq!(
            resp_admin.status(),
            StatusCode::OK,
            "admin must be allowed graph_list"
        );
        let body = to_bytes(resp_admin.into_body(), usize::MAX).await.unwrap();
        let json: Value = serde_json::from_slice(&body).unwrap();
        let graphs = json["graphs"].as_array().unwrap();
        assert_eq!(graphs.len(), 2, "response must list both registered graphs");
        assert_eq!(
            graphs[0]["graph_id"].as_str().unwrap(),
            "alpha",
            "server must sort graphs alphabetically by graph_id (insertion order was 'beta', 'alpha')"
        );
        assert_eq!(graphs[1]["graph_id"].as_str().unwrap(), "beta");

        // Viewer → 403
        let resp_viewer = app
            .oneshot(
                Request::builder()
                    .method(Method::GET)
                    .uri("/graphs")
                    .header("authorization", "Bearer bruno-token")
                    .body(Body::empty())
                    .unwrap(),
            )
            .await
            .unwrap();
        assert_eq!(
            resp_viewer.status(),
            StatusCode::FORBIDDEN,
            "viewer must be denied graph_list (Cedar gate)"
        );
    }

    /// Loads an `omnigraph.yaml` with two graphs and verifies multi-mode
    /// inference plus graph entry resolution. Cluster-route dispatch is
    /// covered by the route tests above.
    #[tokio::test(flavor = "multi_thread")]
    async fn server_settings_load_multi_graph_config_entries() {
        let cfg_dir = tempfile::tempdir().unwrap();
        // Real graph storage dirs (the URIs in the config must point to
        // a graph init-able location).
        let alpha_dir = cfg_dir.path().join("alpha.omni");
        let beta_dir = cfg_dir.path().join("beta.omni");
        let schema = fs::read_to_string(fixture("test.pg")).unwrap();
        Omnigraph::init(alpha_dir.to_str().unwrap(), &schema)
            .await
            .unwrap();
        Omnigraph::init(beta_dir.to_str().unwrap(), &schema)
            .await
            .unwrap();

        let config_path = cfg_dir.path().join("omnigraph.yaml");
        fs::write(
            &config_path,
            format!(
                r#"
graphs:
  alpha:
    uri: {alpha}
  beta:
    uri: {beta}
"#,
                alpha = alpha_dir.display(),
                beta = beta_dir.display(),
            ),
        )
        .unwrap();

        let settings: ServerConfig =
            load_server_settings(Some(&config_path), None, None, None, true).unwrap();
        assert!(matches!(settings.mode, ServerConfigMode::Multi { .. }));

        match settings.mode {
            ServerConfigMode::Multi { graphs, .. } => {
                assert_eq!(graphs.len(), 2);
                let ids: Vec<&str> = graphs.iter().map(|g| g.graph_id.as_str()).collect();
                assert_eq!(ids, vec!["alpha", "beta"]);
            }
            _ => unreachable!(),
        }
    }
}
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								use std::env;
 								use std::fs;
 								use std::path::{Path, PathBuf};
-												tests: pin /ingest admission gate + 429 Retry-After (red)

Per AGENTS.md rule 8, this commit lands the failing regression test
ahead of the fix. Currently fails on f925ad1 with 8/8 statuses returning
200 because /ingest does not call WorkloadController::try_admit.

The test pins:
- /ingest is gated on per-actor admission control (returns 429 when
  the cap is exceeded).
- 429 responses carry the structured `code: too_many_requests` error
  body so clients can distinguish them from generic conflicts.
- 429 responses include a `Retry-After` header so clients can implement
  bounded backoff. The doc claim at api.rs:343 and lib.rs:344 was that
  this header exists; the IntoResponse impl currently emits no headers.

Two follow-up commits will turn this green:
1. Wire WorkloadController::try_admit on /ingest and the four other
   mutating handlers (Block 2.1).
2. Emit the Retry-After header on 429/503 responses (Block 2.2).

The test uses #[serial] + EnvGuard to override
OMNIGRAPH_PER_ACTOR_INFLIGHT_MAX=1 without racing parallel tests, then
spawns 8 concurrent /ingest tasks aligned at a tokio::sync::Barrier so
multiple tasks reach try_admit close in time. With cap=1, at least one
must be rejected.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 16:57:01 +02:00
+								use std::sync::Arc;
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
 								use axum::Router;
 								use axum::body::{Body, to_bytes};
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								use axum::http::header::AUTHORIZATION;
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								use axum::http::{Method, Request, StatusCode};
-												chore(lance): bump 4.0.0 → 6.0.1 (DataFusion 52→53, Arrow 57→58) (#111)

* tests: add lance_surface_guards pre-flight pins for the v6 bump

Land 8 named guards in a new test file that pin Lance API surfaces
OmniGraph relies on. Each guard turns a silent-break risk (variant
rename, struct restructure, async-flip) into a red CI bar instead of
runtime drift.

Guards (mapped to the silent-break inventory from the v6 migration plan):

  Runtime (#[tokio::test]):
  1. lance_error_too_much_write_contention_variant_exists — pins the
     variant referenced by db/manifest/publisher.rs::map_lance_publish_error.
  2. manifest_location_field_shape — pins .path/.size/.e_tag/.naming_scheme
     types and ManifestLocation accessor returning &Self (the access
     pattern at db/manifest/metadata.rs:84-88).
  6. write_params_default_does_not_set_storage_version — confirms our
     explicit V2_2 pin remains load-bearing (blob v2 requirement).

  Compile-only async fns (#[allow(...)] + unimplemented!() placeholders;
  never run, but cargo build --tests enforces the API shape):
  3. checkout_version + restore chain — pins the recovery rollback hammer
     at db/manifest/recovery.rs:505-522.
  4. DatasetBuilder::from_namespace().with_branch().with_version().load()
     — pins the namespace builder chain at db/manifest/namespace.rs:162-174.
  5. MergeInsertBuilder fluent chain — pins the manifest CAS at
     db/manifest/publisher.rs:370-391, including the return shape
     (Arc<Dataset>, MergeStats).
  7. compact_files(&mut ds, CompactionOptions, None) — pins
     db/omnigraph/optimize.rs:107.
  8. DeleteResult { new_dataset, num_deleted_rows } — pins the inline
     delete result shape (MR-A will repurpose this guard to the staged
     two-phase variant once Lance #6658 migration lands).

This is commit 1 of the chore/lance-6.0.1 migration. Cargo bump
follows in commit 2 (will trigger the guards under v6 if any surface
drifted).

Per the migration plan at ~/.claude/plans/shimmering-percolating-duckling.md
(written this session). Two guards from the plan deferred to follow-up:
  - manifest_cas_returns_row_level_contention_variant (full publisher
    race integration test — needs harness scaffolding)
  - table_version_metadata_byte_compatible_with_v4 (TableVersionMetadata
    is pub(crate); requires test reach extension).

Verified on v4: cargo test -p omnigraph-engine --test lance_surface_guards
passes 3/3 runtime tests; cargo build -p omnigraph-engine --tests
compiles all 5 compile-only guards clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* chore(deps): bump Lance 4.0.0 → 6.0.1, DataFusion 52 → 53, Arrow 57 → 58

The Cargo bump itself. Source is intentionally untouched — this commit
will not compile. The compile errors are the work-list for subsequent
commits on this branch.

Lance updates: lance + 7 sub-crates 4.0.0 → 6.0.1. Transitive churn:
  + lance-tokenizer v6.0.1 (vendored tokenizer per Lance PR #6512)
  + object_store 0.13.x (Lance 6 brings it transitively; our explicit
    pin stays at 0.12.5 for now — revisit in stages if diamond bites)
  - tantivy* crates (replaced by lance-tokenizer)

Compile error landscape on this commit (11 errors):
  • 1× E0432: `lance_index::DatasetIndexExt` import (Lance PR #6280
    moved it to lance::index). Sites: table_store.rs:20,
    db/manifest.rs:37 (the second site was missed by the pre-flight
    inventory).
  • 8× E0599: `create_index_builder` / `load_indices` missing on
    `lance::Dataset` — all downstream of the DatasetIndexExt move.
    Once the import is corrected on table_store.rs and db/manifest.rs,
    these resolve automatically.
  • 2× E0063: missing field `is_only_declared` in `DescribeTableResponse`
    initializer at db/manifest/namespace.rs:221, 364. New Lance
    namespace field per the v5 namespace restructure (PR #6186).

Surface guards (lance_surface_guards.rs, commit d571fa8) all still
compile + the 3 runtime ones pass on v6 — none of the silent-break
surfaces drifted. That's the load-bearing observation: the publisher
CAS chain, ManifestLocation field shape, checkout_version/restore,
DatasetBuilder fluent chain, MergeInsertBuilder return shape,
WriteParams::default, compact_files signature, and DeleteResult
fields are all v6-stable.

Next commits address the 11 errors per the migration plan stages
3-8.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* imports: move DatasetIndexExt to lance::index (Lance PR #6280)

Lance 5.0 (PR #6280) moved `DatasetIndexExt` out of `lance-index` into
`lance::index`. `is_system_index` and `IndexType` stayed in `lance-index`.

Mechanical update of 6 import sites:
  crates/omnigraph/src/table_store.rs:20 — split into two `use` lines
  crates/omnigraph-server/tests/server.rs:10 — was traits::DatasetIndexExt
  crates/omnigraph/tests/search.rs:6
  crates/omnigraph/tests/branching.rs:7
  crates/omnigraph/tests/failpoints.rs:467
  crates/omnigraph-cli/tests/cli.rs:3 — was traits::DatasetIndexExt

All 9 E0599 cascading errors on .create_index_builder / .load_indices
resolve once the trait is back in scope.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* namespace: add is_only_declared field to DescribeTableResponse

Lance namespace 6.0.0 added `is_only_declared: Option<bool>` to
`DescribeTableResponse` (lance-namespace-reqwest-client 0.7+ via the
v5.0 namespace API restructure, Lance PR #6186). Set to `Some(false)`
because every table BranchManifestNamespace returns from describe_table
is materialized — the manifest snapshot only includes entries for
tables we've already opened via Dataset::open.

Two sites in db/manifest/namespace.rs (BranchManifestNamespace +
StagedTableNamespace impls of LanceNamespace::describe_table).

Closes the last two compile errors from the v6 bump in the engine lib.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* cargo: add lance to omnigraph-cli + omnigraph-server dev-deps

Stage 3 moved DatasetIndexExt imports from `lance-index` to `lance::index`
in the cli and server test crates. Both crates only had `lance-index`
in their dev-dependencies; add `lance` alongside so the new path
resolves.

This is the last compile-error fix from the v6 bump — `cargo build
--workspace --tests` is now green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* docs: refresh Lance alignment audit for v6.0.1; bump surveyed version

Per CLAUDE.md maintenance rule 2 (same-PR docs):

- docs/dev/lance.md: replace the v4.0.1 alignment audit stanza with
  the v6.0.1 audit. Captures every v5/v6 finding from this PR (the
  DatasetIndexExt move, DescribeTableResponse.is_only_declared,
  MergeInsertBuilder return shape, ManifestLocation field shape,
  LanceFileVersion::default flip, file-reader async, tokenizer
  vendor, Lance #6658/#6666/#6877 status). Cross-references each
  guard in tests/lance_surface_guards.rs.

- AGENTS.md: bump "Storage substrate: Lance 4.x" → "Lance 6.x".
  Note: surveyed crate version stays at 0.4.2 — substrate version
  bumps are independent of OmniGraph's release version.

- crates/omnigraph/src/storage_layer.rs: update the trait module-level
  doc-comment to reflect that Lance #6658 closed 2026-05-14 and
  delete_where two-phase migration is MR-A (the next follow-up).
  #6666 stays open; create_vector_index inline residual stays.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* tests: silence clippy::diverging_sub_expression on compile-only guards

The five `_compile_*` async fns in lance_surface_guards.rs use
`let ds: Dataset = unimplemented!()` as a placeholder so type inference
can chase the method chain we want to pin, without ever running the
function. Clippy's `diverging_sub_expression` lint flags this pattern
because the RHS diverges; that's the entire point. Added to the
per-fn `#[allow(...)]` list, alongside dead_code / unreachable_code /
unused_variables / unused_mut already there.

No behavior change. cargo test -p omnigraph-engine --test
lance_surface_guards still 3/3 green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* docs: correct #6658 status — closed but API ships in Lance v7.x, not v6.0.1

The audit stanza in docs/dev/lance.md and the storage_layer.rs trait
doc-comment both implied the public DeleteBuilder::execute_uncommitted
API shipped with Lance 6.0.1. It did not. Issue #6658 closed
2026-05-14, but binary search across the release stream confirms:

  v6.0.1            ❌ no pub async fn execute_uncommitted on DeleteBuilder
  v6.1.0-rc.1       ❌
  v7.0.0-beta.5     ❌
  v7.0.0-beta.10    ✅ first appearance
  v7.0.0-rc.1       ✅

So MR-A (delete two-phase migration) is gated on the Lance v7.x bump,
not on this PR. v7.0.0-rc.1 dropped 2026-05-21; GA likely within a
week.

No behavior change. Doc-only correction.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* ci(lib): bump recursion_limit to 256 — Lance 6 trait depth on Linux

Lance 6's heavier trait surface around futures/streams in storage_layer.rs's
staged-write API pushes the rustc trait-resolution recursion limit past
the default 128 on Linux builds. CI on PR #111 surfaced this in both
`Test Workspace` and `Test omnigraph-server --features aws`:

  error: queries overflow the depth limit!
    = help: consider increasing the recursion limit by adding a
      `#![recursion_limit = "256"]` attribute to your crate (`omnigraph`)
    = note: query depth increased by 130 when computing layout of
      `{async block@crates/omnigraph/src/storage_layer.rs:697:5: 697:10}`

(The async block is `stage_create_btree_index`'s body — its return type
is several layers of `impl Future<Output=Result<StagedHandle>>` deep on
top of Lance's own builder return types.)

Local macOS builds happened to short-circuit before tripping the limit,
which is why this didn't surface during the v6 bump sequence. The fix
rustc itself suggests is one line at the crate root.

No behavior change. Revisit if a future Lance bump stops needing it.

Verified: `cargo build --locked -p omnigraph-server --features aws`
compiles clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
											
										
										
											2026-05-23 00:42:29 +01:00
+								use lance::index::DatasetIndexExt;
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								use omnigraph::db::{Omnigraph, ReadTarget, SchemaApplyOptions};
 								use omnigraph::error::OmniError;
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								use omnigraph::loader::{LoadMode, load_jsonl};
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								use omnigraph_policy::{PolicyChecker, PolicyEngine};
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								use omnigraph_server::api::{
 								    BranchCreateRequest, BranchMergeRequest, ChangeRequest, ErrorOutput, ExportRequest,
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								    IngestRequest, QueryRequest, ReadRequest, SchemaApplyRequest, SchemaOutput,
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								};
 								use omnigraph_server::{AppState, build_app};
 								use serde_json::{Value, json};
 								use serial_test::serial;
 								use tower::ServiceExt;
 								const MUTATION_QUERIES: &str = r#"
 								query insert_person($name: String, $age: I32) {
 								    insert Person { name: $name, age: $age }
 								}
 								query set_age($name: String, $age: I32) {
 								    update Person set { age: $age } where name = $name
 								}
 								"#;
 								const POLICY_YAML: &str = r#"
 								version: 1
 								groups:
 								  team: [act-andrew, act-bruno, act-ragnor]
 								  admins: [act-ragnor]
 								protected_branches: [main]
 								rules:
 								  - id: team-read
 								    allow:
 								      actors: { group: team }
 								      actions: [read]
 								      branch_scope: any
 								  - id: admins-export
 								    allow:
 								      actors: { group: admins }
 								      actions: [export]
 								      branch_scope: any
 								  - id: team-write-unprotected
 								    allow:
 								      actors: { group: team }
 								      actions: [change]
 								      branch_scope: unprotected
 								  - id: admins-merge
 								    allow:
 								      actors: { group: admins }
 								      actions: [branch_delete, branch_merge]
 								      target_branch_scope: protected
 								"#;
 								const POLICY_PROTECTED_READ_YAML: &str = r#"
 								version: 1
 								groups:
 								  team: [act-bruno]
 								protected_branches: [main]
 								rules:
 								  - id: protected-read
 								    allow:
 								      actors: { group: team }
 								      actions: [read]
 								      branch_scope: protected
 								"#;
 								const INGEST_CREATE_ONLY_POLICY_YAML: &str = r#"
 								version: 1
 								groups:
 								  team: [act-bruno]
 								protected_branches: [main]
 								rules:
 								  - id: team-branch-create
 								    allow:
 								      actors: { group: team }
 								      actions: [branch_create]
 								      target_branch_scope: unprotected
 								"#;
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								const SCHEMA_APPLY_POLICY_YAML: &str = r#"
 								version: 1
 								groups:
 								  admins: [act-ragnor]
 								protected_branches: [main]
 								rules:
 								  - id: admins-schema-apply
 								    allow:
 								      actors: { group: admins }
 								      actions: [schema_apply]
 								      target_branch_scope: protected
 								"#;
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								fn fixture(name: &str) -> PathBuf {
 								    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
 								        .join("../omnigraph/tests/fixtures")
 								        .join(name)
 								}
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								async fn init_loaded_graph() -> tempfile::TempDir {
 								    init_graph_with_schema_and_data(
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        &fs::read_to_string(fixture("test.pg")).unwrap(),
 								        &fs::read_to_string(fixture("test.jsonl")).unwrap(),
 								    )
 								    .await
 								}
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								async fn init_graph_with_schema_and_data(schema: &str, data: &str) -> tempfile::TempDir {
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    let temp = tempfile::tempdir().unwrap();
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let graph = graph_path(temp.path());
 								    fs::create_dir_all(&graph).unwrap();
 								    Omnigraph::init(graph.to_str().unwrap(), schema)
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        .await
 								        .unwrap();
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    load_jsonl(&mut db, data, LoadMode::Overwrite)
 								        .await
 								        .unwrap();
 								    temp
 								}
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								async fn init_graph_with_schema(schema: &str) -> tempfile::TempDir {
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								    let temp = tempfile::tempdir().unwrap();
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let graph = graph_path(temp.path());
 								    fs::create_dir_all(&graph).unwrap();
 								    Omnigraph::init(graph.to_str().unwrap(), schema)
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								        .await
 								        .unwrap();
 								    temp
 								}
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								fn graph_path(root: &Path) -> PathBuf {
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    root.join("server.omni")
 								}
 								fn drifted_test_schema() -> String {
 								    fs::read_to_string(fixture("test.pg"))
 								        .unwrap()
 								        .replace("age: I32?", "age: I64?")
 								}
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								async fn manifest_dataset_version(graph: &Path) -> u64 {
 								    Omnigraph::open(graph.to_string_lossy().as_ref())
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        .await
 								        .unwrap()
 								        .snapshot_of(ReadTarget::branch("main"))
 								        .await
 								        .unwrap()
 								        .version()
 								}
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								fn s3_test_graph_uri(suite: &str) -> Option<String> {
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    let bucket = env::var("OMNIGRAPH_S3_TEST_BUCKET").ok()?;
 								    let prefix = env::var("OMNIGRAPH_S3_TEST_PREFIX")
 								        .ok()
 								        .filter(|value| !value.trim().is_empty())
 								        .unwrap_or_else(|| "omnigraph-itests".to_string());
 								    let unique = std::time::SystemTime::now()
 								        .duration_since(std::time::UNIX_EPOCH)
 								        .ok()?
 								        .as_nanos();
 								    Some(format!("s3://{}/{}/{}/{}", bucket, prefix, suite, unique))
 								}
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								async fn app_for_loaded_graph() -> (tempfile::TempDir, Router) {
 								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
 								    let state = AppState::open(graph.to_string_lossy().to_string())
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        .await
 								        .unwrap();
 								    (temp, build_app(state))
 								}
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								/// Build a permit-all policy YAML that grants every action used by the
 								/// HTTP-layer tests to the listed actor names. MR-723 default-deny
 								/// closed the "tokens but no policy" loophole; helpers that used to
 								/// represent "auth without policy" now install this permit-all policy
 								/// so test cases retain their pre-MR-723 semantics ("auth required,
 								/// every action permitted") without conflicting with the new state
 								/// matrix. Tests that specifically need the State-2 deny path use
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								/// `app_for_graph_with_auth_tokens_only` instead.
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								fn permit_all_policy_yaml(actors: &[&str]) -> String {
 								    let members = actors
 								        .iter()
 								        .map(|a| format!("\"{a}\""))
 								        .collect::<Vec<_>>()
 								        .join(", ");
 								    format!(
 								        r#"
 								version: 1
 								groups:
 								  permitted: [{members}]
 								protected_branches: [main]
 								rules:
 								  - id: permit-data
 								    allow:
 								      actors: {{ group: permitted }}
 								      actions: [read, change, export]
 								      branch_scope: any
 								  - id: permit-protected-target-actions
 								    allow:
 								      actors: {{ group: permitted }}
 								      actions: [schema_apply, branch_create, branch_delete, branch_merge]
 								      target_branch_scope: any
 								"#
 								    )
 								}
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								async fn app_for_loaded_graph_with_auth(token: &str) -> (tempfile::TempDir, Router) {
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								    // `AppState::new_with_bearer_token(token)` maps the token to actor "default";
 								    // permit-all policy needs to include that actor.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								    let policy_path = temp.path().join("policy.yaml");
 								    fs::write(&policy_path, permit_all_policy_yaml(&["default"])).unwrap();
 								    let state = AppState::open_with_bearer_tokens_and_policy(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								        vec![("default".to_string(), token.to_string())],
 								        Some(&policy_path),
 								    )
 								    .await
 								    .unwrap();
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    (temp, build_app(state))
 								}
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								async fn app_for_loaded_graph_with_auth_tokens(
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    tokens: &[(&str, &str)],
 								) -> (tempfile::TempDir, Router) {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								    let policy_path = temp.path().join("policy.yaml");
 								    let actors: Vec<&str> = tokens.iter().map(|(actor, _)| *actor).collect();
 								    fs::write(&policy_path, permit_all_policy_yaml(&actors)).unwrap();
 								    let state = AppState::open_with_bearer_tokens_and_policy(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        tokens
 								            .iter()
 								            .map(|(actor, token)| ((*actor).to_string(), (*token).to_string()))
 								            .collect(),
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								        Some(&policy_path),
 								    )
 								    .await
 								    .unwrap();
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    (temp, build_app(state))
 								}
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								async fn app_for_loaded_graph_with_auth_tokens_and_policy(
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    tokens: &[(&str, &str)],
 								    policy: &str,
 								) -> (tempfile::TempDir, Router) {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    let policy_path = temp.path().join("policy.yaml");
 								    fs::write(&policy_path, policy).unwrap();
 								    let state = AppState::open_with_bearer_tokens_and_policy(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        tokens
 								            .iter()
 								            .map(|(actor, token)| ((*actor).to_string(), (*token).to_string()))
 								            .collect(),
 								        Some(&policy_path),
 								    )
 								    .await
 								    .unwrap();
 								    (temp, build_app(state))
 								}
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								async fn app_for_graph_with_auth_tokens_and_policy(
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								    schema: &str,
 								    tokens: &[(&str, &str)],
 								    policy: &str,
 								) -> (tempfile::TempDir, Router) {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_graph_with_schema(schema).await;
 								    let graph = graph_path(temp.path());
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								    let policy_path = temp.path().join("policy.yaml");
 								    fs::write(&policy_path, policy).unwrap();
 								    let state = AppState::open_with_bearer_tokens_and_policy(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								        tokens
 								            .iter()
 								            .map(|(actor, token)| ((*actor).to_string(), (*token).to_string()))
 								            .collect(),
 								        Some(&policy_path),
 								    )
 								    .await
 								    .unwrap();
 								    (temp, build_app(state))
 								}
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								/// MR-723 default-deny mode: bearer tokens configured, no policy file.
 								/// Exercises ServerRuntimeState::DefaultDeny — authenticated requests
 								/// for Read succeed, every other action is rejected with 403 from
 								/// `authorize_request`'s state-2 branch.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								async fn app_for_graph_with_auth_tokens_only(
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								    schema: &str,
 								    tokens: &[(&str, &str)],
 								) -> (tempfile::TempDir, Router) {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_graph_with_schema(schema).await;
 								    let graph = graph_path(temp.path());
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								    let state = AppState::open_with_bearer_tokens_and_policy(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								        tokens
 								            .iter()
 								            .map(|(actor, token)| ((*actor).to_string(), (*token).to_string()))
 								            .collect(),
 								        None,
 								    )
 								    .await
 								    .unwrap();
 								    (temp, build_app(state))
 								}
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								fn additive_schema_with_nickname() -> String {
 								    fs::read_to_string(fixture("test.pg")).unwrap().replace(
 								        "    age: I32?\n}",
 								        "    age: I32?\n    nickname: String?\n}",
 								    )
 								}
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								fn schema_without_age() -> String {
 								    // Drop the nullable `age` column from the test schema. Used by the
 								    // HTTP soft/hard drop tests below.
 								    fs::read_to_string(fixture("test.pg"))
 								        .unwrap()
 								        .replace("    age: I32?\n", "")
 								}
 								fn schema_without_company() -> String {
 								    // Drop the `Company` node type and the edge referencing it. Used
 								    // by the HTTP DropType test below. Hand-crafted (no template
 								    // string replace) because the fixture interleaves the type and
 								    // its edge.
 								    r#"node Person {
 								    name: String @key
 								    age: I32?
 								}
 								edge Knows: Person -> Person {
 								    since: Date?
 								}
 								"#
 								    .to_string()
 								}
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								fn renamed_person_schema() -> String {
 								    fs::read_to_string(fixture("test.pg"))
 								        .unwrap()
 								        .replace("node Person {\n", "node Human @rename_from(\"Person\") {\n")
 								        .replace("edge Knows: Person -> Person", "edge Knows: Human -> Human")
 								        .replace(
 								            "edge WorksAt: Person -> Company",
 								            "edge WorksAt: Human -> Company",
 								        )
 								}
 								fn renamed_age_schema() -> String {
 								    fs::read_to_string(fixture("test.pg"))
 								        .unwrap()
 								        .replace("age: I32?", "years: I32? @rename_from(\"age\")")
 								}
 								fn indexed_name_schema() -> String {
 								    fs::read_to_string(fixture("test.pg"))
 								        .unwrap()
 								        .replace("name: String @key", "name: String @key @index")
 								}
 								fn unsupported_schema_change() -> String {
 								    fs::read_to_string(fixture("test.pg"))
 								        .unwrap()
 								        .replace("age: I32?", "age: I64?")
 								}
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								async fn json_response(app: &Router, request: Request<Body>) -> (StatusCode, Value) {
 								    let response = app.clone().oneshot(request).await.unwrap();
 								    let status = response.status();
 								    let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
 								    let value = serde_json::from_slice(&body).unwrap();
 								    (status, value)
 								}
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								#[tokio::test]
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								async fn schema_apply_route_updates_graph_for_authorized_admin() {
 								    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								        &fs::read_to_string(fixture("test.pg")).unwrap(),
 								        &[("act-ragnor", "admin-token")],
 								        SCHEMA_APPLY_POLICY_YAML,
 								    )
 								    .await;
 								    let schema = additive_schema_with_nickname();
 								    let request = Request::builder()
 								        .method(Method::POST)
 								        .uri("/schema/apply")
 								        .header("content-type", "application/json")
 								        .header("authorization", "Bearer admin-token")
 								        .body(Body::from(
 								            serde_json::to_vec(&SchemaApplyRequest {
 								                schema_source: schema,
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								                ..Default::default()
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								            })
 								            .unwrap(),
 								        ))
 								        .unwrap();
 								    let (status, payload) = json_response(&app, request).await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(payload["applied"], true);
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let graph = graph_path(temp.path());
 								    let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								    assert!(
 								        reopened.catalog().node_types["Person"]
 								            .properties
 								            .contains_key("nickname")
 								    );
 								}
 								#[tokio::test]
 								async fn schema_apply_route_requires_schema_apply_policy_permission() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_graph_with_auth_tokens_and_policy(
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								        &fs::read_to_string(fixture("test.pg")).unwrap(),
 								        &[("act-ragnor", "admin-token")],
 								        POLICY_YAML,
 								    )
 								    .await;
 								    let request = Request::builder()
 								        .method(Method::POST)
 								        .uri("/schema/apply")
 								        .header("content-type", "application/json")
 								        .header("authorization", "Bearer admin-token")
 								        .body(Body::from(
 								            serde_json::to_vec(&SchemaApplyRequest {
 								                schema_source: additive_schema_with_nickname(),
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								                ..Default::default()
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								            })
 								            .unwrap(),
 								        ))
 								        .unwrap();
 								    let (status, payload) = json_response(&app, request).await;
 								    assert_eq!(status, StatusCode::FORBIDDEN);
 								    assert_eq!(
 								        payload["code"],
 								        serde_json::to_value(omnigraph_server::api::ErrorCode::Forbidden).unwrap()
 								    );
 								}
 								#[tokio::test]
 								async fn schema_apply_route_requires_bearer_token_when_policy_enabled() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_graph_with_auth_tokens_and_policy(
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								        &fs::read_to_string(fixture("test.pg")).unwrap(),
 								        &[("act-ragnor", "admin-token")],
 								        SCHEMA_APPLY_POLICY_YAML,
 								    )
 								    .await;
 								    let request = Request::builder()
 								        .method(Method::POST)
 								        .uri("/schema/apply")
 								        .header("content-type", "application/json")
 								        .body(Body::from(
 								            serde_json::to_vec(&SchemaApplyRequest {
 								                schema_source: additive_schema_with_nickname(),
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								                ..Default::default()
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								            })
 								            .unwrap(),
 								        ))
 								        .unwrap();
 								    let (status, payload) = json_response(&app, request).await;
 								    assert_eq!(status, StatusCode::UNAUTHORIZED);
 								    assert_eq!(
 								        payload["code"],
 								        serde_json::to_value(omnigraph_server::api::ErrorCode::Unauthorized).unwrap()
 								    );
 								}
 								#[tokio::test]
 								async fn schema_apply_route_can_rename_type() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								        &fs::read_to_string(fixture("test.pg")).unwrap(),
 								        &[("act-ragnor", "admin-token")],
 								        SCHEMA_APPLY_POLICY_YAML,
 								    )
 								    .await;
 								    let request = Request::builder()
 								        .method(Method::POST)
 								        .uri("/schema/apply")
 								        .header("content-type", "application/json")
 								        .header("authorization", "Bearer admin-token")
 								        .body(Body::from(
 								            serde_json::to_vec(&SchemaApplyRequest {
 								                schema_source: renamed_person_schema(),
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								                ..Default::default()
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								            })
 								            .unwrap(),
 								        ))
 								        .unwrap();
 								    let (status, payload) = json_response(&app, request).await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(payload["applied"], true);
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let graph = graph_path(temp.path());
 								    let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								    let snapshot = reopened
 								        .snapshot_of(ReadTarget::branch("main"))
 								        .await
 								        .unwrap();
 								    assert!(snapshot.entry("node:Human").is_some());
 								    assert!(snapshot.entry("node:Person").is_none());
 								}
 								#[tokio::test]
 								async fn schema_apply_route_can_rename_property() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								        &fs::read_to_string(fixture("test.pg")).unwrap(),
 								        &[("act-ragnor", "admin-token")],
 								        SCHEMA_APPLY_POLICY_YAML,
 								    )
 								    .await;
 								    let request = Request::builder()
 								        .method(Method::POST)
 								        .uri("/schema/apply")
 								        .header("content-type", "application/json")
 								        .header("authorization", "Bearer admin-token")
 								        .body(Body::from(
 								            serde_json::to_vec(&SchemaApplyRequest {
 								                schema_source: renamed_age_schema(),
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								                ..Default::default()
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								            })
 								            .unwrap(),
 								        ))
 								        .unwrap();
 								    let (status, payload) = json_response(&app, request).await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(payload["applied"], true);
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let graph = graph_path(temp.path());
 								    let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								    let person = &reopened.catalog().node_types["Person"];
 								    assert!(person.properties.contains_key("years"));
 								    assert!(!person.properties.contains_key("age"));
 								}
 								#[tokio::test]
 								async fn schema_apply_route_can_add_index() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								        &fs::read_to_string(fixture("test.pg")).unwrap(),
 								        &[("act-ragnor", "admin-token")],
 								        SCHEMA_APPLY_POLICY_YAML,
 								    )
 								    .await;
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let graph = graph_path(temp.path());
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								    let before_index_count = {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								        let snapshot = db.snapshot_of(ReadTarget::branch("main")).await.unwrap();
 								        let dataset = snapshot.open("node:Person").await.unwrap();
 								        dataset.load_indices().await.unwrap().len()
 								    };
 								    let request = Request::builder()
 								        .method(Method::POST)
 								        .uri("/schema/apply")
 								        .header("content-type", "application/json")
 								        .header("authorization", "Bearer admin-token")
 								        .body(Body::from(
 								            serde_json::to_vec(&SchemaApplyRequest {
 								                schema_source: indexed_name_schema(),
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								                ..Default::default()
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								            })
 								            .unwrap(),
 								        ))
 								        .unwrap();
 								    let (status, payload) = json_response(&app, request).await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(payload["applied"], true);
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								    let snapshot = reopened
 								        .snapshot_of(ReadTarget::branch("main"))
 								        .await
 								        .unwrap();
 								    let dataset = snapshot.open("node:Person").await.unwrap();
 								    let after_index_count = dataset.load_indices().await.unwrap().len();
 								    assert!(after_index_count > before_index_count);
 								}
 								#[tokio::test]
 								async fn schema_apply_route_rejects_unsupported_plan() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_graph_with_auth_tokens_and_policy(
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								        &fs::read_to_string(fixture("test.pg")).unwrap(),
 								        &[("act-ragnor", "admin-token")],
 								        SCHEMA_APPLY_POLICY_YAML,
 								    )
 								    .await;
 								    let request = Request::builder()
 								        .method(Method::POST)
 								        .uri("/schema/apply")
 								        .header("content-type", "application/json")
 								        .header("authorization", "Bearer admin-token")
 								        .body(Body::from(
 								            serde_json::to_vec(&SchemaApplyRequest {
 								                schema_source: unsupported_schema_change(),
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								                ..Default::default()
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								            })
 								            .unwrap(),
 								        ))
 								        .unwrap();
 								    let (status, payload) = json_response(&app, request).await;
 								    assert_eq!(status, StatusCode::BAD_REQUEST);
 								    assert_eq!(
 								        payload["code"],
 								        serde_json::to_value(omnigraph_server::api::ErrorCode::BadRequest).unwrap()
 								    );
 								}
 								#[tokio::test]
 								async fn schema_apply_route_rejects_when_non_main_branch_exists() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_graph_with_schema(&fs::read_to_string(fixture("test.pg")).unwrap()).await;
 								    let graph = graph_path(temp.path());
 								    let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								    db.branch_create("feature").await.unwrap();
 								    drop(db);
 								    let policy_path = temp.path().join("policy.yaml");
 								    fs::write(&policy_path, SCHEMA_APPLY_POLICY_YAML).unwrap();
 								    let state = AppState::open_with_bearer_tokens_and_policy(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								        vec![("act-ragnor".to_string(), "admin-token".to_string())],
 								        Some(&policy_path),
 								    )
 								    .await
 								    .unwrap();
 								    let app = build_app(state);
 								    let request = Request::builder()
 								        .method(Method::POST)
 								        .uri("/schema/apply")
 								        .header("content-type", "application/json")
 								        .header("authorization", "Bearer admin-token")
 								        .body(Body::from(
 								            serde_json::to_vec(&SchemaApplyRequest {
 								                schema_source: additive_schema_with_nickname(),
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								                ..Default::default()
-												Add schema apply command and policy support

											
										
										
											2026-04-12 04:01:14 +03:00
+								            })
 								            .unwrap(),
 								        ))
 								        .unwrap();
 								    let (status, payload) = json_response(&app, request).await;
 								    assert_eq!(status, StatusCode::CONFLICT);
 								    assert_eq!(
 								        payload["code"],
 								        serde_json::to_value(omnigraph_server::api::ErrorCode::Conflict).unwrap()
 								    );
 								}
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								struct EnvGuard {
 								    saved: Vec<(&'static str, Option<String>)>,
 								}
 								impl EnvGuard {
 								    fn set(vars: &[(&'static str, Option<&str>)]) -> Self {
 								        let saved = vars
 								            .iter()
 								            .map(|(name, _)| (*name, env::var(name).ok()))
 								            .collect::<Vec<_>>();
 								        for (name, value) in vars {
 								            unsafe {
 								                match value {
 								                    Some(value) => env::set_var(name, value),
 								                    None => env::remove_var(name),
 								                }
 								            }
 								        }
 								        Self { saved }
 								    }
 								}
 								impl Drop for EnvGuard {
 								    fn drop(&mut self) {
 								        for (name, value) in self.saved.drain(..) {
 								            unsafe {
 								                match value {
 								                    Some(value) => env::set_var(name, value),
 								                    None => env::remove_var(name),
 								                }
 								            }
 								        }
 								    }
 								}
 								fn format_vector(values: &[f32]) -> String {
 								    values
 								        .iter()
 								        .map(|value| format!("{:.8}", value))
 								        .collect::<Vec<_>>()
 								        .join(", ")
 								}
 								fn normalize_vector(mut values: Vec<f32>) -> Vec<f32> {
 								    let norm = values
 								        .iter()
 								        .map(|value| (*value as f64) * (*value as f64))
 								        .sum::<f64>()
 								        .sqrt() as f32;
 								    if norm > f32::EPSILON {
 								        for value in &mut values {
 								            *value /= norm;
 								        }
 								    }
 								    values
 								}
 								fn fnv1a64(bytes: &[u8]) -> u64 {
 								    let mut hash = 14695981039346656037u64;
 								    for byte in bytes {
 								        hash ^= *byte as u64;
 								        hash = hash.wrapping_mul(1099511628211u64);
 								    }
 								    hash
 								}
 								fn xorshift64(mut x: u64) -> u64 {
 								    x ^= x << 13;
 								    x ^= x >> 7;
 								    x ^= x << 17;
 								    x
 								}
 								fn mock_embedding(input: &str, dim: usize) -> Vec<f32> {
 								    let mut seed = fnv1a64(input.as_bytes());
 								    let mut out = Vec::with_capacity(dim);
 								    for _ in 0..dim {
 								        seed = xorshift64(seed);
 								        let ratio = (seed as f64 / u64::MAX as f64) as f32;
 								        out.push((ratio * 2.0) - 1.0);
 								    }
 								    normalize_vector(out)
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn healthz_succeeds_after_startup() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_loaded_graph().await;
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/healthz")
 								            .method(Method::GET)
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(body["status"], "ok");
 								    assert_eq!(body["version"], env!("CARGO_PKG_VERSION"));
 								    match option_env!("OMNIGRAPH_SOURCE_VERSION") {
 								        Some(source_version) => assert_eq!(body["source_version"], source_version),
 								        None => assert!(body.get("source_version").is_none()),
 								    }
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn schema_drift_returns_conflict_for_snapshot_read_and_change() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (temp, app) = app_for_loaded_graph().await;
 								    let graph = graph_path(temp.path());
 								    fs::write(graph.join("_schema.pg"), drifted_test_schema()).unwrap();
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
 								    let (snapshot_status, snapshot_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/snapshot?branch=main")
 								            .method(Method::GET)
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    let snapshot_error: ErrorOutput = serde_json::from_value(snapshot_body).unwrap();
 								    assert_eq!(snapshot_status, StatusCode::CONFLICT);
 								    assert_eq!(
 								        snapshot_error.code,
 								        Some(omnigraph_server::api::ErrorCode::Conflict)
 								    );
 								    assert!(
 								        snapshot_error
 								            .error
 								            .contains("schema evolution is locked down in phase 1")
 								    );
 								    let read = ReadRequest {
 								        query_source: fs::read_to_string(fixture("test.gq")).unwrap(),
 								        query_name: Some("get_person".to_string()),
 								        params: Some(json!({ "name": "Alice" })),
 								        branch: Some("main".to_string()),
 								        snapshot: None,
 								    };
 								    let (read_status, read_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/read")
 								            .method(Method::POST)
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&read).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    let read_error: ErrorOutput = serde_json::from_value(read_body).unwrap();
 								    assert_eq!(read_status, StatusCode::CONFLICT);
 								    assert_eq!(
 								        read_error.code,
 								        Some(omnigraph_server::api::ErrorCode::Conflict)
 								    );
 								    assert!(
 								        read_error
 								            .error
 								            .contains("schema evolution is locked down in phase 1")
 								    );
 								    let change = ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								        query: MUTATION_QUERIES.to_string(),
 								        name: Some("insert_person".to_string()),
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        params: Some(json!({ "name": "Mina", "age": 28 })),
 								        branch: Some("main".to_string()),
 								    };
 								    let (change_status, change_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/change")
 								            .method(Method::POST)
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&change).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    let change_error: ErrorOutput = serde_json::from_value(change_body).unwrap();
 								    assert_eq!(change_status, StatusCode::CONFLICT);
 								    assert_eq!(
 								        change_error.code,
 								        Some(omnigraph_server::api::ErrorCode::Conflict)
 								    );
 								    assert!(
 								        change_error
 								            .error
 								            .contains("schema evolution is locked down in phase 1")
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn protected_routes_require_bearer_token() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_loaded_graph_with_auth("demo-token").await;
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								            .uri("/branches")
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								            .method(Method::GET)
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    let error: ErrorOutput = serde_json::from_value(body).unwrap();
 								    assert_eq!(status, StatusCode::UNAUTHORIZED);
 								    assert_eq!(
 								        error.code,
 								        Some(omnigraph_server::api::ErrorCode::Unauthorized)
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn protected_routes_accept_valid_bearer_token_while_healthz_stays_open() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_loaded_graph_with_auth("demo-token").await;
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
 								    let health = app
 								        .clone()
 								        .oneshot(
 								            Request::builder()
 								                .uri("/healthz")
 								                .method(Method::GET)
 								                .body(Body::empty())
 								                .unwrap(),
 								        )
 								        .await
 								        .unwrap();
 								    assert_eq!(health.status(), StatusCode::OK);
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								            .uri("/branches")
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								            .method(Method::GET)
 								            .header("authorization", "Bearer demo-token")
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								    assert!(body["branches"].is_array());
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn export_route_returns_jsonl_for_branch_snapshot() {
 								    let token = "demo-token";
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
 								    let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    db.branch_create_from(ReadTarget::branch("main"), "feature")
 								        .await
 								        .unwrap();
 								    db.load(
 								        "feature",
 								        r#"{"type":"Person","data":{"name":"Eve","age":29}}"#,
 								        LoadMode::Append,
 								    )
 								    .await
 								    .unwrap();
 								    let expected = db
 								        .export_jsonl("feature", &["Person".to_string()], &[])
 								        .await
 								        .unwrap();
 								    drop(db);
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								    // MR-723: tokens-without-policy is now default-deny. Install a
 								    // permit-all policy alongside the bearer token so /export
 								    // (action=Export) passes Cedar evaluation. The test is exercising
 								    // export semantics, not policy — the policy is just enough to clear
 								    // the State 3 path.
 								    let policy_path = temp.path().join("policy.yaml");
 								    fs::write(&policy_path, permit_all_policy_yaml(&["default"])).unwrap();
 								    let state = AppState::open_with_bearer_tokens_and_policy(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								        vec![("default".to_string(), token.to_string())],
 								        Some(&policy_path),
 								    )
 								    .await
 								    .unwrap();
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    let app = build_app(state);
 								    let response = app
 								        .clone()
 								        .oneshot(
 								            Request::builder()
 								                .uri("/export")
 								                .method(Method::POST)
 								                .header("content-type", "application/json")
 								                .header("authorization", format!("Bearer {}", token))
 								                .body(Body::from(
 								                    serde_json::to_vec(&ExportRequest {
 								                        branch: Some("feature".to_string()),
 								                        type_names: vec!["Person".to_string()],
 								                        table_keys: Vec::new(),
 								                    })
 								                    .unwrap(),
 								                ))
 								                .unwrap(),
 								        )
 								        .await
 								        .unwrap();
 								    assert_eq!(response.status(), StatusCode::OK);
 								    assert_eq!(
 								        response.headers().get("content-type").unwrap(),
 								        "application/x-ndjson; charset=utf-8"
 								    );
 								    let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
 								    let text = String::from_utf8(body.to_vec()).unwrap();
 								    assert_eq!(text, expected);
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn protected_routes_accept_any_configured_team_bearer_token() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_loaded_graph_with_auth_tokens(&[
 								        ("team-01", "token-one"),
 								        ("team-02", "token-two"),
 								    ])
 								    .await;
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								            .uri("/branches")
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								            .method(Method::GET)
 								            .header("authorization", "Bearer token-two")
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								    assert!(body["branches"].is_array());
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								}
-												Harden bearer auth: constant-time compare, hashed at rest, authoritative actor_id

Fixes two live authz bugs in omnigraph-server:

- Bearer-token lookup previously used HashMap::get, which compares keys with
  Eq and short-circuits on the first differing byte — a network-observable
  timing oracle for brute-forcing tokens. Tokens are now stored as SHA-256
  digests and compared with subtle::ConstantTimeEq, iterating every entry
  unconditionally so total work is independent of which slot matches. Raw
  token bytes no longer live in server memory after startup.

- authorize_request now overwrites PolicyRequest.actor_id from the
  authenticated session instead of trusting the handler-supplied field,
  which previously defaulted to "" via unwrap_or_default(). The empty
  string can no longer reach Cedar as a policy subject even if a future
  refactor drops the None check.

External API of AppState constructors is unchanged — tokens still enter as
Vec<(String, String)> and are hashed on the way in.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-17 21:40:51 +03:00
+								/// Verifies the hashed-token lookup correctly resolves each bearer to its
 								/// associated actor, and that the resolved actor — not the handler-supplied
 								/// default — is what the policy engine sees. Two tokens for two distinct
 								/// actors; policy grants read to actor-A only. Swapping tokens must swap
 								/// the policy outcome.
 								#[tokio::test(flavor = "multi_thread")]
 								async fn bearer_token_resolves_to_correct_actor_for_policy_decisions() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
-												Harden bearer auth: constant-time compare, hashed at rest, authoritative actor_id

Fixes two live authz bugs in omnigraph-server:

- Bearer-token lookup previously used HashMap::get, which compares keys with
  Eq and short-circuits on the first differing byte — a network-observable
  timing oracle for brute-forcing tokens. Tokens are now stored as SHA-256
  digests and compared with subtle::ConstantTimeEq, iterating every entry
  unconditionally so total work is independent of which slot matches. Raw
  token bytes no longer live in server memory after startup.

- authorize_request now overwrites PolicyRequest.actor_id from the
  authenticated session instead of trusting the handler-supplied field,
  which previously defaulted to "" via unwrap_or_default(). The empty
  string can no longer reach Cedar as a policy subject even if a future
  refactor drops the None check.

External API of AppState constructors is unchanged — tokens still enter as
Vec<(String, String)> and are hashed on the way in.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-17 21:40:51 +03:00
+								    let policy_path = temp.path().join("policy.yaml");
 								    fs::write(
 								        &policy_path,
 								        r#"
 								version: 1
 								groups:
 								  readers: [act-a]
 								  writers: [act-b]
 								protected_branches: [main]
 								rules:
 								  - id: readers-only
 								    allow:
 								      actors: { group: readers }
 								      actions: [read]
 								      branch_scope: any
 								"#,
 								    )
 								    .unwrap();
 								    let state = AppState::open_with_bearer_tokens_and_policy(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												Harden bearer auth: constant-time compare, hashed at rest, authoritative actor_id

Fixes two live authz bugs in omnigraph-server:

- Bearer-token lookup previously used HashMap::get, which compares keys with
  Eq and short-circuits on the first differing byte — a network-observable
  timing oracle for brute-forcing tokens. Tokens are now stored as SHA-256
  digests and compared with subtle::ConstantTimeEq, iterating every entry
  unconditionally so total work is independent of which slot matches. Raw
  token bytes no longer live in server memory after startup.

- authorize_request now overwrites PolicyRequest.actor_id from the
  authenticated session instead of trusting the handler-supplied field,
  which previously defaulted to "" via unwrap_or_default(). The empty
  string can no longer reach Cedar as a policy subject even if a future
  refactor drops the None check.

External API of AppState constructors is unchanged — tokens still enter as
Vec<(String, String)> and are hashed on the way in.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-17 21:40:51 +03:00
+								        vec![
 								            ("act-a".to_string(), "token-a".to_string()),
 								            ("act-b".to_string(), "token-b".to_string()),
 								        ],
 								        Some(&policy_path),
 								    )
 								    .await
 								    .unwrap();
 								    let app = build_app(state);
 								    // act-a is authenticated AND authorized.
 								    let (ok_status, _) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/snapshot?branch=main")
 								            .method(Method::GET)
 								            .header("authorization", "Bearer token-a")
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(ok_status, StatusCode::OK);
 								    // act-b is authenticated but policy rejects — proves the resolved actor
 								    // (not some default) was the policy subject.
 								    let (denied_status, denied_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/snapshot?branch=main")
 								            .method(Method::GET)
 								            .header("authorization", "Bearer token-b")
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    let denied_error: ErrorOutput = serde_json::from_value(denied_body).unwrap();
 								    assert_eq!(denied_status, StatusCode::FORBIDDEN);
 								    assert_eq!(
 								        denied_error.code,
 								        Some(omnigraph_server::api::ErrorCode::Forbidden)
 								    );
 								    // Unknown token: 401, never reaches the policy engine.
 								    let (bad_status, _) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/snapshot?branch=main")
 								            .method(Method::GET)
 								            .header("authorization", "Bearer wrong-token")
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(bad_status, StatusCode::UNAUTHORIZED);
 								}
-												policy: codify signed-token-claim-only actor identity (MR-731) (#101)

Warm-up commit for the policy chassis epic (MR-722). PR #1 of the
chassis series — same role as schema-lint v1's commit #1 baseline.
Zero behavioral change; establishes the regression test, the
load-bearing doc comment, and the user-doc paragraph for an
invariant already true in code.

Server auth already resolves `actor_id` from the matched bearer
token at `omnigraph-server/src/lib.rs:692-694`, overwriting whatever
the handler put in the PolicyRequest. The principle is named in
docs/dev/invariants.md Hard Invariant 11 ("clients cannot set actor
identity directly"). What was missing: a regression test, a
load-bearing doc comment at the resolution site, and a user-facing
documentation paragraph. This commit adds all three.

Why first. The actor-identity invariant is the foundation every
other policy decision stands on. If `actor_id` can be spoofed, every
chassis primitive (per-row scope, audit log, two-person rule)
becomes ungated. Pinning the invariant first means PR #2 (the
chassis core) doesn't have to re-prove this assertion.

Changes:

* crates/omnigraph-server/tests/server.rs — new regression test
  actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers
  with three sub-assertions:
  - spoof-up: bearer for denied actor + X-Actor-Id naming allowed
    actor → 403 (header doesn't promote)
  - spoof-down: bearer for allowed actor + X-Actor-Id naming denied
    actor → 200 (header doesn't demote)
  - empty-string spoof: empty X-Actor-Id doesn't clear resolved actor
  Cross-link to MR-777 (auth boundary cases — actor-id collision +
  malformed bearer) noted in the test docstring.

* crates/omnigraph-server/src/lib.rs — expanded doc comment at
  the actor-resolution site explaining the SECURITY INVARIANT,
  citing Hard Invariant 11, the Supabase RLS history footgun, and
  the regression test that pins the contract. Reader thinking "I
  should let clients override actor_id for impersonation" hits
  this comment first.

* docs/user/policy.md — new "Actor identity (signed-claim-only)"
  section near the existing Server enforcement section. Closes the
  user-facing doc gap MR-731's "Done when" requires.

Architectural decisions for PR #2+ pinned this session (not
implemented here, recorded so future implementers don't re-litigate):
- PolicyEngine moves to new `omnigraph-policy` workspace crate so
  both engine and server can depend on it (Q2).
- `enforce(action, scope, actor)` will take a new `ResourceScope`
  enum, leaving room for MR-725's per-type and per-row variants (Q3).
- `PolicyAction::Admin` is kept and wired (Option A) — meta-action
  for policy-management surfaces (hot reload, audit log query,
  approvals list) as those consumer features land (Q4).

Test results:
- cargo test -p omnigraph-server --test server: 45 pass (44 existing
  + 1 new); no regressions
- scripts/check-agents-md.sh: passes (34 links / 33 docs OK)

Out of scope (PR #2+):
- Omnigraph::with_policy() + enforce() method
- omnigraph-policy crate creation
- ResourceScope enum
- CLI policy injection into Omnigraph
- HTTP-layer redundant-check removal
- MR-724 Admin action wiring (PR #2)
- MR-723 default-deny 3-state (PR #4)
- MR-736 severity warn/deny (PR #5)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-17 02:51:34 +03:00
+								/// Regression test for MR-731: actor identity comes from the matched
 								/// bearer token, never from a client-supplied request header. A future
 								/// "convenience" PR that lets clients override `actor_id` to spoof
 								/// another identity must break this test. The principle is named in
 								/// `docs/dev/invariants.md` Hard Invariant 11 and at the actor-resolution
 								/// site in `omnigraph-server/src/lib.rs::authorize_request`.
 								///
 								/// Two assertions in one fixture:
 								/// 1. Spoof-up: bearer for a *denied* actor + X-Actor-Id naming an
 								///    *allowed* actor — policy still denies (proves the spoof header
 								///    doesn't promote the request).
 								/// 2. Spoof-down: bearer for an *allowed* actor + X-Actor-Id naming a
 								///    *denied* actor — policy still allows (proves the server-resolved
 								///    identity wins; the spoof can't trick the request into a denial
 								///    either, which would otherwise be a confusing UX trap).
 								///
 								/// Cross-reference: MR-777 covers boundary cases like actor-id
 								/// *collision* (two distinct tokens minting the same actor_id) and
 								/// malformed bearer header parsing. See `auth_boundary_case_coverage`
 								/// suite when it lands; the two tests together pin the full bearer-token
 								/// → actor identity contract.
 								#[tokio::test(flavor = "multi_thread")]
 								async fn actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
-												policy: codify signed-token-claim-only actor identity (MR-731) (#101)

Warm-up commit for the policy chassis epic (MR-722). PR #1 of the
chassis series — same role as schema-lint v1's commit #1 baseline.
Zero behavioral change; establishes the regression test, the
load-bearing doc comment, and the user-doc paragraph for an
invariant already true in code.

Server auth already resolves `actor_id` from the matched bearer
token at `omnigraph-server/src/lib.rs:692-694`, overwriting whatever
the handler put in the PolicyRequest. The principle is named in
docs/dev/invariants.md Hard Invariant 11 ("clients cannot set actor
identity directly"). What was missing: a regression test, a
load-bearing doc comment at the resolution site, and a user-facing
documentation paragraph. This commit adds all three.

Why first. The actor-identity invariant is the foundation every
other policy decision stands on. If `actor_id` can be spoofed, every
chassis primitive (per-row scope, audit log, two-person rule)
becomes ungated. Pinning the invariant first means PR #2 (the
chassis core) doesn't have to re-prove this assertion.

Changes:

* crates/omnigraph-server/tests/server.rs — new regression test
  actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers
  with three sub-assertions:
  - spoof-up: bearer for denied actor + X-Actor-Id naming allowed
    actor → 403 (header doesn't promote)
  - spoof-down: bearer for allowed actor + X-Actor-Id naming denied
    actor → 200 (header doesn't demote)
  - empty-string spoof: empty X-Actor-Id doesn't clear resolved actor
  Cross-link to MR-777 (auth boundary cases — actor-id collision +
  malformed bearer) noted in the test docstring.

* crates/omnigraph-server/src/lib.rs — expanded doc comment at
  the actor-resolution site explaining the SECURITY INVARIANT,
  citing Hard Invariant 11, the Supabase RLS history footgun, and
  the regression test that pins the contract. Reader thinking "I
  should let clients override actor_id for impersonation" hits
  this comment first.

* docs/user/policy.md — new "Actor identity (signed-claim-only)"
  section near the existing Server enforcement section. Closes the
  user-facing doc gap MR-731's "Done when" requires.

Architectural decisions for PR #2+ pinned this session (not
implemented here, recorded so future implementers don't re-litigate):
- PolicyEngine moves to new `omnigraph-policy` workspace crate so
  both engine and server can depend on it (Q2).
- `enforce(action, scope, actor)` will take a new `ResourceScope`
  enum, leaving room for MR-725's per-type and per-row variants (Q3).
- `PolicyAction::Admin` is kept and wired (Option A) — meta-action
  for policy-management surfaces (hot reload, audit log query,
  approvals list) as those consumer features land (Q4).

Test results:
- cargo test -p omnigraph-server --test server: 45 pass (44 existing
  + 1 new); no regressions
- scripts/check-agents-md.sh: passes (34 links / 33 docs OK)

Out of scope (PR #2+):
- Omnigraph::with_policy() + enforce() method
- omnigraph-policy crate creation
- ResourceScope enum
- CLI policy injection into Omnigraph
- HTTP-layer redundant-check removal
- MR-724 Admin action wiring (PR #2)
- MR-723 default-deny 3-state (PR #4)
- MR-736 severity warn/deny (PR #5)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-17 02:51:34 +03:00
+								    let policy_path = temp.path().join("policy.yaml");
 								    // Same readers/writers split as
 								    // `bearer_token_resolves_to_correct_actor_for_policy_decisions` —
 								    // `act-a` can read main, `act-b` cannot. The asymmetry is what
 								    // makes the spoof-up/spoof-down distinction observable.
 								    fs::write(
 								        &policy_path,
 								        r#"
 								version: 1
 								groups:
 								  readers: [act-a]
 								  writers: [act-b]
 								protected_branches: [main]
 								rules:
 								  - id: readers-only
 								    allow:
 								      actors: { group: readers }
 								      actions: [read]
 								      branch_scope: any
 								"#,
 								    )
 								    .unwrap();
 								    let state = AppState::open_with_bearer_tokens_and_policy(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												policy: codify signed-token-claim-only actor identity (MR-731) (#101)

Warm-up commit for the policy chassis epic (MR-722). PR #1 of the
chassis series — same role as schema-lint v1's commit #1 baseline.
Zero behavioral change; establishes the regression test, the
load-bearing doc comment, and the user-doc paragraph for an
invariant already true in code.

Server auth already resolves `actor_id` from the matched bearer
token at `omnigraph-server/src/lib.rs:692-694`, overwriting whatever
the handler put in the PolicyRequest. The principle is named in
docs/dev/invariants.md Hard Invariant 11 ("clients cannot set actor
identity directly"). What was missing: a regression test, a
load-bearing doc comment at the resolution site, and a user-facing
documentation paragraph. This commit adds all three.

Why first. The actor-identity invariant is the foundation every
other policy decision stands on. If `actor_id` can be spoofed, every
chassis primitive (per-row scope, audit log, two-person rule)
becomes ungated. Pinning the invariant first means PR #2 (the
chassis core) doesn't have to re-prove this assertion.

Changes:

* crates/omnigraph-server/tests/server.rs — new regression test
  actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers
  with three sub-assertions:
  - spoof-up: bearer for denied actor + X-Actor-Id naming allowed
    actor → 403 (header doesn't promote)
  - spoof-down: bearer for allowed actor + X-Actor-Id naming denied
    actor → 200 (header doesn't demote)
  - empty-string spoof: empty X-Actor-Id doesn't clear resolved actor
  Cross-link to MR-777 (auth boundary cases — actor-id collision +
  malformed bearer) noted in the test docstring.

* crates/omnigraph-server/src/lib.rs — expanded doc comment at
  the actor-resolution site explaining the SECURITY INVARIANT,
  citing Hard Invariant 11, the Supabase RLS history footgun, and
  the regression test that pins the contract. Reader thinking "I
  should let clients override actor_id for impersonation" hits
  this comment first.

* docs/user/policy.md — new "Actor identity (signed-claim-only)"
  section near the existing Server enforcement section. Closes the
  user-facing doc gap MR-731's "Done when" requires.

Architectural decisions for PR #2+ pinned this session (not
implemented here, recorded so future implementers don't re-litigate):
- PolicyEngine moves to new `omnigraph-policy` workspace crate so
  both engine and server can depend on it (Q2).
- `enforce(action, scope, actor)` will take a new `ResourceScope`
  enum, leaving room for MR-725's per-type and per-row variants (Q3).
- `PolicyAction::Admin` is kept and wired (Option A) — meta-action
  for policy-management surfaces (hot reload, audit log query,
  approvals list) as those consumer features land (Q4).

Test results:
- cargo test -p omnigraph-server --test server: 45 pass (44 existing
  + 1 new); no regressions
- scripts/check-agents-md.sh: passes (34 links / 33 docs OK)

Out of scope (PR #2+):
- Omnigraph::with_policy() + enforce() method
- omnigraph-policy crate creation
- ResourceScope enum
- CLI policy injection into Omnigraph
- HTTP-layer redundant-check removal
- MR-724 Admin action wiring (PR #2)
- MR-723 default-deny 3-state (PR #4)
- MR-736 severity warn/deny (PR #5)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-17 02:51:34 +03:00
+								        vec![
 								            ("act-a".to_string(), "token-a".to_string()),
 								            ("act-b".to_string(), "token-b".to_string()),
 								        ],
 								        Some(&policy_path),
 								    )
 								    .await
 								    .unwrap();
 								    let app = build_app(state);
 								    // (1) Spoof-up: bearer for act-b (denied) + X-Actor-Id: act-a (allowed).
 								    // If the server were trusting the header, this would succeed as
 								    // act-a. The contract is: the bearer wins. Expect 403 because
 								    // act-b can't read.
 								    let (spoof_up_status, spoof_up_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/snapshot?branch=main")
 								            .method(Method::GET)
 								            .header("authorization", "Bearer token-b")
 								            .header("x-actor-id", "act-a")
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    let spoof_up_error: ErrorOutput = serde_json::from_value(spoof_up_body).unwrap();
 								    assert_eq!(
 								        spoof_up_status,
 								        StatusCode::FORBIDDEN,
 								        "X-Actor-Id must not promote a denied bearer to an allowed actor",
 								    );
 								    assert_eq!(
 								        spoof_up_error.code,
 								        Some(omnigraph_server::api::ErrorCode::Forbidden),
 								    );
 								    // (2) Spoof-down: bearer for act-a (allowed) + X-Actor-Id: act-b (denied).
 								    // If the server were trusting the header, this would fail as act-b.
 								    // The contract is: the bearer wins. Expect 200 because act-a can read.
 								    let (spoof_down_status, _) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/snapshot?branch=main")
 								            .method(Method::GET)
 								            .header("authorization", "Bearer token-a")
 								            .header("x-actor-id", "act-b")
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(
 								        spoof_down_status,
 								        StatusCode::OK,
 								        "X-Actor-Id must not demote an allowed bearer to a denied actor",
 								    );
 								    // (3) Empty-string spoof attempt: an X-Actor-Id of "" must not
 								    // leak through as the policy subject. Same expectation as (1):
 								    // bearer for act-b is denied regardless of what the header tries.
 								    let (empty_spoof_status, _) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/snapshot?branch=main")
 								            .method(Method::GET)
 								            .header("authorization", "Bearer token-b")
 								            .header("x-actor-id", "")
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(
 								        empty_spoof_status,
 								        StatusCode::FORBIDDEN,
 								        "empty X-Actor-Id must not clear the resolved actor",
 								    );
 								}
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								#[tokio::test(flavor = "multi_thread")]
 								async fn policy_allows_read_but_distinguishes_401_from_403() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy(
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        &[("act-bruno", "team-token"), ("act-ragnor", "admin-token")],
 								        POLICY_YAML,
 								    )
 								    .await;
 								    let (missing_status, missing_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/snapshot?branch=main")
 								            .method(Method::GET)
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    let missing_error: ErrorOutput = serde_json::from_value(missing_body).unwrap();
 								    assert_eq!(missing_status, StatusCode::UNAUTHORIZED);
 								    assert_eq!(
 								        missing_error.code,
 								        Some(omnigraph_server::api::ErrorCode::Unauthorized)
 								    );
 								    let (snapshot_status, snapshot_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/snapshot?branch=main")
 								            .method(Method::GET)
 								            .header("authorization", "Bearer team-token")
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(snapshot_status, StatusCode::OK);
 								    assert_eq!(snapshot_body["branch"], "main");
 								    let export_request = ExportRequest {
 								        branch: Some("main".to_string()),
 								        type_names: Vec::new(),
 								        table_keys: Vec::new(),
 								    };
 								    let (forbidden_status, forbidden_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/export")
 								            .method(Method::POST)
 								            .header("authorization", "Bearer team-token")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&export_request).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    let forbidden_error: ErrorOutput = serde_json::from_value(forbidden_body).unwrap();
 								    assert_eq!(forbidden_status, StatusCode::FORBIDDEN);
 								    assert_eq!(
 								        forbidden_error.code,
 								        Some(omnigraph_server::api::ErrorCode::Forbidden)
 								    );
 								    let response = app
 								        .clone()
 								        .oneshot(
 								            Request::builder()
 								                .uri("/export")
 								                .method(Method::POST)
 								                .header("authorization", "Bearer admin-token")
 								                .header("content-type", "application/json")
 								                .body(Body::from(serde_json::to_vec(&export_request).unwrap()))
 								                .unwrap(),
 								        )
 								        .await
 								        .unwrap();
 								    assert_eq!(response.status(), StatusCode::OK);
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn policy_uses_resolved_branch_for_snapshot_reads() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    let snapshot_id = {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        db.resolve_snapshot("main").await.unwrap().to_string()
 								    };
 								    let policy_path = temp.path().join("policy.yaml");
 								    fs::write(&policy_path, POLICY_PROTECTED_READ_YAML).unwrap();
 								    let state = AppState::open_with_bearer_tokens_and_policy(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        vec![("act-bruno".to_string(), "team-token".to_string())],
 								        Some(&policy_path),
 								    )
 								    .await
 								    .unwrap();
 								    let app = build_app(state);
 								    let read = ReadRequest {
 								        query_source: fs::read_to_string(fixture("test.gq")).unwrap(),
 								        query_name: Some("get_person".to_string()),
 								        params: Some(json!({ "name": "Alice" })),
 								        branch: None,
 								        snapshot: Some(snapshot_id),
 								    };
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/read")
 								            .method(Method::POST)
 								            .header("authorization", "Bearer team-token")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&read).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(body["target"]["branch"], Value::Null);
 								    assert_eq!(
 								        body["target"]["snapshot"].as_str(),
 								        read.snapshot.as_deref()
 								    );
 								    assert_eq!(body["row_count"], 1);
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn snapshot_route_returns_manifest_dataset_version() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (temp, app) = app_for_loaded_graph().await;
 								    let graph = graph_path(temp.path());
 								    let expected_manifest_version = manifest_dataset_version(&graph).await;
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
 								    let (snapshot_status, snapshot_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/snapshot?branch=main")
 								            .method(Method::GET)
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(snapshot_status, StatusCode::OK);
 								    assert_eq!(snapshot_body["branch"], "main");
 								    assert_eq!(
 								        snapshot_body["manifest_version"].as_u64().unwrap(),
 								        expected_manifest_version
 								    );
 								    assert!(snapshot_body["tables"].is_array());
 								}
-												Polish schema endpoint: rename show, align field name, add tests

Review feedback on #23, applied on top of the original commit:

- Rename the CLI subcommand from `schema get` to `schema show` to match
  the existing `run show` / `commit show` convention. A `#[command(alias
  = "get")]` preserves muscle memory for anyone who already typed `get`.
- Rename `SchemaGetOutput` → `SchemaOutput` and its field `source` →
  `schema_source`, so the get response and the apply request use the
  same field name for the same concept.
- Use `println!` instead of `print!` in the CLI so the shell prompt
  doesn't land on the last line of schema output.
- Add three integration tests on `/schema`: happy path (no auth),
  401 when bearer is required but missing, 403 when the policy grants
  the actor branch_create but not read.

Follow-ups left for a separate PR: include `schema_ir_hash` and
`schema_identity_version` in the response payload so clients can do
drift detection and the server can set an ETag; and a fast-path local
read that skips `Omnigraph::open()` when only the schema source is
needed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-18 00:30:46 +03:00
+								#[tokio::test(flavor = "multi_thread")]
 								async fn schema_route_returns_current_source() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_loaded_graph().await;
-												Polish schema endpoint: rename show, align field name, add tests

Review feedback on #23, applied on top of the original commit:

- Rename the CLI subcommand from `schema get` to `schema show` to match
  the existing `run show` / `commit show` convention. A `#[command(alias
  = "get")]` preserves muscle memory for anyone who already typed `get`.
- Rename `SchemaGetOutput` → `SchemaOutput` and its field `source` →
  `schema_source`, so the get response and the apply request use the
  same field name for the same concept.
- Use `println!` instead of `print!` in the CLI so the shell prompt
  doesn't land on the last line of schema output.
- Add three integration tests on `/schema`: happy path (no auth),
  401 when bearer is required but missing, 403 when the policy grants
  the actor branch_create but not read.

Follow-ups left for a separate PR: include `schema_ir_hash` and
`schema_identity_version` in the response payload so clients can do
drift detection and the server can set an ETag; and a fast-path local
read that skips `Omnigraph::open()` when only the schema source is
needed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-18 00:30:46 +03:00
+								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/schema")
 								            .method(Method::GET)
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
 								    let output: SchemaOutput = serde_json::from_value(body).unwrap();
 								    assert!(output.schema_source.contains("node Person"));
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn schema_route_requires_bearer_token_when_auth_configured() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_loaded_graph_with_auth("demo-token").await;
-												Polish schema endpoint: rename show, align field name, add tests

Review feedback on #23, applied on top of the original commit:

- Rename the CLI subcommand from `schema get` to `schema show` to match
  the existing `run show` / `commit show` convention. A `#[command(alias
  = "get")]` preserves muscle memory for anyone who already typed `get`.
- Rename `SchemaGetOutput` → `SchemaOutput` and its field `source` →
  `schema_source`, so the get response and the apply request use the
  same field name for the same concept.
- Use `println!` instead of `print!` in the CLI so the shell prompt
  doesn't land on the last line of schema output.
- Add three integration tests on `/schema`: happy path (no auth),
  401 when bearer is required but missing, 403 when the policy grants
  the actor branch_create but not read.

Follow-ups left for a separate PR: include `schema_ir_hash` and
`schema_identity_version` in the response payload so clients can do
drift detection and the server can set an ETag; and a fast-path local
read that skips `Omnigraph::open()` when only the schema source is
needed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-18 00:30:46 +03:00
 								    let (missing_status, missing_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/schema")
 								            .method(Method::GET)
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    let missing_error: ErrorOutput = serde_json::from_value(missing_body).unwrap();
 								    assert_eq!(missing_status, StatusCode::UNAUTHORIZED);
 								    assert_eq!(
 								        missing_error.code,
 								        Some(omnigraph_server::api::ErrorCode::Unauthorized)
 								    );
 								    let (ok_status, ok_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/schema")
 								            .method(Method::GET)
 								            .header("authorization", "Bearer demo-token")
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(ok_status, StatusCode::OK);
 								    let output: SchemaOutput = serde_json::from_value(ok_body).unwrap();
 								    assert!(!output.schema_source.is_empty());
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn schema_route_denied_when_actor_lacks_read_permission() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
-												Polish schema endpoint: rename show, align field name, add tests

Review feedback on #23, applied on top of the original commit:

- Rename the CLI subcommand from `schema get` to `schema show` to match
  the existing `run show` / `commit show` convention. A `#[command(alias
  = "get")]` preserves muscle memory for anyone who already typed `get`.
- Rename `SchemaGetOutput` → `SchemaOutput` and its field `source` →
  `schema_source`, so the get response and the apply request use the
  same field name for the same concept.
- Use `println!` instead of `print!` in the CLI so the shell prompt
  doesn't land on the last line of schema output.
- Add three integration tests on `/schema`: happy path (no auth),
  401 when bearer is required but missing, 403 when the policy grants
  the actor branch_create but not read.

Follow-ups left for a separate PR: include `schema_ir_hash` and
`schema_identity_version` in the response payload so clients can do
drift detection and the server can set an ETag; and a fast-path local
read that skips `Omnigraph::open()` when only the schema source is
needed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-18 00:30:46 +03:00
+								    let policy_path = temp.path().join("policy.yaml");
 								    // Policy grants branch_create only — no read action for act-bruno.
 								    fs::write(&policy_path, INGEST_CREATE_ONLY_POLICY_YAML).unwrap();
 								    let state = AppState::open_with_bearer_tokens_and_policy(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												Polish schema endpoint: rename show, align field name, add tests

Review feedback on #23, applied on top of the original commit:

- Rename the CLI subcommand from `schema get` to `schema show` to match
  the existing `run show` / `commit show` convention. A `#[command(alias
  = "get")]` preserves muscle memory for anyone who already typed `get`.
- Rename `SchemaGetOutput` → `SchemaOutput` and its field `source` →
  `schema_source`, so the get response and the apply request use the
  same field name for the same concept.
- Use `println!` instead of `print!` in the CLI so the shell prompt
  doesn't land on the last line of schema output.
- Add three integration tests on `/schema`: happy path (no auth),
  401 when bearer is required but missing, 403 when the policy grants
  the actor branch_create but not read.

Follow-ups left for a separate PR: include `schema_ir_hash` and
`schema_identity_version` in the response payload so clients can do
drift detection and the server can set an ETag; and a fast-path local
read that skips `Omnigraph::open()` when only the schema source is
needed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-18 00:30:46 +03:00
+								        vec![("act-bruno".to_string(), "team-token".to_string())],
 								        Some(&policy_path),
 								    )
 								    .await
 								    .unwrap();
 								    let app = build_app(state);
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/schema")
 								            .method(Method::GET)
 								            .header("authorization", "Bearer team-token")
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    let error: ErrorOutput = serde_json::from_value(body).unwrap();
 								    assert_eq!(status, StatusCode::FORBIDDEN);
 								    assert_eq!(
 								        error.code,
 								        Some(omnigraph_server::api::ErrorCode::Forbidden)
 								    );
 								}
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								#[tokio::test(flavor = "multi_thread")]
 								async fn policy_blocks_change_on_protected_main_but_allows_unprotected_branch() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
 								    let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    db.branch_create_from(ReadTarget::branch("main"), "feature")
 								        .await
 								        .unwrap();
 								    drop(db);
 								    let policy_path = temp.path().join("policy.yaml");
 								    fs::write(&policy_path, POLICY_YAML).unwrap();
 								    let state = AppState::open_with_bearer_tokens_and_policy(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        vec![("act-bruno".to_string(), "team-token".to_string())],
 								        Some(&policy_path),
 								    )
 								    .await
 								    .unwrap();
 								    let app = build_app(state);
 								    let main_change = ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								        query: MUTATION_QUERIES.to_string(),
 								        name: Some("insert_person".to_string()),
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        params: Some(json!({ "name": "Mina", "age": 28 })),
 								        branch: Some("main".to_string()),
 								    };
 								    let (main_status, main_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/change")
 								            .method(Method::POST)
 								            .header("authorization", "Bearer team-token")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&main_change).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    let main_error: ErrorOutput = serde_json::from_value(main_body).unwrap();
 								    assert_eq!(main_status, StatusCode::FORBIDDEN);
 								    assert_eq!(
 								        main_error.code,
 								        Some(omnigraph_server::api::ErrorCode::Forbidden)
 								    );
 								    let feature_change = ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								        query: MUTATION_QUERIES.to_string(),
 								        name: Some("insert_person".to_string()),
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        params: Some(json!({ "name": "Mina", "age": 28 })),
 								        branch: Some("feature".to_string()),
 								    };
 								    let (feature_status, feature_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/change")
 								            .method(Method::POST)
 								            .header("authorization", "Bearer team-token")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&feature_change).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(feature_status, StatusCode::OK);
 								    assert_eq!(feature_body["branch"], "feature");
 								    assert_eq!(feature_body["affected_nodes"], 1);
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn policy_blocks_non_admin_merge_to_main_and_allows_admin() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
 								    let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    db.branch_create_from(ReadTarget::branch("main"), "feature")
 								        .await
 								        .unwrap();
 								    db.load(
 								        "feature",
 								        r#"{"type":"Person","data":{"name":"Zoe","age":33}}"#,
 								        LoadMode::Append,
 								    )
 								    .await
 								    .unwrap();
 								    drop(db);
 								    let policy_path = temp.path().join("policy.yaml");
 								    fs::write(&policy_path, POLICY_YAML).unwrap();
 								    let state = AppState::open_with_bearer_tokens_and_policy(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        vec![
 								            ("act-bruno".to_string(), "team-token".to_string()),
 								            ("act-ragnor".to_string(), "admin-token".to_string()),
 								        ],
 								        Some(&policy_path),
 								    )
 								    .await
 								    .unwrap();
 								    let app = build_app(state);
 								    let merge = BranchMergeRequest {
 								        source: "feature".to_string(),
 								        target: Some("main".to_string()),
 								    };
 								    let (deny_status, deny_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/branches/merge")
 								            .method(Method::POST)
 								            .header("authorization", "Bearer team-token")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&merge).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    let deny_error: ErrorOutput = serde_json::from_value(deny_body).unwrap();
 								    assert_eq!(deny_status, StatusCode::FORBIDDEN);
 								    assert_eq!(
 								        deny_error.code,
 								        Some(omnigraph_server::api::ErrorCode::Forbidden)
 								    );
 								    let (allow_status, allow_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/branches/merge")
 								            .method(Method::POST)
 								            .header("authorization", "Bearer admin-token")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&merge).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(allow_status, StatusCode::OK);
 								    assert_eq!(allow_body["actor_id"], "act-ragnor");
 								}
 								#[tokio::test(flavor = "multi_thread")]
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								async fn authenticated_change_stamps_actor_on_commits() {
-												chore: scrub Linear ticket numbers and review-bot mentions from code comments

OmniGraph is OSS; internal Linear ticket references and code-review-bot
mentions in source-code comments don't help external readers and leak
internal tooling. Replace ticket numbers (MR-XXX) with descriptive
prose, drop linear.app URLs, and remove inline mentions of
Cursor/Bugbot/Cubic/Codex review threads.

Scope is limited to source-code comments (`crates/`). Docs under
`docs/` keep their MR-XXX references — those are part of the
established change-history narrative for in-repo docs and don't
require a Linear account to find context for.

No behavior changes; no public API changes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-01 22:45:38 +02:00
+								    // With the Run state machine removed, actor_id is recorded
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								    // directly on the commit graph (no intermediate run record).
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_loaded_graph_with_auth_tokens(&[("act-andrew", "token-one")]).await;
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
 								    let change = ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								        query: MUTATION_QUERIES.to_string(),
 								        name: Some("insert_person".to_string()),
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        params: Some(json!({ "name": "Mina", "age": 28 })),
 								        branch: Some("main".to_string()),
 								    };
 								    let (change_status, change_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/change")
 								            .method(Method::POST)
 								            .header("authorization", "Bearer token-one")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&change).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(change_status, StatusCode::OK);
 								    assert_eq!(change_body["actor_id"], "act-andrew");
 								    let (commits_status, commits_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/commits?branch=main")
 								            .method(Method::GET)
 								            .header("authorization", "Bearer token-one")
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(commits_status, StatusCode::OK);
 								    let head = commits_body["commits"]
 								        .as_array()
 								        .unwrap()
 								        .last()
 								        .expect("head commit should exist");
 								    assert_eq!(head["actor_id"], "act-andrew");
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn ingest_creates_branch_returns_metadata_and_stamps_actor() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (temp, app) = app_for_loaded_graph_with_auth_tokens(&[("act-andrew", "token-one")]).await;
 								    let graph = graph_path(temp.path());
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    let ingest = IngestRequest {
 								        branch: Some("feature-ingest".to_string()),
 								        from: Some("main".to_string()),
 								        mode: Some(LoadMode::Merge),
 								        data: r#"{"type":"Person","data":{"name":"Zoe","age":33}}
 								{"type":"Person","data":{"name":"Bob","age":26}}"#
 								            .to_string(),
 								    };
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/ingest")
 								            .method(Method::POST)
 								            .header("authorization", "Bearer token-one")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&ingest).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(body["branch"], "feature-ingest");
 								    assert_eq!(body["base_branch"], "main");
 								    assert_eq!(body["branch_created"], true);
 								    assert_eq!(body["mode"], "merge");
 								    assert_eq!(body["actor_id"], "act-andrew");
 								    assert_eq!(body["tables"][0]["table_key"], "node:Person");
 								    assert_eq!(body["tables"][0]["rows_loaded"], 2);
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    let snapshot = db
 								        .snapshot_of(ReadTarget::branch("feature-ingest"))
 								        .await
 								        .unwrap();
 								    let person_ds = snapshot.open("node:Person").await.unwrap();
 								    assert_eq!(person_ds.count_rows(None).await.unwrap(), 5);
 								    let head = db
 								        .list_commits(Some("feature-ingest"))
 								        .await
 								        .unwrap()
 								        .into_iter()
 								        .last()
 								        .unwrap();
 								    assert_eq!(head.actor_id.as_deref(), Some("act-andrew"));
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn ingest_existing_branch_skips_branch_create_policy_check() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        db.branch_create_from(ReadTarget::branch("main"), "feature")
 								            .await
 								            .unwrap();
 								    }
 								    let policy_path = temp.path().join("policy.yaml");
 								    fs::write(&policy_path, POLICY_YAML).unwrap();
 								    let state = AppState::open_with_bearer_tokens_and_policy(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        vec![("act-bruno".to_string(), "team-token".to_string())],
 								        Some(&policy_path),
 								    )
 								    .await
 								    .unwrap();
 								    let app = build_app(state);
 								    let ingest = IngestRequest {
 								        branch: Some("feature".to_string()),
 								        from: Some("other-base".to_string()),
 								        mode: Some(LoadMode::Merge),
 								        data: r#"{"type":"Person","data":{"name":"Zoe","age":33}}"#.to_string(),
 								    };
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/ingest")
 								            .method(Method::POST)
 								            .header("authorization", "Bearer team-token")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&ingest).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(body["branch"], "feature");
 								    assert_eq!(body["branch_created"], false);
 								    assert_eq!(body["base_branch"], "other-base");
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn ingest_denies_missing_branch_without_branch_create_permission() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy(
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        &[("act-bruno", "team-token")],
 								        POLICY_YAML,
 								    )
 								    .await;
 								    let ingest = IngestRequest {
 								        branch: Some("feature".to_string()),
 								        from: Some("main".to_string()),
 								        mode: Some(LoadMode::Merge),
 								        data: r#"{"type":"Person","data":{"name":"Zoe","age":33}}"#.to_string(),
 								    };
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/ingest")
 								            .method(Method::POST)
 								            .header("authorization", "Bearer team-token")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&ingest).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    let error: ErrorOutput = serde_json::from_value(body).unwrap();
 								    assert_eq!(status, StatusCode::FORBIDDEN);
 								    assert_eq!(
 								        error.code,
 								        Some(omnigraph_server::api::ErrorCode::Forbidden)
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn ingest_denies_when_actor_lacks_change_permission() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy(
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        &[("act-bruno", "team-token")],
 								        INGEST_CREATE_ONLY_POLICY_YAML,
 								    )
 								    .await;
 								    let ingest = IngestRequest {
 								        branch: Some("feature".to_string()),
 								        from: Some("main".to_string()),
 								        mode: Some(LoadMode::Merge),
 								        data: r#"{"type":"Person","data":{"name":"Zoe","age":33}}"#.to_string(),
 								    };
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/ingest")
 								            .method(Method::POST)
 								            .header("authorization", "Bearer team-token")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&ingest).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    let error: ErrorOutput = serde_json::from_value(body).unwrap();
 								    assert_eq!(status, StatusCode::FORBIDDEN);
 								    assert_eq!(
 								        error.code,
 								        Some(omnigraph_server::api::ErrorCode::Forbidden)
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn ingest_rejects_payloads_over_32_mib() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_loaded_graph().await;
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    let oversize = IngestRequest {
 								        branch: Some("feature".to_string()),
 								        from: Some("main".to_string()),
 								        mode: Some(LoadMode::Merge),
 								        data: "x".repeat(33 * 1024 * 1024),
 								    };
 								    let response = app
 								        .clone()
 								        .oneshot(
 								            Request::builder()
 								                .uri("/ingest")
 								                .method(Method::POST)
 								                .header("content-type", "application/json")
 								                .body(Body::from(serde_json::to_vec(&oversize).unwrap()))
 								                .unwrap(),
 								        )
 								        .await
 								        .unwrap();
 								    assert_eq!(response.status(), StatusCode::PAYLOAD_TOO_LARGE);
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn authenticated_branch_merge_stamps_merge_actor_on_head_commit() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_loaded_graph_with_auth_tokens(&[
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        ("act-andrew", "token-one"),
 								        ("act-ragnor", "token-two"),
 								    ])
 								    .await;
 								    let create = BranchCreateRequest {
 								        from: Some("main".to_string()),
 								        name: "feature".to_string(),
 								    };
 								    let (create_status, _) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/branches")
 								            .method(Method::POST)
 								            .header("authorization", "Bearer token-one")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&create).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(create_status, StatusCode::OK);
 								    let change = ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								        query: MUTATION_QUERIES.to_string(),
 								        name: Some("insert_person".to_string()),
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        params: Some(json!({ "name": "Zoe", "age": 33 })),
 								        branch: Some("feature".to_string()),
 								    };
 								    let (change_status, _) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/change")
 								            .method(Method::POST)
 								            .header("authorization", "Bearer token-one")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&change).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(change_status, StatusCode::OK);
 								    let merge = BranchMergeRequest {
 								        source: "feature".to_string(),
 								        target: Some("main".to_string()),
 								    };
 								    let (merge_status, merge_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/branches/merge")
 								            .method(Method::POST)
 								            .header("authorization", "Bearer token-two")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&merge).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(merge_status, StatusCode::OK);
 								    assert_eq!(merge_body["actor_id"], "act-ragnor");
 								    let (commit_status, commit_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/commits?branch=main")
 								            .method(Method::GET)
 								            .header("authorization", "Bearer token-two")
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(commit_status, StatusCode::OK);
 								    let head = commit_body["commits"]
 								        .as_array()
 								        .unwrap()
 								        .last()
 								        .expect("head commit should exist");
 								    assert_eq!(head["actor_id"], "act-ragnor");
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn branch_merge_conflict_response_includes_structured_conflicts() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
 								    let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    db.branch_create_from(ReadTarget::branch("main"), "feature")
 								        .await
 								        .unwrap();
 								    db.mutate(
 								        "main",
 								        MUTATION_QUERIES,
 								        "set_age",
 								        &omnigraph_compiler::json_params_to_param_map(
 								            Some(&json!({"name": "Alice", "age": 31 })),
 								            &omnigraph_compiler::find_named_query(MUTATION_QUERIES, "set_age")
 								                .unwrap()
 								                .params,
 								            omnigraph_compiler::JsonParamMode::Standard,
 								        )
 								        .unwrap(),
 								    )
 								    .await
 								    .unwrap();
 								    db.mutate(
 								        "feature",
 								        MUTATION_QUERIES,
 								        "set_age",
 								        &omnigraph_compiler::json_params_to_param_map(
 								            Some(&json!({"name": "Alice", "age": 32 })),
 								            &omnigraph_compiler::find_named_query(MUTATION_QUERIES, "set_age")
 								                .unwrap()
 								                .params,
 								            omnigraph_compiler::JsonParamMode::Standard,
 								        )
 								        .unwrap(),
 								    )
 								    .await
 								    .unwrap();
 								    drop(db);
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let state = AppState::open(graph.to_string_lossy().to_string())
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        .await
 								        .unwrap();
 								    let app = build_app(state);
 								    let merge = BranchMergeRequest {
 								        source: "feature".to_string(),
 								        target: Some("main".to_string()),
 								    };
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/branches/merge")
 								            .method(Method::POST)
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&merge).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    let error: ErrorOutput = serde_json::from_value(body).unwrap();
 								    assert_eq!(status, StatusCode::CONFLICT);
 								    assert_eq!(error.code, Some(omnigraph_server::api::ErrorCode::Conflict));
 								    assert!(error.error.contains("merge conflict"));
 								    assert!(error.merge_conflicts.iter().any(|conflict| {
 								        conflict.table_key == "node:Person"
 								            && conflict.row_id.as_deref() == Some("Alice")
 								            && conflict.kind == omnigraph_server::api::MergeConflictKindOutput::DivergentUpdate
 								    }));
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn repeated_read_after_change_sees_updated_state_from_same_app() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_loaded_graph().await;
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
 								    let change = ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								        query: MUTATION_QUERIES.to_string(),
 								        name: Some("insert_person".to_string()),
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        params: Some(json!({ "name": "Mina", "age": 28 })),
 								        branch: Some("main".to_string()),
 								    };
 								    let (change_status, change_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/change")
 								            .method(Method::POST)
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&change).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(change_status, StatusCode::OK);
 								    assert_eq!(change_body["affected_nodes"], 1);
 								    let read = ReadRequest {
 								        query_source: fs::read_to_string(fixture("test.gq")).unwrap(),
 								        query_name: Some("get_person".to_string()),
 								        params: Some(json!({ "name": "Mina" })),
 								        branch: Some("main".to_string()),
 								        snapshot: None,
 								    };
 								    let (read_status, read_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/read")
 								            .method(Method::POST)
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&read).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(read_status, StatusCode::OK);
 								    assert_eq!(read_body["row_count"], 1);
 								    assert_eq!(read_body["rows"][0]["p.name"], "Mina");
 								}
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								#[tokio::test(flavor = "multi_thread")]
 								async fn query_endpoint_runs_inline_read() {
 								    let (_temp, app) = app_for_loaded_graph().await;
 								    let query = QueryRequest {
 								        query: fs::read_to_string(fixture("test.gq")).unwrap(),
 								        name: Some("get_person".to_string()),
 								        params: Some(json!({ "name": "Alice" })),
 								        branch: Some("main".to_string()),
 								        snapshot: None,
 								    };
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/query")
 								            .method(Method::POST)
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&query).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(body["query_name"], "get_person");
 								    assert_eq!(body["row_count"], 1);
 								    assert_eq!(body["rows"][0]["p.name"], "Alice");
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn query_endpoint_rejects_mutation_with_400() {
 								    let (_temp, app) = app_for_loaded_graph().await;
 								    let query = QueryRequest {
 								        query: MUTATION_QUERIES.to_string(),
 								        name: Some("insert_person".to_string()),
 								        params: Some(json!({ "name": "Should", "age": 1 })),
 								        branch: Some("main".to_string()),
 								        snapshot: None,
 								    };
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/query")
 								            .method(Method::POST)
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&query).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::BAD_REQUEST);
 								    let err = body["error"].as_str().unwrap_or_default();
 								    assert!(
 								        err.contains("contains mutations") && err.contains("POST /mutate"),
 								        "expected mutation-rejection message pointing at canonical /mutate, got: {err}"
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn mutate_endpoint_runs_inline_mutation() {
 								    // Canonical mutation endpoint. Pairs with `/query` on the read side.
 								    // Same wire shape as `/change`, no deprecation signal.
 								    let (_temp, app) = app_for_loaded_graph().await;
 								    let request = json!({
 								        "query": MUTATION_QUERIES,
 								        "name": "insert_person",
 								        "params": { "name": "Mutie", "age": 30 },
 								        "branch": "main",
 								    });
 								    let response = app
 								        .clone()
 								        .oneshot(
 								            Request::builder()
 								                .uri("/mutate")
 								                .method(Method::POST)
 								                .header("content-type", "application/json")
 								                .body(Body::from(serde_json::to_vec(&request).unwrap()))
 								                .unwrap(),
 								        )
 								        .await
 								        .unwrap();
 								    assert_eq!(response.status(), StatusCode::OK);
 								    // Canonical route is NOT deprecated; no Deprecation header expected.
 								    assert!(
 								        response.headers().get("deprecation").is_none(),
 								        "POST /mutate must not advertise itself as deprecated"
 								    );
 								    let body_bytes = to_bytes(response.into_body(), usize::MAX).await.unwrap();
 								    let body: Value = serde_json::from_slice(&body_bytes).unwrap();
 								    assert_eq!(body["affected_nodes"], 1);
 								    assert_eq!(body["query_name"], "insert_person");
 								    assert_eq!(body["branch"], "main");
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn change_endpoint_emits_deprecation_headers() {
 								    // `/change` is kept indefinitely for back-compat but flagged at runtime
 								    // per RFC 9745 (`Deprecation: true`) + RFC 8288 (`Link: </mutate>;
 								    // rel="successor-version"`). The OpenAPI side is covered by
 								    // `openapi_change_is_deprecated` in tests/openapi.rs.
 								    let (_temp, app) = app_for_loaded_graph().await;
 								    let request = json!({
 								        "query": MUTATION_QUERIES,
 								        "name": "insert_person",
 								        "params": { "name": "Legacyer", "age": 33 },
 								        "branch": "main",
 								    });
 								    let response = app
 								        .clone()
 								        .oneshot(
 								            Request::builder()
 								                .uri("/change")
 								                .method(Method::POST)
 								                .header("content-type", "application/json")
 								                .body(Body::from(serde_json::to_vec(&request).unwrap()))
 								                .unwrap(),
 								        )
 								        .await
 								        .unwrap();
 								    assert_eq!(response.status(), StatusCode::OK);
 								    assert_eq!(
 								        response
 								            .headers()
 								            .get("deprecation")
 								            .and_then(|v| v.to_str().ok()),
 								        Some("true"),
 								        "POST /change must advertise `Deprecation: true` (RFC 9745)"
 								    );
 								    assert_eq!(
 								        response.headers().get("link").and_then(|v| v.to_str().ok()),
 								        Some("</mutate>; rel=\"successor-version\""),
 								        "POST /change must point at /mutate via `Link` rel=successor-version (RFC 8288)"
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn read_endpoint_emits_deprecation_headers() {
 								    // `/read` is kept indefinitely for byte-stable back-compat but flagged
 								    // at runtime per RFC 9745 + RFC 8288. Successor is `/query`.
 								    let (_temp, app) = app_for_loaded_graph().await;
 								    let request = ReadRequest {
 								        query_source: fs::read_to_string(fixture("test.gq")).unwrap(),
 								        query_name: Some("get_person".to_string()),
 								        params: Some(json!({ "name": "Alice" })),
 								        branch: Some("main".to_string()),
 								        snapshot: None,
 								    };
 								    let response = app
 								        .clone()
 								        .oneshot(
 								            Request::builder()
 								                .uri("/read")
 								                .method(Method::POST)
 								                .header("content-type", "application/json")
 								                .body(Body::from(serde_json::to_vec(&request).unwrap()))
 								                .unwrap(),
 								        )
 								        .await
 								        .unwrap();
 								    assert_eq!(response.status(), StatusCode::OK);
 								    assert_eq!(
 								        response
 								            .headers()
 								            .get("deprecation")
 								            .and_then(|v| v.to_str().ok()),
 								        Some("true"),
 								        "POST /read must advertise `Deprecation: true` (RFC 9745)"
 								    );
 								    assert_eq!(
 								        response.headers().get("link").and_then(|v| v.to_str().ok()),
 								        Some("</query>; rel=\"successor-version\""),
 								        "POST /read must point at /query via `Link` rel=successor-version (RFC 8288)"
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn query_endpoint_does_not_emit_deprecation_headers() {
 								    // Sanity check the inverse: the canonical `/query` endpoint must not
 								    // carry deprecation signaling, so SDK codegens don't propagate a
 								    // bogus `@deprecated` marker.
 								    let (_temp, app) = app_for_loaded_graph().await;
 								    let request = QueryRequest {
 								        query: fs::read_to_string(fixture("test.gq")).unwrap(),
 								        name: Some("get_person".to_string()),
 								        params: Some(json!({ "name": "Alice" })),
 								        branch: Some("main".to_string()),
 								        snapshot: None,
 								    };
 								    let response = app
 								        .clone()
 								        .oneshot(
 								            Request::builder()
 								                .uri("/query")
 								                .method(Method::POST)
 								                .header("content-type", "application/json")
 								                .body(Body::from(serde_json::to_vec(&request).unwrap()))
 								                .unwrap(),
 								        )
 								        .await
 								        .unwrap();
 								    assert_eq!(response.status(), StatusCode::OK);
 								    assert!(
 								        response.headers().get("deprecation").is_none(),
 								        "POST /query is canonical and must not advertise itself as deprecated"
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn change_endpoint_accepts_legacy_field_names() {
 								    // The canonical wire field names on /change are `query` and `name`, but
 								    // serde aliases keep the legacy `query_source`/`query_name` payload
 								    // shape working for clients that haven't migrated yet. Pin both shapes.
 								    let (_temp, app) = app_for_loaded_graph().await;
 								    let legacy_body = json!({
 								        "query_source": MUTATION_QUERIES,
 								        "query_name": "insert_person",
 								        "params": { "name": "Legacy", "age": 21 },
 								        "branch": "main",
 								    });
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/change")
 								            .method(Method::POST)
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&legacy_body).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(body["affected_nodes"], 1);
 								    let canonical_body = json!({
 								        "query": MUTATION_QUERIES,
 								        "name": "insert_person",
 								        "params": { "name": "Canonical", "age": 22 },
 								        "branch": "main",
 								    });
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/change")
 								            .method(Method::POST)
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&canonical_body).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(body["affected_nodes"], 1);
 								}
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								#[tokio::test(flavor = "multi_thread")]
 								async fn remote_branch_list_create_merge_flow_works() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_loaded_graph().await;
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
 								    let (list_status, list_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/branches")
 								            .method(Method::GET)
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(list_status, StatusCode::OK);
 								    assert_eq!(list_body["branches"], json!(["main"]));
 								    let create = BranchCreateRequest {
 								        from: Some("main".to_string()),
 								        name: "feature".to_string(),
 								    };
 								    let (create_status, create_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/branches")
 								            .method(Method::POST)
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&create).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(create_status, StatusCode::OK);
 								    assert_eq!(create_body["from"], "main");
 								    assert_eq!(create_body["name"], "feature");
 								    let (list_status, list_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/branches")
 								            .method(Method::GET)
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(list_status, StatusCode::OK);
 								    assert_eq!(list_body["branches"], json!(["feature", "main"]));
 								    let change = ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								        query: MUTATION_QUERIES.to_string(),
 								        name: Some("insert_person".to_string()),
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        params: Some(json!({ "name": "Zoe", "age": 33 })),
 								        branch: Some("feature".to_string()),
 								    };
 								    let (change_status, change_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/change")
 								            .method(Method::POST)
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&change).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(change_status, StatusCode::OK);
 								    assert_eq!(change_body["branch"], "feature");
 								    assert_eq!(change_body["affected_nodes"], 1);
 								    let read_main_before = ReadRequest {
 								        query_source: fs::read_to_string(fixture("test.gq")).unwrap(),
 								        query_name: Some("get_person".to_string()),
 								        params: Some(json!({ "name": "Zoe" })),
 								        branch: Some("main".to_string()),
 								        snapshot: None,
 								    };
 								    let (read_status, read_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/read")
 								            .method(Method::POST)
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&read_main_before).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(read_status, StatusCode::OK);
 								    assert_eq!(read_body["row_count"], 0);
 								    let merge = BranchMergeRequest {
 								        source: "feature".to_string(),
 								        target: Some("main".to_string()),
 								    };
 								    let (merge_status, merge_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/branches/merge")
 								            .method(Method::POST)
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&merge).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(merge_status, StatusCode::OK);
 								    assert_eq!(merge_body["source"], "feature");
 								    assert_eq!(merge_body["target"], "main");
 								    assert_eq!(merge_body["outcome"], "fast_forward");
 								    let read_main_after = ReadRequest {
 								        query_source: fs::read_to_string(fixture("test.gq")).unwrap(),
 								        query_name: Some("get_person".to_string()),
 								        params: Some(json!({ "name": "Zoe" })),
 								        branch: Some("main".to_string()),
 								        snapshot: None,
 								    };
 								    let (read_status, read_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/read")
 								            .method(Method::POST)
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&read_main_after).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(read_status, StatusCode::OK);
 								    assert_eq!(read_body["row_count"], 1);
 								    assert_eq!(read_body["rows"][0]["p.name"], "Zoe");
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn remote_branch_delete_flow_works() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_loaded_graph().await;
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
 								    let create = BranchCreateRequest {
 								        from: Some("main".to_string()),
 								        name: "feature".to_string(),
 								    };
 								    let (create_status, _) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/branches")
 								            .method(Method::POST)
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&create).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(create_status, StatusCode::OK);
 								    let (delete_status, delete_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/branches/feature")
 								            .method(Method::DELETE)
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(delete_status, StatusCode::OK);
 								    assert_eq!(delete_body["name"], "feature");
 								    let (list_status, list_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/branches")
 								            .method(Method::GET)
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(list_status, StatusCode::OK);
 								    assert_eq!(list_body["branches"], json!(["main"]));
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn branch_delete_denies_without_policy_permission() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy(
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        &[("act-andrew", "token-admin"), ("act-bruno", "token-team")],
 								        POLICY_YAML,
 								    )
 								    .await;
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let graph = graph_path(temp.path());
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    db.branch_create_from(ReadTarget::branch("main"), "feature")
 								        .await
 								        .unwrap();
 								    drop(db);
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/branches/feature")
 								            .method(Method::DELETE)
 								            .header("authorization", "Bearer token-team")
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::FORBIDDEN);
 								    assert!(
 								        body["error"]
 								            .as_str()
 								            .unwrap()
 								            .contains("policy denied action 'branch_delete'")
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread")]
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								async fn server_opens_s3_graph_directly_and_serves_snapshot_and_read() {
 								    let Some(uri) = s3_test_graph_uri("server") else {
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        eprintln!("skipping s3 server test: OMNIGRAPH_S3_TEST_BUCKET is not set");
 								        return;
 								    };
 								    Omnigraph::init(&uri, &fs::read_to_string(fixture("test.pg")).unwrap())
 								        .await
 								        .unwrap();
 								    let mut db = Omnigraph::open(&uri).await.unwrap();
 								    load_jsonl(
 								        &mut db,
 								        &fs::read_to_string(fixture("test.jsonl")).unwrap(),
 								        LoadMode::Overwrite,
 								    )
 								    .await
 								    .unwrap();
 								    let app = build_app(
 								        AppState::open_with_bearer_token(uri.clone(), Some("s3-token".to_string()))
 								            .await
 								            .unwrap(),
 								    );
 								    let (snapshot_status, snapshot_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/snapshot")
 								            .method(Method::GET)
 								            .header("authorization", "Bearer s3-token")
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(snapshot_status, StatusCode::OK);
 								    assert!(snapshot_body["tables"].is_array());
 								    let read = ReadRequest {
 								        query_source: fs::read_to_string(fixture("test.gq")).unwrap(),
 								        query_name: Some("get_person".to_string()),
 								        params: Some(json!({ "name": "Alice" })),
 								        branch: Some("main".to_string()),
 								        snapshot: None,
 								    };
 								    let (read_status, read_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/read")
 								            .method(Method::POST)
 								            .header("authorization", "Bearer s3-token")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&read).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(read_status, StatusCode::OK);
 								    assert_eq!(read_body["row_count"], 1);
 								    assert_eq!(read_body["rows"][0]["p.name"], "Alice");
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								#[serial]
 								async fn remote_read_embeds_string_nearest_queries_with_mock_runtime() {
 								    const EMBED_SCHEMA: &str = r#"
 								node Doc {
 								    slug: String @key
 								    title: String @index
 								    embedding: Vector(4) @index
 								}
 								"#;
 								    const EMBED_QUERY: &str = r#"
 								query vector_search_string($q: String) {
 								    match { $d: Doc }
 								    return { $d.slug, $d.title }
 								    order { nearest($d.embedding, $q) }
 								    limit 3
 								}
 								"#;
 								    let alpha = mock_embedding("alpha", 4);
 								    let beta = mock_embedding("beta", 4);
 								    let gamma = mock_embedding("gamma", 4);
 								    let data = format!(
 								        concat!(
 								            r#"{{"type":"Doc","data":{{"slug":"alpha-doc","title":"alpha guide","embedding":[{}]}}}}"#,
 								            "\n",
 								            r#"{{"type":"Doc","data":{{"slug":"beta-doc","title":"beta guide","embedding":[{}]}}}}"#,
 								            "\n",
 								            r#"{{"type":"Doc","data":{{"slug":"gamma-doc","title":"gamma handbook","embedding":[{}]}}}}"#
 								        ),
 								        format_vector(&alpha),
 								        format_vector(&beta),
 								        format_vector(&gamma),
 								    );
 								    let _guard = EnvGuard::set(&[
 								        ("OMNIGRAPH_EMBEDDINGS_MOCK", Some("1")),
 								        ("GEMINI_API_KEY", None),
 								    ]);
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_graph_with_schema_and_data(EMBED_SCHEMA, &data).await;
 								    let graph = graph_path(temp.path());
 								    let state = AppState::open(graph.to_string_lossy().to_string())
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        .await
 								        .unwrap();
 								    let app = build_app(state);
 								    let read = ReadRequest {
 								        query_source: EMBED_QUERY.to_string(),
 								        query_name: Some("vector_search_string".to_string()),
 								        params: Some(json!({ "q": "alpha" })),
 								        branch: Some("main".to_string()),
 								        snapshot: None,
 								    };
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/read")
 								            .method(Method::POST)
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&read).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(body["row_count"], 3);
 								    assert_eq!(body["rows"][0]["d.slug"], "alpha-doc");
 								}
 								#[tokio::test(flavor = "multi_thread")]
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								async fn change_conflict_returns_manifest_conflict_409() {
-												chore: scrub Linear ticket numbers and review-bot mentions from code comments

OmniGraph is OSS; internal Linear ticket references and code-review-bot
mentions in source-code comments don't help external readers and leak
internal tooling. Replace ticket numbers (MR-XXX) with descriptive
prose, drop linear.app URLs, and remove inline mentions of
Cursor/Bugbot/Cubic/Codex review threads.

Scope is limited to source-code comments (`crates/`). Docs under
`docs/` keep their MR-XXX references — those are part of the
established change-history narrative for in-repo docs and don't
require a Linear account to find context for.

No behavior changes; no public API changes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-01 22:45:38 +02:00
+								    // A write that races with another writer surfaces as HTTP 409 with
 								    // a structured `manifest_conflict` body — `table_key`, `expected`,
 								    // and `actual` — so clients can detect-and-retry without parsing
 								    // the message.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								    // Build the server first so its handle pins the pre-mutation manifest
 								    // version. Then advance the manifest from outside the server. The
 								    // server's next /change call will capture stale `expected_versions`
 								    // (from its still-pinned snapshot) and the publisher's CAS rejects.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let state = AppState::open(graph.to_string_lossy().to_string())
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        .await
 								        .unwrap();
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								    let app = build_app(state);
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								    {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								        db.mutate(
 								            "main",
 								            MUTATION_QUERIES,
 								            "set_age",
 								            &omnigraph_compiler::json_params_to_param_map(
 								                Some(&json!({"name": "Alice", "age": 31 })),
 								                &omnigraph_compiler::find_named_query(MUTATION_QUERIES, "set_age")
 								                    .unwrap()
 								                    .params,
 								                omnigraph_compiler::JsonParamMode::Standard,
 								            )
 								            .unwrap(),
 								        )
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								        .await
 								        .unwrap();
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								    }
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								            .uri("/change")
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								            .method(Method::POST)
 								            .header("content-type", "application/json")
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								            .body(Body::from(
 								                serde_json::to_vec(&ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								                    query: MUTATION_QUERIES.to_string(),
 								                    name: Some("set_age".to_string()),
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								                    params: Some(json!({ "name": "Alice", "age": 33 })),
 								                    branch: Some("main".to_string()),
 								                })
 								                .unwrap(),
 								            ))
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::CONFLICT);
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								    let error: ErrorOutput = serde_json::from_value(body).unwrap();
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    assert_eq!(error.code, Some(omnigraph_server::api::ErrorCode::Conflict));
-												MR-771: demote Run to direct-publish via expected_table_versions CAS

mutate_as and load now write directly to target tables and call the
publisher once at the end with per-table expected versions; the Run
state machine, _graph_runs.lance writers, __run__ staging branches,
and server /runs/* endpoints are removed. Multi-statement mutations
remain atomic at the manifest level via an in-memory MutationStaging
accumulator that gives read-your-writes within a query and a single
publish at the end. Concurrent-writer conflicts surface as
ExpectedVersionMismatch (HTTP 409 manifest_conflict) instead of the
old DivergentUpdate merge shape. Documents one known limitation in
docs/runs.md: a multi-statement mid-query failure where op-N writes
a Lance fragment and op-N+1 fails leaves Lance HEAD ahead of the
manifest until a follow-up introduces per-table Lance branches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-04-30 08:52:50 +02:00
+								    let conflict = error
 								        .manifest_conflict
 								        .expect("publisher CAS rejection must populate manifest_conflict body");
 								    assert_eq!(conflict.table_key, "node:Person");
 								    assert!(
 								        conflict.actual > conflict.expected,
 								        "actual ({}) should be ahead of expected ({})",
 								        conflict.actual,
 								        conflict.expected,
 								    );
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								}
-												mr-686: Phase 2 — op-kind-aware version check + coord Mutex → RwLock

Fix A: op-kind-aware ensure_expected_version. Insert/Merge skip the
strict pre-stage check; Update/Delete/SchemaRewrite keep it. New
MutationOpKind enum threaded through open_for_mutation_on_branch /
open_owned_dataset_for_branch_write / reopen_for_mutation and all
callers (execute_insert/update/delete_node/delete_edge,
branch_merge::publish_rewritten_merge_table, schema_apply,
ensure_indices_for_branch, loader Append/Merge/Overwrite). Closes the
77% rejection rate on same-key concurrent inserts.

Fix B: coordinator Mutex -> RwLock. Reads parallelize via .read();
writes serialize via .write(). Atomic-commit invariant preserved by
the single .write() covering commit_manifest_updates +
record_graph_commit.

Bench-as-test change_concurrent_inserts_same_key_serialize_without_409
(server.rs:2180) spawns 12 concurrent /change inserts on a single
(table, branch); asserts every request returns 200. Was failing
pre-Phase-2; passes post-Phase-2.
change_conflict_returns_manifest_conflict_409 (cross-process drift
sentinel) and branch_merge_conflict_response_includes_structured_conflicts
both still pass.

Bench (after-pr2-phase2):
- single-actor 1x1: 14.9 ops/s, p50 68ms (baseline 12.3, +22%)
- disjoint 8x8:    7.04 ops/s, p50 1023ms (baseline 6.24, +13%)
- same-key 8x1:    2.62 ops/s, 0 errors (after-pr2: 77% errors)

Disjoint stayed at +13% — Fix B's RwLock helped read paths but the
publisher's .write() critical section still serializes graph-wide.
Splitting GraphCoordinator into per-concern primitives (manifest in
ArcSwap, commit_graph in RwLock, atomic-commit serializer) is the
deferred next step.

102 lib + 30 branching + 24 runs + 16 staged_writes + 63 end_to_end
+ 40 server tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 12:42:26 +02:00
+								#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 								async fn change_concurrent_inserts_same_key_serialize_without_409() {
 								    // PR 2 Phase 2 (MR-686): pin the design fix for the same-key
 								    // concurrency hazard. Pre-fix, in-process concurrent inserts on
 								    // the same `(table, branch)` rejected with 409 manifest_conflict
 								    // because `ensure_expected_version` fired before the per-table
 								    // queue was acquired and saw Lance HEAD already advanced by a
 								    // peer writer. Post-fix, Insert/Merge skip the strict pre-stage
 								    // check (see `MutationOpKind::strict_pre_stage_version_check`);
 								    // the queue serializes commit_staged; Lance's natural rebase
 								    // handles the in-flight stage; the publisher's CAS on a fresh
 								    // per-branch snapshot under the queue catches genuine cross-
 								    // process drift.
 								    //
 								    // This test spawns N concurrent /change inserts on a single
 								    // node type and asserts: every request returns 200 (no 409),
-												tests: extend same-key insert test with /snapshot row-count assertion

The existing change_concurrent_inserts_same_key_serialize_without_409
test claimed in its comment "asserts the final row count equals N" but
only checked HTTP status codes. cubic flagged the gap; this commit
adds the actual /snapshot read after the concurrent inserts to verify
all N batches landed (no silent overwrite) by comparing the post-test
node:Person row_count against SEED + N.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 16:49:38 +02:00
+								    // and the final row count equals the seed count + N (every
 								    // staged batch actually committed).
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
 								    let state = AppState::open(graph.to_string_lossy().to_string())
-												mr-686: Phase 2 — op-kind-aware version check + coord Mutex → RwLock

Fix A: op-kind-aware ensure_expected_version. Insert/Merge skip the
strict pre-stage check; Update/Delete/SchemaRewrite keep it. New
MutationOpKind enum threaded through open_for_mutation_on_branch /
open_owned_dataset_for_branch_write / reopen_for_mutation and all
callers (execute_insert/update/delete_node/delete_edge,
branch_merge::publish_rewritten_merge_table, schema_apply,
ensure_indices_for_branch, loader Append/Merge/Overwrite). Closes the
77% rejection rate on same-key concurrent inserts.

Fix B: coordinator Mutex -> RwLock. Reads parallelize via .read();
writes serialize via .write(). Atomic-commit invariant preserved by
the single .write() covering commit_manifest_updates +
record_graph_commit.

Bench-as-test change_concurrent_inserts_same_key_serialize_without_409
(server.rs:2180) spawns 12 concurrent /change inserts on a single
(table, branch); asserts every request returns 200. Was failing
pre-Phase-2; passes post-Phase-2.
change_conflict_returns_manifest_conflict_409 (cross-process drift
sentinel) and branch_merge_conflict_response_includes_structured_conflicts
both still pass.

Bench (after-pr2-phase2):
- single-actor 1x1: 14.9 ops/s, p50 68ms (baseline 12.3, +22%)
- disjoint 8x8:    7.04 ops/s, p50 1023ms (baseline 6.24, +13%)
- same-key 8x1:    2.62 ops/s, 0 errors (after-pr2: 77% errors)

Disjoint stayed at +13% — Fix B's RwLock helped read paths but the
publisher's .write() critical section still serializes graph-wide.
Splitting GraphCoordinator into per-concern primitives (manifest in
ArcSwap, commit_graph in RwLock, atomic-commit serializer) is the
deferred next step.

102 lib + 30 branching + 24 runs + 16 staged_writes + 63 end_to_end
+ 40 server tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 12:42:26 +02:00
+								        .await
 								        .unwrap();
 								    let app = build_app(state);
-												tests: extend same-key insert test with /snapshot row-count assertion

The existing change_concurrent_inserts_same_key_serialize_without_409
test claimed in its comment "asserts the final row count equals N" but
only checked HTTP status codes. cubic flagged the gap; this commit
adds the actual /snapshot read after the concurrent inserts to verify
all N batches landed (no silent overwrite) by comparing the post-test
node:Person row_count against SEED + N.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 16:49:38 +02:00
+								    // test.jsonl seeds 4 Persons (Alice, Bob, Charlie, Diana).
 								    const SEED_PERSON_ROWS: u64 = 4;
-												mr-686: Phase 2 — op-kind-aware version check + coord Mutex → RwLock

Fix A: op-kind-aware ensure_expected_version. Insert/Merge skip the
strict pre-stage check; Update/Delete/SchemaRewrite keep it. New
MutationOpKind enum threaded through open_for_mutation_on_branch /
open_owned_dataset_for_branch_write / reopen_for_mutation and all
callers (execute_insert/update/delete_node/delete_edge,
branch_merge::publish_rewritten_merge_table, schema_apply,
ensure_indices_for_branch, loader Append/Merge/Overwrite). Closes the
77% rejection rate on same-key concurrent inserts.

Fix B: coordinator Mutex -> RwLock. Reads parallelize via .read();
writes serialize via .write(). Atomic-commit invariant preserved by
the single .write() covering commit_manifest_updates +
record_graph_commit.

Bench-as-test change_concurrent_inserts_same_key_serialize_without_409
(server.rs:2180) spawns 12 concurrent /change inserts on a single
(table, branch); asserts every request returns 200. Was failing
pre-Phase-2; passes post-Phase-2.
change_conflict_returns_manifest_conflict_409 (cross-process drift
sentinel) and branch_merge_conflict_response_includes_structured_conflicts
both still pass.

Bench (after-pr2-phase2):
- single-actor 1x1: 14.9 ops/s, p50 68ms (baseline 12.3, +22%)
- disjoint 8x8:    7.04 ops/s, p50 1023ms (baseline 6.24, +13%)
- same-key 8x1:    2.62 ops/s, 0 errors (after-pr2: 77% errors)

Disjoint stayed at +13% — Fix B's RwLock helped read paths but the
publisher's .write() critical section still serializes graph-wide.
Splitting GraphCoordinator into per-concern primitives (manifest in
ArcSwap, commit_graph in RwLock, atomic-commit serializer) is the
deferred next step.

102 lib + 30 branching + 24 runs + 16 staged_writes + 63 end_to_end
+ 40 server tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 12:42:26 +02:00
+								    const N: usize = 12;
 								    let mut handles = Vec::with_capacity(N);
 								    for i in 0..N {
 								        let app = app.clone();
 								        handles.push(tokio::spawn(async move {
 								            let body = serde_json::to_vec(&ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								                query: MUTATION_QUERIES.to_string(),
 								                name: Some("insert_person".to_string()),
-												mr-686: Phase 2 — op-kind-aware version check + coord Mutex → RwLock

Fix A: op-kind-aware ensure_expected_version. Insert/Merge skip the
strict pre-stage check; Update/Delete/SchemaRewrite keep it. New
MutationOpKind enum threaded through open_for_mutation_on_branch /
open_owned_dataset_for_branch_write / reopen_for_mutation and all
callers (execute_insert/update/delete_node/delete_edge,
branch_merge::publish_rewritten_merge_table, schema_apply,
ensure_indices_for_branch, loader Append/Merge/Overwrite). Closes the
77% rejection rate on same-key concurrent inserts.

Fix B: coordinator Mutex -> RwLock. Reads parallelize via .read();
writes serialize via .write(). Atomic-commit invariant preserved by
the single .write() covering commit_manifest_updates +
record_graph_commit.

Bench-as-test change_concurrent_inserts_same_key_serialize_without_409
(server.rs:2180) spawns 12 concurrent /change inserts on a single
(table, branch); asserts every request returns 200. Was failing
pre-Phase-2; passes post-Phase-2.
change_conflict_returns_manifest_conflict_409 (cross-process drift
sentinel) and branch_merge_conflict_response_includes_structured_conflicts
both still pass.

Bench (after-pr2-phase2):
- single-actor 1x1: 14.9 ops/s, p50 68ms (baseline 12.3, +22%)
- disjoint 8x8:    7.04 ops/s, p50 1023ms (baseline 6.24, +13%)
- same-key 8x1:    2.62 ops/s, 0 errors (after-pr2: 77% errors)

Disjoint stayed at +13% — Fix B's RwLock helped read paths but the
publisher's .write() critical section still serializes graph-wide.
Splitting GraphCoordinator into per-concern primitives (manifest in
ArcSwap, commit_graph in RwLock, atomic-commit serializer) is the
deferred next step.

102 lib + 30 branching + 24 runs + 16 staged_writes + 63 end_to_end
+ 40 server tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 12:42:26 +02:00
+								                params: Some(json!({ "name": format!("racer-{i}"), "age": i as i32 })),
 								                branch: Some("main".to_string()),
 								            })
 								            .unwrap();
 								            let req = Request::builder()
 								                .uri("/change")
 								                .method(Method::POST)
 								                .header("content-type", "application/json")
 								                .body(Body::from(body))
 								                .unwrap();
 								            let response = app.oneshot(req).await.unwrap();
 								            response.status()
 								        }));
 								    }
 								    let mut statuses = Vec::with_capacity(N);
 								    for h in handles {
 								        statuses.push(h.await.unwrap());
 								    }
 								    let bad: Vec<_> = statuses
 								        .iter()
 								        .enumerate()
 								        .filter(|(_, s)| **s != StatusCode::OK)
 								        .collect();
 								    assert!(
 								        bad.is_empty(),
 								        "expected every concurrent insert to return 200, got non-200 for: {:?}",
 								        bad
 								    );
-												tests: extend same-key insert test with /snapshot row-count assertion

The existing change_concurrent_inserts_same_key_serialize_without_409
test claimed in its comment "asserts the final row count equals N" but
only checked HTTP status codes. cubic flagged the gap; this commit
adds the actual /snapshot read after the concurrent inserts to verify
all N batches landed (no silent overwrite) by comparing the post-test
node:Person row_count against SEED + N.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 16:49:38 +02:00
+								    // Verify the inserts actually landed. The status check above only proves
 								    // the publisher CAS didn't reject; the row count proves none of the
 								    // concurrent commits silently overwrote a peer.
 								    let (snapshot_status, snapshot_body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/snapshot?branch=main")
 								            .method(Method::GET)
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(snapshot_status, StatusCode::OK);
 								    let person_rows = snapshot_body["tables"]
 								        .as_array()
 								        .and_then(|tables| {
 								            tables
 								                .iter()
 								                .find(|t| t["table_key"].as_str() == Some("node:Person"))
 								        })
 								        .and_then(|t| t["row_count"].as_u64())
 								        .expect("snapshot must include node:Person row_count");
 								    assert_eq!(
 								        person_rows,
 								        SEED_PERSON_ROWS + N as u64,
 								        "expected {} seeded + {} concurrent inserts = {} Person rows; got {}",
 								        SEED_PERSON_ROWS,
 								        N,
 								        SEED_PERSON_ROWS + N as u64,
 								        person_rows,
 								    );
-												mr-686: Phase 2 — op-kind-aware version check + coord Mutex → RwLock

Fix A: op-kind-aware ensure_expected_version. Insert/Merge skip the
strict pre-stage check; Update/Delete/SchemaRewrite keep it. New
MutationOpKind enum threaded through open_for_mutation_on_branch /
open_owned_dataset_for_branch_write / reopen_for_mutation and all
callers (execute_insert/update/delete_node/delete_edge,
branch_merge::publish_rewritten_merge_table, schema_apply,
ensure_indices_for_branch, loader Append/Merge/Overwrite). Closes the
77% rejection rate on same-key concurrent inserts.

Fix B: coordinator Mutex -> RwLock. Reads parallelize via .read();
writes serialize via .write(). Atomic-commit invariant preserved by
the single .write() covering commit_manifest_updates +
record_graph_commit.

Bench-as-test change_concurrent_inserts_same_key_serialize_without_409
(server.rs:2180) spawns 12 concurrent /change inserts on a single
(table, branch); asserts every request returns 200. Was failing
pre-Phase-2; passes post-Phase-2.
change_conflict_returns_manifest_conflict_409 (cross-process drift
sentinel) and branch_merge_conflict_response_includes_structured_conflicts
both still pass.

Bench (after-pr2-phase2):
- single-actor 1x1: 14.9 ops/s, p50 68ms (baseline 12.3, +22%)
- disjoint 8x8:    7.04 ops/s, p50 1023ms (baseline 6.24, +13%)
- same-key 8x1:    2.62 ops/s, 0 errors (after-pr2: 77% errors)

Disjoint stayed at +13% — Fix B's RwLock helped read paths but the
publisher's .write() critical section still serializes graph-wide.
Splitting GraphCoordinator into per-concern primitives (manifest in
ArcSwap, commit_graph in RwLock, atomic-commit serializer) is the
deferred next step.

102 lib + 30 branching + 24 runs + 16 staged_writes + 63 end_to_end
+ 40 server tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 12:42:26 +02:00
+								}
-												tests: pin UPDATE RYW under in-process concurrency (red)

Per AGENTS.md rule 8, this commit lands the failing regression test
ahead of the fix so the red → green pair is visible in git log.

The test asserts the RYW invariant for in-process concurrent UPDATEs on
the same row: exactly one writer commits and N-1 receive 409
manifest_conflict. Currently fails on f925ad1 with 1 x 200 + 7 x 500:

> "storage: Retryable commit conflict for version 6: This Update
>  transaction was preempted by concurrent transaction Update at
>  version 6. Please retry."

Lance's transaction conflict resolver correctly detects the Update vs
Update race, but the error wraps as `OmniError::Lance(<string>)` and the
API surfaces it as 500 internal rather than 409 retryable conflict. Users
see "internal server error" for what is documented as a retryable
conflict path.

The fix lands in the next commit: an op-kind-aware drift check at the
commit_all entry that returns 409 ExpectedVersionMismatch for tables
whose first touch was Update / Delete / SchemaRewrite when the staged
dataset version drifts from the manifest pin under the queue.

Closes the bug class "Lance internal conflict surfaces as 500 instead
of 409" rather than mapping the specific Lance error variant — the
right architectural layer (engine boundary, under the queue) catches
the drift before commit_staged ever runs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 16:33:53 +02:00
+								#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 								async fn change_concurrent_updates_same_key_serialize_via_publisher_cas() {
 								    // Pin Update RYW semantics under in-process concurrency on the same
 								    // `(table, branch)`. With per-table queue serialization and op-kind-aware
 								    // drift detection at commit time, exactly one of N concurrent UPDATEs
 								    // on the same row commits; the rest are rejected as 409 manifest_conflict.
 								    //
 								    // Pre-fix bug class: in `MutationStaging::commit_all`, after queue
 								    // acquisition, the staged Lance transaction is handed straight to
 								    // `commit_staged`. For a writer whose staged dataset is at V0 but
 								    // Lance HEAD has advanced to V1 (because the queue's prior winner
 								    // already published), Lance's transaction conflict resolver fires
 								    // `RetryableCommitConflict` on Update vs Update on the same row.
 								    // That error gets wrapped as `OmniError::Lance(<string>)` and the
 								    // API surfaces it as **500 internal**, not 409. Users see "internal
 								    // server error" instead of a retryable conflict, breaking the
 								    // documented 409 contract for in-process drift.
 								    //
 								    // Post-fix invariant: `commit_all` does an op-kind-aware drift check
 								    // before each `commit_staged`. For tables whose tracked op_kind has
 								    // `strict_pre_stage_version_check() == true` (Update / Delete /
 								    // SchemaRewrite), if the staged dataset's version doesn't match the
 								    // fresh manifest pin, return `OmniError::manifest_expected_version_mismatch`
 								    // → 409 ExpectedVersionMismatch. The N-1 losers see a clean 409
 								    // before Lance's commit_staged ever runs.
 								    //
 								    // Why correct-by-design: closing the class "Lance internal conflict
 								    // surfaces as 500 instead of 409" rather than mapping the specific
 								    // Lance error variant. The drift check fires at the right architectural
 								    // layer (engine boundary, under the queue) and respects the existing
 								    // `MutationOpKind` policy.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
 								    let state = AppState::open(graph.to_string_lossy().to_string())
-												tests: pin UPDATE RYW under in-process concurrency (red)

Per AGENTS.md rule 8, this commit lands the failing regression test
ahead of the fix so the red → green pair is visible in git log.

The test asserts the RYW invariant for in-process concurrent UPDATEs on
the same row: exactly one writer commits and N-1 receive 409
manifest_conflict. Currently fails on f925ad1 with 1 x 200 + 7 x 500:

> "storage: Retryable commit conflict for version 6: This Update
>  transaction was preempted by concurrent transaction Update at
>  version 6. Please retry."

Lance's transaction conflict resolver correctly detects the Update vs
Update race, but the error wraps as `OmniError::Lance(<string>)` and the
API surfaces it as 500 internal rather than 409 retryable conflict. Users
see "internal server error" for what is documented as a retryable
conflict path.

The fix lands in the next commit: an op-kind-aware drift check at the
commit_all entry that returns 409 ExpectedVersionMismatch for tables
whose first touch was Update / Delete / SchemaRewrite when the staged
dataset version drifts from the manifest pin under the queue.

Closes the bug class "Lance internal conflict surfaces as 500 instead
of 409" rather than mapping the specific Lance error variant — the
right architectural layer (engine boundary, under the queue) catches
the drift before commit_staged ever runs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 16:33:53 +02:00
+								        .await
 								        .unwrap();
 								    let app = build_app(state);
 								    // Spawn N=8 concurrent UPDATEs on Alice (from test.jsonl, age=30 at V0)
 								    // writing distinct ages.
 								    const N: usize = 8;
 								    let mut handles = Vec::with_capacity(N);
 								    for i in 0..N {
 								        let app = app.clone();
 								        let target_age = 100 + i as i32;
 								        handles.push(tokio::spawn(async move {
 								            let body = serde_json::to_vec(&ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								                query: MUTATION_QUERIES.to_string(),
 								                name: Some("set_age".to_string()),
-												tests: pin UPDATE RYW under in-process concurrency (red)

Per AGENTS.md rule 8, this commit lands the failing regression test
ahead of the fix so the red → green pair is visible in git log.

The test asserts the RYW invariant for in-process concurrent UPDATEs on
the same row: exactly one writer commits and N-1 receive 409
manifest_conflict. Currently fails on f925ad1 with 1 x 200 + 7 x 500:

> "storage: Retryable commit conflict for version 6: This Update
>  transaction was preempted by concurrent transaction Update at
>  version 6. Please retry."

Lance's transaction conflict resolver correctly detects the Update vs
Update race, but the error wraps as `OmniError::Lance(<string>)` and the
API surfaces it as 500 internal rather than 409 retryable conflict. Users
see "internal server error" for what is documented as a retryable
conflict path.

The fix lands in the next commit: an op-kind-aware drift check at the
commit_all entry that returns 409 ExpectedVersionMismatch for tables
whose first touch was Update / Delete / SchemaRewrite when the staged
dataset version drifts from the manifest pin under the queue.

Closes the bug class "Lance internal conflict surfaces as 500 instead
of 409" rather than mapping the specific Lance error variant — the
right architectural layer (engine boundary, under the queue) catches
the drift before commit_staged ever runs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 16:33:53 +02:00
+								                params: Some(json!({ "name": "Alice", "age": target_age })),
 								                branch: Some("main".to_string()),
 								            })
 								            .unwrap();
 								            let req = Request::builder()
 								                .uri("/change")
 								                .method(Method::POST)
 								                .header("content-type", "application/json")
 								                .body(Body::from(body))
 								                .unwrap();
 								            let response = app.oneshot(req).await.unwrap();
 								            let status = response.status();
 								            let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
 								            (status, body.to_vec())
 								        }));
 								    }
 								    let mut results = Vec::with_capacity(N);
 								    for h in handles {
 								        results.push(h.await.unwrap());
 								    }
 								    let statuses: Vec<StatusCode> = results.iter().map(|(s, _)| *s).collect();
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let ok_count = statuses.iter().filter(|s| **s == StatusCode::OK).count();
-												tests: pin UPDATE RYW under in-process concurrency (red)

Per AGENTS.md rule 8, this commit lands the failing regression test
ahead of the fix so the red → green pair is visible in git log.

The test asserts the RYW invariant for in-process concurrent UPDATEs on
the same row: exactly one writer commits and N-1 receive 409
manifest_conflict. Currently fails on f925ad1 with 1 x 200 + 7 x 500:

> "storage: Retryable commit conflict for version 6: This Update
>  transaction was preempted by concurrent transaction Update at
>  version 6. Please retry."

Lance's transaction conflict resolver correctly detects the Update vs
Update race, but the error wraps as `OmniError::Lance(<string>)` and the
API surfaces it as 500 internal rather than 409 retryable conflict. Users
see "internal server error" for what is documented as a retryable
conflict path.

The fix lands in the next commit: an op-kind-aware drift check at the
commit_all entry that returns 409 ExpectedVersionMismatch for tables
whose first touch was Update / Delete / SchemaRewrite when the staged
dataset version drifts from the manifest pin under the queue.

Closes the bug class "Lance internal conflict surfaces as 500 instead
of 409" rather than mapping the specific Lance error variant — the
right architectural layer (engine boundary, under the queue) catches
the drift before commit_staged ever runs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 16:33:53 +02:00
+								    let conflict_count = statuses
 								        .iter()
 								        .filter(|s| **s == StatusCode::CONFLICT)
 								        .count();
 								    let other: Vec<_> = statuses
 								        .iter()
 								        .enumerate()
 								        .filter(|(_, s)| **s != StatusCode::OK && **s != StatusCode::CONFLICT)
 								        .collect();
 								    let other_bodies: Vec<(usize, StatusCode, String)> = other
 								        .iter()
 								        .map(|(i, s)| {
 								            let body_str = String::from_utf8_lossy(&results[*i].1).to_string();
 								            (*i, **s, body_str)
 								        })
 								        .collect();
 								    assert!(
 								        other.is_empty(),
 								        "expected only 200 or 409 statuses, got non-200/409 entries: {:?}",
 								        other_bodies
 								    );
 								    assert_eq!(
 								        ok_count + conflict_count,
 								        N,
 								        "all responses must be 200 or 409 to satisfy the RYW invariant; statuses: {:?}",
 								        statuses
 								    );
 								    assert_eq!(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        ok_count,
 ,
-												tests: pin UPDATE RYW under in-process concurrency (red)

Per AGENTS.md rule 8, this commit lands the failing regression test
ahead of the fix so the red → green pair is visible in git log.

The test asserts the RYW invariant for in-process concurrent UPDATEs on
the same row: exactly one writer commits and N-1 receive 409
manifest_conflict. Currently fails on f925ad1 with 1 x 200 + 7 x 500:

> "storage: Retryable commit conflict for version 6: This Update
>  transaction was preempted by concurrent transaction Update at
>  version 6. Please retry."

Lance's transaction conflict resolver correctly detects the Update vs
Update race, but the error wraps as `OmniError::Lance(<string>)` and the
API surfaces it as 500 internal rather than 409 retryable conflict. Users
see "internal server error" for what is documented as a retryable
conflict path.

The fix lands in the next commit: an op-kind-aware drift check at the
commit_all entry that returns 409 ExpectedVersionMismatch for tables
whose first touch was Update / Delete / SchemaRewrite when the staged
dataset version drifts from the manifest pin under the queue.

Closes the bug class "Lance internal conflict surfaces as 500 instead
of 409" rather than mapping the specific Lance error variant — the
right architectural layer (engine boundary, under the queue) catches
the drift before commit_staged ever runs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 16:33:53 +02:00
+								        "expected exactly one update to commit and N-1 to receive 409 manifest_conflict \
 								         (op-kind-aware drift check rejects stale-V0 staged datasets at commit_all entry). \
 								         Got {} OK + {} 409 + {} other. \
 								         Pre-fix symptom: 1 OK + (N-1) x 500 because Lance's RetryableCommitConflict for \
 								         Update vs Update on the same row bubbles up as `OmniError::Lance(<string>)` and \
 								         the API maps it to 500 internal, not 409. Statuses: {:?}",
 								        ok_count,
 								        conflict_count,
 								        statuses.len() - ok_count - conflict_count,
 								        statuses,
 								    );
 								}
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								// ─────────────────────────────────────────────────────────────────────────
 								// Branch-ops morphological matrix
 								//
 								// Table-driven test covering all interesting (op_a, op_b, target_overlap)
 								// concurrent-pair cells with the C1-C6 invariants asserted uniformly:
 								//
 								//   C1 — both complete (no deadlock, no hang)
 								//   C2 — status: both 200, or exactly one clean conflict (409/429), no 500
 								//   C3 — per-target row count
 								//   C4 — per-target row identity (present + absent named persons)
 								//   C5 — engine state remains coherent (subsequent /snapshot is consistent)
 								//   C6 — post-op /change on main succeeds (engine state isn't poisoned)
 								//
 								// Cell list (a-k) below. Each cell uses a fresh tempdir + AppState so a
 								// failure in one doesn't leak into the next. Within a cell, ops align at
 								// a tokio::sync::Barrier so both reach the engine close in time, and the
 								// pair is wrapped in tokio::time::timeout(15s) so a deadlock surfaces
 								// as a clean panic.
 								//
 								// Replaces the three narrow concurrent_branch_* tests below; their
 								// scenarios are folded into cells f, h, i (branch_create_from race),
 								// cell a (merge race with C4 identity assertions), and cell d
 								// (concurrent change-during-merge).
 								// ─────────────────────────────────────────────────────────────────────────
 								mod matrix {
 								    use super::*;
 								    use std::time::Duration;
 								    use tokio::sync::Barrier;
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								    #[derive(Debug)]
 								    pub(super) struct OpStatus {
 								        pub status: StatusCode,
 								        pub body: Vec<u8>,
 								    }
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								    pub(super) struct Harness {
 								        pub _temp: tempfile::TempDir,
 								        pub app: Router,
 								    }
 								    impl Harness {
 								        pub async fn new() -> Self {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								            let temp = init_loaded_graph().await;
 								            let graph = graph_path(temp.path());
-												tests: matrix harness uses with_defaults() workload, not from_env()

Round 4 CI failure: Test Workspace and server-aws both red on
`concurrent_branch_ops_morphological_matrix` cell b
("merge × merge: same-target-distinct-sources") — second merge
returned 429 instead of 200. The matrix passes locally.

Root cause: cargo test runs tests in parallel by default. The admission
test `ingest_per_actor_admission_cap_returns_429` is wrapped with
`#[serial]` and an EnvGuard that sets
`OMNIGRAPH_PER_ACTOR_INFLIGHT_MAX=1` for its duration. Process-wide
env vars are visible to concurrently-running tests; the matrix's
`Harness::new()` called `AppState::open()` which delegates to
`WorkloadController::from_env()`, picking up cap=1 if it ran while
the admission test held the EnvGuard. With cap=1 + 2 concurrent
merges in cell b, one merge waits behind merge_exclusive while the
other is admitted; the waiter holds its admission permit, but a
fresh actor permit is needed when admission is per-actor — the
second merge's permit acquisition fails because the first hasn't
released yet, and 429 fires.

Fix (correct by design, AGENTS.md rule 9): the matrix harness builds
the WorkloadController explicitly via
`WorkloadController::with_defaults()` and passes it to
`AppState::new_with_workload`, the constructor added in commit
22d76db. Closes the bug class "tests pick up another concurrent test's
env override at construction time" — the matrix is now insulated from
any env-var manipulation in the rest of the test suite.

Verified locally: with `OMNIGRAPH_PER_ACTOR_INFLIGHT_MAX=1` set in the
environment, the matrix passes (it ignores env entirely now).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:19:42 +02:00
+								            // Build the WorkloadController explicitly with defaults rather
 								            // than letting `AppState::open` call
 								            // `WorkloadController::from_env()`. The admission-gate test
 								            // (`ingest_per_actor_admission_cap_returns_429`) sets
 								            // OMNIGRAPH_PER_ACTOR_INFLIGHT_MAX=1 inside an EnvGuard while
 								            // it runs. Process-wide env vars are visible to
 								            // concurrently-running tests; if a matrix cell reads env at
 								            // AppState construction time during that window it picks up
 								            // cap=1 and the second concurrent merge in cell b surfaces
 								            // 429 instead of the expected 200. Constructing the
 								            // controller here with explicit defaults makes cells
 								            // independent of any env mutation other tests perform.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								            let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
 								            let workload = omnigraph_server::workload::WorkloadController::with_defaults();
-												tests: matrix harness uses with_defaults() workload, not from_env()

Round 4 CI failure: Test Workspace and server-aws both red on
`concurrent_branch_ops_morphological_matrix` cell b
("merge × merge: same-target-distinct-sources") — second merge
returned 429 instead of 200. The matrix passes locally.

Root cause: cargo test runs tests in parallel by default. The admission
test `ingest_per_actor_admission_cap_returns_429` is wrapped with
`#[serial]` and an EnvGuard that sets
`OMNIGRAPH_PER_ACTOR_INFLIGHT_MAX=1` for its duration. Process-wide
env vars are visible to concurrently-running tests; the matrix's
`Harness::new()` called `AppState::open()` which delegates to
`WorkloadController::from_env()`, picking up cap=1 if it ran while
the admission test held the EnvGuard. With cap=1 + 2 concurrent
merges in cell b, one merge waits behind merge_exclusive while the
other is admitted; the waiter holds its admission permit, but a
fresh actor permit is needed when admission is per-actor — the
second merge's permit acquisition fails because the first hasn't
released yet, and 429 fires.

Fix (correct by design, AGENTS.md rule 9): the matrix harness builds
the WorkloadController explicitly via
`WorkloadController::with_defaults()` and passes it to
`AppState::new_with_workload`, the constructor added in commit
22d76db. Closes the bug class "tests pick up another concurrent test's
env override at construction time" — the matrix is now insulated from
any env-var manipulation in the rest of the test suite.

Verified locally: with `OMNIGRAPH_PER_ACTOR_INFLIGHT_MAX=1` set in the
environment, the matrix passes (it ignores env entirely now).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:19:42 +02:00
+								            let state = AppState::new_with_workload(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								                graph.to_string_lossy().to_string(),
-												tests: matrix harness uses with_defaults() workload, not from_env()

Round 4 CI failure: Test Workspace and server-aws both red on
`concurrent_branch_ops_morphological_matrix` cell b
("merge × merge: same-target-distinct-sources") — second merge
returned 429 instead of 200. The matrix passes locally.

Root cause: cargo test runs tests in parallel by default. The admission
test `ingest_per_actor_admission_cap_returns_429` is wrapped with
`#[serial]` and an EnvGuard that sets
`OMNIGRAPH_PER_ACTOR_INFLIGHT_MAX=1` for its duration. Process-wide
env vars are visible to concurrently-running tests; the matrix's
`Harness::new()` called `AppState::open()` which delegates to
`WorkloadController::from_env()`, picking up cap=1 if it ran while
the admission test held the EnvGuard. With cap=1 + 2 concurrent
merges in cell b, one merge waits behind merge_exclusive while the
other is admitted; the waiter holds its admission permit, but a
fresh actor permit is needed when admission is per-actor — the
second merge's permit acquisition fails because the first hasn't
released yet, and 429 fires.

Fix (correct by design, AGENTS.md rule 9): the matrix harness builds
the WorkloadController explicitly via
`WorkloadController::with_defaults()` and passes it to
`AppState::new_with_workload`, the constructor added in commit
22d76db. Closes the bug class "tests pick up another concurrent test's
env override at construction time" — the matrix is now insulated from
any env-var manipulation in the rest of the test suite.

Verified locally: with `OMNIGRAPH_PER_ACTOR_INFLIGHT_MAX=1` set in the
environment, the matrix passes (it ignores env entirely now).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:19:42 +02:00
+								                db,
 								                Vec::new(),
 								                workload,
 								            );
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								            let app = build_app(state);
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								            Self { _temp: temp, app }
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        }
 								        pub async fn create_branch(&self, from: &str, name: &str) {
 								            let body = serde_json::to_vec(&BranchCreateRequest {
 								                from: Some(from.to_string()),
 								                name: name.to_string(),
 								            })
 								            .unwrap();
 								            let r = self
 								                .app
 								                .clone()
 								                .oneshot(
 								                    Request::builder()
 								                        .uri("/branches")
 								                        .method(Method::POST)
 								                        .header("content-type", "application/json")
 								                        .body(Body::from(body))
 								                        .unwrap(),
 								                )
 								                .await
 								                .unwrap();
 								            assert_eq!(
 								                r.status(),
 								                StatusCode::OK,
 								                "setup create_branch {} from {} failed",
 								                name,
 								                from
 								            );
 								        }
 								        pub async fn insert_person(&self, branch: &str, name: &str, age: i32) {
 								            let body = serde_json::to_vec(&ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								                query: MUTATION_QUERIES.to_string(),
 								                name: Some("insert_person".to_string()),
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								                params: Some(json!({ "name": name, "age": age })),
 								                branch: Some(branch.to_string()),
 								            })
 								            .unwrap();
 								            let r = self
 								                .app
 								                .clone()
 								                .oneshot(
 								                    Request::builder()
 								                        .uri("/change")
 								                        .method(Method::POST)
 								                        .header("content-type", "application/json")
 								                        .body(Body::from(body))
 								                        .unwrap(),
 								                )
 								                .await
 								                .unwrap();
 								            assert_eq!(
 								                r.status(),
 								                StatusCode::OK,
 								                "setup insert {} on {} failed",
 								                name,
 								                branch
 								            );
 								        }
 								        /// Run two ops concurrently with barrier alignment + 15s deadlock
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								        /// timeout. Returns `(op_a, op_b)`. Panics on timeout.
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        pub async fn run_pair(
 								            &self,
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								            op_a: impl FnOnce(Router, Arc<Barrier>) -> tokio::task::JoinHandle<OpStatus>,
 								            op_b: impl FnOnce(Router, Arc<Barrier>) -> tokio::task::JoinHandle<OpStatus>,
 								        ) -> (OpStatus, OpStatus) {
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								            let barrier = Arc::new(Barrier::new(2));
 								            let h_a = op_a(self.app.clone(), Arc::clone(&barrier));
 								            let h_b = op_b(self.app.clone(), Arc::clone(&barrier));
 								            let result = tokio::time::timeout(Duration::from_secs(15), async {
 								                let a = h_a.await.unwrap();
 								                let b = h_b.await.unwrap();
 								                (a, b)
 								            })
 								            .await;
 								            result.expect("concurrent op pair deadlocked (>15s)")
 								        }
 								        pub async fn person_count(&self, branch: &str) -> u64 {
 								            let r = self
 								                .app
 								                .clone()
 								                .oneshot(
 								                    Request::builder()
 								                        .uri(format!("/snapshot?branch={}", branch))
 								                        .method(Method::GET)
 								                        .body(Body::empty())
 								                        .unwrap(),
 								                )
 								                .await
 								                .unwrap();
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								            assert_eq!(r.status(), StatusCode::OK, "snapshot {} failed", branch);
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								            let body = to_bytes(r.into_body(), usize::MAX).await.unwrap();
 								            let v: Value = serde_json::from_slice(&body).unwrap();
 								            v["tables"]
 								                .as_array()
 								                .and_then(|tables| {
 								                    tables
 								                        .iter()
 								                        .find(|t| t["table_key"].as_str() == Some("node:Person"))
 								                })
 								                .and_then(|t| t["row_count"].as_u64())
 								                .unwrap_or_else(|| panic!("snapshot {} missing node:Person", branch))
 								        }
 								        /// True iff the named Person exists on `branch`. Uses the
 								        /// `get_person` query from `test.gq` for identity rather than
 								        /// just count.
 								        pub async fn person_exists(&self, branch: &str, name: &str) -> bool {
 								            let body = serde_json::to_vec(&ReadRequest {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								                query_source: include_str!("../../omnigraph/tests/fixtures/test.gq").to_string(),
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								                query_name: Some("get_person".to_string()),
 								                params: Some(json!({ "name": name })),
 								                branch: Some(branch.to_string()),
 								                snapshot: None,
 								            })
 								            .unwrap();
 								            let r = self
 								                .app
 								                .clone()
 								                .oneshot(
 								                    Request::builder()
 								                        .uri("/read")
 								                        .method(Method::POST)
 								                        .header("content-type", "application/json")
 								                        .body(Body::from(body))
 								                        .unwrap(),
 								                )
 								                .await
 								                .unwrap();
 								            assert_eq!(
 								                r.status(),
 								                StatusCode::OK,
 								                "person_exists query for {} on {} failed",
 								                name,
 								                branch
 								            );
 								            let body = to_bytes(r.into_body(), usize::MAX).await.unwrap();
 								            let v: Value = serde_json::from_slice(&body).unwrap();
 								            v["row_count"].as_u64().unwrap_or(0) > 0
 								        }
 								        /// Asserts each name in `present` exists on `branch` and each in
 								        /// `absent` does not. Identity-grade check that catches symmetric
 								        /// swap races a row-count assertion would miss.
 								        pub async fn assert_persons(
 								            &self,
 								            branch: &str,
 								            cell: &str,
 								            present: &[&str],
 								            absent: &[&str],
 								        ) {
 								            for name in present {
 								                assert!(
 								                    self.person_exists(branch, name).await,
 								                    "[{}] expected {} to be present on {}",
 								                    cell,
 								                    name,
 								                    branch
 								                );
 								            }
 								            for name in absent {
 								                assert!(
 								                    !self.person_exists(branch, name).await,
 								                    "[{}] expected {} to be absent from {}",
 								                    cell,
 								                    name,
 								                    branch
 								                );
 								            }
 								        }
 								        /// C6: insert a uniquely-named sentinel on main and verify it
 								        /// landed. Catches engine-state poisoning where a cell's
 								        /// concurrent ops left the engine half-broken — subsequent
 								        /// /change either deadlocks or returns a non-200.
 								        pub async fn assert_post_op_sentinel(&self, cell: &str, sentinel: &str) {
 								            let body = serde_json::to_vec(&ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								                query: MUTATION_QUERIES.to_string(),
 								                name: Some("insert_person".to_string()),
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								                params: Some(json!({ "name": sentinel, "age": 99 })),
 								                branch: Some("main".to_string()),
 								            })
 								            .unwrap();
 								            let r = self
 								                .app
 								                .clone()
 								                .oneshot(
 								                    Request::builder()
 								                        .uri("/change")
 								                        .method(Method::POST)
 								                        .header("content-type", "application/json")
 								                        .body(Body::from(body))
 								                        .unwrap(),
 								                )
 								                .await
 								                .unwrap();
 								            assert_eq!(
 								                r.status(),
 								                StatusCode::OK,
 								                "[{}] post-op sentinel /change on main failed (engine poisoned?)",
 								                cell
 								            );
 								            assert!(
 								                self.person_exists("main", sentinel).await,
 								                "[{}] sentinel {} did not land on main",
 								                cell,
 								                sentinel
 								            );
 								        }
 								    }
 								    // Helpers that build the closures for `run_pair`. Each takes a
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								    // Router + Barrier and returns a JoinHandle yielding the status/body.
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
 								    pub(super) fn op_merge(
 								        source: String,
 								        target: String,
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								    ) -> impl FnOnce(Router, Arc<Barrier>) -> tokio::task::JoinHandle<OpStatus> {
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        move |app: Router, barrier: Arc<Barrier>| {
 								            tokio::spawn(async move {
 								                barrier.wait().await;
 								                let body = serde_json::to_vec(&BranchMergeRequest {
 								                    source,
 								                    target: Some(target),
 								                })
 								                .unwrap();
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								                let response = app
 								                    .oneshot(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								                        Request::builder()
 								                            .uri("/branches/merge")
 								                            .method(Method::POST)
 								                            .header("content-type", "application/json")
 								                            .body(Body::from(body))
 								                            .unwrap(),
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								                    )
 								                    .await
 								                    .unwrap();
 								                let status = response.status();
 								                let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
 								                OpStatus {
 								                    status,
 								                    body: body.to_vec(),
 								                }
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								            })
 								        }
 								    }
 								    pub(super) fn op_change_insert(
 								        branch: String,
 								        name: String,
 								        age: i32,
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								    ) -> impl FnOnce(Router, Arc<Barrier>) -> tokio::task::JoinHandle<OpStatus> {
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        move |app: Router, barrier: Arc<Barrier>| {
 								            tokio::spawn(async move {
 								                barrier.wait().await;
 								                let body = serde_json::to_vec(&ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								                    query: MUTATION_QUERIES.to_string(),
 								                    name: Some("insert_person".to_string()),
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								                    params: Some(json!({ "name": name, "age": age })),
 								                    branch: Some(branch),
 								                })
 								                .unwrap();
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								                let response = app
 								                    .oneshot(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								                        Request::builder()
 								                            .uri("/change")
 								                            .method(Method::POST)
 								                            .header("content-type", "application/json")
 								                            .body(Body::from(body))
 								                            .unwrap(),
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								                    )
 								                    .await
 								                    .unwrap();
 								                let status = response.status();
 								                let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
 								                OpStatus {
 								                    status,
 								                    body: body.to_vec(),
 								                }
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								            })
 								        }
 								    }
 								    pub(super) fn op_branch_create(
 								        from: String,
 								        name: String,
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								    ) -> impl FnOnce(Router, Arc<Barrier>) -> tokio::task::JoinHandle<OpStatus> {
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        move |app: Router, barrier: Arc<Barrier>| {
 								            tokio::spawn(async move {
 								                barrier.wait().await;
 								                let body = serde_json::to_vec(&BranchCreateRequest {
 								                    from: Some(from),
 								                    name,
 								                })
 								                .unwrap();
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								                let response = app
 								                    .oneshot(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								                        Request::builder()
 								                            .uri("/branches")
 								                            .method(Method::POST)
 								                            .header("content-type", "application/json")
 								                            .body(Body::from(body))
 								                            .unwrap(),
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								                    )
 								                    .await
 								                    .unwrap();
 								                let status = response.status();
 								                let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
 								                OpStatus {
 								                    status,
 								                    body: body.to_vec(),
 								                }
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								            })
 								        }
 								    }
 								    pub(super) fn op_branch_delete(
 								        name: String,
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								    ) -> impl FnOnce(Router, Arc<Barrier>) -> tokio::task::JoinHandle<OpStatus> {
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        move |app: Router, barrier: Arc<Barrier>| {
 								            tokio::spawn(async move {
 								                barrier.wait().await;
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								                let response = app
 								                    .oneshot(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								                        Request::builder()
 								                            .uri(format!("/branches/{}", name))
 								                            .method(Method::DELETE)
 								                            .body(Body::empty())
 								                            .unwrap(),
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								                    )
 								                    .await
 								                    .unwrap();
 								                let status = response.status();
 								                let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
 								                OpStatus {
 								                    status,
 								                    body: body.to_vec(),
 								                }
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								            })
 								        }
 								    }
 								}
 								#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 								async fn concurrent_branch_ops_morphological_matrix() {
 								    // Cell a: Merge × Merge, distinct targets.
 								    // Pre-fix on b09a097/22d76db: branch_merge_impl's swap-restore race
 								    // landed feature_a's content in target_b instead of target_a (and
 								    // vice versa — symmetric swap). Identity asserts catch both
 								    // asymmetric and symmetric variants.
 								    {
 								        let cell = "a:merge×merge:distinct-targets";
 								        let h = matrix::Harness::new().await;
 								        h.create_branch("main", "feature-a-cella").await;
 								        h.insert_person("feature-a-cella", "EveA-cella", 22).await;
 								        h.create_branch("main", "feature-b-cella").await;
 								        h.insert_person("feature-b-cella", "FrankB-cella", 33).await;
 								        h.create_branch("main", "target-a-cella").await;
 								        h.create_branch("main", "target-b-cella").await;
 								        let (sa, sb) = h
 								            .run_pair(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								                matrix::op_merge("feature-a-cella".to_string(), "target-a-cella".to_string()),
 								                matrix::op_merge("feature-b-cella".to_string(), "target-b-cella".to_string()),
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								            )
 								            .await;
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								        assert_eq!(sa.status, StatusCode::OK, "[{}] merge a", cell);
 								        assert_eq!(sb.status, StatusCode::OK, "[{}] merge b", cell);
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        h.assert_persons("target-a-cella", cell, &["EveA-cella"], &["FrankB-cella"])
 								            .await;
 								        h.assert_persons("target-b-cella", cell, &["FrankB-cella"], &["EveA-cella"])
 								            .await;
 								        h.assert_post_op_sentinel(cell, "sentinel-cella").await;
 								    }
 								    // Cell b: Merge × Merge, same target / distinct sources.
 								    // Both want to land in main. merge_exclusive serializes; both should
 								    // succeed and main should contain BOTH sources' contributions.
 								    {
 								        let cell = "b:merge×merge:same-target-distinct-sources";
 								        let h = matrix::Harness::new().await;
 								        h.create_branch("main", "src-x-cellb").await;
 								        h.insert_person("src-x-cellb", "Xavier-cellb", 41).await;
 								        h.create_branch("main", "src-y-cellb").await;
 								        h.insert_person("src-y-cellb", "Yvonne-cellb", 42).await;
 								        let (sa, sb) = h
 								            .run_pair(
 								                matrix::op_merge("src-x-cellb".to_string(), "main".to_string()),
 								                matrix::op_merge("src-y-cellb".to_string(), "main".to_string()),
 								            )
 								            .await;
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								        assert_eq!(sa.status, StatusCode::OK, "[{}] merge x", cell);
 								        assert_eq!(sb.status, StatusCode::OK, "[{}] merge y", cell);
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        h.assert_persons("main", cell, &["Xavier-cellb", "Yvonne-cellb"], &[])
 								            .await;
 								        h.assert_post_op_sentinel(cell, "sentinel-cellb").await;
 								    }
 								    // Cell c: Merge × Merge, same source / distinct targets (fanout).
 								    // One source merged into two targets simultaneously. merge_exclusive
 								    // serializes; both targets should reflect the source's content.
 								    {
 								        let cell = "c:merge×merge:same-source-distinct-targets";
 								        let h = matrix::Harness::new().await;
 								        h.create_branch("main", "src-shared-cellc").await;
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        h.insert_person("src-shared-cellc", "Sharon-cellc", 50)
 								            .await;
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        h.create_branch("main", "tgt-1-cellc").await;
 								        h.create_branch("main", "tgt-2-cellc").await;
 								        let (sa, sb) = h
 								            .run_pair(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								                matrix::op_merge("src-shared-cellc".to_string(), "tgt-1-cellc".to_string()),
 								                matrix::op_merge("src-shared-cellc".to_string(), "tgt-2-cellc".to_string()),
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								            )
 								            .await;
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								        assert_eq!(sa.status, StatusCode::OK, "[{}] merge into tgt-1", cell);
 								        assert_eq!(sb.status, StatusCode::OK, "[{}] merge into tgt-2", cell);
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        h.assert_persons("tgt-1-cellc", cell, &["Sharon-cellc"], &[])
 								            .await;
 								        h.assert_persons("tgt-2-cellc", cell, &["Sharon-cellc"], &[])
 								            .await;
 								        h.assert_post_op_sentinel(cell, "sentinel-cellc").await;
 								    }
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								    // Cell d: Merge × Change, both touching main. C2 permits both
 								    // succeed, or exactly one clean 409 if the merge detects target
 								    // movement after planning but before acquiring the queue.
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								    {
 								        let cell = "d:merge×change:into-target";
 								        let h = matrix::Harness::new().await;
 								        h.create_branch("main", "feature-celld").await;
 								        h.insert_person("feature-celld", "EveD-celld", 22).await;
 								        let (sa, sb) = h
 								            .run_pair(
 								                matrix::op_merge("feature-celld".to_string(), "main".to_string()),
 								                matrix::op_change_insert("main".to_string(), "FrankD-celld".to_string(), 33),
 								            )
 								            .await;
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								        assert_eq!(sb.status, StatusCode::OK, "[{}] change", cell);
 								        assert!(
 								            sa.status == StatusCode::OK || sa.status == StatusCode::CONFLICT,
 								            "[{}] merge must be 200 or clean 409, got {}",
 								            cell,
 								            sa.status
 								        );
 								        if sa.status == StatusCode::OK {
 								            h.assert_persons("main", cell, &["EveD-celld", "FrankD-celld"], &[])
 								                .await;
 								        } else {
 								            let error: ErrorOutput = serde_json::from_slice(&sa.body).unwrap();
 								            let conflict = error
 								                .manifest_conflict
 								                .expect("merge 409 must include manifest_conflict");
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								            assert_eq!(
 								                conflict.table_key, "node:Person",
 								                "[{}] conflict table",
 								                cell
 								            );
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								            h.assert_persons("main", cell, &["FrankD-celld"], &["EveD-celld"])
 								                .await;
 								        }
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        h.assert_post_op_sentinel(cell, "sentinel-celld").await;
 								    }
 								    // Cell e: Merge × BranchCreateFrom-target. Concurrent fork off the
 								    // merge target while the merge runs. Both should succeed; the new
 								    // branch should have a coherent view (either pre- or post-merge,
 								    // both valid). After both, target = main has the merged content.
 								    {
 								        let cell = "e:merge×branch_create_from:target";
 								        let h = matrix::Harness::new().await;
 								        h.create_branch("main", "src-celle").await;
 								        h.insert_person("src-celle", "Eve-celle", 22).await;
 								        let (sa, sb) = h
 								            .run_pair(
 								                matrix::op_merge("src-celle".to_string(), "main".to_string()),
 								                matrix::op_branch_create("main".to_string(), "fork-celle".to_string()),
 								            )
 								            .await;
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								        assert_eq!(sa.status, StatusCode::OK, "[{}] merge", cell);
 								        assert_eq!(sb.status, StatusCode::OK, "[{}] branch_create_from", cell);
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        // Main definitely has Eve.
 								        h.assert_persons("main", cell, &["Eve-celle"], &[]).await;
 								        // fork-celle was forked off main at SOME version; main's current
 								        // count is 5 (4 seeded + Eve). fork-celle has either 4 (pre-merge
 								        // snapshot) or 5 (post-merge snapshot); both are valid timings.
 								        let fork_count = h.person_count("fork-celle").await;
 								        assert!(
 								            fork_count == 4 || fork_count == 5,
 								            "[{}] fork-celle row count must be pre- or post-merge view (4 or 5), got {}",
 								            cell,
 								            fork_count
 								        );
 								        h.assert_post_op_sentinel(cell, "sentinel-celle").await;
 								    }
 								    // Cell f: BranchCreateFrom × BranchCreateFrom, distinct parents.
 								    // Pre-fix on f925ad1: swap-restore race in branch_create_from_impl
 								    // forked the new branch off the wrong parent. Identity asserts pin
 								    // that fork-from-A inherits A's content, fork-from-B inherits B's.
 								    {
 								        let cell = "f:branch_create_from×branch_create_from:distinct-parents";
 								        let h = matrix::Harness::new().await;
 								        h.create_branch("main", "alpha-cellf").await;
 								        h.insert_person("alpha-cellf", "Eve-cellf", 22).await;
 								        h.create_branch("main", "beta-cellf").await;
 								        let (sa, sb) = h
 								            .run_pair(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								                matrix::op_branch_create("alpha-cellf".to_string(), "gamma-cellf".to_string()),
 								                matrix::op_branch_create("beta-cellf".to_string(), "delta-cellf".to_string()),
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								            )
 								            .await;
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								        assert_eq!(sa.status, StatusCode::OK, "[{}] gamma create", cell);
 								        assert_eq!(sb.status, StatusCode::OK, "[{}] delta create", cell);
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        // gamma forks off alpha → must contain Eve.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        h.assert_persons("gamma-cellf", cell, &["Eve-cellf"], &[])
 								            .await;
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        // delta forks off beta → must NOT contain Eve.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        h.assert_persons("delta-cellf", cell, &[], &["Eve-cellf"])
 								            .await;
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        h.assert_post_op_sentinel(cell, "sentinel-cellf").await;
 								    }
 								    // Cell g: BranchCreateFrom × BranchDelete, unrelated branches.
 								    // Disjoint branches; both should complete cleanly without
 								    // interference.
 								    {
 								        let cell = "g:branch_create_from×branch_delete:unrelated";
 								        let h = matrix::Harness::new().await;
 								        h.create_branch("main", "doomed-cellg").await;
 								        let (sa, sb) = h
 								            .run_pair(
 								                matrix::op_branch_create("main".to_string(), "newborn-cellg".to_string()),
 								                matrix::op_branch_delete("doomed-cellg".to_string()),
 								            )
 								            .await;
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								        assert_eq!(sa.status, StatusCode::OK, "[{}] create newborn", cell);
 								        assert_eq!(sb.status, StatusCode::OK, "[{}] delete doomed", cell);
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        // newborn-cellg exists with main's content.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        h.assert_persons("newborn-cellg", cell, &["Alice"], &[])
 								            .await;
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        h.assert_post_op_sentinel(cell, "sentinel-cellg").await;
 								    }
 								    // Cell h: BranchDelete × BranchDelete, distinct branches. Both call
 								    // refresh() internally; verify no deadlock and both deletes land.
 								    {
 								        let cell = "h:branch_delete×branch_delete:distinct";
 								        let h = matrix::Harness::new().await;
 								        h.create_branch("main", "doomed1-cellh").await;
 								        h.create_branch("main", "doomed2-cellh").await;
 								        let (sa, sb) = h
 								            .run_pair(
 								                matrix::op_branch_delete("doomed1-cellh".to_string()),
 								                matrix::op_branch_delete("doomed2-cellh".to_string()),
 								            )
 								            .await;
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								        assert_eq!(sa.status, StatusCode::OK, "[{}] delete 1", cell);
 								        assert_eq!(sb.status, StatusCode::OK, "[{}] delete 2", cell);
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        // Verify both gone via /branches list (snapshot would still work
 								        // for a deleted branch via parent fallback in some paths, so we
 								        // use the explicit list).
 								        let r = h
 								            .app
 								            .clone()
 								            .oneshot(
 								                Request::builder()
 								                    .uri("/branches")
 								                    .method(Method::GET)
 								                    .body(Body::empty())
 								                    .unwrap(),
 								            )
 								            .await
 								            .unwrap();
 								        assert_eq!(r.status(), StatusCode::OK);
 								        let body = to_bytes(r.into_body(), usize::MAX).await.unwrap();
 								        let list_body: Value = serde_json::from_slice(&body).unwrap();
 								        let branches: Vec<&str> = list_body["branches"]
 								            .as_array()
 								            .unwrap()
 								            .iter()
 								            .filter_map(|v| v.as_str())
 								            .collect();
 								        assert!(
 								            !branches.contains(&"doomed1-cellh"),
 								            "[{}] doomed1 still in branch list: {:?}",
 								            cell,
 								            branches
 								        );
 								        assert!(
 								            !branches.contains(&"doomed2-cellh"),
 								            "[{}] doomed2 still in branch list: {:?}",
 								            cell,
 								            branches
 								        );
 								        h.assert_post_op_sentinel(cell, "sentinel-cellh").await;
 								    }
 								    // Cell i: BranchDelete × Change, on a different branch. Delete one
 								    // branch while a /change runs on main. Both should succeed.
 								    {
 								        let cell = "i:branch_delete×change:distinct-branch";
 								        let h = matrix::Harness::new().await;
 								        h.create_branch("main", "doomed-celli").await;
 								        let (sa, sb) = h
 								            .run_pair(
 								                matrix::op_branch_delete("doomed-celli".to_string()),
 								                matrix::op_change_insert("main".to_string(), "Pat-celli".to_string(), 44),
 								            )
 								            .await;
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								        assert_eq!(sa.status, StatusCode::OK, "[{}] delete", cell);
 								        assert_eq!(sb.status, StatusCode::OK, "[{}] change", cell);
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        h.assert_persons("main", cell, &["Pat-celli"], &[]).await;
 								        h.assert_post_op_sentinel(cell, "sentinel-celli").await;
 								    }
 								    // Cell j: BranchCreateFrom × Change, both on main. The fork timing
 								    // determines whether the new branch sees the change (pre or post).
 								    // Both valid. Main must contain the inserted row.
 								    {
 								        let cell = "j:branch_create_from×change:on-source";
 								        let h = matrix::Harness::new().await;
 								        let (sa, sb) = h
 								            .run_pair(
 								                matrix::op_branch_create("main".to_string(), "twin-cellj".to_string()),
 								                matrix::op_change_insert("main".to_string(), "Quincy-cellj".to_string(), 55),
 								            )
 								            .await;
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								        assert_eq!(sa.status, StatusCode::OK, "[{}] branch_create", cell);
 								        assert_eq!(sb.status, StatusCode::OK, "[{}] change", cell);
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        h.assert_persons("main", cell, &["Quincy-cellj"], &[]).await;
 								        // twin-cellj has either pre-change view (no Quincy) or
 								        // post-change view (with Quincy); either is valid.
 								        let twin_has_quincy = h.person_exists("twin-cellj", "Quincy-cellj").await;
 								        let _ = twin_has_quincy; // either valid timing — just ensure no panic
 								        h.assert_post_op_sentinel(cell, "sentinel-cellj").await;
 								    }
 								    // Cell k: reopen consistency. Run a representative concurrent pair,
 								    // drop the engine, reopen on a separate handle, verify state matches.
 								    {
 								        let cell = "k:reopen-after-pair";
 								        let h = matrix::Harness::new().await;
 								        h.create_branch("main", "src-cellk").await;
 								        h.insert_person("src-cellk", "Rita-cellk", 36).await;
 								        let (sa, sb) = h
 								            .run_pair(
 								                matrix::op_merge("src-cellk".to_string(), "main".to_string()),
 								                matrix::op_change_insert("main".to_string(), "Steve-cellk".to_string(), 37),
 								            )
 								            .await;
-												engine: branch-merge revalidates target snapshot under queue

											
										
										
											2026-05-09 20:16:12 +00:00
+								        assert_eq!(sb.status, StatusCode::OK, "[{}] change", cell);
-												tests: matrix cell k asserts post-reopen row count

											
										
										
											2026-05-09 20:16:44 +00:00
+								        assert!(
 								            sa.status == StatusCode::OK || sa.status == StatusCode::CONFLICT,
 								            "[{}] merge must be 200 or clean 409, got {}",
 								            cell,
 								            sa.status
 								        );
 								        if sa.status == StatusCode::OK {
 								            h.assert_persons("main", cell, &["Rita-cellk", "Steve-cellk"], &[])
 								                .await;
 								        } else {
 								            let error: ErrorOutput = serde_json::from_slice(&sa.body).unwrap();
 								            let conflict = error
 								                .manifest_conflict
 								                .expect("merge 409 must include manifest_conflict");
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								            assert_eq!(
 								                conflict.table_key, "node:Person",
 								                "[{}] conflict table",
 								                cell
 								            );
-												tests: matrix cell k asserts post-reopen row count

											
										
										
											2026-05-09 20:16:44 +00:00
+								            h.assert_persons("main", cell, &["Steve-cellk"], &["Rita-cellk"])
 								                .await;
 								        }
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        // Reopen via a fresh AppState on the same graph.
 								        let graph_uri = format!("{}/server.omni", h._temp.path().display());
 								        let reopened = AppState::open(graph_uri.clone()).await.unwrap();
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								        let app2 = build_app(reopened);
 								        // Sanity: the same identity check via the new app must see
 								        // Rita and Steve.
 								        let r = app2
 								            .clone()
 								            .oneshot(
 								                Request::builder()
 								                    .uri("/snapshot?branch=main")
 								                    .method(Method::GET)
 								                    .body(Body::empty())
 								                    .unwrap(),
 								            )
 								            .await
 								            .unwrap();
 								        assert_eq!(r.status(), StatusCode::OK, "[{}] reopen snapshot", cell);
-												tests: matrix cell k asserts post-reopen row count

											
										
										
											2026-05-09 20:16:44 +00:00
+								        let body = to_bytes(r.into_body(), usize::MAX).await.unwrap();
 								        let v: Value = serde_json::from_slice(&body).unwrap();
 								        let person_rows = v["tables"]
 								            .as_array()
 								            .and_then(|tables| {
 								                tables
 								                    .iter()
 								                    .find(|t| t["table_key"].as_str() == Some("node:Person"))
 								            })
 								            .and_then(|t| t["row_count"].as_u64())
 								            .expect("reopen snapshot must include node:Person row_count");
 								        let expected_rows = if sa.status == StatusCode::OK { 6 } else { 5 };
 								        assert_eq!(
 								            person_rows, expected_rows,
 								            "[{}] reopened main should include seed (4) + committed concurrent writes",
 								            cell,
 								        );
-												tests: branch-ops morphological matrix (T1)

Replaces three narrow concurrent_branch_* tests (folded in below) with
one parameterized matrix test covering 11 representative
(op_a, op_b, target_overlap) cells, asserting C1-C6 uniformly:

  C1 — both complete (no deadlock; tokio::time::timeout(15s))
  C2 — status: both 200 or exactly one clean conflict; never 500
  C3 — per-target row count
  C4 — per-target row identity (named persons present + absent — catches
       the symmetric-swap class that count assertions miss; cubic P2 on
       commit 64f2b99 flagged this gap on the round-3 merge race test)
  C5 — engine state coherent (subsequent /snapshot consistent)
  C6 — post-op /change on main succeeds (engine isn't poisoned)

Cells:

  a. Merge × Merge, distinct targets    — branch_merge_impl race pin
  b. Merge × Merge, same target / distinct sources    — merge_exclusive serialization
  c. Merge × Merge, same source / distinct targets    — fanout
  d. Merge × Change, into target    — per-(table, branch) queue
  e. Merge × BranchCreateFrom, target — interaction with refresh path
  f. BranchCreateFrom × BranchCreateFrom, distinct parents — round-1 race pin
  g. BranchCreateFrom × BranchDelete, unrelated branches — disjoint state
  h. BranchDelete × BranchDelete, distinct branches — concurrent refresh
  i. BranchDelete × Change, distinct branch — refresh-side vs writer
  j. BranchCreateFrom × Change, on source — fork-while-writing
  k. Reopen consistency after concurrent pair — disk-vs-cache drift

Each cell:
- spins up its own tempdir + AppState so failures don't cascade,
- aligns the pair at a tokio::sync::Barrier so both reach the engine
  close in time,
- wraps in a 15s deadlock timeout,
- asserts identity via a /read with the `get_person` fixture query
  (specific names must be present on the right branch and absent from
  the wrong one).

Subsumes:
- concurrent_branch_create_from_distinct_parents_does_not_corrupt_coordinator
  (now cell f, with identity assertions added)
- concurrent_branch_merges_distinct_targets_do_not_swap_into_each_other
  (now cells a + b + c, with identity assertions; the symmetric-swap
  blind spot cubic flagged on commit 64f2b99 is closed)
- concurrent_change_during_branch_merge_preserves_writes
  (now cell d)

Those three narrow tests are removed in the next commit so this lands
green standalone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:07:37 +02:00
+								    }
 								}
-												tests: pin disjoint /change concurrency at HTTP level

Closes the cubic acceptance-criteria gap (❌ "Integration test: two
/change requests targeting different (table_key, branch) execute
concurrently end-to-end"). The bench harness measures the throughput
side; this test is the regression sentinel that catches a future
change which accidentally re-introduces graph-wide serialization on
the disjoint path.

Spawns 4 concurrent /change inserts on node:Person and 4 on
node:Company. All 8 must return 200, and the post-test row counts
on each table must reflect every insert.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 17:01:52 +02:00
+								#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 								async fn change_disjoint_table_concurrency_succeeds_at_http_level() {
 								    // HTTP-level pin for MR-686's disjoint-table promise: concurrent /change
 								    // requests touching different node types must coexist without admission
 								    // rejection or publisher-CAS conflict. The bench harness measures
 								    // throughput; this test is the regression sentinel that catches a
 								    // future change which accidentally re-introduces graph-wide
 								    // serialization on the disjoint path.
 								    //
 								    // Setup: test.jsonl seeds 4 Persons + 2 Companies. Spawn N=4 concurrent
 								    // /change inserts on `node:Person` and N=4 concurrent inserts on
 								    // `node:Company`. All 8 must return 200, and the post-test row counts
 								    // must reflect every insert.
 								    const PERSON_QUERY: &str = r#"
 								query insert_p($name: String, $age: I32) {
 								    insert Person { name: $name, age: $age }
 								}
 								"#;
 								    const COMPANY_QUERY: &str = r#"
 								query insert_c($name: String) {
 								    insert Company { name: $name }
 								}
 								"#;
 								    const SEED_PERSONS: u64 = 4;
 								    const SEED_COMPANIES: u64 = 2;
 								    const PER_TYPE: usize = 4;
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
 								    let state = AppState::open(graph.to_string_lossy().to_string())
-												tests: pin disjoint /change concurrency at HTTP level

Closes the cubic acceptance-criteria gap (❌ "Integration test: two
/change requests targeting different (table_key, branch) execute
concurrently end-to-end"). The bench harness measures the throughput
side; this test is the regression sentinel that catches a future
change which accidentally re-introduces graph-wide serialization on
the disjoint path.

Spawns 4 concurrent /change inserts on node:Person and 4 on
node:Company. All 8 must return 200, and the post-test row counts
on each table must reflect every insert.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 17:01:52 +02:00
+								        .await
 								        .unwrap();
 								    let app = build_app(state);
 								    let mut handles = Vec::with_capacity(PER_TYPE * 2);
 								    for i in 0..PER_TYPE {
 								        let app_p = app.clone();
 								        handles.push(tokio::spawn(async move {
 								            let body = serde_json::to_vec(&ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								                query: PERSON_QUERY.to_string(),
 								                name: Some("insert_p".to_string()),
-												tests: pin disjoint /change concurrency at HTTP level

Closes the cubic acceptance-criteria gap (❌ "Integration test: two
/change requests targeting different (table_key, branch) execute
concurrently end-to-end"). The bench harness measures the throughput
side; this test is the regression sentinel that catches a future
change which accidentally re-introduces graph-wide serialization on
the disjoint path.

Spawns 4 concurrent /change inserts on node:Person and 4 on
node:Company. All 8 must return 200, and the post-test row counts
on each table must reflect every insert.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 17:01:52 +02:00
+								                params: Some(json!({ "name": format!("p-{i}"), "age": i as i32 })),
 								                branch: Some("main".to_string()),
 								            })
 								            .unwrap();
 								            let req = Request::builder()
 								                .uri("/change")
 								                .method(Method::POST)
 								                .header("content-type", "application/json")
 								                .body(Body::from(body))
 								                .unwrap();
 								            app_p.oneshot(req).await.unwrap().status()
 								        }));
 								        let app_c = app.clone();
 								        handles.push(tokio::spawn(async move {
 								            let body = serde_json::to_vec(&ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								                query: COMPANY_QUERY.to_string(),
 								                name: Some("insert_c".to_string()),
-												tests: pin disjoint /change concurrency at HTTP level

Closes the cubic acceptance-criteria gap (❌ "Integration test: two
/change requests targeting different (table_key, branch) execute
concurrently end-to-end"). The bench harness measures the throughput
side; this test is the regression sentinel that catches a future
change which accidentally re-introduces graph-wide serialization on
the disjoint path.

Spawns 4 concurrent /change inserts on node:Person and 4 on
node:Company. All 8 must return 200, and the post-test row counts
on each table must reflect every insert.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 17:01:52 +02:00
+								                params: Some(json!({ "name": format!("c-{i}") })),
 								                branch: Some("main".to_string()),
 								            })
 								            .unwrap();
 								            let req = Request::builder()
 								                .uri("/change")
 								                .method(Method::POST)
 								                .header("content-type", "application/json")
 								                .body(Body::from(body))
 								                .unwrap();
 								            app_c.oneshot(req).await.unwrap().status()
 								        }));
 								    }
 								    let mut statuses = Vec::with_capacity(PER_TYPE * 2);
 								    for h in handles {
 								        statuses.push(h.await.unwrap());
 								    }
 								    let bad: Vec<_> = statuses
 								        .iter()
 								        .enumerate()
 								        .filter(|(_, s)| **s != StatusCode::OK)
 								        .collect();
 								    assert!(
 								        bad.is_empty(),
 								        "expected every disjoint /change insert to return 200, got non-200 for: {:?}",
 								        bad,
 								    );
 								    // Verify both tables landed every insert.
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/snapshot?branch=main")
 								            .method(Method::GET)
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
 								    let lookup_count = |table_key: &str| -> u64 {
 								        body["tables"]
 								            .as_array()
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								            .and_then(|tables| {
 								                tables
 								                    .iter()
 								                    .find(|t| t["table_key"].as_str() == Some(table_key))
 								            })
-												tests: pin disjoint /change concurrency at HTTP level

Closes the cubic acceptance-criteria gap (❌ "Integration test: two
/change requests targeting different (table_key, branch) execute
concurrently end-to-end"). The bench harness measures the throughput
side; this test is the regression sentinel that catches a future
change which accidentally re-introduces graph-wide serialization on
the disjoint path.

Spawns 4 concurrent /change inserts on node:Person and 4 on
node:Company. All 8 must return 200, and the post-test row counts
on each table must reflect every insert.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 17:01:52 +02:00
+								            .and_then(|t| t["row_count"].as_u64())
 								            .unwrap_or_else(|| panic!("snapshot missing {}", table_key))
 								    };
 								    assert_eq!(
 								        lookup_count("node:Person"),
 								        SEED_PERSONS + PER_TYPE as u64,
 								        "Person row count after concurrent inserts",
 								    );
 								    assert_eq!(
 								        lookup_count("node:Company"),
 								        SEED_COMPANIES + PER_TYPE as u64,
 								        "Company row count after concurrent inserts",
 								    );
 								}
-												tests: pin /ingest admission gate + 429 Retry-After (red)

Per AGENTS.md rule 8, this commit lands the failing regression test
ahead of the fix. Currently fails on f925ad1 with 8/8 statuses returning
200 because /ingest does not call WorkloadController::try_admit.

The test pins:
- /ingest is gated on per-actor admission control (returns 429 when
  the cap is exceeded).
- 429 responses carry the structured `code: too_many_requests` error
  body so clients can distinguish them from generic conflicts.
- 429 responses include a `Retry-After` header so clients can implement
  bounded backoff. The doc claim at api.rs:343 and lib.rs:344 was that
  this header exists; the IntoResponse impl currently emits no headers.

Two follow-up commits will turn this green:
1. Wire WorkloadController::try_admit on /ingest and the four other
   mutating handlers (Block 2.1).
2. Emit the Retry-After header on 429/503 responses (Block 2.2).

The test uses #[serial] + EnvGuard to override
OMNIGRAPH_PER_ACTOR_INFLIGHT_MAX=1 without racing parallel tests, then
spawns 8 concurrent /ingest tasks aligned at a tokio::sync::Barrier so
multiple tasks reach try_admit close in time. With cap=1, at least one
must be rejected.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 16:57:01 +02:00
+								#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 								async fn ingest_per_actor_admission_cap_returns_429() {
 								    // Pin the admission gate on `/ingest`. With per-actor in-flight cap of 1
 								    // and 8 concurrent requests from the same actor, at least one request
 								    // must be rejected with HTTP 429 and `code: too_many_requests`.
 								    //
 								    // Pre-fix bug class: the admission pattern at `server_change`
 								    // (`crates/omnigraph-server/src/lib.rs:932`) was the only handler
 								    // that called `WorkloadController::try_admit`. A heavy actor sending
 								    // bulk-ingest traffic would exhaust shared engine capacity (Lance I/O
 								    // threads, manifest churn) without ever hitting an admission cap.
 								    // Pinned at the HTTP boundary so future refactors that drop the
 								    // try_admit call from a mutating handler turn this red.
 								    //
 								    // Post-fix invariant: `/ingest`, `/branches/create`, `/branches/delete`,
 								    // `/branches/merge`, and `/schema/apply` all gate on
 								    // `state.workload.try_admit(&actor_arc, est_bytes)` after Cedar
 								    // authorization and before the engine call. Cap exhaustion surfaces as
 								    // 429 with `code: too_many_requests`.
-												tests: admission test uses new_with_workload, drops env mutation + #[serial]

Migrates `ingest_per_actor_admission_cap_returns_429` from env-var
override to direct `WorkloadController::new(1, ...)` construction via
`AppState::new_with_workload`. Removes the `EnvGuard` and the
`#[serial]` annotation that paired with it.

Why correct by design (AGENTS.md rule 9): the previous round's matrix
fix (commit 8bd9a5f) shielded the matrix from this test's env
mutation, but the broader bug class — "test A's process-wide env
mutation can leak into any test B that calls
`AppState::open` / `WorkloadController::from_env()`" — was still
reachable by any future test that didn't think to opt out. Closing
the class at the source: this test no longer mutates global state at
all, so no other test needs to defend against it.

Net effect:
- This test no longer needs `#[serial]` (was the only reason it was
  marked) — runs in parallel with the rest of the suite.
- The matrix's defensive `with_defaults()` construction (commit
  8bd9a5f) remains correct but is no longer required for correctness;
  it's now a "belt and suspenders" guard against any FUTURE
  env-mutating test.

Verified locally: both tests pass when run together; full server
suite (44 tests) green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:35:41 +02:00
+								    //
 								    // Construct the WorkloadController directly with cap=1 instead of
 								    // mutating `OMNIGRAPH_PER_ACTOR_INFLIGHT_MAX` via EnvGuard. Process-wide
 								    // env vars are visible to concurrently-running tests; the previous
 								    // `EnvGuard + #[serial]` pair leaked the override into any other test
 								    // that called `AppState::open` during the guard's window
 								    // (matrix CI failure on commit 99b0941). Using the explicit
 								    // `AppState::new_with_workload` constructor closes that bug class —
 								    // this test no longer mutates global state and no longer needs
 								    // `#[serial]`.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
 								    let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												tests: admission test uses new_with_workload, drops env mutation + #[serial]

Migrates `ingest_per_actor_admission_cap_returns_429` from env-var
override to direct `WorkloadController::new(1, ...)` construction via
`AppState::new_with_workload`. Removes the `EnvGuard` and the
`#[serial]` annotation that paired with it.

Why correct by design (AGENTS.md rule 9): the previous round's matrix
fix (commit 8bd9a5f) shielded the matrix from this test's env
mutation, but the broader bug class — "test A's process-wide env
mutation can leak into any test B that calls
`AppState::open` / `WorkloadController::from_env()`" — was still
reachable by any future test that didn't think to opt out. Closing
the class at the source: this test no longer mutates global state at
all, so no other test needs to defend against it.

Net effect:
- This test no longer needs `#[serial]` (was the only reason it was
  marked) — runs in parallel with the rest of the suite.
- The matrix's defensive `with_defaults()` construction (commit
  8bd9a5f) remains correct but is no longer required for correctness;
  it's now a "belt and suspenders" guard against any FUTURE
  env-mutating test.

Verified locally: both tests pass when run together; full server
suite (44 tests) green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:35:41 +02:00
+								    let workload = omnigraph_server::workload::WorkloadController::new(
 ,             // per-actor in-flight cap (the fixture under test)
 _000_000_000, // per-actor byte budget — large so it never bottlenecks
 								    );
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								    // MR-723: install a permit-all policy alongside the bearer token so
 								    // /ingest (action=Change) passes Cedar evaluation. The test is
 								    // exercising the admission cap, not policy — the policy is just
 								    // enough to clear the State 3 path so the test reaches workload.
 								    let policy_path = temp.path().join("policy.yaml");
 								    fs::write(&policy_path, permit_all_policy_yaml(&["act-flooder"])).unwrap();
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let policy_engine =
-												(feat): multi-graph server mode (#119)

* mr-668: add GraphId newtype + Cloud-mode forward identity stubs (PR 1/10)

PR 1 of the MR-668 multi-graph server work. Pure types, no runtime
behavior changes yet.

Ships the validated identity vocabulary that the rest of the implementation
will consume:

- `GraphId(String)` — `^[a-zA-Z0-9-]{1,64}$`, leading underscore rejected
  (engine reserves every `_*` filename), reserved route names rejected
  (`policies`, `healthz`, `openapi`, `openapi.json`, `graphs`). Validation
  lives in `try_from` only; serde `Deserialize` re-runs it so JSON payloads
  cannot bypass.
- `TenantId(String)` — same regex shape as GraphId. `None` in Cluster
  mode; reserved for Cloud mode (RFC 0003) where it carries the OAuth
  `org_id` claim.
- `GraphKey { tenant_id: Option<TenantId>, graph_id }` — the registry
  HashMap key. `cluster()` constructor for the Cluster-mode default.
- `Scope` enum with `Full` variant — Cluster mode default; RFC 0004 will
  extend with OAuth scopes (`graph:read`/`write`/`admin`/`*`).
- `AuthSource` enum with `Static` variant — Cluster mode default; RFC
  0001 step 1 will add `Oidc`.
- `ResolvedActor { actor_id, tenant_id, scopes, source }` — replaces the
  upcoming refactor of `AuthenticatedActor(Arc<str>)` in PR 4a.

Per MR-668 design decision 13: ship the Cloud-mode forward type shapes
now (no `TokenVerifier` trait yet — that's RFC 0001 step 1) so handler
signatures stay stable across the Cluster → Cloud trajectory. `Scope`
and `AuthSource` use `#[non_exhaustive]` so future variants don't break
caller matches.

Tests: 26 new (15 graph_id + 11 identity), all passing. No regression
in the existing 36 server library tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: Omnigraph::init error-path cleanup + three failpoints (PR 2a/10)

PR 2a of the MR-668 multi-graph server work. Bug fix: a partially-failed
`Omnigraph::init` previously left orphan schema files at the graph URI,
making the URI unusable for a retry (the next `init` would refuse because
`_schema.pg` already exists).

Changes:

1. `init_with_storage` now wraps the I/O phase. On any error from
   `init_storage_phase`, calls `best_effort_cleanup_init_artifacts` to
   remove the three schema files before returning the original error:
   - `_schema.pg`
   - `_schema.ir.json`
   - `__schema_state.json`
   Cleanup is best-effort: a failure to delete is logged via
   `tracing::warn` but does NOT mask the init error.

2. Three failpoints added at the init phase boundaries:
   - `init.after_schema_pg_written`
   - `init.after_schema_contract_written`
   - `init.after_coordinator_init`

3. Four new failpoint tests in `tests/failpoints.rs` pin the cleanup
   behavior at each boundary plus the "original error wins over cleanup
   error" contract. All 23 failpoint tests pass.

Coverage gap (documented in code comments):
Lance per-type datasets and `__manifest/` directory created by
`GraphCoordinator::init` are NOT cleaned up after a coordinator-init-phase
failure. Recursive directory deletion requires `StorageAdapter::delete_prefix`,
which was deferred along with `DELETE /graphs/{id}` (originally PR 2b). When
that primitive lands, the third failpoint test can be tightened to assert
the graph root is fully empty.

Tests: 4 new (init_failpoint_*), all 23 failpoint tests green. No
regression in the 105 engine library tests or 64 end_to_end tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: add GraphHandle + GraphRegistry data structure (PR 3/10)

PR 3 of the MR-668 multi-graph server work. Pure data structure — no
routing changes yet (that's PR 4a).

New file: `crates/omnigraph-server/src/registry.rs`

- `GraphHandle { key: GraphKey, uri: String, engine: Arc<Omnigraph>,
  policy: Option<Arc<PolicyEngine>> }` — the per-graph state that the
  routing middleware (PR 4a) will inject as a request extension.
- `RegistrySnapshot { graphs: HashMap<GraphKey, Arc<GraphHandle>> }` —
  immutable snapshot; replaced atomically via `ArcSwap`.
- `GraphRegistry { snapshot: ArcSwap<_>, mutate: Mutex<()> }` — lock-free
  reads, mutex-serialized mutations.
- `RegistryLookup { Ready(Arc<GraphHandle>) | Gone }` — two-valued, no
  `Tombstoned` variant since DELETE is deferred in v0.7.0 scope.
- `InsertError { DuplicateKey | DuplicateUri }` — both rejection cases
  for create-graph (maps to HTTP 409 in PR 7).
- Methods: `new`, `from_handles` (bulk startup-time init), `get`, `list`,
  `len`, `insert`.

Race semantics pinned by three multi-thread tests:
- `concurrent_insert_same_key_exactly_one_succeeds` — N=8 spawned
  inserts with the same key; exactly 1 returns Ok, 7 return DuplicateKey.
- `concurrent_insert_distinct_keys_all_succeed` — N=8 spawned inserts
  with distinct keys; all succeed.
- `concurrent_reads_during_inserts_see_consistent_snapshots` — reader
  loop concurrent with sequential writes; every listed handle's key
  resolves via `get()` (no torn state).

Why no tombstones field: `DELETE /graphs/{id}` is deferred to bound
the scope of v0.7.0. Without a delete endpoint, there's no use for
tombstones — every key in the registry is `Ready`, and every key
not in the registry is `Gone`. When DELETE lands later, the
`Tombstoned` variant + `tombstones: HashSet<GraphKey>` slot in
additively without breaking caller signatures (the `Gone` variant
remains the "not currently active" case).

Why `tokio::sync::Mutex`: insert is async because PR 7's flow holds
this mutex across the atomic YAML rewrite step (file I/O). std::Mutex
would footgun across .await.

Dependency additions: `arc-swap = { workspace = true }`,
`thiserror = { workspace = true }` (used by InsertError).

Tests: 12 new (12 passing). 74 server lib tests total green
(62 from PR 1 + 12 new). Clippy clean on server crate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: router restructure + handler refactor for multi-graph (PR 4a/10)

PR 4a of the MR-668 multi-graph server work. The heaviest single PR —
rewires every handler to extract `Arc<GraphHandle>` from a routing
middleware, replaces `AuthenticatedActor(Arc<str>)` with `ResolvedActor`
everywhere, and adds the `ServerMode` discriminator.

Behavior changes:
- **Single mode** (legacy `omnigraph-server <URI>`): flat routes
  (`/snapshot`, `/read`, `/branches`, …) continue to work exactly as
  v0.6.0. Internally, the registry holds a single handle keyed by the
  sentinel `SINGLE_GRAPH_KEY_ID = "default"`; routing middleware injects
  that handle on every request. No HTTP-visible change.
- **Multi mode** (new): routes nest under `/graphs/{graph_id}/...`.
  Routing middleware extracts the graph id from the path, looks it up
  in the registry, and injects the handle. 404 if not found.
  (Multi-mode startup itself lands in PR 5; this PR provides the
  router-side wiring.)

AppState refactor:
- `engine: Arc<Omnigraph>` and `policy_engine: Option<Arc<PolicyEngine>>`
  fields removed — both now live inside `GraphHandle` in the registry.
- `mode: ServerMode { Single { uri } | Multi { config_path } }` added.
- `registry: Arc<GraphRegistry>` added.
- `server_policy: Option<Arc<PolicyEngine>>` added (placeholder for
  management endpoints in PR 6b; unused today).
- Existing constructors (`new`, `new_with_bearer_token{s,_and_policy}`,
  `new_with_workload`, `open*`) build a single-mode AppState
  internally and remain source-compatible. Tests that constructed
  AppState via these constructors continue to work.
- `with_policy_engine` post-construction setter — rebuilds the
  single-mode handle with the policy attached. Engine-layer
  enforcement is NOT reinstalled (matches the old single-field
  semantics; `open_with_bearer_tokens_and_policy` is the path that
  installs both layers).
- `new_multi` constructor added for PR 5's startup loop.
- `uri()` now returns `Option<&str>` (Some in single, None in multi).

Routing middleware:
- `resolve_graph_handle` injects `Arc<GraphHandle>` as a request
  extension. Mode-aware: single returns the only handle; multi parses
  `/graphs/{graph_id}/...` from the URI. Returns 404 in multi mode
  when the graph id is unregistered. Records `graph_id` on the
  current tracing span.
- `require_bearer_auth` updated to insert `ResolvedActor` (was
  `AuthenticatedActor`).

Handler refactor — every protected handler:
- Gains `Extension(handle): Extension<Arc<GraphHandle>>` param.
- Replaces `state.engine` → `handle.engine`.
- Replaces `state.policy_engine()` → `handle.policy.as_deref()`.
- Replaces `state.uri()` → `handle.uri.as_str()` (or `.clone()`
  where String is needed).
- Replaces `Arc::clone(&state.engine)` → `Arc::clone(&handle.engine)`
  (the spawn-and-clone pattern in `server_export` — proof that a
  long-running export survives the registry being mutated later).

authorize_request signature:
- Was: `(state: &AppState, actor: Option<&AuthenticatedActor>, request: PolicyRequest)`.
- Now: `(actor: Option<&ResolvedActor>, policy: Option<&PolicyEngine>, request: PolicyRequest)`.
- Per-graph callers pass `handle.policy.as_deref()`. The (future PR 6b)
  management endpoints will pass `state.server_policy.as_deref()`.

MR-731 invariant preserved:
- The single chokepoint `request.actor_id = actor.actor_id.as_ref().to_string()`
  inside `authorize_request` still overwrites any client-supplied
  actor identity. Regression test
  `actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
  at `tests/server.rs:1114-1216` passes unchanged.

Tests: 0 new (the registry race tests in PR 3 already cover the
data structure; this PR exercises them indirectly via the existing
test suite). 74 lib + 57 server integration + 60 openapi = 191 tests
green. Clippy clean.

LOC: +397 insertions, -153 deletions in `crates/omnigraph-server/src/lib.rs`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: OpenAPI multi-mode cluster filter (PR 4b/10)

PR 4b of the MR-668 multi-graph server work. In multi mode, the served
`/openapi.json` reports cluster routes (`/graphs/{graph_id}/...`) instead
of the legacy flat protected paths — matching what `build_app` actually
mounts (PR 4a's `Router::nest`). Single mode is unchanged.

Implementation:
- New `server_openapi` branch: when `state.mode()` is `Multi`, call
  `nest_paths_under_cluster_prefix(&mut doc)` after `ApiDoc::openapi()`.
- The rewrite consumes `doc.paths.paths`, then for every path-item:
  - If the path is in `ALWAYS_FLAT_PATHS` (`/healthz` for now), keep
    it flat.
  - Otherwise, prefix every operation_id with `cluster_` and reinsert
    the item at `/graphs/{graph_id}<original_path>`.
- Single mode hits no extra work — the path map is untouched.
- The static `ApiDoc::openapi()` still emits the flat surface, so
  in-process callers (the existing `openapi_json()` helper in tests)
  see the unmodified spec.

Why cluster_ prefix on operation IDs: OpenAPI specs require unique
operation_ids across the document. With both flat (single-mode) and
cluster (multi-mode) surfaces ever co-existing in a generated SDK,
the prefix prevents collision. The current served doc only carries
one surface, so the prefix is forward-compat with potential future
dual-surface generation.

Tests: 6 new in `tests/openapi.rs`, all via the `/openapi.json` route
(not the static `ApiDoc::openapi()` helper):
- `multi_mode_openapi_lists_cluster_paths` — every protected path
  appears as a cluster variant.
- `multi_mode_openapi_drops_flat_protected_paths` — flat protected
  paths are absent.
- `multi_mode_openapi_keeps_healthz_flat` — `/healthz` survives.
- `multi_mode_openapi_prefixes_operation_ids_with_cluster` — every
  cluster operation_id starts with `cluster_`.
- `multi_mode_operation_ids_are_unique` — no operation_id collisions.
- `single_mode_openapi_unchanged_by_cluster_filter` — single mode
  still emits the legacy flat surface (regression).

New test helper `app_for_multi_mode(graph_ids)` exercises the new
`AppState::new_multi` constructor from PR 4a — first user of multi-mode
construction outside of unit tests.

Result: 66 openapi tests + 57 server integration tests + 74 lib tests
= 197 green. No regression in the existing OpenAPI drift check
(`openapi_spec_is_up_to_date` still validates the static flat surface
matches the committed openapi.json).

LOC: +67 in lib.rs (rewrite logic), +219 in tests/openapi.rs (test
suite + helper).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: multi-graph startup + mode inference (PR 5/10)

PR 5 of the MR-668 multi-graph server work. This is the first PR that
makes multi mode actually usable end-to-end: operators invoking
`omnigraph-server --config omnigraph.yaml` with a non-empty `graphs:`
map and no single-mode selector now get a running multi-graph server.

Mode inference (MR-668 decision 2, four-rule matrix in
`load_server_settings`):
  1. CLI `<URI>` positional        → Single
  2. CLI `--target <name>`         → Single (URI from graphs.<name>)
  3. `server.graph` in config      → Single (URI from graphs.<name>)
  4. `--config` + non-empty `graphs:` + no single-mode selector
                                   → Multi (all entries in `graphs:`)
  5. otherwise                     → error with migration hint

Rule 5's error message names every escape hatch so operators can fix
their invocation without grepping docs.

Config schema extensions:
  - `TargetConfig.policy: PolicySettings` (per-graph Cedar policy file).
    `#[serde(default)]` so existing single-graph YAMLs keep parsing.
  - `ServerDefaults.policy: PolicySettings` (server-level Cedar policy
    for management endpoints — loaded in PR 5, wired into `GET /graphs`
    in PR 6b).
  - `OmnigraphConfig::resolve_target_policy_file(name)` and
    `resolve_server_policy_file()` helpers — both resolve relative to
    the config file's `base_dir`.

Public types added to `omnigraph-server`:
  - `ServerConfigMode { Single { uri, policy_file } | Multi { graphs,
    config_path, server_policy_file } }`.
  - `GraphStartupConfig { graph_id, uri, policy_file }` — one entry
    per graph in multi mode.

`ServerConfig` shape change:
  - WAS: `{ uri: String, bind, policy_file, allow_unauthenticated }`.
  - NOW: `{ mode: ServerConfigMode, bind, allow_unauthenticated }`.
  - Breaking for any code that constructs `ServerConfig` directly.
    `main.rs` is unaffected (uses `load_server_settings`).

`serve()` now forks on `ServerConfig.mode`:
  - Single: existing flow via `AppState::open_with_bearer_tokens_and_policy`.
  - Multi: parallel open via `futures::stream::iter(graphs)
    .map(open_single_graph).buffer_unordered(4).collect()`. Bound 4 is
    a rule-of-thumb for I/O-bound work — at N≤10 this trades startup
    latency for a small amount of concurrent S3/Lance open pressure.
    Fail-fast: first open error aborts startup; in-flight opens drop
    their engine via Arc (Lance datasets close cleanly).

New helper `open_single_graph(GraphStartupConfig)`:
  - Validates `GraphId` per the regex in PR 1.
  - `Omnigraph::open(uri).await` with descriptive error context.
  - Loads per-graph policy file and re-applies it via
    `Omnigraph::with_policy` (engine-layer enforcement, MR-722).
  - Returns `Arc<GraphHandle>` ready for the registry.

Routing middleware bug fix:
  - `Router::nest("/graphs/{graph_id}", inner)` rewrites
    `request.uri().path()` to the inner suffix (e.g. `/snapshot`).
    The previous middleware tried to parse `{graph_id}` from
    `request.uri().path()` and got 400 instead of 200. Fixed by reading
    from `axum::extract::OriginalUri` request extension, which preserves
    the pre-rewrite URI.
  - Caught by the two new tests
    `cluster_routes_dispatch_per_graph_handle` and
    `cluster_route_for_unknown_graph_returns_404`.

Tests (14 new, all passing):
  - Four-rule matrix: one test per branch + the joint case
    `mode_inference_cli_uri_overrides_graphs_map` + the empty-graphs-map
    error case.
  - Per-graph + server-level policy file path resolution.
  - Reserved `GraphId` rejection at startup.
  - End-to-end multi-graph routing: two graphs side by side, each
    cluster route hits the right engine.
  - Unknown graph id under cluster prefix → 404.
  - Flat routes 404 in multi mode.

Inline `ServerConfig` test (`serve_refuses_to_start_in_state_1_without_unauthenticated`)
and three `server_settings_*` tests updated to the new `mode` shape.

Result: 211 server tests green (74 lib + 71 integration + 66 openapi),
MR-731 regression test still pinned and passing.

LOC: +45 config.rs, +281 lib.rs (net), +395 tests/server.rs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: Cedar resource-model refactor (PR 6a/10)

PR 6a of the MR-668 multi-graph server work. Policy-crate-only refactor —
no HTTP handler changes, no operator-supplied policy.yaml changes. Sets
up the chassis that PR 6b's `GET /graphs` consumes.

Two new `PolicyAction` variants:
  - `GraphCreate` — gates `POST /graphs` (deferred behavioral PR).
  - `GraphList`   — gates `GET /graphs` (lands in PR 6b).

Note: `GraphDelete` is intentionally NOT added in this PR. `DELETE
/graphs/{id}` is deferred from MR-668's v0.7.0 scope to bound complexity
(no `delete_prefix`, no tombstone, no `RegistryLookup::Tombstoned`).
Adding the Cedar action without a consumer would be the same kind of
"dead vocabulary" trap the `Admin` variant already documents.

New `PolicyResourceKind { Graph, Server }` enum, plus a
`PolicyAction::resource_kind()` method that classifies every action.
Per-graph actions (Read, Change, BranchCreate, …) bind to
`Omnigraph::Graph::"<graph_label>"`; server-scoped actions
(GraphCreate, GraphList) bind to the singleton
`Omnigraph::Server::"root"`. `Admin` stays classified as per-graph for
now — MR-724 will pick the final shape when the first consumer surface
ships.

Cedar schema string additions:
  - `entity Server;`
  - `action "graph_create" appliesTo { principal: Actor, resource: Server, ... }`
  - `action "graph_list"   appliesTo { principal: Actor, resource: Server, ... }`

Compiler updates:
  - `compile_policy_source` picks the resource literal based on the
    action's `resource_kind`. Existing graph-only policies generate
    the same Cedar source as before — pinned by
    `per_graph_rules_continue_to_work_alongside_server_rules`.
  - `compile_entities` includes the `Server::"root"` entity only when
    a rule references a server-scoped action. Keeps test assertions
    for graph-only policies tight.
  - `PolicyEngine::authorize` builds the right resource UID at
    request time based on `request.action.resource_kind()`.

Validation rules added to `PolicyConfig::validate`:
  - A rule may not mix server-scoped and per-graph actions (different
    resource kinds need different `permit` clauses).
  - Server-scoped actions cannot have `branch_scope` or
    `target_branch_scope` — there's no branch context at the server
    level.

Operator impact: zero. The Cedar schema `Omnigraph::Server` entity is
internally referenced by `compile_policy_source`; operator policy.yaml
files only declare actions in `rules[].allow.actions` and never
reference the resource entity directly. Decision 6's "internal rename
only; operator policies unaffected" contract is preserved and pinned
by `per_graph_rules_continue_to_work_alongside_server_rules`.

Tests: 5 new (11 policy tests total, up from 6):
  - `graph_list_action_authorizes_against_server_resource`
  - `graph_create_action_authorizes_against_server_resource`
  - `server_scoped_rule_cannot_use_branch_scope`
  - `rule_mixing_server_and_per_graph_actions_is_rejected`
  - `per_graph_rules_continue_to_work_alongside_server_rules`

No regression: 145 server tests (74 lib + 71 integration) still green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: GET /graphs endpoint + per-graph policy wire-up (PR 6b/10)

PR 6b of the MR-668 multi-graph server work. First management endpoint —
`GET /graphs` lists every graph registered with the server, gated by the
server-level Cedar policy from PR 6a.

New API shapes (in `omnigraph-server::api`):
  - `GraphInfo { graph_id, uri }` — one entry per registered graph.
  - `GraphListResponse { graphs: Vec<GraphInfo> }` — sorted alphabetically
    by `graph_id` for deterministic output.

Handler `server_graphs_list`:
  - Mounted at `GET /graphs` in both modes.
  - Single mode: returns 405 (resource exists in the API surface, just
    not operational without a `graphs:` map). 405 chosen over 404 so
    clients see "resource exists, wrong context" rather than "no such
    resource".
  - Multi mode: requires bearer auth (when configured); Cedar-gated by
    `PolicyAction::GraphList` against `Omnigraph::Server::"root"`
    (PR 6a's chassis). Returns the sorted registry list.

Cedar gate composition:
  - When no `server.policy.file` is configured, the MR-723 default-deny
    falls through: `GraphList` is not `Read`, so an authenticated actor
    without a server policy gets 403. This is the right default — don't
    expose the registry until the operator explicitly authorizes it.
  - When a server policy is configured, Cedar evaluates the rule. The
    test `get_graphs_with_server_policy_authorizes_per_cedar` pins the
    admin-allow / viewer-deny split.

Routing:
  - New `management` sub-router holding `/graphs` (auth-required, no
    `resolve_graph_handle` middleware — operates on the registry, not
    a single graph).
  - Single mode merges flat protected routes + management.
  - Multi mode merges nested `/graphs/{graph_id}/...` + management.

OpenAPI:
  - `server_graphs_list` registered in `ApiDoc::paths(...)`.
  - `EXPECTED_PATHS` in `tests/openapi.rs` gains `/graphs`.
  - `openapi.json` regenerated (auto-tracked by
    `openapi_spec_is_up_to_date` in CI).

Tests: 4 new in `tests/server.rs::multi_graph_startup`:
  - `get_graphs_lists_registered_graphs_in_multi_mode`
  - `get_graphs_returns_405_in_single_mode`
  - `get_graphs_requires_bearer_auth_when_configured`
  - `get_graphs_with_server_policy_authorizes_per_cedar`

What's NOT in this PR (deferred):
  - Per-graph policy enforcement is wired through `handle.policy`
    (PR 4a already did this); PR 6b doesn't add new per-graph
    behavior beyond making sure the server policy lookup composes
    cleanly alongside it.
  - `POST /graphs` (PR 7) and `DELETE /graphs/{id}` (out of scope
    for v0.7.0).
  - CLI `omnigraph graphs list` (PR 8 will add).

Result: 215 server tests green (74 lib + 66 openapi + 75 integration),
11 policy tests green. MR-731 spoof regression preserved across all
this work.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: POST /graphs runtime create endpoint (PR 7/10)

PR 7 of the MR-668 multi-graph server work. Operators can now add a
graph to a running multi-graph server without restarting:

  curl -X POST http://server/graphs \
    -H "Content-Type: application/json" \
    -d '{
          "graph_id": "beta",
          "uri": "/data/beta.omni",
          "schema": { "source": "node Person { name: String @key }\n" },
          "policy": { "file": "./policies/beta.yaml" }
        }'

DELETE remains deferred (out of v0.7.0 scope per the trimmed plan —
no `delete_prefix`, no tombstones).

Body shape (decision 7):
  - Nested `schema: { source: "..." }` (mirrors the `policy: { file }`
    pattern; leaves room for future fields without breakage).
  - Optional nested `policy: { file: "..." }` for per-graph Cedar.
  - 32 MiB body limit (reuses `INGEST_REQUEST_BODY_LIMIT_BYTES`).
  - Asymmetric with `SchemaApplyRequest` which keeps flat
    `schema_source: String` — documented in api.rs.

Atomic YAML rewrite + drift detection:
  - New `config::rewrite_atomic(path, new_config, expected_hash)`:
    flock → re-read + hash check → serialize → write `.tmp` → fsync
    → rename → fsync parent dir. Returns the new hash for the caller
    to update its in-memory baseline.
  - New `config::hash_config_file(path)` — SHA-256 of the on-disk
    bytes, used at startup and after each rewrite.
  - New `RewriteAtomicError { Drift | Io | Serialize }` enum.
  - `AppState.config_hash: Option<Arc<Mutex<[u8;32]>>>` carries the
    in-memory baseline. Updated after every successful rewrite so
    subsequent POSTs don't false-trigger drift.
  - The mutex is `std::sync::Mutex` (brief critical section, no .await
    inside). The flock itself serializes file access process-wide
    AND across multiple server instances (defense in depth).
  - All sync I/O runs inside `tokio::task::spawn_blocking` — flock
    is sync.

Handler ordering (the load-bearing sequence):
  1. Mode check: 405 in single mode.
  2. Cedar authorize: `GraphCreate` against `Omnigraph::Server::"root"`.
  3. Validate body: `GraphId::try_from` (regex + reserved-name), empty
     schema/uri checks, per-graph policy file parse.
  4. Pre-check registry for duplicate graph_id / duplicate uri (409).
  5. `Omnigraph::init` the new engine.
  6. Atomic YAML rewrite (drift detection inside).
  7. Publish in registry (atomic re-check via `GraphRegistry::insert`).

Failure modes (documented in handler rustdoc):
  - Init fails → orphan storage at `req.uri` (PR 2a cleans up schema
    files; Lance datasets remain orphans until `delete_prefix` lands).
  - YAML rewrite fails (drift, IO) → orphan storage; YAML unchanged.
  - Registry insert fails (race) → YAML has entry but registry doesn't;
    next restart opens it cleanly.

New dependency: `fs2 = "0.4"` (workspace + omnigraph-server). POSIX-only
file locking. Linux/macOS deployment supported; Windows out of scope.

Tests (10 new in `tests/server.rs::multi_graph_startup`):
  - `post_graphs_creates_a_new_graph_end_to_end` — happy path, includes
    YAML inspection to confirm the rewrite landed.
  - `post_graphs_baseline_hash_updates_between_rewrites` — two POSTs in
    a row both succeed (drift baseline updates correctly).
  - `post_graphs_duplicate_graph_id_returns_409`
  - `post_graphs_duplicate_uri_returns_409`
  - `post_graphs_invalid_graph_id_returns_400` (reserved name)
  - `post_graphs_empty_schema_source_returns_400`
  - `post_graphs_returns_405_in_single_mode`
  - `post_graphs_yaml_drift_detection_returns_503` — operator hand-edits
    omnigraph.yaml; server refuses to clobber.
  - `hash_config_file_is_deterministic_and_detects_changes`
  - `rewrite_atomic_refuses_when_hash_drifts`

OpenAPI: `server_graphs_create` registered in `ApiDoc::paths(...)`;
openapi.json regenerated.

Result: 225 server tests green (74 lib + 66 openapi + 85 integration),
all MR-731 regressions still pinned.

LOC: ~580 lib.rs net (handler + helpers), ~120 config.rs (rewrite
machinery), +71 api.rs (request/response shapes), +332 tests/server.rs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: CLI omnigraph graphs list/create (PR 8/10)

PR 8 of the MR-668 multi-graph server work. CLI parity for the
v0.7.0 management surface: operators can now manage graphs from
the command line against a running multi-graph server.

  omnigraph graphs list --target dev --json
  omnigraph graphs create \
    --target dev \
    --graph-id beta \
    --graph-uri /data/beta.omni \
    --schema schema.pg

DELETE is intentionally absent — server-side DELETE was deferred from
v0.7.0 scope, and shipping a client subcommand for a server endpoint
that doesn't exist would be dead vocabulary. The help output, the
subcommand enum, and the test that pins it (`graphs_subcommand_help_
lists_list_and_create`) all agree.

CLI architecture (modeled on `BranchCommand`):
  - New `Command::Graphs { command: GraphsCommand }` top-level variant.
  - `GraphsCommand { List, Create }` enum.
  - List: GET `<base>/graphs`. Stdout is `<graph_id>\t<uri>` per line,
    or JSON via `--json`.
  - Create: reads `--schema <path>` from local disk, inlines as
    `schema: { source: <file> }` in the POST body (nested per
    MR-668 decision 7). Optional `--policy-file <path>` becomes
    `policy: { file: <path> }`. Returns 201 → "created graph X at Y"
    or JSON via `--json`.
  - Both subcommands reject local URI targets with a clear
    "remote multi-graph server URL" error.

New API type imports in the CLI: `GraphCreateRequest`,
`GraphCreateResponse`, `GraphListResponse`, `GraphSchemaSpec`,
`GraphPolicySpec` — all from `omnigraph-server::api`.

Tests:
  - cli.rs (4 new, non-network):
      * `graphs_subcommand_help_lists_list_and_create` — pins the
        deferral of `delete` (catches scope creep).
      * `graphs_list_against_local_uri_errors_with_remote_only_message`
      * `graphs_create_against_local_uri_errors_with_remote_only_message`
      * `graphs_create_with_missing_schema_file_errors` — pins the
        IO context in the schema-read error path.
  - system_remote.rs (1 new, `#[ignore]` like its peers):
      * `graphs_list_and_create_against_multi_graph_server` — spawns a
        multi-mode server, calls `graphs list` (sees `alpha`),
        `graphs create` (adds `beta`), `graphs list` again (sees both),
        and confirms the new graph is reachable via its cluster route.

CLI suite: 62 tests green (58 existing + 4 new). The new ignored
end-to-end test runs locally with `cargo test --ignored`.

LOC: +159 main.rs (enum + handlers), +88 cli.rs (unit tests),
+131 system_remote.rs (integration test).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: composite e2e tests, race fix, v0.7.0 release (PR 9/10)

PR 9 — the final integration PR for MR-668 multi-graph server work.
Closes the v0.7.0 release.

Composite lifecycle tests (closes gaps flagged in PR 7's coverage
review):
  - `multi_graph_lifecycle_post_query_restart_persistence` — POST a
    graph, query it via cluster route, reload the config from disk
    and confirm `load_server_settings` sees the rewritten YAML.
    Validates the "restart resolves orphans" failure-mode story.
  - `per_graph_policy_enforced_on_post_created_graph` — POST a graph
    with a per-graph policy attached, then send authenticated read
    and change requests. Per-graph Cedar enforcement fires correctly
    on a POST-created graph (engine-layer policy reinstalled via
    `Omnigraph::with_policy` inside the create flow).
  - `concurrent_post_graphs_distinct_ids_all_succeed` — 4 concurrent
    POSTs with distinct graph_ids all return 201. Caught a real
    race in `rewrite_atomic` (see below).

Race fix — `rewrite_atomic_with_modify`:

The first composite test surfaced a real bug. The old
`rewrite_atomic(path, new_config, expected_hash)` captured the
baseline hash OUTSIDE the flock, then called rewrite_atomic which
re-acquired it inside. Under concurrent writers:

  - POST A: captures baseline H0, calls rewrite_atomic.
  - POST B: captures baseline H0 too (before A's update lands).
  - A: acquires flock, on-disk == H0, writes H1, releases.
  - A: updates baseline H0 → H1.
  - B: tries to acquire flock — waits.
  - B: acquires flock. On-disk is now H1. Expected (captured
       before A finished) is H0. MISMATCH → spurious Drift error.

Worse: even if the timing happens to align, B's `updated` config
was constructed from BYTES read before the flock. B writes a config
that doesn't include A's new graph — silent data loss.

The fix: new `config::rewrite_atomic_with_modify(path, baseline,
modify)` takes a closure. Inside the flock + baseline mutex:
  1. Read on-disk bytes, hash, compare to baseline.
  2. Parse on-disk YAML.
  3. Call `modify(parsed)` to produce the new config — receives
     fresh on-disk state, returns the modification.
  4. Serialize + write + fsync + rename + update baseline.

Everything is read-modify-write under the same critical section.
Concurrent writers serialize cleanly. Test confirmed this is no
longer a race.

The old `rewrite_atomic(path, new_config, expected_hash)` API stays
for tests that don't need the read-modify-write shape; the POST
handler switches to the new shape.

Version bump v0.6.0 → v0.7.0:
  - All 5 `crates/*/Cargo.toml` (compiler, engine, policy, cli, server)
    plus their inter-crate `path` dep version constraints.
  - `Cargo.lock` regenerated by `cargo build --workspace`.
  - `AGENTS.md` "Version surveyed" line, capability matrix HTTP-server
    row updated to mention multi-graph + cluster routes + atomic YAML
    rewrite.
  - `openapi.json` regenerated.

Docs:
  - `docs/releases/v0.7.0.md` (new) — release notes with breaking
    changes, new features, deferred items (DELETE, `delete_prefix`,
    actor forwarding), and the single→multi migration recipe.
  - `docs/user/server.md` — substantial section additions for the
    two modes, mode inference, cluster endpoint table, management
    endpoints, `omnigraph.yaml` ownership contract, `POST /graphs`
    body shape + status codes.
  - `docs/user/cli.md` — `omnigraph graphs list/create` section,
    deferred-DELETE note.
  - `docs/user/policy.md` — server-scoped Cedar actions
    (`graph_create`, `graph_list`), per-graph vs server-level policy
    composition, example server-level policy.

Workspace test pass: 573 tests green across all crates. Zero
failures. MR-731 spoof regression still pinned and passing across
the entire 10-PR series.

This commit closes MR-668. v0.7.0 is ready for tagging.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: remove POST /graphs and CLI graphs create (defer runtime graph mgmt)

The POST /graphs runtime-create endpoint shipped in PR 7/10 has three
unresolved high-severity bugs:

  - flock-on-renamed-inode race: the YAML flock is taken on
    omnigraph.yaml itself, then a temp file is renamed over it.
    Cross-process writers end up locking different inodes — both
    believing they hold exclusive access.
  - duplicate-check outside the file lock: precheck runs against
    the in-memory registry only; the locked closure does
    config.graphs.insert(...) unconditionally. Concurrent same-id
    POSTs can persist the loser in YAML while the in-memory registry
    keeps the winner — they disagree after restart.
  - best_effort_cleanup_init_artifacts deletes _schema.pg /
    _schema.ir.json / __schema_state.json on any init failure. An
    accidental re-init against an existing graph's URI destroys its
    schema; subsequent open() fails at read_text(_schema.pg).

The correct fix is a Lance-style cluster catalog (reserve → init →
publish with recovery sidecars), parallel to the engine's existing
__manifest discipline. That work is out of scope for v0.7.0.

For now, disable runtime add/remove from the network and CLI surface.
Operators add graphs by editing omnigraph.yaml and restarting. The
GET /graphs read-only enumeration stays.

Removed:
- POST /graphs handler + router fragment + utoipa registration
- 13 post_graphs_* server tests + 3 composite POST tests +
  multi_mode_app_with_real_config / post_graph helpers
- CLI omnigraph graphs create subcommand + its handler + cli.rs tests
- system_remote.rs combined list+create test trimmed to list-only
- YAML rewrite infra: rewrite_atomic[_with_modify], RewriteAtomicError,
  staging_path, hash_config_file, AppState::config_hash field +
  threading through new_multi and open_multi_graph_state
- fs2 dependency (verified absent from cargo tree)
- sha2/fs2 imports in config.rs (only the rewrite path used them)
- Cedar PolicyAction::GraphCreate variant + "graph_create" match arms
  + action def in Cedar schema + graph_create_action_authorizes_against_server_resource test
- GraphCreateRequest / GraphCreateResponse / GraphSchemaSpec /
  GraphPolicySpec API types (only the POST handler / CLI imported them)

Kept:
- GET /graphs (read-only enumeration) and graph_list Cedar action
- omnigraph graphs list CLI subcommand
- All multi-graph startup, mode inference, cluster routes,
  per-graph + server-level Cedar policies
- server_settings_drive_multi_graph_startup_end_to_end (the test
  that covers operator-authored YAML + restart — the path that
  survives)
- best_effort_cleanup_init_artifacts and the three init failpoints
  (still reachable from CLI `omnigraph init`; preflight fix deferred
  as a follow-up)
- GraphRegistry::insert and its concurrency tests — production
  callers gone, but the method is the natural seam for the future
  cluster-catalog work

Also fixed (transcript issue 4):
- ALWAYS_FLAT_PATHS now includes /graphs so multi-mode OpenAPI
  advertises the management route correctly (was previously rewritten
  to /graphs/{graph_id}/graphs)
- multi_mode_openapi_keeps_healthz_flat → renamed to
  multi_mode_openapi_keeps_management_paths_flat, asserts both
  /healthz and /graphs stay flat
- multi_mode_openapi_prefixes_operation_ids_with_cluster skips
  /graphs in addition to /healthz

Doc fixes:
- docs/user/cli.md: graphs list example was --target http://...,
  but --target is a config-graph-name lookup; corrected to --uri.
  Removed the graphs create example.
- docs/user/server.md: dropped POST /graphs row, "omnigraph.yaml
  ownership", and "POST /graphs body shape" sections. Added a
  paragraph stating runtime add/remove is not exposed in v0.7.0.
- docs/user/policy.md: dropped graph_create action; reworded the
  "Configuration" line to clarify that server-scoped rules (graph_list)
  take neither branch_scope nor target_branch_scope.
- docs/releases/v0.7.0.md: rewrote release narrative — multi-graph
  mode ships; runtime add/remove deferred.
- AGENTS.md: HTTP server bullet and capability matrix row updated to
  reflect read-only GET /graphs and the operator-edit workflow.
- openapi.json regenerated; /graphs has only .get, no .post.

Diff: 17 files, +123 −1525 LOC.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: comment cleanup and policy format style

Strip "PR Na/Nb" sub-PR references throughout MR-668 surfaces — they
were useful during the 10-PR delivery sequence but rot now that the
work is in the tree. Keep the MR-668 umbrella references.

Also:
- Add explicit `when = when` and `resource_literal = resource_literal`
  named args in `compile_policy_source`'s outer `format!` to match the
  surrounding crate style (already explicit for `group` and `action`).
- Rename the best-effort cleanup tracing target from
  "omnigraph::init" to "omnigraph::init::cleanup" so operators can
  filter init-failure cleanup events separately from init's other
  log lines.

No behavior change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop actor_id from PolicyRequest; pass actor as separate arg

The MR-731 "server-authoritative actor identity" invariant was enforced
by an in-function chokepoint (`request.actor_id = actor.actor_id...`
overwrite inside `authorize_request`). That worked but relied on every
caller passing in a `PolicyRequest` and trusting the overwrite — a
comment-enforced invariant.

Move the invariant into the type system:

* `PolicyRequest` no longer carries `actor_id`. The struct now models
  what a caller wants to do, not who they are.
* `PolicyEngine::authorize(actor_id: &str, request: &PolicyRequest)`
  and `validate_request(actor_id, request)` take identity as a
  separate argument. The same shape `PolicyChecker::check` already had
  for the engine layer.
* `authorize_request` in the HTTP layer extracts `actor_id` from the
  bearer-resolved `ResolvedActor` and passes it positionally — no
  overwrite step that could be skipped.
* CLI `omnigraph policy explain` updated (the only other consumer
  that built a `PolicyRequest`).

Public API break for the `omnigraph-policy` crate. Worth it: handlers
can no longer accidentally populate `actor_id` from a request body
field, and external consumers are forced by the compiler to source
actor identity from a trusted path.

The MR-731 chokepoint test
`actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
still passes — the bearer-resolved actor is what reaches the engine.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: consolidate AppState single-mode constructors; delete with_policy_engine

The prior `with_policy_engine` constructor reused the engine `Arc`
from the existing handle (`engine: Arc::clone(&existing.engine)`)
without re-applying `Omnigraph::with_policy`. Combined with
`new_with_workload`, the documented composition pattern was
`AppState::new_with_workload(...).with_policy_engine(p)` — which
produced an `AppState` whose HTTP layer enforced Cedar but whose
underlying engine had no `PolicyChecker` installed. Any caller
reaching the engine via `state.registry().list()[i].engine` could
bypass policy entirely. The doc comment named this gap; the type
system didn't.

Make composition impossible to get wrong:

* Add `AppState::new_single(uri, db, tokens, Option<PolicyEngine>,
  WorkloadController)` — canonical single-mode constructor that
  takes every option together and routes through `build_single_mode`
  (which applies `db.with_policy(checker)` to the engine itself).
* `new`, `new_with_bearer_token`, `new_with_bearer_tokens`,
  `new_with_bearer_tokens_and_policy`, `new_with_workload` all become
  thin wrappers around `new_single`.
* Delete `with_policy_engine`. There is no post-construction policy
  install path any more; the single linear construction forces
  HTTP-layer and engine-layer policy to install together or not at all.

Regression test `engine_layer_policy_fires_via_direct_arc_omnigraph_from_new_single`
constructs an `AppState::new_single` with a deny-all policy, pulls
the `Arc<Omnigraph>` from the registry handle (the same path an
embedded SDK consumer would take), and asserts a direct `mutate_as`
call returns `OmniError::Policy`. Pre-fix this test would have
succeeded the mutation.

Test caller in `ingest_per_actor_admission_cap_returns_429`
migrates from `.with_policy_engine(...)` to `new_single(...,
Some(policy_engine), workload)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: derive any_per_graph_policy on RegistrySnapshot; simplify dup check

`AppState::requires_bearer_auth` walked the entire registry per
request (cloning Arcs into a `Vec`, then `.iter().any(|h| h.policy
.is_some())`) to decide whether the auth middleware should challenge.
The walk is unnecessary — the answer only changes when the registry
mutates, which is exactly the moment a new snapshot is constructed.

Move the flag onto the snapshot itself:

* `RegistrySnapshot { graphs, any_per_graph_policy: bool }`.
* `RegistrySnapshot::new(graphs)` is the only construction path —
  it derives the flag from `graphs.values().any(|h| h.policy
  .is_some())` so the cached value can't drift from the source data.
* `Default` delegates to `new(HashMap::new())`.
* `GraphRegistry::from_handles` and `insert` build snapshots via
  `RegistrySnapshot::new(...)`.
* `GraphRegistry::snapshot_ref()` exposes the current snapshot
  through an `arc_swap::Guard`; callers that need cached derived
  state go through this accessor (callers that only want `graphs`
  still use `list` / `get`).

`requires_bearer_auth` becomes one `ArcSwap::load` + bool read.

Also (drive-by, same file, same hunk): replace the dead
`if let Some(other) = seen_uris.get(...)` + `let _ = other;` pattern
in `from_handles` with a plain `seen_uris.contains_key(...)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: fail-fast multi-graph startup with try_collect

The `open_multi_graph_state` doc comment claims "Fail-fast — the
first open error aborts startup; other in-flight opens are dropped"
but the code did

    .buffer_unordered(4)
    .collect::<Vec<_>>()
    .await
    .into_iter()
    .collect::<Result<Vec<_>>>()?;

which drains every future in the stream before propagating the first
`Err`. With N S3-backed graphs and graph #2 failing fast, the caller
still waits for #1, #3, #4, … to either succeed or fail before
seeing the error.

Replace the four-line dance with `futures::TryStreamExt::try_collect`,
which short-circuits on the first `Err` and drops the rest. The
doc comment now matches behavior.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused State extractor from 7 read-only handlers

After the routing-middleware refactor moved the engine into the per-graph
`GraphHandle` (extracted via `Extension<Arc<GraphHandle>>`), seven
read-only handlers — `server_snapshot`, `server_read`, `server_export`,
`server_schema_get`, `server_branch_list`, `server_commit_list`,
`server_commit_show` — kept an unused `State(_state): State<AppState>`
extractor. Drop it. Each request avoids one `FromRequestParts` clone
of `AppState`'s Arcs.

Handlers that actually use state (workload admission for write paths,
`server_policy` for management endpoints) keep theirs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: emit info! for graph routing decision

`tracing::Span::current().record("graph_id", ...)` in the routing
middleware silently no-ops here: no upstream `#[tracing::instrument]`
on the handlers declares a `graph_id` field, and `TraceLayer::new_for_http`
doesn't either. The recorded value never lands anywhere visible.

Replace with an explicit `info!(graph_id = %handle.key.graph_id,
"graph routed")` event so operators can grep logs and correlate
requests with the active graph. In single mode the value is the
sentinel `"default"`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: align GET /graphs 405 body code with HTTP status

The single-mode `GET /graphs` handler returned an `ApiError` built
via struct literal with `status: METHOD_NOT_ALLOWED, code: BadRequest`.
The body code disagreed with the HTTP status — clients deserializing
on `code` saw `bad_request`, clients deserializing on `status` saw
405. Same bug class as the earlier 503+Conflict mismatch on the
removed YAML drift path.

Close the class for this one remaining instance:

* Add `ErrorCode::MethodNotAllowed` to the API enum.
* Add `ApiError::method_not_allowed(msg)` — pairs the 405 status
  with the matching code.
* Replace the struct literal in `server_graphs_list` with the
  constructor.
* Regenerate `openapi.json` (adds `method_not_allowed` to the
  ErrorCode schema enum).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused axum::handler::Handler import

The import landed in earlier work but no current call site uses it.
Emitted an `unused_imports` warning on every server build.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused fs2 workspace dependency

`fs2 = "0.4"` lingered in [workspace.dependencies] after the
POST /graphs flock-on-rename design was pulled. `cargo tree -i fs2`
reports no consumers in the workspace and the dep is not in
Cargo.lock. Removing the declaration closes the "phantom dep" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: AGENTS.md Cedar row no longer hardcodes action count

The "8 actions" claim drifted as soon as MR-668 added `graph_list`.
Bumping the count would just push the drift one PR forward; the
correct-by-design fix is to defer to the canonical list in
docs/user/policy.md and stop maintaining a duplicate count.

Closes the "doc hardcodes a count that drifts from the enum" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: cfg(test)-gate GraphRegistry::insert and its mutex

`insert` and the `mutate: Mutex<()>` that serializes it had no
runtime consumer in v0.7.0 — the only insertion path at startup
is `from_handles`, and runtime add/remove is deferred until a
managed cluster catalog ships. Leaving both `pub` and live made
them a "looks like API, isn't" footgun: a future change could
build on `insert` without re-establishing the concurrency contract
with an actual consumer in scope.

Gate both together (`#[cfg(test)]` on the method, the field, and
the `tokio::sync::Mutex` import) so the race-pinning tests still
compile but production cannot reach them. When a real consumer
ships, ungate both — they're a unit. Closes the "public API with
no runtime consumer drifts toward incorrect" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop vestigial PolicyEngine surface

* `validate_request` had zero callsites — pure surface for nothing.
* `deny`'s `_actor_id` and `_request` parameters were both unused
  (the underscore prefix gave it away); the message is built by the
  caller before `deny` ever sees the request. Trim both.

Closes the "public API that the type system can't justify" class
for the policy engine. No behavior change; every existing test
stays green because the deletions never had a runtime effect.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for init re-init footgun (red)

A second `Omnigraph::init` against an existing graph URI today
destroys the existing graph's schema artifacts. `init_storage_phase`
overwrites `_schema.pg` before any preflight, and on the inner
`GraphCoordinator::init` failure that follows,
`best_effort_cleanup_init_artifacts` deletes all three schema files.
The existing Lance datasets and `__manifest/` survive but the
schema metadata is gone — unrecoverable without operator surgery.

This test exercises that path and currently fails with
"_schema.pg must not be deleted by a failed re-init", confirming
the destructive cleanup branch fires. The fix in the next commit
makes the test pass by preflighting with `storage.exists()` and
returning a typed error before any write touches disk.

Per AGENTS.md rule 12, the test commit lands just before the fix
commit so the red → green pair is visible in `git log` and a
reviewer can check out this commit alone to reproduce.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: close init re-init footgun via InitOptions preflight (green)

`Omnigraph::init` is "create a new graph"; existing graphs need
an explicit overwrite. Today's behavior — silently overwrite
schema files, then on inner failure delete them via best-effort
cleanup — is destructive against an existing graph regardless of
which branch fires.

Correct-by-design fix:

* New `InitOptions { force: bool }` struct (default `force: false`).
* New `Omnigraph::init_with_options(uri, schema, options)`. The
  old `Omnigraph::init(uri, schema)` is a thin shortcut that
  passes `InitOptions::default()`.
* `init_with_storage` runs a `storage.exists()` preflight on the
  three schema URIs BEFORE any parse, write, or coordinator call.
  Any hit → typed `OmniError::AlreadyInitialized { uri }`. The
  destructive code paths (the `write_text` overwrite and the
  best-effort cleanup) are now unreachable in strict mode against
  an existing graph.
* `force: true` skips the preflight; existing operators who
  actually mean to overwrite opt in explicitly.
* CLI: `omnigraph init --force` maps to `InitOptions { force: true }`.
* HTTP: `OmniError::AlreadyInitialized` maps to 409 via
  `ApiError::from_omni`. Not currently HTTP-reachable (POST /graphs
  was pulled), but the wiring lands here so a future runtime
  create endpoint has one canonical translation.

Closes the "init is destructive against existing state" class.
The regression test added in the previous commit
(`init_on_existing_graph_uri_does_not_destroy_existing_schema`)
turns green: the original schema files now survive a second
init attempt byte-for-byte, and the call errors cleanly with
`AlreadyInitialized`. The four existing
`init_failpoint_after_*_cleans_up_*` tests stay green — strict
mode's preflight passes on a fresh tempdir, and cleanup still
runs as before when a failpoint fires mid-write.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: split PolicyEngine::load into kind-typed loaders

Pre-fix, every caller of `PolicyEngine::load(path, graph_id)`
passed *some* `graph_id` argument — even when the policy was
server-scoped and Cedar's resolution would never touch a Graph
entity. The server-level loader at lib.rs passed the meaningless
sentinel `"server"`. A graph policy file containing a `graph_list`
rule compiled fine; a server policy file containing a `read` rule
compiled fine. Both silently no-op'd at request time because the
engine kind and the rule's resource kind disagreed.

Correct-by-design fix: replace `load` with two kind-typed loaders.

* `PolicyEngine::load_graph(path, graph_id)` — for per-graph
  policy files. Rejects any rule whose action `resource_kind()`
  is `Server`.
* `PolicyEngine::load_server(path)` — for server-level policy
  files. Takes no `graph_id`: server-scoped actions resolve against
  the singleton `Omnigraph::Server::"root"` entity, never a Graph.
  Rejects any rule whose action `resource_kind()` is `Graph`.

The old `load` is hard-deleted in the same commit because every
in-tree consumer migrates here (no semver promise on the workspace
crate, no external pinners). New `PolicyEngineKind` enum types
the loader's intent; `validate_kind_alignment` is the load-time
check that closes the "wrong action, wrong file, silent no-op"
class — operators get a load-time error instead of confused-and-
silent behavior at request time.

Callsites migrated:
* server lib.rs:374 (single-mode per-graph)   → load_graph
* server lib.rs:1065 (multi-mode server)      → load_server
* server lib.rs:1103 (multi-mode per-graph)   → load_graph
* CLI main.rs:732 (resolve_policy_engine)     → load_graph
* tests/server.rs ×5 (4 graph, 1 server)      → load_graph/load_server
* policy_engine_chassis.rs                    → load_graph

Four new in-source tests pin the contract: both rejection paths
and both positive paths.

Closes the "operator puts an action in the wrong file and the
rule silently never matches" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: introduce GraphRouting, retire single_mode_handle

Pre-fix, `AppState` always carried `Arc<GraphRegistry>` even when
serving one graph. Single mode populated the registry with one
handle keyed by the `SINGLE_GRAPH_KEY_ID = "default"` sentinel;
`single_mode_handle` walked the registry, asserted `len == 1`,
and returned the single element with a 500-class "programmer
error" branch on mismatch. Three smells in a row — magic key,
walk-and-assert, programmer-error guard — all because the
single-mode runtime was forced through a multi-mode abstraction.

Correct-by-design fix: type the routing.

* New `pub enum GraphRouting { Single { handle }, Multi { registry,
  config_path } }` on `AppState`. The `Single` arm carries the handle
  directly — no registry, no key, no walk.
* `resolve_graph_handle` middleware matches on `routing`. Single mode
  returns the handle in O(1); multi mode does the same path-extract +
  registry lookup as before. The 500-class programmer-error branch
  is gone — the type system now makes the violated invariant
  ("single mode has exactly one handle") unrepresentable.
* `requires_bearer_auth` reads `handle.policy.is_some()` directly
  in the Single arm; Multi arm still uses the cached
  `any_per_graph_policy` flag.

`ServerMode` and the legacy `registry` field on `AppState` are still
populated for now — C-3 removes both once every reader is migrated.
The `SINGLE_GRAPH_KEY_ID` sentinel and `ServerMode` will also go
away in C-3.

Closes the "single mode forced through a multi-mode abstraction"
class. All 76 server integration tests stay green: handlers still
extract `Extension<Arc<GraphHandle>>` from the request, so the
middleware's internal change is invisible to them.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: remove ServerMode, registry field, and the SINGLE_GRAPH sentinel

C-1/C-2 introduced `GraphRouting` and pointed the middleware at it.
This commit removes the legacy shape that's now dead:

* `ServerMode` enum — deleted. Single mode's `uri` lives on
  `handle.uri`; multi mode's `config_path` lives on the
  `GraphRouting::Multi` arm.
* `AppState.mode: ServerMode` field — deleted.
* `AppState.registry: Arc<GraphRegistry>` field — deleted. Multi
  mode's registry is on `GraphRouting::Multi { registry, .. }`;
  single mode has no registry at all.
* `AppState::mode()`, `AppState::uri()`, `AppState::registry()`
  accessors — deleted. New `AppState::routing() -> &GraphRouting`
  is the single public entry point.
* `SINGLE_GRAPH_KEY_ID` constant — deleted. `GraphHandle.key` is
  still required by the struct, but in single mode the key is now
  only a tracing label (`"default"`, inlined with a comment naming
  its sole remaining purpose). Single-mode flat routes never carry
  a `{graph_id}` parameter, so the key is never compared against
  user input, and there is no registry where it could be a map
  key. C-1/C-2 already removed the registry walk that the sentinel
  was named for.

Callers migrated:
* `build_app` (lib.rs:944) — matches on `state.routing()` instead
  of `state.mode()`.
* `server_graphs_list` (lib.rs:1162) — destructures the Multi arm
  to get the registry; Single arm short-circuits to 405.
* `server_openapi` (lib.rs:1217) — matches the Multi arm for the
  cluster-prefix rewrite.
* `tests/server.rs:3735` — the B2 footgun regression test now
  matches on `state.routing()` to extract the single-mode handle
  (the test's earlier `state.registry().list().next()` shape was
  the closest pre-fix analog to "embedded consumer reaches the
  engine"; the new shape is more direct).

Closes the entire "single mode forced through a multi-mode
abstraction" class. After this commit:
* No magic sentinel as a routing key.
* No `single_mode_handle` walk-and-assert helper.
* No 500-class "programmer error" branch in the middleware.
* No two-field discriminant on `AppState` where one would do.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for nested-route path extraction (red)

`server_branch_delete` and `server_commit_show` use bare
`Path<String>` extractors. In single-mode flat routes
(`/branches/{branch}`, `/commits/{commit_id}`) this works — one
capture, one value. In multi-graph cluster routes
(`/graphs/{graph_id}/branches/{branch}`,
`/graphs/{graph_id}/commits/{commit_id}`) axum 0.8 propagates the
outer `{graph_id}` capture into the inner handler, so the
extractor sees two captures and 500s with
"Wrong number of path arguments. Expected 1 but got 2."

`cluster_routes_dispatch_per_graph_handle` only exercises
`/snapshot` (no Path extractor), so the regression slipped through.
This test closes that gap structurally: every cluster route with
an inner path param gets exercised here.

Currently fails with the exact symptom above. Fix in the next
commit makes it pass.

Per AGENTS.md rule 12, the red test commit lands just before the
fix so the pair is visible in `git log` and a reviewer can check
out this commit alone to reproduce.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: named-field path-param structs for nested cluster routes (green)

`Path<String>` deserializes one path-param value positionally.
Single-mode flat routes (`/branches/{branch}`,
`/commits/{commit_id}`) have one capture; multi-mode nested routes
(`/graphs/{graph_id}/branches/{branch}`,
`/graphs/{graph_id}/commits/{commit_id}`) have two — axum 0.8
propagates the outer capture into nested handlers. Same handler,
two different shapes; the multi-mode shape 500s with
"Wrong number of path arguments. Expected 1 but got 2."

Symptomatic fix: change to `Path<(String, String)>` and ignore the
first element. Breaks again the moment we add another nest layer
(e.g. tenant in Cloud mode).

Correct-by-design fix: named-field structs deserialized by name
from axum's path-param map. Each handler picks only the fields it
needs. Stable across single / multi / future-cloud nest depths
because deserialization is by field name, not position.

* New `BranchPath { branch: String }` (file-local to lib.rs)
* New `CommitPath { commit_id: String }`
* `server_branch_delete` extractor → `Path<BranchPath>`
* `server_commit_show` extractor → `Path<CommitPath>`

Closes the "handler path-extractor type is positional and breaks
when route nesting changes" class. Red test from the previous
commit turns green. All 77 server tests pass (single-mode branch
delete + commit show, plus new multi-mode coverage).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: centralize policy-requires-tokens check in the runtime classifier

Single-mode `open_with_bearer_tokens_and_policy` bailed at lib.rs:380
when policy was installed and no tokens. Multi-mode
`open_multi_graph_state` had no equivalent: the server started, every
request 401'd because no token could ever match, and the operator
spent time debugging a misconfiguration the single-mode path would
have caught at startup.

The doc/code contradiction made the gap easy to miss: the
`ServerRuntimeState::PolicyEnabled` docstring said tokens-or-not
was "unusual but valid — every request fails 401 without a bearer,
which is effectively 'locked'." The single-mode bail contradicted
that. In practice, silent-401-on-every-request is bug-shaped, not
feature-shaped (operators wanting deny-all should configure tokens
plus a deny-all Cedar rule to get meaningful 403s with
policy-decision logging).

Symptomatic fix: add a copy of the bail to multi-mode. Two copies
that can drift again the next time a startup path is added.

Correct-by-design fix: hoist the check into
`classify_server_runtime_state` so both modes get the same
enforcement from one source of truth. The classifier becomes the
single source of truth for "should we start?" and adding a startup
invariant there is now the natural extension point for any future
mode.

Classifier matrix is now complete:

| has_tokens | has_policy | allow_unauthenticated | Result |
|---|---|---|---|
| F | F | F | bail (existing) |
| F | F | T | Open (existing) |
| T | F | * | DefaultDeny (existing) |
| F | T | * | bail (NEW — closes the gap) |
| T | T | * | PolicyEnabled (existing) |

Changes:

* `classify_server_runtime_state` (lib.rs:870-890) gains the
  `(false, true, _) => bail!(…)` arm with a clear message naming
  the failure mode and the two valid resolutions.
* `open_with_bearer_tokens_and_policy` (lib.rs:369+) drops its
  redundant local bail — the classifier rejected the invalid case
  before construction was reached.
* `ServerRuntimeState::PolicyEnabled` docstring is rewritten:
  drops the "(unusual but valid)" carve-out and states plainly
  that PolicyEnabled requires tokens. Names the explicit
  alternative (tokens + deny-all Cedar rule) for operators who
  want the all-requests-denied behavior.
* `classify_policy_enabled_always_wins` test is renamed to
  `classify_policy_enabled_requires_tokens` and the now-invalid
  `(false, true, _)` assertion is removed (covered by the new
  rejection test).
* New `classify_policy_without_tokens_is_rejected` test covers the
  new arm.
* New `serve_refuses_to_start_with_policy_but_no_tokens_multi_mode`
  integration test pins the multi-mode propagation path —
  symmetric with the existing single-mode
  `serve_refuses_to_start_in_state_1_without_unauthenticated`.

Closes the "single mode and multi mode startup branches can drift
on safety invariants" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: close coverage gaps surfaced by the test-coverage audit

The bot-review pass and the subsequent coverage audit surfaced two
material gaps in PR #119's test surface — both easy to close, both
worth closing before merge.

* **Gap 1 — cluster-route sweep.** The Bug-1 path-extractor
  regression slipped through because
  `cluster_routes_dispatch_per_graph_handle` only exercised
  `/snapshot`. The other six protected cluster routes (`/read`,
  `/change`, `/export`, `/schema`, `/schema/apply`, `/ingest`,
  `/branches/merge`) were implicitly trusted to work without any
  multi-mode integration test.

  Add `all_protected_cluster_routes_resolve_to_their_handler`
  (`tests/server.rs`) that hits each protected cluster route with
  a minimal request and asserts the response is consistent with
  the handler being reached — no 404 (router didn't match), no 500
  with "Wrong number of path arguments" (Bug-1 class), no 500 with
  "missing extension" (routing middleware didn't inject the handle).
  Status code is a negative assertion because each handler's
  happy-path inputs differ; what matters is "the request reached
  the handler," not "the handler returned 200" — that's already
  pinned by the single-mode tests.

* **Gap 2 — `--force` happy path.** The strict re-init regression
  test (`init_on_existing_graph_uri_does_not_destroy_existing_schema`)
  pins the error path; nothing pinned the `force: true` escape
  hatch actually doing what its docstring claims.

  Add `init_with_force_recovers_from_orphan_schema_files`
  (`tests/lifecycle.rs`). Writes a bare `_schema.pg` to simulate
  orphan files from a failed prior init, confirms strict mode
  bails as expected, then confirms `init_with_options(force: true)`
  succeeds and produces a functional graph.

  Note: the test follows the documented semantics — force skips
  the preflight only, it does NOT purge existing Lance state. An
  earlier draft of the test (against full overwrite of an existing
  populated graph) failed because `GraphCoordinator::init` errored
  on the existing `__manifest`, which is exactly the limitation
  the `InitOptions::force` docstring already calls out. Recursive
  purge needs `StorageAdapter::delete_prefix` (tracked separately).

Coverage is now fully aligned with the PR's claims.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for GraphList open-mode bypass (red)

Cursor bot's review at commit 4120448 surfaced that
`server_graphs_list` returns 200 in Open mode (`--unauthenticated`,
no tokens, no policy), exposing the full graph registry — graph
IDs and URIs that may contain S3 bucket paths or internal
hostnames — to any unauthenticated caller.

Root cause: `authorize_request`'s no-policy fallback only denies
when `actor.is_some()`. In Open mode `actor: None`, so the
denial branch never fires and the call returns `Ok(())`. The
docstring on `server_graphs_list` claims the endpoint is
"Cedar-gated" and that we "don't leak the registry until the
operator explicitly authorizes it" — but Open mode has no Cedar
at all, so the docstring intent and the code disagree.

This commit renames the existing
`get_graphs_lists_registered_graphs_in_multi_mode` test to
`get_graphs_denied_in_open_mode_without_server_policy` and flips
the assertion from 200 → 403. Today this fails (server returns
200) — exactly the symptom the bot named. The fix in the next
commit tightens the no-policy fallback to deny server-scoped
actions unconditionally, regardless of mode.

Per AGENTS.md rule 12, the red test commit lands just before
the fix so the red → green pair is visible in `git log` and a
reviewer can check out this commit alone to reproduce.

Sort-order coverage that previously lived in the renamed test
moves to `get_graphs_with_server_policy_authorizes_per_cedar`
in the next commit, where the admin-200 response is operator-
authorized and a non-empty body is asserted.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: server-scoped actions always require explicit policy (green)

`server_graphs_list` returned 200 in Open mode (`--unauthenticated`,
no tokens, no policy) because `authorize_request`'s no-policy
fallback only denied when `actor.is_some()` AND action != Read.
In Open mode `actor: None`, so the denial branch never fired and
the call returned `Ok(())` — leaking the registry (graph IDs +
URIs that may contain S3 bucket paths or internal hostnames) to
any unauthenticated caller. The docstring on `server_graphs_list`
claimed it was "Cedar-gated" and that the server should "not leak
the registry until the operator explicitly authorizes it" —
docstring intent and code disagreed.

Symptomatic fix: special-case GraphList. Breaks the moment
another server-scoped action (`graph_create`, `graph_delete`) is
added.

Correct-by-design fix: tie authorization to the action's
`resource_kind()`. Server-scoped actions
(`PolicyResourceKind::Server`) always require explicit policy
authorization — there is no runtime state where they're served
by default. Per-graph actions keep the existing default-deny
logic (DefaultDeny denies non-Read for authenticated actors;
Open mode allows everything per the operator's `--unauthenticated`
opt-in for graph DATA, but not for server topology).

The fix uses the existing `PolicyResourceKind` enum that #119
already added — no new abstraction. Future server-scoped actions
(runtime `graph_create`/`graph_delete` when the cluster catalog
ships) automatically pick up the same enforcement without any
per-action handler change.

Changes:

* `crates/omnigraph-server/src/lib.rs:51` — re-export
  `PolicyResourceKind` (the kind discriminator was already public
  on the omnigraph-policy crate; needed in scope here).
* `crates/omnigraph-server/src/lib.rs:1457` — `authorize_request`'s
  no-policy fallback gains a server-scoped-action check that fires
  before the actor-based default-deny logic. Error message names
  the failure mode and points at `server.policy.file`.
* `crates/omnigraph-server/tests/server.rs:5037` —
  `get_graphs_with_server_policy_authorizes_per_cedar` extended
  to register two graphs in non-alphabetical order and assert
  the admin-200 response is sorted alphabetically. Restores the
  sort-order coverage that lived in
  `get_graphs_lists_registered_graphs_in_multi_mode` before the
  red commit renamed it to assert denial.

Also bundles a small adjacent cleanup that the bot-review flagged:

* `crates/omnigraph-server/src/graph_id.rs:124` — drop the
  unreachable `"openapi.json"` entry from `is_reserved`. The
  regex `^[a-zA-Z0-9-]{1,64}$` rejects every dot-containing name
  before `is_reserved` can run, so dotted entries in this list
  were dead code that misled readers into thinking the list
  needed to cover them. Comment now names the structural
  exclusion. The `rejects_reserved_route_names` test loses its
  `openapi.json` row (covered by `rejects_dots` via the regex).

Closes the "server-scoped management actions silently leak in
Open mode" class. Red test from the previous commit
(`get_graphs_denied_in_open_mode_without_server_policy`) turns
green; all 78 server integration tests + 76 lib tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: fold multi-graph work into v0.6.0 (no separate v0.7.0 release)

The branch had bumped workspace versions to 0.7.0 and added a
dedicated `docs/releases/v0.7.0.md` for the multi-graph work.
Per scope decision: ship the graph-rename and the multi-graph
mode in one v0.6.0 release.

Changes:

* Workspace versions bumped 0.7.0 → 0.6.0 in every crate manifest
  (`omnigraph`, `omnigraph-compiler`, `omnigraph-policy`,
  `omnigraph-server`, `omnigraph-cli`) and their internal
  `path = ..., version = "..."` dependency constraints.
* `docs/releases/v0.7.0.md` content merged into
  `docs/releases/v0.6.0.md`, retargeted to a single coherent
  v0.6.0 release note covering both the graph terminology rename
  and the multi-graph server mode. The original v0.7.0.md is
  deleted.
* All `v0.7.0` / `0.7.0` doc and comment references throughout
  `crates/`, `docs/`, `AGENTS.md`, and `openapi.json` retargeted
  to `v0.6.0` / `0.6.0`. `Cargo.lock` regenerated to match.
* OpenAPI spec regenerated via `OMNIGRAPH_UPDATE_OPENAPI=1
  cargo test -p omnigraph-server --test openapi
  openapi_spec_is_up_to_date` — `"version": "0.6.0"` now.

Verification:

* `cargo build --workspace` — clean (6 pre-existing engine
  warnings only).
* `cargo test --workspace --locked` — zero failures across all
  39 test result groups.
* `bash scripts/check-agents-md.sh` — passes (34 links / 33 docs).
* `grep -rn "0\.7\.0\|v0\.7\.0" --include='*.rs' --include='*.md'
  --include='*.json' --include='*.toml' .` returns no workspace
  hits. The three remaining `0.7.0` strings in `Cargo.lock`
  belong to unrelated 3rd-party crates (`pem-rfc7468`, `radium`,
  `rand_xoshiro`).

The git tag and crates.io publish happen later — this commit
just consolidates the surface so the eventual release is one
coherent v0.6.0 covering all the work since v0.5.0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: sanitize internal refs from v0.6.0 release notes

cubic-dev-ai P2 comments flagged that the release notes carried
internal Linear ticket and RFC references (MR-668, MR-731,
MR-723, RFC 0003, RFC 0004). Per AGENTS.md maintenance rule 5,
"Release docs are public project history. Describe capabilities,
behavior changes, breaking changes, upgrade notes, and user
impact; do not reference private ticket systems, internal
codenames, or planning shorthand that an outside contributor
cannot inspect." The bot's comments are correct against our own
published contract — they were a docs-quality regression
introduced when I drafted these notes.

Replaced each internal reference with the public-facing concept
it stood for. The substantive content (capabilities, behavior,
guarantees) was already present alongside the refs; sanitization
just trimmed the bracketed ticket labels:

* Line 6: dropped `(MR-668)` from the multi-graph mode summary —
  the descriptive name was already self-sufficient.
* Line 24: `MR-731 spoof defense` → `the bearer-derived-actor-
  identity guarantee`; `Forward-compat for Cloud mode (RFC 0003)
  and OAuth provider (RFC 0004)` → "forward-compat seams for
  future multi-tenant and OAuth deployments; they're inert in
  this release" — describes what the operator sees instead of
  pointing at planning docs.
* Line 26: `MR-731's server-authoritative-actor invariant` →
  "the server-authoritative-actor invariant: actor identity is
  always sourced from the bearer-token match resolved at the
  auth boundary" — the public-facing statement of the guarantee.
* Line 36: `(MR-723 default-deny otherwise rejects …)` →
  "without a server policy the default-deny posture rejects …"
  — same content, no ticket label.
* Line 121: `MR-731 spoof regression test` → "The bearer-auth-
  derived-actor-identity regression test (client-supplied
  identity headers are ignored; the server-resolved actor is the
  only identity Cedar sees)" — describes what the test guards
  instead of naming the originating ticket.

Verified: `grep -E 'MR-\d+|RFC[ -]?\d+' docs/releases/v0.6.0.md`
returns no matches; the rest of `docs/releases/` is also clean.
`scripts/check-agents-md.sh` passes.

Note: cubic-dev-ai also flagged `crates/omnigraph-cli/src/main.rs:276`
("doc comment incorrectly references v0.6.0 for a command that
only exists in v0.7.0"). That comment is based on a stale model
of the release surface — after folding v0.7.0 into v0.6.0 in
the previous commit, the multi-graph CLI surface IS in v0.6.0
and the comment is correct as written. No change needed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix: close validated init and multi-graph gaps

* chore: address review cleanup comments

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
											
										
										
											2026-05-28 16:19:31 +02:00
+								        omnigraph_server::PolicyEngine::load_graph(&policy_path, graph.to_string_lossy().as_ref())
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								            .unwrap();
-												(feat): multi-graph server mode (#119)

* mr-668: add GraphId newtype + Cloud-mode forward identity stubs (PR 1/10)

PR 1 of the MR-668 multi-graph server work. Pure types, no runtime
behavior changes yet.

Ships the validated identity vocabulary that the rest of the implementation
will consume:

- `GraphId(String)` — `^[a-zA-Z0-9-]{1,64}$`, leading underscore rejected
  (engine reserves every `_*` filename), reserved route names rejected
  (`policies`, `healthz`, `openapi`, `openapi.json`, `graphs`). Validation
  lives in `try_from` only; serde `Deserialize` re-runs it so JSON payloads
  cannot bypass.
- `TenantId(String)` — same regex shape as GraphId. `None` in Cluster
  mode; reserved for Cloud mode (RFC 0003) where it carries the OAuth
  `org_id` claim.
- `GraphKey { tenant_id: Option<TenantId>, graph_id }` — the registry
  HashMap key. `cluster()` constructor for the Cluster-mode default.
- `Scope` enum with `Full` variant — Cluster mode default; RFC 0004 will
  extend with OAuth scopes (`graph:read`/`write`/`admin`/`*`).
- `AuthSource` enum with `Static` variant — Cluster mode default; RFC
  0001 step 1 will add `Oidc`.
- `ResolvedActor { actor_id, tenant_id, scopes, source }` — replaces the
  upcoming refactor of `AuthenticatedActor(Arc<str>)` in PR 4a.

Per MR-668 design decision 13: ship the Cloud-mode forward type shapes
now (no `TokenVerifier` trait yet — that's RFC 0001 step 1) so handler
signatures stay stable across the Cluster → Cloud trajectory. `Scope`
and `AuthSource` use `#[non_exhaustive]` so future variants don't break
caller matches.

Tests: 26 new (15 graph_id + 11 identity), all passing. No regression
in the existing 36 server library tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: Omnigraph::init error-path cleanup + three failpoints (PR 2a/10)

PR 2a of the MR-668 multi-graph server work. Bug fix: a partially-failed
`Omnigraph::init` previously left orphan schema files at the graph URI,
making the URI unusable for a retry (the next `init` would refuse because
`_schema.pg` already exists).

Changes:

1. `init_with_storage` now wraps the I/O phase. On any error from
   `init_storage_phase`, calls `best_effort_cleanup_init_artifacts` to
   remove the three schema files before returning the original error:
   - `_schema.pg`
   - `_schema.ir.json`
   - `__schema_state.json`
   Cleanup is best-effort: a failure to delete is logged via
   `tracing::warn` but does NOT mask the init error.

2. Three failpoints added at the init phase boundaries:
   - `init.after_schema_pg_written`
   - `init.after_schema_contract_written`
   - `init.after_coordinator_init`

3. Four new failpoint tests in `tests/failpoints.rs` pin the cleanup
   behavior at each boundary plus the "original error wins over cleanup
   error" contract. All 23 failpoint tests pass.

Coverage gap (documented in code comments):
Lance per-type datasets and `__manifest/` directory created by
`GraphCoordinator::init` are NOT cleaned up after a coordinator-init-phase
failure. Recursive directory deletion requires `StorageAdapter::delete_prefix`,
which was deferred along with `DELETE /graphs/{id}` (originally PR 2b). When
that primitive lands, the third failpoint test can be tightened to assert
the graph root is fully empty.

Tests: 4 new (init_failpoint_*), all 23 failpoint tests green. No
regression in the 105 engine library tests or 64 end_to_end tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: add GraphHandle + GraphRegistry data structure (PR 3/10)

PR 3 of the MR-668 multi-graph server work. Pure data structure — no
routing changes yet (that's PR 4a).

New file: `crates/omnigraph-server/src/registry.rs`

- `GraphHandle { key: GraphKey, uri: String, engine: Arc<Omnigraph>,
  policy: Option<Arc<PolicyEngine>> }` — the per-graph state that the
  routing middleware (PR 4a) will inject as a request extension.
- `RegistrySnapshot { graphs: HashMap<GraphKey, Arc<GraphHandle>> }` —
  immutable snapshot; replaced atomically via `ArcSwap`.
- `GraphRegistry { snapshot: ArcSwap<_>, mutate: Mutex<()> }` — lock-free
  reads, mutex-serialized mutations.
- `RegistryLookup { Ready(Arc<GraphHandle>) | Gone }` — two-valued, no
  `Tombstoned` variant since DELETE is deferred in v0.7.0 scope.
- `InsertError { DuplicateKey | DuplicateUri }` — both rejection cases
  for create-graph (maps to HTTP 409 in PR 7).
- Methods: `new`, `from_handles` (bulk startup-time init), `get`, `list`,
  `len`, `insert`.

Race semantics pinned by three multi-thread tests:
- `concurrent_insert_same_key_exactly_one_succeeds` — N=8 spawned
  inserts with the same key; exactly 1 returns Ok, 7 return DuplicateKey.
- `concurrent_insert_distinct_keys_all_succeed` — N=8 spawned inserts
  with distinct keys; all succeed.
- `concurrent_reads_during_inserts_see_consistent_snapshots` — reader
  loop concurrent with sequential writes; every listed handle's key
  resolves via `get()` (no torn state).

Why no tombstones field: `DELETE /graphs/{id}` is deferred to bound
the scope of v0.7.0. Without a delete endpoint, there's no use for
tombstones — every key in the registry is `Ready`, and every key
not in the registry is `Gone`. When DELETE lands later, the
`Tombstoned` variant + `tombstones: HashSet<GraphKey>` slot in
additively without breaking caller signatures (the `Gone` variant
remains the "not currently active" case).

Why `tokio::sync::Mutex`: insert is async because PR 7's flow holds
this mutex across the atomic YAML rewrite step (file I/O). std::Mutex
would footgun across .await.

Dependency additions: `arc-swap = { workspace = true }`,
`thiserror = { workspace = true }` (used by InsertError).

Tests: 12 new (12 passing). 74 server lib tests total green
(62 from PR 1 + 12 new). Clippy clean on server crate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: router restructure + handler refactor for multi-graph (PR 4a/10)

PR 4a of the MR-668 multi-graph server work. The heaviest single PR —
rewires every handler to extract `Arc<GraphHandle>` from a routing
middleware, replaces `AuthenticatedActor(Arc<str>)` with `ResolvedActor`
everywhere, and adds the `ServerMode` discriminator.

Behavior changes:
- **Single mode** (legacy `omnigraph-server <URI>`): flat routes
  (`/snapshot`, `/read`, `/branches`, …) continue to work exactly as
  v0.6.0. Internally, the registry holds a single handle keyed by the
  sentinel `SINGLE_GRAPH_KEY_ID = "default"`; routing middleware injects
  that handle on every request. No HTTP-visible change.
- **Multi mode** (new): routes nest under `/graphs/{graph_id}/...`.
  Routing middleware extracts the graph id from the path, looks it up
  in the registry, and injects the handle. 404 if not found.
  (Multi-mode startup itself lands in PR 5; this PR provides the
  router-side wiring.)

AppState refactor:
- `engine: Arc<Omnigraph>` and `policy_engine: Option<Arc<PolicyEngine>>`
  fields removed — both now live inside `GraphHandle` in the registry.
- `mode: ServerMode { Single { uri } | Multi { config_path } }` added.
- `registry: Arc<GraphRegistry>` added.
- `server_policy: Option<Arc<PolicyEngine>>` added (placeholder for
  management endpoints in PR 6b; unused today).
- Existing constructors (`new`, `new_with_bearer_token{s,_and_policy}`,
  `new_with_workload`, `open*`) build a single-mode AppState
  internally and remain source-compatible. Tests that constructed
  AppState via these constructors continue to work.
- `with_policy_engine` post-construction setter — rebuilds the
  single-mode handle with the policy attached. Engine-layer
  enforcement is NOT reinstalled (matches the old single-field
  semantics; `open_with_bearer_tokens_and_policy` is the path that
  installs both layers).
- `new_multi` constructor added for PR 5's startup loop.
- `uri()` now returns `Option<&str>` (Some in single, None in multi).

Routing middleware:
- `resolve_graph_handle` injects `Arc<GraphHandle>` as a request
  extension. Mode-aware: single returns the only handle; multi parses
  `/graphs/{graph_id}/...` from the URI. Returns 404 in multi mode
  when the graph id is unregistered. Records `graph_id` on the
  current tracing span.
- `require_bearer_auth` updated to insert `ResolvedActor` (was
  `AuthenticatedActor`).

Handler refactor — every protected handler:
- Gains `Extension(handle): Extension<Arc<GraphHandle>>` param.
- Replaces `state.engine` → `handle.engine`.
- Replaces `state.policy_engine()` → `handle.policy.as_deref()`.
- Replaces `state.uri()` → `handle.uri.as_str()` (or `.clone()`
  where String is needed).
- Replaces `Arc::clone(&state.engine)` → `Arc::clone(&handle.engine)`
  (the spawn-and-clone pattern in `server_export` — proof that a
  long-running export survives the registry being mutated later).

authorize_request signature:
- Was: `(state: &AppState, actor: Option<&AuthenticatedActor>, request: PolicyRequest)`.
- Now: `(actor: Option<&ResolvedActor>, policy: Option<&PolicyEngine>, request: PolicyRequest)`.
- Per-graph callers pass `handle.policy.as_deref()`. The (future PR 6b)
  management endpoints will pass `state.server_policy.as_deref()`.

MR-731 invariant preserved:
- The single chokepoint `request.actor_id = actor.actor_id.as_ref().to_string()`
  inside `authorize_request` still overwrites any client-supplied
  actor identity. Regression test
  `actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
  at `tests/server.rs:1114-1216` passes unchanged.

Tests: 0 new (the registry race tests in PR 3 already cover the
data structure; this PR exercises them indirectly via the existing
test suite). 74 lib + 57 server integration + 60 openapi = 191 tests
green. Clippy clean.

LOC: +397 insertions, -153 deletions in `crates/omnigraph-server/src/lib.rs`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: OpenAPI multi-mode cluster filter (PR 4b/10)

PR 4b of the MR-668 multi-graph server work. In multi mode, the served
`/openapi.json` reports cluster routes (`/graphs/{graph_id}/...`) instead
of the legacy flat protected paths — matching what `build_app` actually
mounts (PR 4a's `Router::nest`). Single mode is unchanged.

Implementation:
- New `server_openapi` branch: when `state.mode()` is `Multi`, call
  `nest_paths_under_cluster_prefix(&mut doc)` after `ApiDoc::openapi()`.
- The rewrite consumes `doc.paths.paths`, then for every path-item:
  - If the path is in `ALWAYS_FLAT_PATHS` (`/healthz` for now), keep
    it flat.
  - Otherwise, prefix every operation_id with `cluster_` and reinsert
    the item at `/graphs/{graph_id}<original_path>`.
- Single mode hits no extra work — the path map is untouched.
- The static `ApiDoc::openapi()` still emits the flat surface, so
  in-process callers (the existing `openapi_json()` helper in tests)
  see the unmodified spec.

Why cluster_ prefix on operation IDs: OpenAPI specs require unique
operation_ids across the document. With both flat (single-mode) and
cluster (multi-mode) surfaces ever co-existing in a generated SDK,
the prefix prevents collision. The current served doc only carries
one surface, so the prefix is forward-compat with potential future
dual-surface generation.

Tests: 6 new in `tests/openapi.rs`, all via the `/openapi.json` route
(not the static `ApiDoc::openapi()` helper):
- `multi_mode_openapi_lists_cluster_paths` — every protected path
  appears as a cluster variant.
- `multi_mode_openapi_drops_flat_protected_paths` — flat protected
  paths are absent.
- `multi_mode_openapi_keeps_healthz_flat` — `/healthz` survives.
- `multi_mode_openapi_prefixes_operation_ids_with_cluster` — every
  cluster operation_id starts with `cluster_`.
- `multi_mode_operation_ids_are_unique` — no operation_id collisions.
- `single_mode_openapi_unchanged_by_cluster_filter` — single mode
  still emits the legacy flat surface (regression).

New test helper `app_for_multi_mode(graph_ids)` exercises the new
`AppState::new_multi` constructor from PR 4a — first user of multi-mode
construction outside of unit tests.

Result: 66 openapi tests + 57 server integration tests + 74 lib tests
= 197 green. No regression in the existing OpenAPI drift check
(`openapi_spec_is_up_to_date` still validates the static flat surface
matches the committed openapi.json).

LOC: +67 in lib.rs (rewrite logic), +219 in tests/openapi.rs (test
suite + helper).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: multi-graph startup + mode inference (PR 5/10)

PR 5 of the MR-668 multi-graph server work. This is the first PR that
makes multi mode actually usable end-to-end: operators invoking
`omnigraph-server --config omnigraph.yaml` with a non-empty `graphs:`
map and no single-mode selector now get a running multi-graph server.

Mode inference (MR-668 decision 2, four-rule matrix in
`load_server_settings`):
  1. CLI `<URI>` positional        → Single
  2. CLI `--target <name>`         → Single (URI from graphs.<name>)
  3. `server.graph` in config      → Single (URI from graphs.<name>)
  4. `--config` + non-empty `graphs:` + no single-mode selector
                                   → Multi (all entries in `graphs:`)
  5. otherwise                     → error with migration hint

Rule 5's error message names every escape hatch so operators can fix
their invocation without grepping docs.

Config schema extensions:
  - `TargetConfig.policy: PolicySettings` (per-graph Cedar policy file).
    `#[serde(default)]` so existing single-graph YAMLs keep parsing.
  - `ServerDefaults.policy: PolicySettings` (server-level Cedar policy
    for management endpoints — loaded in PR 5, wired into `GET /graphs`
    in PR 6b).
  - `OmnigraphConfig::resolve_target_policy_file(name)` and
    `resolve_server_policy_file()` helpers — both resolve relative to
    the config file's `base_dir`.

Public types added to `omnigraph-server`:
  - `ServerConfigMode { Single { uri, policy_file } | Multi { graphs,
    config_path, server_policy_file } }`.
  - `GraphStartupConfig { graph_id, uri, policy_file }` — one entry
    per graph in multi mode.

`ServerConfig` shape change:
  - WAS: `{ uri: String, bind, policy_file, allow_unauthenticated }`.
  - NOW: `{ mode: ServerConfigMode, bind, allow_unauthenticated }`.
  - Breaking for any code that constructs `ServerConfig` directly.
    `main.rs` is unaffected (uses `load_server_settings`).

`serve()` now forks on `ServerConfig.mode`:
  - Single: existing flow via `AppState::open_with_bearer_tokens_and_policy`.
  - Multi: parallel open via `futures::stream::iter(graphs)
    .map(open_single_graph).buffer_unordered(4).collect()`. Bound 4 is
    a rule-of-thumb for I/O-bound work — at N≤10 this trades startup
    latency for a small amount of concurrent S3/Lance open pressure.
    Fail-fast: first open error aborts startup; in-flight opens drop
    their engine via Arc (Lance datasets close cleanly).

New helper `open_single_graph(GraphStartupConfig)`:
  - Validates `GraphId` per the regex in PR 1.
  - `Omnigraph::open(uri).await` with descriptive error context.
  - Loads per-graph policy file and re-applies it via
    `Omnigraph::with_policy` (engine-layer enforcement, MR-722).
  - Returns `Arc<GraphHandle>` ready for the registry.

Routing middleware bug fix:
  - `Router::nest("/graphs/{graph_id}", inner)` rewrites
    `request.uri().path()` to the inner suffix (e.g. `/snapshot`).
    The previous middleware tried to parse `{graph_id}` from
    `request.uri().path()` and got 400 instead of 200. Fixed by reading
    from `axum::extract::OriginalUri` request extension, which preserves
    the pre-rewrite URI.
  - Caught by the two new tests
    `cluster_routes_dispatch_per_graph_handle` and
    `cluster_route_for_unknown_graph_returns_404`.

Tests (14 new, all passing):
  - Four-rule matrix: one test per branch + the joint case
    `mode_inference_cli_uri_overrides_graphs_map` + the empty-graphs-map
    error case.
  - Per-graph + server-level policy file path resolution.
  - Reserved `GraphId` rejection at startup.
  - End-to-end multi-graph routing: two graphs side by side, each
    cluster route hits the right engine.
  - Unknown graph id under cluster prefix → 404.
  - Flat routes 404 in multi mode.

Inline `ServerConfig` test (`serve_refuses_to_start_in_state_1_without_unauthenticated`)
and three `server_settings_*` tests updated to the new `mode` shape.

Result: 211 server tests green (74 lib + 71 integration + 66 openapi),
MR-731 regression test still pinned and passing.

LOC: +45 config.rs, +281 lib.rs (net), +395 tests/server.rs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: Cedar resource-model refactor (PR 6a/10)

PR 6a of the MR-668 multi-graph server work. Policy-crate-only refactor —
no HTTP handler changes, no operator-supplied policy.yaml changes. Sets
up the chassis that PR 6b's `GET /graphs` consumes.

Two new `PolicyAction` variants:
  - `GraphCreate` — gates `POST /graphs` (deferred behavioral PR).
  - `GraphList`   — gates `GET /graphs` (lands in PR 6b).

Note: `GraphDelete` is intentionally NOT added in this PR. `DELETE
/graphs/{id}` is deferred from MR-668's v0.7.0 scope to bound complexity
(no `delete_prefix`, no tombstone, no `RegistryLookup::Tombstoned`).
Adding the Cedar action without a consumer would be the same kind of
"dead vocabulary" trap the `Admin` variant already documents.

New `PolicyResourceKind { Graph, Server }` enum, plus a
`PolicyAction::resource_kind()` method that classifies every action.
Per-graph actions (Read, Change, BranchCreate, …) bind to
`Omnigraph::Graph::"<graph_label>"`; server-scoped actions
(GraphCreate, GraphList) bind to the singleton
`Omnigraph::Server::"root"`. `Admin` stays classified as per-graph for
now — MR-724 will pick the final shape when the first consumer surface
ships.

Cedar schema string additions:
  - `entity Server;`
  - `action "graph_create" appliesTo { principal: Actor, resource: Server, ... }`
  - `action "graph_list"   appliesTo { principal: Actor, resource: Server, ... }`

Compiler updates:
  - `compile_policy_source` picks the resource literal based on the
    action's `resource_kind`. Existing graph-only policies generate
    the same Cedar source as before — pinned by
    `per_graph_rules_continue_to_work_alongside_server_rules`.
  - `compile_entities` includes the `Server::"root"` entity only when
    a rule references a server-scoped action. Keeps test assertions
    for graph-only policies tight.
  - `PolicyEngine::authorize` builds the right resource UID at
    request time based on `request.action.resource_kind()`.

Validation rules added to `PolicyConfig::validate`:
  - A rule may not mix server-scoped and per-graph actions (different
    resource kinds need different `permit` clauses).
  - Server-scoped actions cannot have `branch_scope` or
    `target_branch_scope` — there's no branch context at the server
    level.

Operator impact: zero. The Cedar schema `Omnigraph::Server` entity is
internally referenced by `compile_policy_source`; operator policy.yaml
files only declare actions in `rules[].allow.actions` and never
reference the resource entity directly. Decision 6's "internal rename
only; operator policies unaffected" contract is preserved and pinned
by `per_graph_rules_continue_to_work_alongside_server_rules`.

Tests: 5 new (11 policy tests total, up from 6):
  - `graph_list_action_authorizes_against_server_resource`
  - `graph_create_action_authorizes_against_server_resource`
  - `server_scoped_rule_cannot_use_branch_scope`
  - `rule_mixing_server_and_per_graph_actions_is_rejected`
  - `per_graph_rules_continue_to_work_alongside_server_rules`

No regression: 145 server tests (74 lib + 71 integration) still green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: GET /graphs endpoint + per-graph policy wire-up (PR 6b/10)

PR 6b of the MR-668 multi-graph server work. First management endpoint —
`GET /graphs` lists every graph registered with the server, gated by the
server-level Cedar policy from PR 6a.

New API shapes (in `omnigraph-server::api`):
  - `GraphInfo { graph_id, uri }` — one entry per registered graph.
  - `GraphListResponse { graphs: Vec<GraphInfo> }` — sorted alphabetically
    by `graph_id` for deterministic output.

Handler `server_graphs_list`:
  - Mounted at `GET /graphs` in both modes.
  - Single mode: returns 405 (resource exists in the API surface, just
    not operational without a `graphs:` map). 405 chosen over 404 so
    clients see "resource exists, wrong context" rather than "no such
    resource".
  - Multi mode: requires bearer auth (when configured); Cedar-gated by
    `PolicyAction::GraphList` against `Omnigraph::Server::"root"`
    (PR 6a's chassis). Returns the sorted registry list.

Cedar gate composition:
  - When no `server.policy.file` is configured, the MR-723 default-deny
    falls through: `GraphList` is not `Read`, so an authenticated actor
    without a server policy gets 403. This is the right default — don't
    expose the registry until the operator explicitly authorizes it.
  - When a server policy is configured, Cedar evaluates the rule. The
    test `get_graphs_with_server_policy_authorizes_per_cedar` pins the
    admin-allow / viewer-deny split.

Routing:
  - New `management` sub-router holding `/graphs` (auth-required, no
    `resolve_graph_handle` middleware — operates on the registry, not
    a single graph).
  - Single mode merges flat protected routes + management.
  - Multi mode merges nested `/graphs/{graph_id}/...` + management.

OpenAPI:
  - `server_graphs_list` registered in `ApiDoc::paths(...)`.
  - `EXPECTED_PATHS` in `tests/openapi.rs` gains `/graphs`.
  - `openapi.json` regenerated (auto-tracked by
    `openapi_spec_is_up_to_date` in CI).

Tests: 4 new in `tests/server.rs::multi_graph_startup`:
  - `get_graphs_lists_registered_graphs_in_multi_mode`
  - `get_graphs_returns_405_in_single_mode`
  - `get_graphs_requires_bearer_auth_when_configured`
  - `get_graphs_with_server_policy_authorizes_per_cedar`

What's NOT in this PR (deferred):
  - Per-graph policy enforcement is wired through `handle.policy`
    (PR 4a already did this); PR 6b doesn't add new per-graph
    behavior beyond making sure the server policy lookup composes
    cleanly alongside it.
  - `POST /graphs` (PR 7) and `DELETE /graphs/{id}` (out of scope
    for v0.7.0).
  - CLI `omnigraph graphs list` (PR 8 will add).

Result: 215 server tests green (74 lib + 66 openapi + 75 integration),
11 policy tests green. MR-731 spoof regression preserved across all
this work.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: POST /graphs runtime create endpoint (PR 7/10)

PR 7 of the MR-668 multi-graph server work. Operators can now add a
graph to a running multi-graph server without restarting:

  curl -X POST http://server/graphs \
    -H "Content-Type: application/json" \
    -d '{
          "graph_id": "beta",
          "uri": "/data/beta.omni",
          "schema": { "source": "node Person { name: String @key }\n" },
          "policy": { "file": "./policies/beta.yaml" }
        }'

DELETE remains deferred (out of v0.7.0 scope per the trimmed plan —
no `delete_prefix`, no tombstones).

Body shape (decision 7):
  - Nested `schema: { source: "..." }` (mirrors the `policy: { file }`
    pattern; leaves room for future fields without breakage).
  - Optional nested `policy: { file: "..." }` for per-graph Cedar.
  - 32 MiB body limit (reuses `INGEST_REQUEST_BODY_LIMIT_BYTES`).
  - Asymmetric with `SchemaApplyRequest` which keeps flat
    `schema_source: String` — documented in api.rs.

Atomic YAML rewrite + drift detection:
  - New `config::rewrite_atomic(path, new_config, expected_hash)`:
    flock → re-read + hash check → serialize → write `.tmp` → fsync
    → rename → fsync parent dir. Returns the new hash for the caller
    to update its in-memory baseline.
  - New `config::hash_config_file(path)` — SHA-256 of the on-disk
    bytes, used at startup and after each rewrite.
  - New `RewriteAtomicError { Drift | Io | Serialize }` enum.
  - `AppState.config_hash: Option<Arc<Mutex<[u8;32]>>>` carries the
    in-memory baseline. Updated after every successful rewrite so
    subsequent POSTs don't false-trigger drift.
  - The mutex is `std::sync::Mutex` (brief critical section, no .await
    inside). The flock itself serializes file access process-wide
    AND across multiple server instances (defense in depth).
  - All sync I/O runs inside `tokio::task::spawn_blocking` — flock
    is sync.

Handler ordering (the load-bearing sequence):
  1. Mode check: 405 in single mode.
  2. Cedar authorize: `GraphCreate` against `Omnigraph::Server::"root"`.
  3. Validate body: `GraphId::try_from` (regex + reserved-name), empty
     schema/uri checks, per-graph policy file parse.
  4. Pre-check registry for duplicate graph_id / duplicate uri (409).
  5. `Omnigraph::init` the new engine.
  6. Atomic YAML rewrite (drift detection inside).
  7. Publish in registry (atomic re-check via `GraphRegistry::insert`).

Failure modes (documented in handler rustdoc):
  - Init fails → orphan storage at `req.uri` (PR 2a cleans up schema
    files; Lance datasets remain orphans until `delete_prefix` lands).
  - YAML rewrite fails (drift, IO) → orphan storage; YAML unchanged.
  - Registry insert fails (race) → YAML has entry but registry doesn't;
    next restart opens it cleanly.

New dependency: `fs2 = "0.4"` (workspace + omnigraph-server). POSIX-only
file locking. Linux/macOS deployment supported; Windows out of scope.

Tests (10 new in `tests/server.rs::multi_graph_startup`):
  - `post_graphs_creates_a_new_graph_end_to_end` — happy path, includes
    YAML inspection to confirm the rewrite landed.
  - `post_graphs_baseline_hash_updates_between_rewrites` — two POSTs in
    a row both succeed (drift baseline updates correctly).
  - `post_graphs_duplicate_graph_id_returns_409`
  - `post_graphs_duplicate_uri_returns_409`
  - `post_graphs_invalid_graph_id_returns_400` (reserved name)
  - `post_graphs_empty_schema_source_returns_400`
  - `post_graphs_returns_405_in_single_mode`
  - `post_graphs_yaml_drift_detection_returns_503` — operator hand-edits
    omnigraph.yaml; server refuses to clobber.
  - `hash_config_file_is_deterministic_and_detects_changes`
  - `rewrite_atomic_refuses_when_hash_drifts`

OpenAPI: `server_graphs_create` registered in `ApiDoc::paths(...)`;
openapi.json regenerated.

Result: 225 server tests green (74 lib + 66 openapi + 85 integration),
all MR-731 regressions still pinned.

LOC: ~580 lib.rs net (handler + helpers), ~120 config.rs (rewrite
machinery), +71 api.rs (request/response shapes), +332 tests/server.rs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: CLI omnigraph graphs list/create (PR 8/10)

PR 8 of the MR-668 multi-graph server work. CLI parity for the
v0.7.0 management surface: operators can now manage graphs from
the command line against a running multi-graph server.

  omnigraph graphs list --target dev --json
  omnigraph graphs create \
    --target dev \
    --graph-id beta \
    --graph-uri /data/beta.omni \
    --schema schema.pg

DELETE is intentionally absent — server-side DELETE was deferred from
v0.7.0 scope, and shipping a client subcommand for a server endpoint
that doesn't exist would be dead vocabulary. The help output, the
subcommand enum, and the test that pins it (`graphs_subcommand_help_
lists_list_and_create`) all agree.

CLI architecture (modeled on `BranchCommand`):
  - New `Command::Graphs { command: GraphsCommand }` top-level variant.
  - `GraphsCommand { List, Create }` enum.
  - List: GET `<base>/graphs`. Stdout is `<graph_id>\t<uri>` per line,
    or JSON via `--json`.
  - Create: reads `--schema <path>` from local disk, inlines as
    `schema: { source: <file> }` in the POST body (nested per
    MR-668 decision 7). Optional `--policy-file <path>` becomes
    `policy: { file: <path> }`. Returns 201 → "created graph X at Y"
    or JSON via `--json`.
  - Both subcommands reject local URI targets with a clear
    "remote multi-graph server URL" error.

New API type imports in the CLI: `GraphCreateRequest`,
`GraphCreateResponse`, `GraphListResponse`, `GraphSchemaSpec`,
`GraphPolicySpec` — all from `omnigraph-server::api`.

Tests:
  - cli.rs (4 new, non-network):
      * `graphs_subcommand_help_lists_list_and_create` — pins the
        deferral of `delete` (catches scope creep).
      * `graphs_list_against_local_uri_errors_with_remote_only_message`
      * `graphs_create_against_local_uri_errors_with_remote_only_message`
      * `graphs_create_with_missing_schema_file_errors` — pins the
        IO context in the schema-read error path.
  - system_remote.rs (1 new, `#[ignore]` like its peers):
      * `graphs_list_and_create_against_multi_graph_server` — spawns a
        multi-mode server, calls `graphs list` (sees `alpha`),
        `graphs create` (adds `beta`), `graphs list` again (sees both),
        and confirms the new graph is reachable via its cluster route.

CLI suite: 62 tests green (58 existing + 4 new). The new ignored
end-to-end test runs locally with `cargo test --ignored`.

LOC: +159 main.rs (enum + handlers), +88 cli.rs (unit tests),
+131 system_remote.rs (integration test).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: composite e2e tests, race fix, v0.7.0 release (PR 9/10)

PR 9 — the final integration PR for MR-668 multi-graph server work.
Closes the v0.7.0 release.

Composite lifecycle tests (closes gaps flagged in PR 7's coverage
review):
  - `multi_graph_lifecycle_post_query_restart_persistence` — POST a
    graph, query it via cluster route, reload the config from disk
    and confirm `load_server_settings` sees the rewritten YAML.
    Validates the "restart resolves orphans" failure-mode story.
  - `per_graph_policy_enforced_on_post_created_graph` — POST a graph
    with a per-graph policy attached, then send authenticated read
    and change requests. Per-graph Cedar enforcement fires correctly
    on a POST-created graph (engine-layer policy reinstalled via
    `Omnigraph::with_policy` inside the create flow).
  - `concurrent_post_graphs_distinct_ids_all_succeed` — 4 concurrent
    POSTs with distinct graph_ids all return 201. Caught a real
    race in `rewrite_atomic` (see below).

Race fix — `rewrite_atomic_with_modify`:

The first composite test surfaced a real bug. The old
`rewrite_atomic(path, new_config, expected_hash)` captured the
baseline hash OUTSIDE the flock, then called rewrite_atomic which
re-acquired it inside. Under concurrent writers:

  - POST A: captures baseline H0, calls rewrite_atomic.
  - POST B: captures baseline H0 too (before A's update lands).
  - A: acquires flock, on-disk == H0, writes H1, releases.
  - A: updates baseline H0 → H1.
  - B: tries to acquire flock — waits.
  - B: acquires flock. On-disk is now H1. Expected (captured
       before A finished) is H0. MISMATCH → spurious Drift error.

Worse: even if the timing happens to align, B's `updated` config
was constructed from BYTES read before the flock. B writes a config
that doesn't include A's new graph — silent data loss.

The fix: new `config::rewrite_atomic_with_modify(path, baseline,
modify)` takes a closure. Inside the flock + baseline mutex:
  1. Read on-disk bytes, hash, compare to baseline.
  2. Parse on-disk YAML.
  3. Call `modify(parsed)` to produce the new config — receives
     fresh on-disk state, returns the modification.
  4. Serialize + write + fsync + rename + update baseline.

Everything is read-modify-write under the same critical section.
Concurrent writers serialize cleanly. Test confirmed this is no
longer a race.

The old `rewrite_atomic(path, new_config, expected_hash)` API stays
for tests that don't need the read-modify-write shape; the POST
handler switches to the new shape.

Version bump v0.6.0 → v0.7.0:
  - All 5 `crates/*/Cargo.toml` (compiler, engine, policy, cli, server)
    plus their inter-crate `path` dep version constraints.
  - `Cargo.lock` regenerated by `cargo build --workspace`.
  - `AGENTS.md` "Version surveyed" line, capability matrix HTTP-server
    row updated to mention multi-graph + cluster routes + atomic YAML
    rewrite.
  - `openapi.json` regenerated.

Docs:
  - `docs/releases/v0.7.0.md` (new) — release notes with breaking
    changes, new features, deferred items (DELETE, `delete_prefix`,
    actor forwarding), and the single→multi migration recipe.
  - `docs/user/server.md` — substantial section additions for the
    two modes, mode inference, cluster endpoint table, management
    endpoints, `omnigraph.yaml` ownership contract, `POST /graphs`
    body shape + status codes.
  - `docs/user/cli.md` — `omnigraph graphs list/create` section,
    deferred-DELETE note.
  - `docs/user/policy.md` — server-scoped Cedar actions
    (`graph_create`, `graph_list`), per-graph vs server-level policy
    composition, example server-level policy.

Workspace test pass: 573 tests green across all crates. Zero
failures. MR-731 spoof regression still pinned and passing across
the entire 10-PR series.

This commit closes MR-668. v0.7.0 is ready for tagging.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: remove POST /graphs and CLI graphs create (defer runtime graph mgmt)

The POST /graphs runtime-create endpoint shipped in PR 7/10 has three
unresolved high-severity bugs:

  - flock-on-renamed-inode race: the YAML flock is taken on
    omnigraph.yaml itself, then a temp file is renamed over it.
    Cross-process writers end up locking different inodes — both
    believing they hold exclusive access.
  - duplicate-check outside the file lock: precheck runs against
    the in-memory registry only; the locked closure does
    config.graphs.insert(...) unconditionally. Concurrent same-id
    POSTs can persist the loser in YAML while the in-memory registry
    keeps the winner — they disagree after restart.
  - best_effort_cleanup_init_artifacts deletes _schema.pg /
    _schema.ir.json / __schema_state.json on any init failure. An
    accidental re-init against an existing graph's URI destroys its
    schema; subsequent open() fails at read_text(_schema.pg).

The correct fix is a Lance-style cluster catalog (reserve → init →
publish with recovery sidecars), parallel to the engine's existing
__manifest discipline. That work is out of scope for v0.7.0.

For now, disable runtime add/remove from the network and CLI surface.
Operators add graphs by editing omnigraph.yaml and restarting. The
GET /graphs read-only enumeration stays.

Removed:
- POST /graphs handler + router fragment + utoipa registration
- 13 post_graphs_* server tests + 3 composite POST tests +
  multi_mode_app_with_real_config / post_graph helpers
- CLI omnigraph graphs create subcommand + its handler + cli.rs tests
- system_remote.rs combined list+create test trimmed to list-only
- YAML rewrite infra: rewrite_atomic[_with_modify], RewriteAtomicError,
  staging_path, hash_config_file, AppState::config_hash field +
  threading through new_multi and open_multi_graph_state
- fs2 dependency (verified absent from cargo tree)
- sha2/fs2 imports in config.rs (only the rewrite path used them)
- Cedar PolicyAction::GraphCreate variant + "graph_create" match arms
  + action def in Cedar schema + graph_create_action_authorizes_against_server_resource test
- GraphCreateRequest / GraphCreateResponse / GraphSchemaSpec /
  GraphPolicySpec API types (only the POST handler / CLI imported them)

Kept:
- GET /graphs (read-only enumeration) and graph_list Cedar action
- omnigraph graphs list CLI subcommand
- All multi-graph startup, mode inference, cluster routes,
  per-graph + server-level Cedar policies
- server_settings_drive_multi_graph_startup_end_to_end (the test
  that covers operator-authored YAML + restart — the path that
  survives)
- best_effort_cleanup_init_artifacts and the three init failpoints
  (still reachable from CLI `omnigraph init`; preflight fix deferred
  as a follow-up)
- GraphRegistry::insert and its concurrency tests — production
  callers gone, but the method is the natural seam for the future
  cluster-catalog work

Also fixed (transcript issue 4):
- ALWAYS_FLAT_PATHS now includes /graphs so multi-mode OpenAPI
  advertises the management route correctly (was previously rewritten
  to /graphs/{graph_id}/graphs)
- multi_mode_openapi_keeps_healthz_flat → renamed to
  multi_mode_openapi_keeps_management_paths_flat, asserts both
  /healthz and /graphs stay flat
- multi_mode_openapi_prefixes_operation_ids_with_cluster skips
  /graphs in addition to /healthz

Doc fixes:
- docs/user/cli.md: graphs list example was --target http://...,
  but --target is a config-graph-name lookup; corrected to --uri.
  Removed the graphs create example.
- docs/user/server.md: dropped POST /graphs row, "omnigraph.yaml
  ownership", and "POST /graphs body shape" sections. Added a
  paragraph stating runtime add/remove is not exposed in v0.7.0.
- docs/user/policy.md: dropped graph_create action; reworded the
  "Configuration" line to clarify that server-scoped rules (graph_list)
  take neither branch_scope nor target_branch_scope.
- docs/releases/v0.7.0.md: rewrote release narrative — multi-graph
  mode ships; runtime add/remove deferred.
- AGENTS.md: HTTP server bullet and capability matrix row updated to
  reflect read-only GET /graphs and the operator-edit workflow.
- openapi.json regenerated; /graphs has only .get, no .post.

Diff: 17 files, +123 −1525 LOC.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: comment cleanup and policy format style

Strip "PR Na/Nb" sub-PR references throughout MR-668 surfaces — they
were useful during the 10-PR delivery sequence but rot now that the
work is in the tree. Keep the MR-668 umbrella references.

Also:
- Add explicit `when = when` and `resource_literal = resource_literal`
  named args in `compile_policy_source`'s outer `format!` to match the
  surrounding crate style (already explicit for `group` and `action`).
- Rename the best-effort cleanup tracing target from
  "omnigraph::init" to "omnigraph::init::cleanup" so operators can
  filter init-failure cleanup events separately from init's other
  log lines.

No behavior change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop actor_id from PolicyRequest; pass actor as separate arg

The MR-731 "server-authoritative actor identity" invariant was enforced
by an in-function chokepoint (`request.actor_id = actor.actor_id...`
overwrite inside `authorize_request`). That worked but relied on every
caller passing in a `PolicyRequest` and trusting the overwrite — a
comment-enforced invariant.

Move the invariant into the type system:

* `PolicyRequest` no longer carries `actor_id`. The struct now models
  what a caller wants to do, not who they are.
* `PolicyEngine::authorize(actor_id: &str, request: &PolicyRequest)`
  and `validate_request(actor_id, request)` take identity as a
  separate argument. The same shape `PolicyChecker::check` already had
  for the engine layer.
* `authorize_request` in the HTTP layer extracts `actor_id` from the
  bearer-resolved `ResolvedActor` and passes it positionally — no
  overwrite step that could be skipped.
* CLI `omnigraph policy explain` updated (the only other consumer
  that built a `PolicyRequest`).

Public API break for the `omnigraph-policy` crate. Worth it: handlers
can no longer accidentally populate `actor_id` from a request body
field, and external consumers are forced by the compiler to source
actor identity from a trusted path.

The MR-731 chokepoint test
`actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
still passes — the bearer-resolved actor is what reaches the engine.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: consolidate AppState single-mode constructors; delete with_policy_engine

The prior `with_policy_engine` constructor reused the engine `Arc`
from the existing handle (`engine: Arc::clone(&existing.engine)`)
without re-applying `Omnigraph::with_policy`. Combined with
`new_with_workload`, the documented composition pattern was
`AppState::new_with_workload(...).with_policy_engine(p)` — which
produced an `AppState` whose HTTP layer enforced Cedar but whose
underlying engine had no `PolicyChecker` installed. Any caller
reaching the engine via `state.registry().list()[i].engine` could
bypass policy entirely. The doc comment named this gap; the type
system didn't.

Make composition impossible to get wrong:

* Add `AppState::new_single(uri, db, tokens, Option<PolicyEngine>,
  WorkloadController)` — canonical single-mode constructor that
  takes every option together and routes through `build_single_mode`
  (which applies `db.with_policy(checker)` to the engine itself).
* `new`, `new_with_bearer_token`, `new_with_bearer_tokens`,
  `new_with_bearer_tokens_and_policy`, `new_with_workload` all become
  thin wrappers around `new_single`.
* Delete `with_policy_engine`. There is no post-construction policy
  install path any more; the single linear construction forces
  HTTP-layer and engine-layer policy to install together or not at all.

Regression test `engine_layer_policy_fires_via_direct_arc_omnigraph_from_new_single`
constructs an `AppState::new_single` with a deny-all policy, pulls
the `Arc<Omnigraph>` from the registry handle (the same path an
embedded SDK consumer would take), and asserts a direct `mutate_as`
call returns `OmniError::Policy`. Pre-fix this test would have
succeeded the mutation.

Test caller in `ingest_per_actor_admission_cap_returns_429`
migrates from `.with_policy_engine(...)` to `new_single(...,
Some(policy_engine), workload)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: derive any_per_graph_policy on RegistrySnapshot; simplify dup check

`AppState::requires_bearer_auth` walked the entire registry per
request (cloning Arcs into a `Vec`, then `.iter().any(|h| h.policy
.is_some())`) to decide whether the auth middleware should challenge.
The walk is unnecessary — the answer only changes when the registry
mutates, which is exactly the moment a new snapshot is constructed.

Move the flag onto the snapshot itself:

* `RegistrySnapshot { graphs, any_per_graph_policy: bool }`.
* `RegistrySnapshot::new(graphs)` is the only construction path —
  it derives the flag from `graphs.values().any(|h| h.policy
  .is_some())` so the cached value can't drift from the source data.
* `Default` delegates to `new(HashMap::new())`.
* `GraphRegistry::from_handles` and `insert` build snapshots via
  `RegistrySnapshot::new(...)`.
* `GraphRegistry::snapshot_ref()` exposes the current snapshot
  through an `arc_swap::Guard`; callers that need cached derived
  state go through this accessor (callers that only want `graphs`
  still use `list` / `get`).

`requires_bearer_auth` becomes one `ArcSwap::load` + bool read.

Also (drive-by, same file, same hunk): replace the dead
`if let Some(other) = seen_uris.get(...)` + `let _ = other;` pattern
in `from_handles` with a plain `seen_uris.contains_key(...)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: fail-fast multi-graph startup with try_collect

The `open_multi_graph_state` doc comment claims "Fail-fast — the
first open error aborts startup; other in-flight opens are dropped"
but the code did

    .buffer_unordered(4)
    .collect::<Vec<_>>()
    .await
    .into_iter()
    .collect::<Result<Vec<_>>>()?;

which drains every future in the stream before propagating the first
`Err`. With N S3-backed graphs and graph #2 failing fast, the caller
still waits for #1, #3, #4, … to either succeed or fail before
seeing the error.

Replace the four-line dance with `futures::TryStreamExt::try_collect`,
which short-circuits on the first `Err` and drops the rest. The
doc comment now matches behavior.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused State extractor from 7 read-only handlers

After the routing-middleware refactor moved the engine into the per-graph
`GraphHandle` (extracted via `Extension<Arc<GraphHandle>>`), seven
read-only handlers — `server_snapshot`, `server_read`, `server_export`,
`server_schema_get`, `server_branch_list`, `server_commit_list`,
`server_commit_show` — kept an unused `State(_state): State<AppState>`
extractor. Drop it. Each request avoids one `FromRequestParts` clone
of `AppState`'s Arcs.

Handlers that actually use state (workload admission for write paths,
`server_policy` for management endpoints) keep theirs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: emit info! for graph routing decision

`tracing::Span::current().record("graph_id", ...)` in the routing
middleware silently no-ops here: no upstream `#[tracing::instrument]`
on the handlers declares a `graph_id` field, and `TraceLayer::new_for_http`
doesn't either. The recorded value never lands anywhere visible.

Replace with an explicit `info!(graph_id = %handle.key.graph_id,
"graph routed")` event so operators can grep logs and correlate
requests with the active graph. In single mode the value is the
sentinel `"default"`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: align GET /graphs 405 body code with HTTP status

The single-mode `GET /graphs` handler returned an `ApiError` built
via struct literal with `status: METHOD_NOT_ALLOWED, code: BadRequest`.
The body code disagreed with the HTTP status — clients deserializing
on `code` saw `bad_request`, clients deserializing on `status` saw
405. Same bug class as the earlier 503+Conflict mismatch on the
removed YAML drift path.

Close the class for this one remaining instance:

* Add `ErrorCode::MethodNotAllowed` to the API enum.
* Add `ApiError::method_not_allowed(msg)` — pairs the 405 status
  with the matching code.
* Replace the struct literal in `server_graphs_list` with the
  constructor.
* Regenerate `openapi.json` (adds `method_not_allowed` to the
  ErrorCode schema enum).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused axum::handler::Handler import

The import landed in earlier work but no current call site uses it.
Emitted an `unused_imports` warning on every server build.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused fs2 workspace dependency

`fs2 = "0.4"` lingered in [workspace.dependencies] after the
POST /graphs flock-on-rename design was pulled. `cargo tree -i fs2`
reports no consumers in the workspace and the dep is not in
Cargo.lock. Removing the declaration closes the "phantom dep" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: AGENTS.md Cedar row no longer hardcodes action count

The "8 actions" claim drifted as soon as MR-668 added `graph_list`.
Bumping the count would just push the drift one PR forward; the
correct-by-design fix is to defer to the canonical list in
docs/user/policy.md and stop maintaining a duplicate count.

Closes the "doc hardcodes a count that drifts from the enum" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: cfg(test)-gate GraphRegistry::insert and its mutex

`insert` and the `mutate: Mutex<()>` that serializes it had no
runtime consumer in v0.7.0 — the only insertion path at startup
is `from_handles`, and runtime add/remove is deferred until a
managed cluster catalog ships. Leaving both `pub` and live made
them a "looks like API, isn't" footgun: a future change could
build on `insert` without re-establishing the concurrency contract
with an actual consumer in scope.

Gate both together (`#[cfg(test)]` on the method, the field, and
the `tokio::sync::Mutex` import) so the race-pinning tests still
compile but production cannot reach them. When a real consumer
ships, ungate both — they're a unit. Closes the "public API with
no runtime consumer drifts toward incorrect" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop vestigial PolicyEngine surface

* `validate_request` had zero callsites — pure surface for nothing.
* `deny`'s `_actor_id` and `_request` parameters were both unused
  (the underscore prefix gave it away); the message is built by the
  caller before `deny` ever sees the request. Trim both.

Closes the "public API that the type system can't justify" class
for the policy engine. No behavior change; every existing test
stays green because the deletions never had a runtime effect.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for init re-init footgun (red)

A second `Omnigraph::init` against an existing graph URI today
destroys the existing graph's schema artifacts. `init_storage_phase`
overwrites `_schema.pg` before any preflight, and on the inner
`GraphCoordinator::init` failure that follows,
`best_effort_cleanup_init_artifacts` deletes all three schema files.
The existing Lance datasets and `__manifest/` survive but the
schema metadata is gone — unrecoverable without operator surgery.

This test exercises that path and currently fails with
"_schema.pg must not be deleted by a failed re-init", confirming
the destructive cleanup branch fires. The fix in the next commit
makes the test pass by preflighting with `storage.exists()` and
returning a typed error before any write touches disk.

Per AGENTS.md rule 12, the test commit lands just before the fix
commit so the red → green pair is visible in `git log` and a
reviewer can check out this commit alone to reproduce.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: close init re-init footgun via InitOptions preflight (green)

`Omnigraph::init` is "create a new graph"; existing graphs need
an explicit overwrite. Today's behavior — silently overwrite
schema files, then on inner failure delete them via best-effort
cleanup — is destructive against an existing graph regardless of
which branch fires.

Correct-by-design fix:

* New `InitOptions { force: bool }` struct (default `force: false`).
* New `Omnigraph::init_with_options(uri, schema, options)`. The
  old `Omnigraph::init(uri, schema)` is a thin shortcut that
  passes `InitOptions::default()`.
* `init_with_storage` runs a `storage.exists()` preflight on the
  three schema URIs BEFORE any parse, write, or coordinator call.
  Any hit → typed `OmniError::AlreadyInitialized { uri }`. The
  destructive code paths (the `write_text` overwrite and the
  best-effort cleanup) are now unreachable in strict mode against
  an existing graph.
* `force: true` skips the preflight; existing operators who
  actually mean to overwrite opt in explicitly.
* CLI: `omnigraph init --force` maps to `InitOptions { force: true }`.
* HTTP: `OmniError::AlreadyInitialized` maps to 409 via
  `ApiError::from_omni`. Not currently HTTP-reachable (POST /graphs
  was pulled), but the wiring lands here so a future runtime
  create endpoint has one canonical translation.

Closes the "init is destructive against existing state" class.
The regression test added in the previous commit
(`init_on_existing_graph_uri_does_not_destroy_existing_schema`)
turns green: the original schema files now survive a second
init attempt byte-for-byte, and the call errors cleanly with
`AlreadyInitialized`. The four existing
`init_failpoint_after_*_cleans_up_*` tests stay green — strict
mode's preflight passes on a fresh tempdir, and cleanup still
runs as before when a failpoint fires mid-write.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: split PolicyEngine::load into kind-typed loaders

Pre-fix, every caller of `PolicyEngine::load(path, graph_id)`
passed *some* `graph_id` argument — even when the policy was
server-scoped and Cedar's resolution would never touch a Graph
entity. The server-level loader at lib.rs passed the meaningless
sentinel `"server"`. A graph policy file containing a `graph_list`
rule compiled fine; a server policy file containing a `read` rule
compiled fine. Both silently no-op'd at request time because the
engine kind and the rule's resource kind disagreed.

Correct-by-design fix: replace `load` with two kind-typed loaders.

* `PolicyEngine::load_graph(path, graph_id)` — for per-graph
  policy files. Rejects any rule whose action `resource_kind()`
  is `Server`.
* `PolicyEngine::load_server(path)` — for server-level policy
  files. Takes no `graph_id`: server-scoped actions resolve against
  the singleton `Omnigraph::Server::"root"` entity, never a Graph.
  Rejects any rule whose action `resource_kind()` is `Graph`.

The old `load` is hard-deleted in the same commit because every
in-tree consumer migrates here (no semver promise on the workspace
crate, no external pinners). New `PolicyEngineKind` enum types
the loader's intent; `validate_kind_alignment` is the load-time
check that closes the "wrong action, wrong file, silent no-op"
class — operators get a load-time error instead of confused-and-
silent behavior at request time.

Callsites migrated:
* server lib.rs:374 (single-mode per-graph)   → load_graph
* server lib.rs:1065 (multi-mode server)      → load_server
* server lib.rs:1103 (multi-mode per-graph)   → load_graph
* CLI main.rs:732 (resolve_policy_engine)     → load_graph
* tests/server.rs ×5 (4 graph, 1 server)      → load_graph/load_server
* policy_engine_chassis.rs                    → load_graph

Four new in-source tests pin the contract: both rejection paths
and both positive paths.

Closes the "operator puts an action in the wrong file and the
rule silently never matches" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: introduce GraphRouting, retire single_mode_handle

Pre-fix, `AppState` always carried `Arc<GraphRegistry>` even when
serving one graph. Single mode populated the registry with one
handle keyed by the `SINGLE_GRAPH_KEY_ID = "default"` sentinel;
`single_mode_handle` walked the registry, asserted `len == 1`,
and returned the single element with a 500-class "programmer
error" branch on mismatch. Three smells in a row — magic key,
walk-and-assert, programmer-error guard — all because the
single-mode runtime was forced through a multi-mode abstraction.

Correct-by-design fix: type the routing.

* New `pub enum GraphRouting { Single { handle }, Multi { registry,
  config_path } }` on `AppState`. The `Single` arm carries the handle
  directly — no registry, no key, no walk.
* `resolve_graph_handle` middleware matches on `routing`. Single mode
  returns the handle in O(1); multi mode does the same path-extract +
  registry lookup as before. The 500-class programmer-error branch
  is gone — the type system now makes the violated invariant
  ("single mode has exactly one handle") unrepresentable.
* `requires_bearer_auth` reads `handle.policy.is_some()` directly
  in the Single arm; Multi arm still uses the cached
  `any_per_graph_policy` flag.

`ServerMode` and the legacy `registry` field on `AppState` are still
populated for now — C-3 removes both once every reader is migrated.
The `SINGLE_GRAPH_KEY_ID` sentinel and `ServerMode` will also go
away in C-3.

Closes the "single mode forced through a multi-mode abstraction"
class. All 76 server integration tests stay green: handlers still
extract `Extension<Arc<GraphHandle>>` from the request, so the
middleware's internal change is invisible to them.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: remove ServerMode, registry field, and the SINGLE_GRAPH sentinel

C-1/C-2 introduced `GraphRouting` and pointed the middleware at it.
This commit removes the legacy shape that's now dead:

* `ServerMode` enum — deleted. Single mode's `uri` lives on
  `handle.uri`; multi mode's `config_path` lives on the
  `GraphRouting::Multi` arm.
* `AppState.mode: ServerMode` field — deleted.
* `AppState.registry: Arc<GraphRegistry>` field — deleted. Multi
  mode's registry is on `GraphRouting::Multi { registry, .. }`;
  single mode has no registry at all.
* `AppState::mode()`, `AppState::uri()`, `AppState::registry()`
  accessors — deleted. New `AppState::routing() -> &GraphRouting`
  is the single public entry point.
* `SINGLE_GRAPH_KEY_ID` constant — deleted. `GraphHandle.key` is
  still required by the struct, but in single mode the key is now
  only a tracing label (`"default"`, inlined with a comment naming
  its sole remaining purpose). Single-mode flat routes never carry
  a `{graph_id}` parameter, so the key is never compared against
  user input, and there is no registry where it could be a map
  key. C-1/C-2 already removed the registry walk that the sentinel
  was named for.

Callers migrated:
* `build_app` (lib.rs:944) — matches on `state.routing()` instead
  of `state.mode()`.
* `server_graphs_list` (lib.rs:1162) — destructures the Multi arm
  to get the registry; Single arm short-circuits to 405.
* `server_openapi` (lib.rs:1217) — matches the Multi arm for the
  cluster-prefix rewrite.
* `tests/server.rs:3735` — the B2 footgun regression test now
  matches on `state.routing()` to extract the single-mode handle
  (the test's earlier `state.registry().list().next()` shape was
  the closest pre-fix analog to "embedded consumer reaches the
  engine"; the new shape is more direct).

Closes the entire "single mode forced through a multi-mode
abstraction" class. After this commit:
* No magic sentinel as a routing key.
* No `single_mode_handle` walk-and-assert helper.
* No 500-class "programmer error" branch in the middleware.
* No two-field discriminant on `AppState` where one would do.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for nested-route path extraction (red)

`server_branch_delete` and `server_commit_show` use bare
`Path<String>` extractors. In single-mode flat routes
(`/branches/{branch}`, `/commits/{commit_id}`) this works — one
capture, one value. In multi-graph cluster routes
(`/graphs/{graph_id}/branches/{branch}`,
`/graphs/{graph_id}/commits/{commit_id}`) axum 0.8 propagates the
outer `{graph_id}` capture into the inner handler, so the
extractor sees two captures and 500s with
"Wrong number of path arguments. Expected 1 but got 2."

`cluster_routes_dispatch_per_graph_handle` only exercises
`/snapshot` (no Path extractor), so the regression slipped through.
This test closes that gap structurally: every cluster route with
an inner path param gets exercised here.

Currently fails with the exact symptom above. Fix in the next
commit makes it pass.

Per AGENTS.md rule 12, the red test commit lands just before the
fix so the pair is visible in `git log` and a reviewer can check
out this commit alone to reproduce.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: named-field path-param structs for nested cluster routes (green)

`Path<String>` deserializes one path-param value positionally.
Single-mode flat routes (`/branches/{branch}`,
`/commits/{commit_id}`) have one capture; multi-mode nested routes
(`/graphs/{graph_id}/branches/{branch}`,
`/graphs/{graph_id}/commits/{commit_id}`) have two — axum 0.8
propagates the outer capture into nested handlers. Same handler,
two different shapes; the multi-mode shape 500s with
"Wrong number of path arguments. Expected 1 but got 2."

Symptomatic fix: change to `Path<(String, String)>` and ignore the
first element. Breaks again the moment we add another nest layer
(e.g. tenant in Cloud mode).

Correct-by-design fix: named-field structs deserialized by name
from axum's path-param map. Each handler picks only the fields it
needs. Stable across single / multi / future-cloud nest depths
because deserialization is by field name, not position.

* New `BranchPath { branch: String }` (file-local to lib.rs)
* New `CommitPath { commit_id: String }`
* `server_branch_delete` extractor → `Path<BranchPath>`
* `server_commit_show` extractor → `Path<CommitPath>`

Closes the "handler path-extractor type is positional and breaks
when route nesting changes" class. Red test from the previous
commit turns green. All 77 server tests pass (single-mode branch
delete + commit show, plus new multi-mode coverage).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: centralize policy-requires-tokens check in the runtime classifier

Single-mode `open_with_bearer_tokens_and_policy` bailed at lib.rs:380
when policy was installed and no tokens. Multi-mode
`open_multi_graph_state` had no equivalent: the server started, every
request 401'd because no token could ever match, and the operator
spent time debugging a misconfiguration the single-mode path would
have caught at startup.

The doc/code contradiction made the gap easy to miss: the
`ServerRuntimeState::PolicyEnabled` docstring said tokens-or-not
was "unusual but valid — every request fails 401 without a bearer,
which is effectively 'locked'." The single-mode bail contradicted
that. In practice, silent-401-on-every-request is bug-shaped, not
feature-shaped (operators wanting deny-all should configure tokens
plus a deny-all Cedar rule to get meaningful 403s with
policy-decision logging).

Symptomatic fix: add a copy of the bail to multi-mode. Two copies
that can drift again the next time a startup path is added.

Correct-by-design fix: hoist the check into
`classify_server_runtime_state` so both modes get the same
enforcement from one source of truth. The classifier becomes the
single source of truth for "should we start?" and adding a startup
invariant there is now the natural extension point for any future
mode.

Classifier matrix is now complete:

| has_tokens | has_policy | allow_unauthenticated | Result |
|---|---|---|---|
| F | F | F | bail (existing) |
| F | F | T | Open (existing) |
| T | F | * | DefaultDeny (existing) |
| F | T | * | bail (NEW — closes the gap) |
| T | T | * | PolicyEnabled (existing) |

Changes:

* `classify_server_runtime_state` (lib.rs:870-890) gains the
  `(false, true, _) => bail!(…)` arm with a clear message naming
  the failure mode and the two valid resolutions.
* `open_with_bearer_tokens_and_policy` (lib.rs:369+) drops its
  redundant local bail — the classifier rejected the invalid case
  before construction was reached.
* `ServerRuntimeState::PolicyEnabled` docstring is rewritten:
  drops the "(unusual but valid)" carve-out and states plainly
  that PolicyEnabled requires tokens. Names the explicit
  alternative (tokens + deny-all Cedar rule) for operators who
  want the all-requests-denied behavior.
* `classify_policy_enabled_always_wins` test is renamed to
  `classify_policy_enabled_requires_tokens` and the now-invalid
  `(false, true, _)` assertion is removed (covered by the new
  rejection test).
* New `classify_policy_without_tokens_is_rejected` test covers the
  new arm.
* New `serve_refuses_to_start_with_policy_but_no_tokens_multi_mode`
  integration test pins the multi-mode propagation path —
  symmetric with the existing single-mode
  `serve_refuses_to_start_in_state_1_without_unauthenticated`.

Closes the "single mode and multi mode startup branches can drift
on safety invariants" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: close coverage gaps surfaced by the test-coverage audit

The bot-review pass and the subsequent coverage audit surfaced two
material gaps in PR #119's test surface — both easy to close, both
worth closing before merge.

* **Gap 1 — cluster-route sweep.** The Bug-1 path-extractor
  regression slipped through because
  `cluster_routes_dispatch_per_graph_handle` only exercised
  `/snapshot`. The other six protected cluster routes (`/read`,
  `/change`, `/export`, `/schema`, `/schema/apply`, `/ingest`,
  `/branches/merge`) were implicitly trusted to work without any
  multi-mode integration test.

  Add `all_protected_cluster_routes_resolve_to_their_handler`
  (`tests/server.rs`) that hits each protected cluster route with
  a minimal request and asserts the response is consistent with
  the handler being reached — no 404 (router didn't match), no 500
  with "Wrong number of path arguments" (Bug-1 class), no 500 with
  "missing extension" (routing middleware didn't inject the handle).
  Status code is a negative assertion because each handler's
  happy-path inputs differ; what matters is "the request reached
  the handler," not "the handler returned 200" — that's already
  pinned by the single-mode tests.

* **Gap 2 — `--force` happy path.** The strict re-init regression
  test (`init_on_existing_graph_uri_does_not_destroy_existing_schema`)
  pins the error path; nothing pinned the `force: true` escape
  hatch actually doing what its docstring claims.

  Add `init_with_force_recovers_from_orphan_schema_files`
  (`tests/lifecycle.rs`). Writes a bare `_schema.pg` to simulate
  orphan files from a failed prior init, confirms strict mode
  bails as expected, then confirms `init_with_options(force: true)`
  succeeds and produces a functional graph.

  Note: the test follows the documented semantics — force skips
  the preflight only, it does NOT purge existing Lance state. An
  earlier draft of the test (against full overwrite of an existing
  populated graph) failed because `GraphCoordinator::init` errored
  on the existing `__manifest`, which is exactly the limitation
  the `InitOptions::force` docstring already calls out. Recursive
  purge needs `StorageAdapter::delete_prefix` (tracked separately).

Coverage is now fully aligned with the PR's claims.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for GraphList open-mode bypass (red)

Cursor bot's review at commit 4120448 surfaced that
`server_graphs_list` returns 200 in Open mode (`--unauthenticated`,
no tokens, no policy), exposing the full graph registry — graph
IDs and URIs that may contain S3 bucket paths or internal
hostnames — to any unauthenticated caller.

Root cause: `authorize_request`'s no-policy fallback only denies
when `actor.is_some()`. In Open mode `actor: None`, so the
denial branch never fires and the call returns `Ok(())`. The
docstring on `server_graphs_list` claims the endpoint is
"Cedar-gated" and that we "don't leak the registry until the
operator explicitly authorizes it" — but Open mode has no Cedar
at all, so the docstring intent and the code disagree.

This commit renames the existing
`get_graphs_lists_registered_graphs_in_multi_mode` test to
`get_graphs_denied_in_open_mode_without_server_policy` and flips
the assertion from 200 → 403. Today this fails (server returns
200) — exactly the symptom the bot named. The fix in the next
commit tightens the no-policy fallback to deny server-scoped
actions unconditionally, regardless of mode.

Per AGENTS.md rule 12, the red test commit lands just before
the fix so the red → green pair is visible in `git log` and a
reviewer can check out this commit alone to reproduce.

Sort-order coverage that previously lived in the renamed test
moves to `get_graphs_with_server_policy_authorizes_per_cedar`
in the next commit, where the admin-200 response is operator-
authorized and a non-empty body is asserted.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: server-scoped actions always require explicit policy (green)

`server_graphs_list` returned 200 in Open mode (`--unauthenticated`,
no tokens, no policy) because `authorize_request`'s no-policy
fallback only denied when `actor.is_some()` AND action != Read.
In Open mode `actor: None`, so the denial branch never fired and
the call returned `Ok(())` — leaking the registry (graph IDs +
URIs that may contain S3 bucket paths or internal hostnames) to
any unauthenticated caller. The docstring on `server_graphs_list`
claimed it was "Cedar-gated" and that the server should "not leak
the registry until the operator explicitly authorizes it" —
docstring intent and code disagreed.

Symptomatic fix: special-case GraphList. Breaks the moment
another server-scoped action (`graph_create`, `graph_delete`) is
added.

Correct-by-design fix: tie authorization to the action's
`resource_kind()`. Server-scoped actions
(`PolicyResourceKind::Server`) always require explicit policy
authorization — there is no runtime state where they're served
by default. Per-graph actions keep the existing default-deny
logic (DefaultDeny denies non-Read for authenticated actors;
Open mode allows everything per the operator's `--unauthenticated`
opt-in for graph DATA, but not for server topology).

The fix uses the existing `PolicyResourceKind` enum that #119
already added — no new abstraction. Future server-scoped actions
(runtime `graph_create`/`graph_delete` when the cluster catalog
ships) automatically pick up the same enforcement without any
per-action handler change.

Changes:

* `crates/omnigraph-server/src/lib.rs:51` — re-export
  `PolicyResourceKind` (the kind discriminator was already public
  on the omnigraph-policy crate; needed in scope here).
* `crates/omnigraph-server/src/lib.rs:1457` — `authorize_request`'s
  no-policy fallback gains a server-scoped-action check that fires
  before the actor-based default-deny logic. Error message names
  the failure mode and points at `server.policy.file`.
* `crates/omnigraph-server/tests/server.rs:5037` —
  `get_graphs_with_server_policy_authorizes_per_cedar` extended
  to register two graphs in non-alphabetical order and assert
  the admin-200 response is sorted alphabetically. Restores the
  sort-order coverage that lived in
  `get_graphs_lists_registered_graphs_in_multi_mode` before the
  red commit renamed it to assert denial.

Also bundles a small adjacent cleanup that the bot-review flagged:

* `crates/omnigraph-server/src/graph_id.rs:124` — drop the
  unreachable `"openapi.json"` entry from `is_reserved`. The
  regex `^[a-zA-Z0-9-]{1,64}$` rejects every dot-containing name
  before `is_reserved` can run, so dotted entries in this list
  were dead code that misled readers into thinking the list
  needed to cover them. Comment now names the structural
  exclusion. The `rejects_reserved_route_names` test loses its
  `openapi.json` row (covered by `rejects_dots` via the regex).

Closes the "server-scoped management actions silently leak in
Open mode" class. Red test from the previous commit
(`get_graphs_denied_in_open_mode_without_server_policy`) turns
green; all 78 server integration tests + 76 lib tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: fold multi-graph work into v0.6.0 (no separate v0.7.0 release)

The branch had bumped workspace versions to 0.7.0 and added a
dedicated `docs/releases/v0.7.0.md` for the multi-graph work.
Per scope decision: ship the graph-rename and the multi-graph
mode in one v0.6.0 release.

Changes:

* Workspace versions bumped 0.7.0 → 0.6.0 in every crate manifest
  (`omnigraph`, `omnigraph-compiler`, `omnigraph-policy`,
  `omnigraph-server`, `omnigraph-cli`) and their internal
  `path = ..., version = "..."` dependency constraints.
* `docs/releases/v0.7.0.md` content merged into
  `docs/releases/v0.6.0.md`, retargeted to a single coherent
  v0.6.0 release note covering both the graph terminology rename
  and the multi-graph server mode. The original v0.7.0.md is
  deleted.
* All `v0.7.0` / `0.7.0` doc and comment references throughout
  `crates/`, `docs/`, `AGENTS.md`, and `openapi.json` retargeted
  to `v0.6.0` / `0.6.0`. `Cargo.lock` regenerated to match.
* OpenAPI spec regenerated via `OMNIGRAPH_UPDATE_OPENAPI=1
  cargo test -p omnigraph-server --test openapi
  openapi_spec_is_up_to_date` — `"version": "0.6.0"` now.

Verification:

* `cargo build --workspace` — clean (6 pre-existing engine
  warnings only).
* `cargo test --workspace --locked` — zero failures across all
  39 test result groups.
* `bash scripts/check-agents-md.sh` — passes (34 links / 33 docs).
* `grep -rn "0\.7\.0\|v0\.7\.0" --include='*.rs' --include='*.md'
  --include='*.json' --include='*.toml' .` returns no workspace
  hits. The three remaining `0.7.0` strings in `Cargo.lock`
  belong to unrelated 3rd-party crates (`pem-rfc7468`, `radium`,
  `rand_xoshiro`).

The git tag and crates.io publish happen later — this commit
just consolidates the surface so the eventual release is one
coherent v0.6.0 covering all the work since v0.5.0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: sanitize internal refs from v0.6.0 release notes

cubic-dev-ai P2 comments flagged that the release notes carried
internal Linear ticket and RFC references (MR-668, MR-731,
MR-723, RFC 0003, RFC 0004). Per AGENTS.md maintenance rule 5,
"Release docs are public project history. Describe capabilities,
behavior changes, breaking changes, upgrade notes, and user
impact; do not reference private ticket systems, internal
codenames, or planning shorthand that an outside contributor
cannot inspect." The bot's comments are correct against our own
published contract — they were a docs-quality regression
introduced when I drafted these notes.

Replaced each internal reference with the public-facing concept
it stood for. The substantive content (capabilities, behavior,
guarantees) was already present alongside the refs; sanitization
just trimmed the bracketed ticket labels:

* Line 6: dropped `(MR-668)` from the multi-graph mode summary —
  the descriptive name was already self-sufficient.
* Line 24: `MR-731 spoof defense` → `the bearer-derived-actor-
  identity guarantee`; `Forward-compat for Cloud mode (RFC 0003)
  and OAuth provider (RFC 0004)` → "forward-compat seams for
  future multi-tenant and OAuth deployments; they're inert in
  this release" — describes what the operator sees instead of
  pointing at planning docs.
* Line 26: `MR-731's server-authoritative-actor invariant` →
  "the server-authoritative-actor invariant: actor identity is
  always sourced from the bearer-token match resolved at the
  auth boundary" — the public-facing statement of the guarantee.
* Line 36: `(MR-723 default-deny otherwise rejects …)` →
  "without a server policy the default-deny posture rejects …"
  — same content, no ticket label.
* Line 121: `MR-731 spoof regression test` → "The bearer-auth-
  derived-actor-identity regression test (client-supplied
  identity headers are ignored; the server-resolved actor is the
  only identity Cedar sees)" — describes what the test guards
  instead of naming the originating ticket.

Verified: `grep -E 'MR-\d+|RFC[ -]?\d+' docs/releases/v0.6.0.md`
returns no matches; the rest of `docs/releases/` is also clean.
`scripts/check-agents-md.sh` passes.

Note: cubic-dev-ai also flagged `crates/omnigraph-cli/src/main.rs:276`
("doc comment incorrectly references v0.6.0 for a command that
only exists in v0.7.0"). That comment is based on a stale model
of the release surface — after folding v0.7.0 into v0.6.0 in
the previous commit, the multi-graph CLI surface IS in v0.6.0
and the comment is correct as written. No change needed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix: close validated init and multi-graph gaps

* chore: address review cleanup comments

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
											
										
										
											2026-05-28 16:19:31 +02:00
+								    let state = AppState::new_single(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												tests: admission test uses new_with_workload, drops env mutation + #[serial]

Migrates `ingest_per_actor_admission_cap_returns_429` from env-var
override to direct `WorkloadController::new(1, ...)` construction via
`AppState::new_with_workload`. Removes the `EnvGuard` and the
`#[serial]` annotation that paired with it.

Why correct by design (AGENTS.md rule 9): the previous round's matrix
fix (commit 8bd9a5f) shielded the matrix from this test's env
mutation, but the broader bug class — "test A's process-wide env
mutation can leak into any test B that calls
`AppState::open` / `WorkloadController::from_env()`" — was still
reachable by any future test that didn't think to opt out. Closing
the class at the source: this test no longer mutates global state at
all, so no other test needs to defend against it.

Net effect:
- This test no longer needs `#[serial]` (was the only reason it was
  marked) — runs in parallel with the rest of the suite.
- The matrix's defensive `with_defaults()` construction (commit
  8bd9a5f) remains correct but is no longer required for correctness;
  it's now a "belt and suspenders" guard against any FUTURE
  env-mutating test.

Verified locally: both tests pass when run together; full server
suite (44 tests) green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:35:41 +02:00
+								        db,
 								        vec![("act-flooder".to_string(), "flooder-token".to_string())],
-												(feat): multi-graph server mode (#119)

* mr-668: add GraphId newtype + Cloud-mode forward identity stubs (PR 1/10)

PR 1 of the MR-668 multi-graph server work. Pure types, no runtime
behavior changes yet.

Ships the validated identity vocabulary that the rest of the implementation
will consume:

- `GraphId(String)` — `^[a-zA-Z0-9-]{1,64}$`, leading underscore rejected
  (engine reserves every `_*` filename), reserved route names rejected
  (`policies`, `healthz`, `openapi`, `openapi.json`, `graphs`). Validation
  lives in `try_from` only; serde `Deserialize` re-runs it so JSON payloads
  cannot bypass.
- `TenantId(String)` — same regex shape as GraphId. `None` in Cluster
  mode; reserved for Cloud mode (RFC 0003) where it carries the OAuth
  `org_id` claim.
- `GraphKey { tenant_id: Option<TenantId>, graph_id }` — the registry
  HashMap key. `cluster()` constructor for the Cluster-mode default.
- `Scope` enum with `Full` variant — Cluster mode default; RFC 0004 will
  extend with OAuth scopes (`graph:read`/`write`/`admin`/`*`).
- `AuthSource` enum with `Static` variant — Cluster mode default; RFC
  0001 step 1 will add `Oidc`.
- `ResolvedActor { actor_id, tenant_id, scopes, source }` — replaces the
  upcoming refactor of `AuthenticatedActor(Arc<str>)` in PR 4a.

Per MR-668 design decision 13: ship the Cloud-mode forward type shapes
now (no `TokenVerifier` trait yet — that's RFC 0001 step 1) so handler
signatures stay stable across the Cluster → Cloud trajectory. `Scope`
and `AuthSource` use `#[non_exhaustive]` so future variants don't break
caller matches.

Tests: 26 new (15 graph_id + 11 identity), all passing. No regression
in the existing 36 server library tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: Omnigraph::init error-path cleanup + three failpoints (PR 2a/10)

PR 2a of the MR-668 multi-graph server work. Bug fix: a partially-failed
`Omnigraph::init` previously left orphan schema files at the graph URI,
making the URI unusable for a retry (the next `init` would refuse because
`_schema.pg` already exists).

Changes:

1. `init_with_storage` now wraps the I/O phase. On any error from
   `init_storage_phase`, calls `best_effort_cleanup_init_artifacts` to
   remove the three schema files before returning the original error:
   - `_schema.pg`
   - `_schema.ir.json`
   - `__schema_state.json`
   Cleanup is best-effort: a failure to delete is logged via
   `tracing::warn` but does NOT mask the init error.

2. Three failpoints added at the init phase boundaries:
   - `init.after_schema_pg_written`
   - `init.after_schema_contract_written`
   - `init.after_coordinator_init`

3. Four new failpoint tests in `tests/failpoints.rs` pin the cleanup
   behavior at each boundary plus the "original error wins over cleanup
   error" contract. All 23 failpoint tests pass.

Coverage gap (documented in code comments):
Lance per-type datasets and `__manifest/` directory created by
`GraphCoordinator::init` are NOT cleaned up after a coordinator-init-phase
failure. Recursive directory deletion requires `StorageAdapter::delete_prefix`,
which was deferred along with `DELETE /graphs/{id}` (originally PR 2b). When
that primitive lands, the third failpoint test can be tightened to assert
the graph root is fully empty.

Tests: 4 new (init_failpoint_*), all 23 failpoint tests green. No
regression in the 105 engine library tests or 64 end_to_end tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: add GraphHandle + GraphRegistry data structure (PR 3/10)

PR 3 of the MR-668 multi-graph server work. Pure data structure — no
routing changes yet (that's PR 4a).

New file: `crates/omnigraph-server/src/registry.rs`

- `GraphHandle { key: GraphKey, uri: String, engine: Arc<Omnigraph>,
  policy: Option<Arc<PolicyEngine>> }` — the per-graph state that the
  routing middleware (PR 4a) will inject as a request extension.
- `RegistrySnapshot { graphs: HashMap<GraphKey, Arc<GraphHandle>> }` —
  immutable snapshot; replaced atomically via `ArcSwap`.
- `GraphRegistry { snapshot: ArcSwap<_>, mutate: Mutex<()> }` — lock-free
  reads, mutex-serialized mutations.
- `RegistryLookup { Ready(Arc<GraphHandle>) | Gone }` — two-valued, no
  `Tombstoned` variant since DELETE is deferred in v0.7.0 scope.
- `InsertError { DuplicateKey | DuplicateUri }` — both rejection cases
  for create-graph (maps to HTTP 409 in PR 7).
- Methods: `new`, `from_handles` (bulk startup-time init), `get`, `list`,
  `len`, `insert`.

Race semantics pinned by three multi-thread tests:
- `concurrent_insert_same_key_exactly_one_succeeds` — N=8 spawned
  inserts with the same key; exactly 1 returns Ok, 7 return DuplicateKey.
- `concurrent_insert_distinct_keys_all_succeed` — N=8 spawned inserts
  with distinct keys; all succeed.
- `concurrent_reads_during_inserts_see_consistent_snapshots` — reader
  loop concurrent with sequential writes; every listed handle's key
  resolves via `get()` (no torn state).

Why no tombstones field: `DELETE /graphs/{id}` is deferred to bound
the scope of v0.7.0. Without a delete endpoint, there's no use for
tombstones — every key in the registry is `Ready`, and every key
not in the registry is `Gone`. When DELETE lands later, the
`Tombstoned` variant + `tombstones: HashSet<GraphKey>` slot in
additively without breaking caller signatures (the `Gone` variant
remains the "not currently active" case).

Why `tokio::sync::Mutex`: insert is async because PR 7's flow holds
this mutex across the atomic YAML rewrite step (file I/O). std::Mutex
would footgun across .await.

Dependency additions: `arc-swap = { workspace = true }`,
`thiserror = { workspace = true }` (used by InsertError).

Tests: 12 new (12 passing). 74 server lib tests total green
(62 from PR 1 + 12 new). Clippy clean on server crate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: router restructure + handler refactor for multi-graph (PR 4a/10)

PR 4a of the MR-668 multi-graph server work. The heaviest single PR —
rewires every handler to extract `Arc<GraphHandle>` from a routing
middleware, replaces `AuthenticatedActor(Arc<str>)` with `ResolvedActor`
everywhere, and adds the `ServerMode` discriminator.

Behavior changes:
- **Single mode** (legacy `omnigraph-server <URI>`): flat routes
  (`/snapshot`, `/read`, `/branches`, …) continue to work exactly as
  v0.6.0. Internally, the registry holds a single handle keyed by the
  sentinel `SINGLE_GRAPH_KEY_ID = "default"`; routing middleware injects
  that handle on every request. No HTTP-visible change.
- **Multi mode** (new): routes nest under `/graphs/{graph_id}/...`.
  Routing middleware extracts the graph id from the path, looks it up
  in the registry, and injects the handle. 404 if not found.
  (Multi-mode startup itself lands in PR 5; this PR provides the
  router-side wiring.)

AppState refactor:
- `engine: Arc<Omnigraph>` and `policy_engine: Option<Arc<PolicyEngine>>`
  fields removed — both now live inside `GraphHandle` in the registry.
- `mode: ServerMode { Single { uri } | Multi { config_path } }` added.
- `registry: Arc<GraphRegistry>` added.
- `server_policy: Option<Arc<PolicyEngine>>` added (placeholder for
  management endpoints in PR 6b; unused today).
- Existing constructors (`new`, `new_with_bearer_token{s,_and_policy}`,
  `new_with_workload`, `open*`) build a single-mode AppState
  internally and remain source-compatible. Tests that constructed
  AppState via these constructors continue to work.
- `with_policy_engine` post-construction setter — rebuilds the
  single-mode handle with the policy attached. Engine-layer
  enforcement is NOT reinstalled (matches the old single-field
  semantics; `open_with_bearer_tokens_and_policy` is the path that
  installs both layers).
- `new_multi` constructor added for PR 5's startup loop.
- `uri()` now returns `Option<&str>` (Some in single, None in multi).

Routing middleware:
- `resolve_graph_handle` injects `Arc<GraphHandle>` as a request
  extension. Mode-aware: single returns the only handle; multi parses
  `/graphs/{graph_id}/...` from the URI. Returns 404 in multi mode
  when the graph id is unregistered. Records `graph_id` on the
  current tracing span.
- `require_bearer_auth` updated to insert `ResolvedActor` (was
  `AuthenticatedActor`).

Handler refactor — every protected handler:
- Gains `Extension(handle): Extension<Arc<GraphHandle>>` param.
- Replaces `state.engine` → `handle.engine`.
- Replaces `state.policy_engine()` → `handle.policy.as_deref()`.
- Replaces `state.uri()` → `handle.uri.as_str()` (or `.clone()`
  where String is needed).
- Replaces `Arc::clone(&state.engine)` → `Arc::clone(&handle.engine)`
  (the spawn-and-clone pattern in `server_export` — proof that a
  long-running export survives the registry being mutated later).

authorize_request signature:
- Was: `(state: &AppState, actor: Option<&AuthenticatedActor>, request: PolicyRequest)`.
- Now: `(actor: Option<&ResolvedActor>, policy: Option<&PolicyEngine>, request: PolicyRequest)`.
- Per-graph callers pass `handle.policy.as_deref()`. The (future PR 6b)
  management endpoints will pass `state.server_policy.as_deref()`.

MR-731 invariant preserved:
- The single chokepoint `request.actor_id = actor.actor_id.as_ref().to_string()`
  inside `authorize_request` still overwrites any client-supplied
  actor identity. Regression test
  `actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
  at `tests/server.rs:1114-1216` passes unchanged.

Tests: 0 new (the registry race tests in PR 3 already cover the
data structure; this PR exercises them indirectly via the existing
test suite). 74 lib + 57 server integration + 60 openapi = 191 tests
green. Clippy clean.

LOC: +397 insertions, -153 deletions in `crates/omnigraph-server/src/lib.rs`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: OpenAPI multi-mode cluster filter (PR 4b/10)

PR 4b of the MR-668 multi-graph server work. In multi mode, the served
`/openapi.json` reports cluster routes (`/graphs/{graph_id}/...`) instead
of the legacy flat protected paths — matching what `build_app` actually
mounts (PR 4a's `Router::nest`). Single mode is unchanged.

Implementation:
- New `server_openapi` branch: when `state.mode()` is `Multi`, call
  `nest_paths_under_cluster_prefix(&mut doc)` after `ApiDoc::openapi()`.
- The rewrite consumes `doc.paths.paths`, then for every path-item:
  - If the path is in `ALWAYS_FLAT_PATHS` (`/healthz` for now), keep
    it flat.
  - Otherwise, prefix every operation_id with `cluster_` and reinsert
    the item at `/graphs/{graph_id}<original_path>`.
- Single mode hits no extra work — the path map is untouched.
- The static `ApiDoc::openapi()` still emits the flat surface, so
  in-process callers (the existing `openapi_json()` helper in tests)
  see the unmodified spec.

Why cluster_ prefix on operation IDs: OpenAPI specs require unique
operation_ids across the document. With both flat (single-mode) and
cluster (multi-mode) surfaces ever co-existing in a generated SDK,
the prefix prevents collision. The current served doc only carries
one surface, so the prefix is forward-compat with potential future
dual-surface generation.

Tests: 6 new in `tests/openapi.rs`, all via the `/openapi.json` route
(not the static `ApiDoc::openapi()` helper):
- `multi_mode_openapi_lists_cluster_paths` — every protected path
  appears as a cluster variant.
- `multi_mode_openapi_drops_flat_protected_paths` — flat protected
  paths are absent.
- `multi_mode_openapi_keeps_healthz_flat` — `/healthz` survives.
- `multi_mode_openapi_prefixes_operation_ids_with_cluster` — every
  cluster operation_id starts with `cluster_`.
- `multi_mode_operation_ids_are_unique` — no operation_id collisions.
- `single_mode_openapi_unchanged_by_cluster_filter` — single mode
  still emits the legacy flat surface (regression).

New test helper `app_for_multi_mode(graph_ids)` exercises the new
`AppState::new_multi` constructor from PR 4a — first user of multi-mode
construction outside of unit tests.

Result: 66 openapi tests + 57 server integration tests + 74 lib tests
= 197 green. No regression in the existing OpenAPI drift check
(`openapi_spec_is_up_to_date` still validates the static flat surface
matches the committed openapi.json).

LOC: +67 in lib.rs (rewrite logic), +219 in tests/openapi.rs (test
suite + helper).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: multi-graph startup + mode inference (PR 5/10)

PR 5 of the MR-668 multi-graph server work. This is the first PR that
makes multi mode actually usable end-to-end: operators invoking
`omnigraph-server --config omnigraph.yaml` with a non-empty `graphs:`
map and no single-mode selector now get a running multi-graph server.

Mode inference (MR-668 decision 2, four-rule matrix in
`load_server_settings`):
  1. CLI `<URI>` positional        → Single
  2. CLI `--target <name>`         → Single (URI from graphs.<name>)
  3. `server.graph` in config      → Single (URI from graphs.<name>)
  4. `--config` + non-empty `graphs:` + no single-mode selector
                                   → Multi (all entries in `graphs:`)
  5. otherwise                     → error with migration hint

Rule 5's error message names every escape hatch so operators can fix
their invocation without grepping docs.

Config schema extensions:
  - `TargetConfig.policy: PolicySettings` (per-graph Cedar policy file).
    `#[serde(default)]` so existing single-graph YAMLs keep parsing.
  - `ServerDefaults.policy: PolicySettings` (server-level Cedar policy
    for management endpoints — loaded in PR 5, wired into `GET /graphs`
    in PR 6b).
  - `OmnigraphConfig::resolve_target_policy_file(name)` and
    `resolve_server_policy_file()` helpers — both resolve relative to
    the config file's `base_dir`.

Public types added to `omnigraph-server`:
  - `ServerConfigMode { Single { uri, policy_file } | Multi { graphs,
    config_path, server_policy_file } }`.
  - `GraphStartupConfig { graph_id, uri, policy_file }` — one entry
    per graph in multi mode.

`ServerConfig` shape change:
  - WAS: `{ uri: String, bind, policy_file, allow_unauthenticated }`.
  - NOW: `{ mode: ServerConfigMode, bind, allow_unauthenticated }`.
  - Breaking for any code that constructs `ServerConfig` directly.
    `main.rs` is unaffected (uses `load_server_settings`).

`serve()` now forks on `ServerConfig.mode`:
  - Single: existing flow via `AppState::open_with_bearer_tokens_and_policy`.
  - Multi: parallel open via `futures::stream::iter(graphs)
    .map(open_single_graph).buffer_unordered(4).collect()`. Bound 4 is
    a rule-of-thumb for I/O-bound work — at N≤10 this trades startup
    latency for a small amount of concurrent S3/Lance open pressure.
    Fail-fast: first open error aborts startup; in-flight opens drop
    their engine via Arc (Lance datasets close cleanly).

New helper `open_single_graph(GraphStartupConfig)`:
  - Validates `GraphId` per the regex in PR 1.
  - `Omnigraph::open(uri).await` with descriptive error context.
  - Loads per-graph policy file and re-applies it via
    `Omnigraph::with_policy` (engine-layer enforcement, MR-722).
  - Returns `Arc<GraphHandle>` ready for the registry.

Routing middleware bug fix:
  - `Router::nest("/graphs/{graph_id}", inner)` rewrites
    `request.uri().path()` to the inner suffix (e.g. `/snapshot`).
    The previous middleware tried to parse `{graph_id}` from
    `request.uri().path()` and got 400 instead of 200. Fixed by reading
    from `axum::extract::OriginalUri` request extension, which preserves
    the pre-rewrite URI.
  - Caught by the two new tests
    `cluster_routes_dispatch_per_graph_handle` and
    `cluster_route_for_unknown_graph_returns_404`.

Tests (14 new, all passing):
  - Four-rule matrix: one test per branch + the joint case
    `mode_inference_cli_uri_overrides_graphs_map` + the empty-graphs-map
    error case.
  - Per-graph + server-level policy file path resolution.
  - Reserved `GraphId` rejection at startup.
  - End-to-end multi-graph routing: two graphs side by side, each
    cluster route hits the right engine.
  - Unknown graph id under cluster prefix → 404.
  - Flat routes 404 in multi mode.

Inline `ServerConfig` test (`serve_refuses_to_start_in_state_1_without_unauthenticated`)
and three `server_settings_*` tests updated to the new `mode` shape.

Result: 211 server tests green (74 lib + 71 integration + 66 openapi),
MR-731 regression test still pinned and passing.

LOC: +45 config.rs, +281 lib.rs (net), +395 tests/server.rs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: Cedar resource-model refactor (PR 6a/10)

PR 6a of the MR-668 multi-graph server work. Policy-crate-only refactor —
no HTTP handler changes, no operator-supplied policy.yaml changes. Sets
up the chassis that PR 6b's `GET /graphs` consumes.

Two new `PolicyAction` variants:
  - `GraphCreate` — gates `POST /graphs` (deferred behavioral PR).
  - `GraphList`   — gates `GET /graphs` (lands in PR 6b).

Note: `GraphDelete` is intentionally NOT added in this PR. `DELETE
/graphs/{id}` is deferred from MR-668's v0.7.0 scope to bound complexity
(no `delete_prefix`, no tombstone, no `RegistryLookup::Tombstoned`).
Adding the Cedar action without a consumer would be the same kind of
"dead vocabulary" trap the `Admin` variant already documents.

New `PolicyResourceKind { Graph, Server }` enum, plus a
`PolicyAction::resource_kind()` method that classifies every action.
Per-graph actions (Read, Change, BranchCreate, …) bind to
`Omnigraph::Graph::"<graph_label>"`; server-scoped actions
(GraphCreate, GraphList) bind to the singleton
`Omnigraph::Server::"root"`. `Admin` stays classified as per-graph for
now — MR-724 will pick the final shape when the first consumer surface
ships.

Cedar schema string additions:
  - `entity Server;`
  - `action "graph_create" appliesTo { principal: Actor, resource: Server, ... }`
  - `action "graph_list"   appliesTo { principal: Actor, resource: Server, ... }`

Compiler updates:
  - `compile_policy_source` picks the resource literal based on the
    action's `resource_kind`. Existing graph-only policies generate
    the same Cedar source as before — pinned by
    `per_graph_rules_continue_to_work_alongside_server_rules`.
  - `compile_entities` includes the `Server::"root"` entity only when
    a rule references a server-scoped action. Keeps test assertions
    for graph-only policies tight.
  - `PolicyEngine::authorize` builds the right resource UID at
    request time based on `request.action.resource_kind()`.

Validation rules added to `PolicyConfig::validate`:
  - A rule may not mix server-scoped and per-graph actions (different
    resource kinds need different `permit` clauses).
  - Server-scoped actions cannot have `branch_scope` or
    `target_branch_scope` — there's no branch context at the server
    level.

Operator impact: zero. The Cedar schema `Omnigraph::Server` entity is
internally referenced by `compile_policy_source`; operator policy.yaml
files only declare actions in `rules[].allow.actions` and never
reference the resource entity directly. Decision 6's "internal rename
only; operator policies unaffected" contract is preserved and pinned
by `per_graph_rules_continue_to_work_alongside_server_rules`.

Tests: 5 new (11 policy tests total, up from 6):
  - `graph_list_action_authorizes_against_server_resource`
  - `graph_create_action_authorizes_against_server_resource`
  - `server_scoped_rule_cannot_use_branch_scope`
  - `rule_mixing_server_and_per_graph_actions_is_rejected`
  - `per_graph_rules_continue_to_work_alongside_server_rules`

No regression: 145 server tests (74 lib + 71 integration) still green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: GET /graphs endpoint + per-graph policy wire-up (PR 6b/10)

PR 6b of the MR-668 multi-graph server work. First management endpoint —
`GET /graphs` lists every graph registered with the server, gated by the
server-level Cedar policy from PR 6a.

New API shapes (in `omnigraph-server::api`):
  - `GraphInfo { graph_id, uri }` — one entry per registered graph.
  - `GraphListResponse { graphs: Vec<GraphInfo> }` — sorted alphabetically
    by `graph_id` for deterministic output.

Handler `server_graphs_list`:
  - Mounted at `GET /graphs` in both modes.
  - Single mode: returns 405 (resource exists in the API surface, just
    not operational without a `graphs:` map). 405 chosen over 404 so
    clients see "resource exists, wrong context" rather than "no such
    resource".
  - Multi mode: requires bearer auth (when configured); Cedar-gated by
    `PolicyAction::GraphList` against `Omnigraph::Server::"root"`
    (PR 6a's chassis). Returns the sorted registry list.

Cedar gate composition:
  - When no `server.policy.file` is configured, the MR-723 default-deny
    falls through: `GraphList` is not `Read`, so an authenticated actor
    without a server policy gets 403. This is the right default — don't
    expose the registry until the operator explicitly authorizes it.
  - When a server policy is configured, Cedar evaluates the rule. The
    test `get_graphs_with_server_policy_authorizes_per_cedar` pins the
    admin-allow / viewer-deny split.

Routing:
  - New `management` sub-router holding `/graphs` (auth-required, no
    `resolve_graph_handle` middleware — operates on the registry, not
    a single graph).
  - Single mode merges flat protected routes + management.
  - Multi mode merges nested `/graphs/{graph_id}/...` + management.

OpenAPI:
  - `server_graphs_list` registered in `ApiDoc::paths(...)`.
  - `EXPECTED_PATHS` in `tests/openapi.rs` gains `/graphs`.
  - `openapi.json` regenerated (auto-tracked by
    `openapi_spec_is_up_to_date` in CI).

Tests: 4 new in `tests/server.rs::multi_graph_startup`:
  - `get_graphs_lists_registered_graphs_in_multi_mode`
  - `get_graphs_returns_405_in_single_mode`
  - `get_graphs_requires_bearer_auth_when_configured`
  - `get_graphs_with_server_policy_authorizes_per_cedar`

What's NOT in this PR (deferred):
  - Per-graph policy enforcement is wired through `handle.policy`
    (PR 4a already did this); PR 6b doesn't add new per-graph
    behavior beyond making sure the server policy lookup composes
    cleanly alongside it.
  - `POST /graphs` (PR 7) and `DELETE /graphs/{id}` (out of scope
    for v0.7.0).
  - CLI `omnigraph graphs list` (PR 8 will add).

Result: 215 server tests green (74 lib + 66 openapi + 75 integration),
11 policy tests green. MR-731 spoof regression preserved across all
this work.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: POST /graphs runtime create endpoint (PR 7/10)

PR 7 of the MR-668 multi-graph server work. Operators can now add a
graph to a running multi-graph server without restarting:

  curl -X POST http://server/graphs \
    -H "Content-Type: application/json" \
    -d '{
          "graph_id": "beta",
          "uri": "/data/beta.omni",
          "schema": { "source": "node Person { name: String @key }\n" },
          "policy": { "file": "./policies/beta.yaml" }
        }'

DELETE remains deferred (out of v0.7.0 scope per the trimmed plan —
no `delete_prefix`, no tombstones).

Body shape (decision 7):
  - Nested `schema: { source: "..." }` (mirrors the `policy: { file }`
    pattern; leaves room for future fields without breakage).
  - Optional nested `policy: { file: "..." }` for per-graph Cedar.
  - 32 MiB body limit (reuses `INGEST_REQUEST_BODY_LIMIT_BYTES`).
  - Asymmetric with `SchemaApplyRequest` which keeps flat
    `schema_source: String` — documented in api.rs.

Atomic YAML rewrite + drift detection:
  - New `config::rewrite_atomic(path, new_config, expected_hash)`:
    flock → re-read + hash check → serialize → write `.tmp` → fsync
    → rename → fsync parent dir. Returns the new hash for the caller
    to update its in-memory baseline.
  - New `config::hash_config_file(path)` — SHA-256 of the on-disk
    bytes, used at startup and after each rewrite.
  - New `RewriteAtomicError { Drift | Io | Serialize }` enum.
  - `AppState.config_hash: Option<Arc<Mutex<[u8;32]>>>` carries the
    in-memory baseline. Updated after every successful rewrite so
    subsequent POSTs don't false-trigger drift.
  - The mutex is `std::sync::Mutex` (brief critical section, no .await
    inside). The flock itself serializes file access process-wide
    AND across multiple server instances (defense in depth).
  - All sync I/O runs inside `tokio::task::spawn_blocking` — flock
    is sync.

Handler ordering (the load-bearing sequence):
  1. Mode check: 405 in single mode.
  2. Cedar authorize: `GraphCreate` against `Omnigraph::Server::"root"`.
  3. Validate body: `GraphId::try_from` (regex + reserved-name), empty
     schema/uri checks, per-graph policy file parse.
  4. Pre-check registry for duplicate graph_id / duplicate uri (409).
  5. `Omnigraph::init` the new engine.
  6. Atomic YAML rewrite (drift detection inside).
  7. Publish in registry (atomic re-check via `GraphRegistry::insert`).

Failure modes (documented in handler rustdoc):
  - Init fails → orphan storage at `req.uri` (PR 2a cleans up schema
    files; Lance datasets remain orphans until `delete_prefix` lands).
  - YAML rewrite fails (drift, IO) → orphan storage; YAML unchanged.
  - Registry insert fails (race) → YAML has entry but registry doesn't;
    next restart opens it cleanly.

New dependency: `fs2 = "0.4"` (workspace + omnigraph-server). POSIX-only
file locking. Linux/macOS deployment supported; Windows out of scope.

Tests (10 new in `tests/server.rs::multi_graph_startup`):
  - `post_graphs_creates_a_new_graph_end_to_end` — happy path, includes
    YAML inspection to confirm the rewrite landed.
  - `post_graphs_baseline_hash_updates_between_rewrites` — two POSTs in
    a row both succeed (drift baseline updates correctly).
  - `post_graphs_duplicate_graph_id_returns_409`
  - `post_graphs_duplicate_uri_returns_409`
  - `post_graphs_invalid_graph_id_returns_400` (reserved name)
  - `post_graphs_empty_schema_source_returns_400`
  - `post_graphs_returns_405_in_single_mode`
  - `post_graphs_yaml_drift_detection_returns_503` — operator hand-edits
    omnigraph.yaml; server refuses to clobber.
  - `hash_config_file_is_deterministic_and_detects_changes`
  - `rewrite_atomic_refuses_when_hash_drifts`

OpenAPI: `server_graphs_create` registered in `ApiDoc::paths(...)`;
openapi.json regenerated.

Result: 225 server tests green (74 lib + 66 openapi + 85 integration),
all MR-731 regressions still pinned.

LOC: ~580 lib.rs net (handler + helpers), ~120 config.rs (rewrite
machinery), +71 api.rs (request/response shapes), +332 tests/server.rs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: CLI omnigraph graphs list/create (PR 8/10)

PR 8 of the MR-668 multi-graph server work. CLI parity for the
v0.7.0 management surface: operators can now manage graphs from
the command line against a running multi-graph server.

  omnigraph graphs list --target dev --json
  omnigraph graphs create \
    --target dev \
    --graph-id beta \
    --graph-uri /data/beta.omni \
    --schema schema.pg

DELETE is intentionally absent — server-side DELETE was deferred from
v0.7.0 scope, and shipping a client subcommand for a server endpoint
that doesn't exist would be dead vocabulary. The help output, the
subcommand enum, and the test that pins it (`graphs_subcommand_help_
lists_list_and_create`) all agree.

CLI architecture (modeled on `BranchCommand`):
  - New `Command::Graphs { command: GraphsCommand }` top-level variant.
  - `GraphsCommand { List, Create }` enum.
  - List: GET `<base>/graphs`. Stdout is `<graph_id>\t<uri>` per line,
    or JSON via `--json`.
  - Create: reads `--schema <path>` from local disk, inlines as
    `schema: { source: <file> }` in the POST body (nested per
    MR-668 decision 7). Optional `--policy-file <path>` becomes
    `policy: { file: <path> }`. Returns 201 → "created graph X at Y"
    or JSON via `--json`.
  - Both subcommands reject local URI targets with a clear
    "remote multi-graph server URL" error.

New API type imports in the CLI: `GraphCreateRequest`,
`GraphCreateResponse`, `GraphListResponse`, `GraphSchemaSpec`,
`GraphPolicySpec` — all from `omnigraph-server::api`.

Tests:
  - cli.rs (4 new, non-network):
      * `graphs_subcommand_help_lists_list_and_create` — pins the
        deferral of `delete` (catches scope creep).
      * `graphs_list_against_local_uri_errors_with_remote_only_message`
      * `graphs_create_against_local_uri_errors_with_remote_only_message`
      * `graphs_create_with_missing_schema_file_errors` — pins the
        IO context in the schema-read error path.
  - system_remote.rs (1 new, `#[ignore]` like its peers):
      * `graphs_list_and_create_against_multi_graph_server` — spawns a
        multi-mode server, calls `graphs list` (sees `alpha`),
        `graphs create` (adds `beta`), `graphs list` again (sees both),
        and confirms the new graph is reachable via its cluster route.

CLI suite: 62 tests green (58 existing + 4 new). The new ignored
end-to-end test runs locally with `cargo test --ignored`.

LOC: +159 main.rs (enum + handlers), +88 cli.rs (unit tests),
+131 system_remote.rs (integration test).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: composite e2e tests, race fix, v0.7.0 release (PR 9/10)

PR 9 — the final integration PR for MR-668 multi-graph server work.
Closes the v0.7.0 release.

Composite lifecycle tests (closes gaps flagged in PR 7's coverage
review):
  - `multi_graph_lifecycle_post_query_restart_persistence` — POST a
    graph, query it via cluster route, reload the config from disk
    and confirm `load_server_settings` sees the rewritten YAML.
    Validates the "restart resolves orphans" failure-mode story.
  - `per_graph_policy_enforced_on_post_created_graph` — POST a graph
    with a per-graph policy attached, then send authenticated read
    and change requests. Per-graph Cedar enforcement fires correctly
    on a POST-created graph (engine-layer policy reinstalled via
    `Omnigraph::with_policy` inside the create flow).
  - `concurrent_post_graphs_distinct_ids_all_succeed` — 4 concurrent
    POSTs with distinct graph_ids all return 201. Caught a real
    race in `rewrite_atomic` (see below).

Race fix — `rewrite_atomic_with_modify`:

The first composite test surfaced a real bug. The old
`rewrite_atomic(path, new_config, expected_hash)` captured the
baseline hash OUTSIDE the flock, then called rewrite_atomic which
re-acquired it inside. Under concurrent writers:

  - POST A: captures baseline H0, calls rewrite_atomic.
  - POST B: captures baseline H0 too (before A's update lands).
  - A: acquires flock, on-disk == H0, writes H1, releases.
  - A: updates baseline H0 → H1.
  - B: tries to acquire flock — waits.
  - B: acquires flock. On-disk is now H1. Expected (captured
       before A finished) is H0. MISMATCH → spurious Drift error.

Worse: even if the timing happens to align, B's `updated` config
was constructed from BYTES read before the flock. B writes a config
that doesn't include A's new graph — silent data loss.

The fix: new `config::rewrite_atomic_with_modify(path, baseline,
modify)` takes a closure. Inside the flock + baseline mutex:
  1. Read on-disk bytes, hash, compare to baseline.
  2. Parse on-disk YAML.
  3. Call `modify(parsed)` to produce the new config — receives
     fresh on-disk state, returns the modification.
  4. Serialize + write + fsync + rename + update baseline.

Everything is read-modify-write under the same critical section.
Concurrent writers serialize cleanly. Test confirmed this is no
longer a race.

The old `rewrite_atomic(path, new_config, expected_hash)` API stays
for tests that don't need the read-modify-write shape; the POST
handler switches to the new shape.

Version bump v0.6.0 → v0.7.0:
  - All 5 `crates/*/Cargo.toml` (compiler, engine, policy, cli, server)
    plus their inter-crate `path` dep version constraints.
  - `Cargo.lock` regenerated by `cargo build --workspace`.
  - `AGENTS.md` "Version surveyed" line, capability matrix HTTP-server
    row updated to mention multi-graph + cluster routes + atomic YAML
    rewrite.
  - `openapi.json` regenerated.

Docs:
  - `docs/releases/v0.7.0.md` (new) — release notes with breaking
    changes, new features, deferred items (DELETE, `delete_prefix`,
    actor forwarding), and the single→multi migration recipe.
  - `docs/user/server.md` — substantial section additions for the
    two modes, mode inference, cluster endpoint table, management
    endpoints, `omnigraph.yaml` ownership contract, `POST /graphs`
    body shape + status codes.
  - `docs/user/cli.md` — `omnigraph graphs list/create` section,
    deferred-DELETE note.
  - `docs/user/policy.md` — server-scoped Cedar actions
    (`graph_create`, `graph_list`), per-graph vs server-level policy
    composition, example server-level policy.

Workspace test pass: 573 tests green across all crates. Zero
failures. MR-731 spoof regression still pinned and passing across
the entire 10-PR series.

This commit closes MR-668. v0.7.0 is ready for tagging.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: remove POST /graphs and CLI graphs create (defer runtime graph mgmt)

The POST /graphs runtime-create endpoint shipped in PR 7/10 has three
unresolved high-severity bugs:

  - flock-on-renamed-inode race: the YAML flock is taken on
    omnigraph.yaml itself, then a temp file is renamed over it.
    Cross-process writers end up locking different inodes — both
    believing they hold exclusive access.
  - duplicate-check outside the file lock: precheck runs against
    the in-memory registry only; the locked closure does
    config.graphs.insert(...) unconditionally. Concurrent same-id
    POSTs can persist the loser in YAML while the in-memory registry
    keeps the winner — they disagree after restart.
  - best_effort_cleanup_init_artifacts deletes _schema.pg /
    _schema.ir.json / __schema_state.json on any init failure. An
    accidental re-init against an existing graph's URI destroys its
    schema; subsequent open() fails at read_text(_schema.pg).

The correct fix is a Lance-style cluster catalog (reserve → init →
publish with recovery sidecars), parallel to the engine's existing
__manifest discipline. That work is out of scope for v0.7.0.

For now, disable runtime add/remove from the network and CLI surface.
Operators add graphs by editing omnigraph.yaml and restarting. The
GET /graphs read-only enumeration stays.

Removed:
- POST /graphs handler + router fragment + utoipa registration
- 13 post_graphs_* server tests + 3 composite POST tests +
  multi_mode_app_with_real_config / post_graph helpers
- CLI omnigraph graphs create subcommand + its handler + cli.rs tests
- system_remote.rs combined list+create test trimmed to list-only
- YAML rewrite infra: rewrite_atomic[_with_modify], RewriteAtomicError,
  staging_path, hash_config_file, AppState::config_hash field +
  threading through new_multi and open_multi_graph_state
- fs2 dependency (verified absent from cargo tree)
- sha2/fs2 imports in config.rs (only the rewrite path used them)
- Cedar PolicyAction::GraphCreate variant + "graph_create" match arms
  + action def in Cedar schema + graph_create_action_authorizes_against_server_resource test
- GraphCreateRequest / GraphCreateResponse / GraphSchemaSpec /
  GraphPolicySpec API types (only the POST handler / CLI imported them)

Kept:
- GET /graphs (read-only enumeration) and graph_list Cedar action
- omnigraph graphs list CLI subcommand
- All multi-graph startup, mode inference, cluster routes,
  per-graph + server-level Cedar policies
- server_settings_drive_multi_graph_startup_end_to_end (the test
  that covers operator-authored YAML + restart — the path that
  survives)
- best_effort_cleanup_init_artifacts and the three init failpoints
  (still reachable from CLI `omnigraph init`; preflight fix deferred
  as a follow-up)
- GraphRegistry::insert and its concurrency tests — production
  callers gone, but the method is the natural seam for the future
  cluster-catalog work

Also fixed (transcript issue 4):
- ALWAYS_FLAT_PATHS now includes /graphs so multi-mode OpenAPI
  advertises the management route correctly (was previously rewritten
  to /graphs/{graph_id}/graphs)
- multi_mode_openapi_keeps_healthz_flat → renamed to
  multi_mode_openapi_keeps_management_paths_flat, asserts both
  /healthz and /graphs stay flat
- multi_mode_openapi_prefixes_operation_ids_with_cluster skips
  /graphs in addition to /healthz

Doc fixes:
- docs/user/cli.md: graphs list example was --target http://...,
  but --target is a config-graph-name lookup; corrected to --uri.
  Removed the graphs create example.
- docs/user/server.md: dropped POST /graphs row, "omnigraph.yaml
  ownership", and "POST /graphs body shape" sections. Added a
  paragraph stating runtime add/remove is not exposed in v0.7.0.
- docs/user/policy.md: dropped graph_create action; reworded the
  "Configuration" line to clarify that server-scoped rules (graph_list)
  take neither branch_scope nor target_branch_scope.
- docs/releases/v0.7.0.md: rewrote release narrative — multi-graph
  mode ships; runtime add/remove deferred.
- AGENTS.md: HTTP server bullet and capability matrix row updated to
  reflect read-only GET /graphs and the operator-edit workflow.
- openapi.json regenerated; /graphs has only .get, no .post.

Diff: 17 files, +123 −1525 LOC.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: comment cleanup and policy format style

Strip "PR Na/Nb" sub-PR references throughout MR-668 surfaces — they
were useful during the 10-PR delivery sequence but rot now that the
work is in the tree. Keep the MR-668 umbrella references.

Also:
- Add explicit `when = when` and `resource_literal = resource_literal`
  named args in `compile_policy_source`'s outer `format!` to match the
  surrounding crate style (already explicit for `group` and `action`).
- Rename the best-effort cleanup tracing target from
  "omnigraph::init" to "omnigraph::init::cleanup" so operators can
  filter init-failure cleanup events separately from init's other
  log lines.

No behavior change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop actor_id from PolicyRequest; pass actor as separate arg

The MR-731 "server-authoritative actor identity" invariant was enforced
by an in-function chokepoint (`request.actor_id = actor.actor_id...`
overwrite inside `authorize_request`). That worked but relied on every
caller passing in a `PolicyRequest` and trusting the overwrite — a
comment-enforced invariant.

Move the invariant into the type system:

* `PolicyRequest` no longer carries `actor_id`. The struct now models
  what a caller wants to do, not who they are.
* `PolicyEngine::authorize(actor_id: &str, request: &PolicyRequest)`
  and `validate_request(actor_id, request)` take identity as a
  separate argument. The same shape `PolicyChecker::check` already had
  for the engine layer.
* `authorize_request` in the HTTP layer extracts `actor_id` from the
  bearer-resolved `ResolvedActor` and passes it positionally — no
  overwrite step that could be skipped.
* CLI `omnigraph policy explain` updated (the only other consumer
  that built a `PolicyRequest`).

Public API break for the `omnigraph-policy` crate. Worth it: handlers
can no longer accidentally populate `actor_id` from a request body
field, and external consumers are forced by the compiler to source
actor identity from a trusted path.

The MR-731 chokepoint test
`actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
still passes — the bearer-resolved actor is what reaches the engine.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: consolidate AppState single-mode constructors; delete with_policy_engine

The prior `with_policy_engine` constructor reused the engine `Arc`
from the existing handle (`engine: Arc::clone(&existing.engine)`)
without re-applying `Omnigraph::with_policy`. Combined with
`new_with_workload`, the documented composition pattern was
`AppState::new_with_workload(...).with_policy_engine(p)` — which
produced an `AppState` whose HTTP layer enforced Cedar but whose
underlying engine had no `PolicyChecker` installed. Any caller
reaching the engine via `state.registry().list()[i].engine` could
bypass policy entirely. The doc comment named this gap; the type
system didn't.

Make composition impossible to get wrong:

* Add `AppState::new_single(uri, db, tokens, Option<PolicyEngine>,
  WorkloadController)` — canonical single-mode constructor that
  takes every option together and routes through `build_single_mode`
  (which applies `db.with_policy(checker)` to the engine itself).
* `new`, `new_with_bearer_token`, `new_with_bearer_tokens`,
  `new_with_bearer_tokens_and_policy`, `new_with_workload` all become
  thin wrappers around `new_single`.
* Delete `with_policy_engine`. There is no post-construction policy
  install path any more; the single linear construction forces
  HTTP-layer and engine-layer policy to install together or not at all.

Regression test `engine_layer_policy_fires_via_direct_arc_omnigraph_from_new_single`
constructs an `AppState::new_single` with a deny-all policy, pulls
the `Arc<Omnigraph>` from the registry handle (the same path an
embedded SDK consumer would take), and asserts a direct `mutate_as`
call returns `OmniError::Policy`. Pre-fix this test would have
succeeded the mutation.

Test caller in `ingest_per_actor_admission_cap_returns_429`
migrates from `.with_policy_engine(...)` to `new_single(...,
Some(policy_engine), workload)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: derive any_per_graph_policy on RegistrySnapshot; simplify dup check

`AppState::requires_bearer_auth` walked the entire registry per
request (cloning Arcs into a `Vec`, then `.iter().any(|h| h.policy
.is_some())`) to decide whether the auth middleware should challenge.
The walk is unnecessary — the answer only changes when the registry
mutates, which is exactly the moment a new snapshot is constructed.

Move the flag onto the snapshot itself:

* `RegistrySnapshot { graphs, any_per_graph_policy: bool }`.
* `RegistrySnapshot::new(graphs)` is the only construction path —
  it derives the flag from `graphs.values().any(|h| h.policy
  .is_some())` so the cached value can't drift from the source data.
* `Default` delegates to `new(HashMap::new())`.
* `GraphRegistry::from_handles` and `insert` build snapshots via
  `RegistrySnapshot::new(...)`.
* `GraphRegistry::snapshot_ref()` exposes the current snapshot
  through an `arc_swap::Guard`; callers that need cached derived
  state go through this accessor (callers that only want `graphs`
  still use `list` / `get`).

`requires_bearer_auth` becomes one `ArcSwap::load` + bool read.

Also (drive-by, same file, same hunk): replace the dead
`if let Some(other) = seen_uris.get(...)` + `let _ = other;` pattern
in `from_handles` with a plain `seen_uris.contains_key(...)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: fail-fast multi-graph startup with try_collect

The `open_multi_graph_state` doc comment claims "Fail-fast — the
first open error aborts startup; other in-flight opens are dropped"
but the code did

    .buffer_unordered(4)
    .collect::<Vec<_>>()
    .await
    .into_iter()
    .collect::<Result<Vec<_>>>()?;

which drains every future in the stream before propagating the first
`Err`. With N S3-backed graphs and graph #2 failing fast, the caller
still waits for #1, #3, #4, … to either succeed or fail before
seeing the error.

Replace the four-line dance with `futures::TryStreamExt::try_collect`,
which short-circuits on the first `Err` and drops the rest. The
doc comment now matches behavior.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused State extractor from 7 read-only handlers

After the routing-middleware refactor moved the engine into the per-graph
`GraphHandle` (extracted via `Extension<Arc<GraphHandle>>`), seven
read-only handlers — `server_snapshot`, `server_read`, `server_export`,
`server_schema_get`, `server_branch_list`, `server_commit_list`,
`server_commit_show` — kept an unused `State(_state): State<AppState>`
extractor. Drop it. Each request avoids one `FromRequestParts` clone
of `AppState`'s Arcs.

Handlers that actually use state (workload admission for write paths,
`server_policy` for management endpoints) keep theirs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: emit info! for graph routing decision

`tracing::Span::current().record("graph_id", ...)` in the routing
middleware silently no-ops here: no upstream `#[tracing::instrument]`
on the handlers declares a `graph_id` field, and `TraceLayer::new_for_http`
doesn't either. The recorded value never lands anywhere visible.

Replace with an explicit `info!(graph_id = %handle.key.graph_id,
"graph routed")` event so operators can grep logs and correlate
requests with the active graph. In single mode the value is the
sentinel `"default"`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: align GET /graphs 405 body code with HTTP status

The single-mode `GET /graphs` handler returned an `ApiError` built
via struct literal with `status: METHOD_NOT_ALLOWED, code: BadRequest`.
The body code disagreed with the HTTP status — clients deserializing
on `code` saw `bad_request`, clients deserializing on `status` saw
405. Same bug class as the earlier 503+Conflict mismatch on the
removed YAML drift path.

Close the class for this one remaining instance:

* Add `ErrorCode::MethodNotAllowed` to the API enum.
* Add `ApiError::method_not_allowed(msg)` — pairs the 405 status
  with the matching code.
* Replace the struct literal in `server_graphs_list` with the
  constructor.
* Regenerate `openapi.json` (adds `method_not_allowed` to the
  ErrorCode schema enum).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused axum::handler::Handler import

The import landed in earlier work but no current call site uses it.
Emitted an `unused_imports` warning on every server build.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused fs2 workspace dependency

`fs2 = "0.4"` lingered in [workspace.dependencies] after the
POST /graphs flock-on-rename design was pulled. `cargo tree -i fs2`
reports no consumers in the workspace and the dep is not in
Cargo.lock. Removing the declaration closes the "phantom dep" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: AGENTS.md Cedar row no longer hardcodes action count

The "8 actions" claim drifted as soon as MR-668 added `graph_list`.
Bumping the count would just push the drift one PR forward; the
correct-by-design fix is to defer to the canonical list in
docs/user/policy.md and stop maintaining a duplicate count.

Closes the "doc hardcodes a count that drifts from the enum" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: cfg(test)-gate GraphRegistry::insert and its mutex

`insert` and the `mutate: Mutex<()>` that serializes it had no
runtime consumer in v0.7.0 — the only insertion path at startup
is `from_handles`, and runtime add/remove is deferred until a
managed cluster catalog ships. Leaving both `pub` and live made
them a "looks like API, isn't" footgun: a future change could
build on `insert` without re-establishing the concurrency contract
with an actual consumer in scope.

Gate both together (`#[cfg(test)]` on the method, the field, and
the `tokio::sync::Mutex` import) so the race-pinning tests still
compile but production cannot reach them. When a real consumer
ships, ungate both — they're a unit. Closes the "public API with
no runtime consumer drifts toward incorrect" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop vestigial PolicyEngine surface

* `validate_request` had zero callsites — pure surface for nothing.
* `deny`'s `_actor_id` and `_request` parameters were both unused
  (the underscore prefix gave it away); the message is built by the
  caller before `deny` ever sees the request. Trim both.

Closes the "public API that the type system can't justify" class
for the policy engine. No behavior change; every existing test
stays green because the deletions never had a runtime effect.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for init re-init footgun (red)

A second `Omnigraph::init` against an existing graph URI today
destroys the existing graph's schema artifacts. `init_storage_phase`
overwrites `_schema.pg` before any preflight, and on the inner
`GraphCoordinator::init` failure that follows,
`best_effort_cleanup_init_artifacts` deletes all three schema files.
The existing Lance datasets and `__manifest/` survive but the
schema metadata is gone — unrecoverable without operator surgery.

This test exercises that path and currently fails with
"_schema.pg must not be deleted by a failed re-init", confirming
the destructive cleanup branch fires. The fix in the next commit
makes the test pass by preflighting with `storage.exists()` and
returning a typed error before any write touches disk.

Per AGENTS.md rule 12, the test commit lands just before the fix
commit so the red → green pair is visible in `git log` and a
reviewer can check out this commit alone to reproduce.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: close init re-init footgun via InitOptions preflight (green)

`Omnigraph::init` is "create a new graph"; existing graphs need
an explicit overwrite. Today's behavior — silently overwrite
schema files, then on inner failure delete them via best-effort
cleanup — is destructive against an existing graph regardless of
which branch fires.

Correct-by-design fix:

* New `InitOptions { force: bool }` struct (default `force: false`).
* New `Omnigraph::init_with_options(uri, schema, options)`. The
  old `Omnigraph::init(uri, schema)` is a thin shortcut that
  passes `InitOptions::default()`.
* `init_with_storage` runs a `storage.exists()` preflight on the
  three schema URIs BEFORE any parse, write, or coordinator call.
  Any hit → typed `OmniError::AlreadyInitialized { uri }`. The
  destructive code paths (the `write_text` overwrite and the
  best-effort cleanup) are now unreachable in strict mode against
  an existing graph.
* `force: true` skips the preflight; existing operators who
  actually mean to overwrite opt in explicitly.
* CLI: `omnigraph init --force` maps to `InitOptions { force: true }`.
* HTTP: `OmniError::AlreadyInitialized` maps to 409 via
  `ApiError::from_omni`. Not currently HTTP-reachable (POST /graphs
  was pulled), but the wiring lands here so a future runtime
  create endpoint has one canonical translation.

Closes the "init is destructive against existing state" class.
The regression test added in the previous commit
(`init_on_existing_graph_uri_does_not_destroy_existing_schema`)
turns green: the original schema files now survive a second
init attempt byte-for-byte, and the call errors cleanly with
`AlreadyInitialized`. The four existing
`init_failpoint_after_*_cleans_up_*` tests stay green — strict
mode's preflight passes on a fresh tempdir, and cleanup still
runs as before when a failpoint fires mid-write.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: split PolicyEngine::load into kind-typed loaders

Pre-fix, every caller of `PolicyEngine::load(path, graph_id)`
passed *some* `graph_id` argument — even when the policy was
server-scoped and Cedar's resolution would never touch a Graph
entity. The server-level loader at lib.rs passed the meaningless
sentinel `"server"`. A graph policy file containing a `graph_list`
rule compiled fine; a server policy file containing a `read` rule
compiled fine. Both silently no-op'd at request time because the
engine kind and the rule's resource kind disagreed.

Correct-by-design fix: replace `load` with two kind-typed loaders.

* `PolicyEngine::load_graph(path, graph_id)` — for per-graph
  policy files. Rejects any rule whose action `resource_kind()`
  is `Server`.
* `PolicyEngine::load_server(path)` — for server-level policy
  files. Takes no `graph_id`: server-scoped actions resolve against
  the singleton `Omnigraph::Server::"root"` entity, never a Graph.
  Rejects any rule whose action `resource_kind()` is `Graph`.

The old `load` is hard-deleted in the same commit because every
in-tree consumer migrates here (no semver promise on the workspace
crate, no external pinners). New `PolicyEngineKind` enum types
the loader's intent; `validate_kind_alignment` is the load-time
check that closes the "wrong action, wrong file, silent no-op"
class — operators get a load-time error instead of confused-and-
silent behavior at request time.

Callsites migrated:
* server lib.rs:374 (single-mode per-graph)   → load_graph
* server lib.rs:1065 (multi-mode server)      → load_server
* server lib.rs:1103 (multi-mode per-graph)   → load_graph
* CLI main.rs:732 (resolve_policy_engine)     → load_graph
* tests/server.rs ×5 (4 graph, 1 server)      → load_graph/load_server
* policy_engine_chassis.rs                    → load_graph

Four new in-source tests pin the contract: both rejection paths
and both positive paths.

Closes the "operator puts an action in the wrong file and the
rule silently never matches" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: introduce GraphRouting, retire single_mode_handle

Pre-fix, `AppState` always carried `Arc<GraphRegistry>` even when
serving one graph. Single mode populated the registry with one
handle keyed by the `SINGLE_GRAPH_KEY_ID = "default"` sentinel;
`single_mode_handle` walked the registry, asserted `len == 1`,
and returned the single element with a 500-class "programmer
error" branch on mismatch. Three smells in a row — magic key,
walk-and-assert, programmer-error guard — all because the
single-mode runtime was forced through a multi-mode abstraction.

Correct-by-design fix: type the routing.

* New `pub enum GraphRouting { Single { handle }, Multi { registry,
  config_path } }` on `AppState`. The `Single` arm carries the handle
  directly — no registry, no key, no walk.
* `resolve_graph_handle` middleware matches on `routing`. Single mode
  returns the handle in O(1); multi mode does the same path-extract +
  registry lookup as before. The 500-class programmer-error branch
  is gone — the type system now makes the violated invariant
  ("single mode has exactly one handle") unrepresentable.
* `requires_bearer_auth` reads `handle.policy.is_some()` directly
  in the Single arm; Multi arm still uses the cached
  `any_per_graph_policy` flag.

`ServerMode` and the legacy `registry` field on `AppState` are still
populated for now — C-3 removes both once every reader is migrated.
The `SINGLE_GRAPH_KEY_ID` sentinel and `ServerMode` will also go
away in C-3.

Closes the "single mode forced through a multi-mode abstraction"
class. All 76 server integration tests stay green: handlers still
extract `Extension<Arc<GraphHandle>>` from the request, so the
middleware's internal change is invisible to them.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: remove ServerMode, registry field, and the SINGLE_GRAPH sentinel

C-1/C-2 introduced `GraphRouting` and pointed the middleware at it.
This commit removes the legacy shape that's now dead:

* `ServerMode` enum — deleted. Single mode's `uri` lives on
  `handle.uri`; multi mode's `config_path` lives on the
  `GraphRouting::Multi` arm.
* `AppState.mode: ServerMode` field — deleted.
* `AppState.registry: Arc<GraphRegistry>` field — deleted. Multi
  mode's registry is on `GraphRouting::Multi { registry, .. }`;
  single mode has no registry at all.
* `AppState::mode()`, `AppState::uri()`, `AppState::registry()`
  accessors — deleted. New `AppState::routing() -> &GraphRouting`
  is the single public entry point.
* `SINGLE_GRAPH_KEY_ID` constant — deleted. `GraphHandle.key` is
  still required by the struct, but in single mode the key is now
  only a tracing label (`"default"`, inlined with a comment naming
  its sole remaining purpose). Single-mode flat routes never carry
  a `{graph_id}` parameter, so the key is never compared against
  user input, and there is no registry where it could be a map
  key. C-1/C-2 already removed the registry walk that the sentinel
  was named for.

Callers migrated:
* `build_app` (lib.rs:944) — matches on `state.routing()` instead
  of `state.mode()`.
* `server_graphs_list` (lib.rs:1162) — destructures the Multi arm
  to get the registry; Single arm short-circuits to 405.
* `server_openapi` (lib.rs:1217) — matches the Multi arm for the
  cluster-prefix rewrite.
* `tests/server.rs:3735` — the B2 footgun regression test now
  matches on `state.routing()` to extract the single-mode handle
  (the test's earlier `state.registry().list().next()` shape was
  the closest pre-fix analog to "embedded consumer reaches the
  engine"; the new shape is more direct).

Closes the entire "single mode forced through a multi-mode
abstraction" class. After this commit:
* No magic sentinel as a routing key.
* No `single_mode_handle` walk-and-assert helper.
* No 500-class "programmer error" branch in the middleware.
* No two-field discriminant on `AppState` where one would do.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for nested-route path extraction (red)

`server_branch_delete` and `server_commit_show` use bare
`Path<String>` extractors. In single-mode flat routes
(`/branches/{branch}`, `/commits/{commit_id}`) this works — one
capture, one value. In multi-graph cluster routes
(`/graphs/{graph_id}/branches/{branch}`,
`/graphs/{graph_id}/commits/{commit_id}`) axum 0.8 propagates the
outer `{graph_id}` capture into the inner handler, so the
extractor sees two captures and 500s with
"Wrong number of path arguments. Expected 1 but got 2."

`cluster_routes_dispatch_per_graph_handle` only exercises
`/snapshot` (no Path extractor), so the regression slipped through.
This test closes that gap structurally: every cluster route with
an inner path param gets exercised here.

Currently fails with the exact symptom above. Fix in the next
commit makes it pass.

Per AGENTS.md rule 12, the red test commit lands just before the
fix so the pair is visible in `git log` and a reviewer can check
out this commit alone to reproduce.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: named-field path-param structs for nested cluster routes (green)

`Path<String>` deserializes one path-param value positionally.
Single-mode flat routes (`/branches/{branch}`,
`/commits/{commit_id}`) have one capture; multi-mode nested routes
(`/graphs/{graph_id}/branches/{branch}`,
`/graphs/{graph_id}/commits/{commit_id}`) have two — axum 0.8
propagates the outer capture into nested handlers. Same handler,
two different shapes; the multi-mode shape 500s with
"Wrong number of path arguments. Expected 1 but got 2."

Symptomatic fix: change to `Path<(String, String)>` and ignore the
first element. Breaks again the moment we add another nest layer
(e.g. tenant in Cloud mode).

Correct-by-design fix: named-field structs deserialized by name
from axum's path-param map. Each handler picks only the fields it
needs. Stable across single / multi / future-cloud nest depths
because deserialization is by field name, not position.

* New `BranchPath { branch: String }` (file-local to lib.rs)
* New `CommitPath { commit_id: String }`
* `server_branch_delete` extractor → `Path<BranchPath>`
* `server_commit_show` extractor → `Path<CommitPath>`

Closes the "handler path-extractor type is positional and breaks
when route nesting changes" class. Red test from the previous
commit turns green. All 77 server tests pass (single-mode branch
delete + commit show, plus new multi-mode coverage).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: centralize policy-requires-tokens check in the runtime classifier

Single-mode `open_with_bearer_tokens_and_policy` bailed at lib.rs:380
when policy was installed and no tokens. Multi-mode
`open_multi_graph_state` had no equivalent: the server started, every
request 401'd because no token could ever match, and the operator
spent time debugging a misconfiguration the single-mode path would
have caught at startup.

The doc/code contradiction made the gap easy to miss: the
`ServerRuntimeState::PolicyEnabled` docstring said tokens-or-not
was "unusual but valid — every request fails 401 without a bearer,
which is effectively 'locked'." The single-mode bail contradicted
that. In practice, silent-401-on-every-request is bug-shaped, not
feature-shaped (operators wanting deny-all should configure tokens
plus a deny-all Cedar rule to get meaningful 403s with
policy-decision logging).

Symptomatic fix: add a copy of the bail to multi-mode. Two copies
that can drift again the next time a startup path is added.

Correct-by-design fix: hoist the check into
`classify_server_runtime_state` so both modes get the same
enforcement from one source of truth. The classifier becomes the
single source of truth for "should we start?" and adding a startup
invariant there is now the natural extension point for any future
mode.

Classifier matrix is now complete:

| has_tokens | has_policy | allow_unauthenticated | Result |
|---|---|---|---|
| F | F | F | bail (existing) |
| F | F | T | Open (existing) |
| T | F | * | DefaultDeny (existing) |
| F | T | * | bail (NEW — closes the gap) |
| T | T | * | PolicyEnabled (existing) |

Changes:

* `classify_server_runtime_state` (lib.rs:870-890) gains the
  `(false, true, _) => bail!(…)` arm with a clear message naming
  the failure mode and the two valid resolutions.
* `open_with_bearer_tokens_and_policy` (lib.rs:369+) drops its
  redundant local bail — the classifier rejected the invalid case
  before construction was reached.
* `ServerRuntimeState::PolicyEnabled` docstring is rewritten:
  drops the "(unusual but valid)" carve-out and states plainly
  that PolicyEnabled requires tokens. Names the explicit
  alternative (tokens + deny-all Cedar rule) for operators who
  want the all-requests-denied behavior.
* `classify_policy_enabled_always_wins` test is renamed to
  `classify_policy_enabled_requires_tokens` and the now-invalid
  `(false, true, _)` assertion is removed (covered by the new
  rejection test).
* New `classify_policy_without_tokens_is_rejected` test covers the
  new arm.
* New `serve_refuses_to_start_with_policy_but_no_tokens_multi_mode`
  integration test pins the multi-mode propagation path —
  symmetric with the existing single-mode
  `serve_refuses_to_start_in_state_1_without_unauthenticated`.

Closes the "single mode and multi mode startup branches can drift
on safety invariants" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: close coverage gaps surfaced by the test-coverage audit

The bot-review pass and the subsequent coverage audit surfaced two
material gaps in PR #119's test surface — both easy to close, both
worth closing before merge.

* **Gap 1 — cluster-route sweep.** The Bug-1 path-extractor
  regression slipped through because
  `cluster_routes_dispatch_per_graph_handle` only exercised
  `/snapshot`. The other six protected cluster routes (`/read`,
  `/change`, `/export`, `/schema`, `/schema/apply`, `/ingest`,
  `/branches/merge`) were implicitly trusted to work without any
  multi-mode integration test.

  Add `all_protected_cluster_routes_resolve_to_their_handler`
  (`tests/server.rs`) that hits each protected cluster route with
  a minimal request and asserts the response is consistent with
  the handler being reached — no 404 (router didn't match), no 500
  with "Wrong number of path arguments" (Bug-1 class), no 500 with
  "missing extension" (routing middleware didn't inject the handle).
  Status code is a negative assertion because each handler's
  happy-path inputs differ; what matters is "the request reached
  the handler," not "the handler returned 200" — that's already
  pinned by the single-mode tests.

* **Gap 2 — `--force` happy path.** The strict re-init regression
  test (`init_on_existing_graph_uri_does_not_destroy_existing_schema`)
  pins the error path; nothing pinned the `force: true` escape
  hatch actually doing what its docstring claims.

  Add `init_with_force_recovers_from_orphan_schema_files`
  (`tests/lifecycle.rs`). Writes a bare `_schema.pg` to simulate
  orphan files from a failed prior init, confirms strict mode
  bails as expected, then confirms `init_with_options(force: true)`
  succeeds and produces a functional graph.

  Note: the test follows the documented semantics — force skips
  the preflight only, it does NOT purge existing Lance state. An
  earlier draft of the test (against full overwrite of an existing
  populated graph) failed because `GraphCoordinator::init` errored
  on the existing `__manifest`, which is exactly the limitation
  the `InitOptions::force` docstring already calls out. Recursive
  purge needs `StorageAdapter::delete_prefix` (tracked separately).

Coverage is now fully aligned with the PR's claims.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for GraphList open-mode bypass (red)

Cursor bot's review at commit 4120448 surfaced that
`server_graphs_list` returns 200 in Open mode (`--unauthenticated`,
no tokens, no policy), exposing the full graph registry — graph
IDs and URIs that may contain S3 bucket paths or internal
hostnames — to any unauthenticated caller.

Root cause: `authorize_request`'s no-policy fallback only denies
when `actor.is_some()`. In Open mode `actor: None`, so the
denial branch never fires and the call returns `Ok(())`. The
docstring on `server_graphs_list` claims the endpoint is
"Cedar-gated" and that we "don't leak the registry until the
operator explicitly authorizes it" — but Open mode has no Cedar
at all, so the docstring intent and the code disagree.

This commit renames the existing
`get_graphs_lists_registered_graphs_in_multi_mode` test to
`get_graphs_denied_in_open_mode_without_server_policy` and flips
the assertion from 200 → 403. Today this fails (server returns
200) — exactly the symptom the bot named. The fix in the next
commit tightens the no-policy fallback to deny server-scoped
actions unconditionally, regardless of mode.

Per AGENTS.md rule 12, the red test commit lands just before
the fix so the red → green pair is visible in `git log` and a
reviewer can check out this commit alone to reproduce.

Sort-order coverage that previously lived in the renamed test
moves to `get_graphs_with_server_policy_authorizes_per_cedar`
in the next commit, where the admin-200 response is operator-
authorized and a non-empty body is asserted.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: server-scoped actions always require explicit policy (green)

`server_graphs_list` returned 200 in Open mode (`--unauthenticated`,
no tokens, no policy) because `authorize_request`'s no-policy
fallback only denied when `actor.is_some()` AND action != Read.
In Open mode `actor: None`, so the denial branch never fired and
the call returned `Ok(())` — leaking the registry (graph IDs +
URIs that may contain S3 bucket paths or internal hostnames) to
any unauthenticated caller. The docstring on `server_graphs_list`
claimed it was "Cedar-gated" and that the server should "not leak
the registry until the operator explicitly authorizes it" —
docstring intent and code disagreed.

Symptomatic fix: special-case GraphList. Breaks the moment
another server-scoped action (`graph_create`, `graph_delete`) is
added.

Correct-by-design fix: tie authorization to the action's
`resource_kind()`. Server-scoped actions
(`PolicyResourceKind::Server`) always require explicit policy
authorization — there is no runtime state where they're served
by default. Per-graph actions keep the existing default-deny
logic (DefaultDeny denies non-Read for authenticated actors;
Open mode allows everything per the operator's `--unauthenticated`
opt-in for graph DATA, but not for server topology).

The fix uses the existing `PolicyResourceKind` enum that #119
already added — no new abstraction. Future server-scoped actions
(runtime `graph_create`/`graph_delete` when the cluster catalog
ships) automatically pick up the same enforcement without any
per-action handler change.

Changes:

* `crates/omnigraph-server/src/lib.rs:51` — re-export
  `PolicyResourceKind` (the kind discriminator was already public
  on the omnigraph-policy crate; needed in scope here).
* `crates/omnigraph-server/src/lib.rs:1457` — `authorize_request`'s
  no-policy fallback gains a server-scoped-action check that fires
  before the actor-based default-deny logic. Error message names
  the failure mode and points at `server.policy.file`.
* `crates/omnigraph-server/tests/server.rs:5037` —
  `get_graphs_with_server_policy_authorizes_per_cedar` extended
  to register two graphs in non-alphabetical order and assert
  the admin-200 response is sorted alphabetically. Restores the
  sort-order coverage that lived in
  `get_graphs_lists_registered_graphs_in_multi_mode` before the
  red commit renamed it to assert denial.

Also bundles a small adjacent cleanup that the bot-review flagged:

* `crates/omnigraph-server/src/graph_id.rs:124` — drop the
  unreachable `"openapi.json"` entry from `is_reserved`. The
  regex `^[a-zA-Z0-9-]{1,64}$` rejects every dot-containing name
  before `is_reserved` can run, so dotted entries in this list
  were dead code that misled readers into thinking the list
  needed to cover them. Comment now names the structural
  exclusion. The `rejects_reserved_route_names` test loses its
  `openapi.json` row (covered by `rejects_dots` via the regex).

Closes the "server-scoped management actions silently leak in
Open mode" class. Red test from the previous commit
(`get_graphs_denied_in_open_mode_without_server_policy`) turns
green; all 78 server integration tests + 76 lib tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: fold multi-graph work into v0.6.0 (no separate v0.7.0 release)

The branch had bumped workspace versions to 0.7.0 and added a
dedicated `docs/releases/v0.7.0.md` for the multi-graph work.
Per scope decision: ship the graph-rename and the multi-graph
mode in one v0.6.0 release.

Changes:

* Workspace versions bumped 0.7.0 → 0.6.0 in every crate manifest
  (`omnigraph`, `omnigraph-compiler`, `omnigraph-policy`,
  `omnigraph-server`, `omnigraph-cli`) and their internal
  `path = ..., version = "..."` dependency constraints.
* `docs/releases/v0.7.0.md` content merged into
  `docs/releases/v0.6.0.md`, retargeted to a single coherent
  v0.6.0 release note covering both the graph terminology rename
  and the multi-graph server mode. The original v0.7.0.md is
  deleted.
* All `v0.7.0` / `0.7.0` doc and comment references throughout
  `crates/`, `docs/`, `AGENTS.md`, and `openapi.json` retargeted
  to `v0.6.0` / `0.6.0`. `Cargo.lock` regenerated to match.
* OpenAPI spec regenerated via `OMNIGRAPH_UPDATE_OPENAPI=1
  cargo test -p omnigraph-server --test openapi
  openapi_spec_is_up_to_date` — `"version": "0.6.0"` now.

Verification:

* `cargo build --workspace` — clean (6 pre-existing engine
  warnings only).
* `cargo test --workspace --locked` — zero failures across all
  39 test result groups.
* `bash scripts/check-agents-md.sh` — passes (34 links / 33 docs).
* `grep -rn "0\.7\.0\|v0\.7\.0" --include='*.rs' --include='*.md'
  --include='*.json' --include='*.toml' .` returns no workspace
  hits. The three remaining `0.7.0` strings in `Cargo.lock`
  belong to unrelated 3rd-party crates (`pem-rfc7468`, `radium`,
  `rand_xoshiro`).

The git tag and crates.io publish happen later — this commit
just consolidates the surface so the eventual release is one
coherent v0.6.0 covering all the work since v0.5.0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: sanitize internal refs from v0.6.0 release notes

cubic-dev-ai P2 comments flagged that the release notes carried
internal Linear ticket and RFC references (MR-668, MR-731,
MR-723, RFC 0003, RFC 0004). Per AGENTS.md maintenance rule 5,
"Release docs are public project history. Describe capabilities,
behavior changes, breaking changes, upgrade notes, and user
impact; do not reference private ticket systems, internal
codenames, or planning shorthand that an outside contributor
cannot inspect." The bot's comments are correct against our own
published contract — they were a docs-quality regression
introduced when I drafted these notes.

Replaced each internal reference with the public-facing concept
it stood for. The substantive content (capabilities, behavior,
guarantees) was already present alongside the refs; sanitization
just trimmed the bracketed ticket labels:

* Line 6: dropped `(MR-668)` from the multi-graph mode summary —
  the descriptive name was already self-sufficient.
* Line 24: `MR-731 spoof defense` → `the bearer-derived-actor-
  identity guarantee`; `Forward-compat for Cloud mode (RFC 0003)
  and OAuth provider (RFC 0004)` → "forward-compat seams for
  future multi-tenant and OAuth deployments; they're inert in
  this release" — describes what the operator sees instead of
  pointing at planning docs.
* Line 26: `MR-731's server-authoritative-actor invariant` →
  "the server-authoritative-actor invariant: actor identity is
  always sourced from the bearer-token match resolved at the
  auth boundary" — the public-facing statement of the guarantee.
* Line 36: `(MR-723 default-deny otherwise rejects …)` →
  "without a server policy the default-deny posture rejects …"
  — same content, no ticket label.
* Line 121: `MR-731 spoof regression test` → "The bearer-auth-
  derived-actor-identity regression test (client-supplied
  identity headers are ignored; the server-resolved actor is the
  only identity Cedar sees)" — describes what the test guards
  instead of naming the originating ticket.

Verified: `grep -E 'MR-\d+|RFC[ -]?\d+' docs/releases/v0.6.0.md`
returns no matches; the rest of `docs/releases/` is also clean.
`scripts/check-agents-md.sh` passes.

Note: cubic-dev-ai also flagged `crates/omnigraph-cli/src/main.rs:276`
("doc comment incorrectly references v0.6.0 for a command that
only exists in v0.7.0"). That comment is based on a stale model
of the release surface — after folding v0.7.0 into v0.6.0 in
the previous commit, the multi-graph CLI surface IS in v0.6.0
and the comment is correct as written. No change needed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix: close validated init and multi-graph gaps

* chore: address review cleanup comments

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
											
										
										
											2026-05-28 16:19:31 +02:00
+								        Some(policy_engine),
-												tests: admission test uses new_with_workload, drops env mutation + #[serial]

Migrates `ingest_per_actor_admission_cap_returns_429` from env-var
override to direct `WorkloadController::new(1, ...)` construction via
`AppState::new_with_workload`. Removes the `EnvGuard` and the
`#[serial]` annotation that paired with it.

Why correct by design (AGENTS.md rule 9): the previous round's matrix
fix (commit 8bd9a5f) shielded the matrix from this test's env
mutation, but the broader bug class — "test A's process-wide env
mutation can leak into any test B that calls
`AppState::open` / `WorkloadController::from_env()`" — was still
reachable by any future test that didn't think to opt out. Closing
the class at the source: this test no longer mutates global state at
all, so no other test needs to defend against it.

Net effect:
- This test no longer needs `#[serial]` (was the only reason it was
  marked) — runs in parallel with the rest of the suite.
- The matrix's defensive `with_defaults()` construction (commit
  8bd9a5f) remains correct but is no longer required for correctness;
  it's now a "belt and suspenders" guard against any FUTURE
  env-mutating test.

Verified locally: both tests pass when run together; full server
suite (44 tests) green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:35:41 +02:00
+								        workload,
-												(feat): multi-graph server mode (#119)

* mr-668: add GraphId newtype + Cloud-mode forward identity stubs (PR 1/10)

PR 1 of the MR-668 multi-graph server work. Pure types, no runtime
behavior changes yet.

Ships the validated identity vocabulary that the rest of the implementation
will consume:

- `GraphId(String)` — `^[a-zA-Z0-9-]{1,64}$`, leading underscore rejected
  (engine reserves every `_*` filename), reserved route names rejected
  (`policies`, `healthz`, `openapi`, `openapi.json`, `graphs`). Validation
  lives in `try_from` only; serde `Deserialize` re-runs it so JSON payloads
  cannot bypass.
- `TenantId(String)` — same regex shape as GraphId. `None` in Cluster
  mode; reserved for Cloud mode (RFC 0003) where it carries the OAuth
  `org_id` claim.
- `GraphKey { tenant_id: Option<TenantId>, graph_id }` — the registry
  HashMap key. `cluster()` constructor for the Cluster-mode default.
- `Scope` enum with `Full` variant — Cluster mode default; RFC 0004 will
  extend with OAuth scopes (`graph:read`/`write`/`admin`/`*`).
- `AuthSource` enum with `Static` variant — Cluster mode default; RFC
  0001 step 1 will add `Oidc`.
- `ResolvedActor { actor_id, tenant_id, scopes, source }` — replaces the
  upcoming refactor of `AuthenticatedActor(Arc<str>)` in PR 4a.

Per MR-668 design decision 13: ship the Cloud-mode forward type shapes
now (no `TokenVerifier` trait yet — that's RFC 0001 step 1) so handler
signatures stay stable across the Cluster → Cloud trajectory. `Scope`
and `AuthSource` use `#[non_exhaustive]` so future variants don't break
caller matches.

Tests: 26 new (15 graph_id + 11 identity), all passing. No regression
in the existing 36 server library tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: Omnigraph::init error-path cleanup + three failpoints (PR 2a/10)

PR 2a of the MR-668 multi-graph server work. Bug fix: a partially-failed
`Omnigraph::init` previously left orphan schema files at the graph URI,
making the URI unusable for a retry (the next `init` would refuse because
`_schema.pg` already exists).

Changes:

1. `init_with_storage` now wraps the I/O phase. On any error from
   `init_storage_phase`, calls `best_effort_cleanup_init_artifacts` to
   remove the three schema files before returning the original error:
   - `_schema.pg`
   - `_schema.ir.json`
   - `__schema_state.json`
   Cleanup is best-effort: a failure to delete is logged via
   `tracing::warn` but does NOT mask the init error.

2. Three failpoints added at the init phase boundaries:
   - `init.after_schema_pg_written`
   - `init.after_schema_contract_written`
   - `init.after_coordinator_init`

3. Four new failpoint tests in `tests/failpoints.rs` pin the cleanup
   behavior at each boundary plus the "original error wins over cleanup
   error" contract. All 23 failpoint tests pass.

Coverage gap (documented in code comments):
Lance per-type datasets and `__manifest/` directory created by
`GraphCoordinator::init` are NOT cleaned up after a coordinator-init-phase
failure. Recursive directory deletion requires `StorageAdapter::delete_prefix`,
which was deferred along with `DELETE /graphs/{id}` (originally PR 2b). When
that primitive lands, the third failpoint test can be tightened to assert
the graph root is fully empty.

Tests: 4 new (init_failpoint_*), all 23 failpoint tests green. No
regression in the 105 engine library tests or 64 end_to_end tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: add GraphHandle + GraphRegistry data structure (PR 3/10)

PR 3 of the MR-668 multi-graph server work. Pure data structure — no
routing changes yet (that's PR 4a).

New file: `crates/omnigraph-server/src/registry.rs`

- `GraphHandle { key: GraphKey, uri: String, engine: Arc<Omnigraph>,
  policy: Option<Arc<PolicyEngine>> }` — the per-graph state that the
  routing middleware (PR 4a) will inject as a request extension.
- `RegistrySnapshot { graphs: HashMap<GraphKey, Arc<GraphHandle>> }` —
  immutable snapshot; replaced atomically via `ArcSwap`.
- `GraphRegistry { snapshot: ArcSwap<_>, mutate: Mutex<()> }` — lock-free
  reads, mutex-serialized mutations.
- `RegistryLookup { Ready(Arc<GraphHandle>) | Gone }` — two-valued, no
  `Tombstoned` variant since DELETE is deferred in v0.7.0 scope.
- `InsertError { DuplicateKey | DuplicateUri }` — both rejection cases
  for create-graph (maps to HTTP 409 in PR 7).
- Methods: `new`, `from_handles` (bulk startup-time init), `get`, `list`,
  `len`, `insert`.

Race semantics pinned by three multi-thread tests:
- `concurrent_insert_same_key_exactly_one_succeeds` — N=8 spawned
  inserts with the same key; exactly 1 returns Ok, 7 return DuplicateKey.
- `concurrent_insert_distinct_keys_all_succeed` — N=8 spawned inserts
  with distinct keys; all succeed.
- `concurrent_reads_during_inserts_see_consistent_snapshots` — reader
  loop concurrent with sequential writes; every listed handle's key
  resolves via `get()` (no torn state).

Why no tombstones field: `DELETE /graphs/{id}` is deferred to bound
the scope of v0.7.0. Without a delete endpoint, there's no use for
tombstones — every key in the registry is `Ready`, and every key
not in the registry is `Gone`. When DELETE lands later, the
`Tombstoned` variant + `tombstones: HashSet<GraphKey>` slot in
additively without breaking caller signatures (the `Gone` variant
remains the "not currently active" case).

Why `tokio::sync::Mutex`: insert is async because PR 7's flow holds
this mutex across the atomic YAML rewrite step (file I/O). std::Mutex
would footgun across .await.

Dependency additions: `arc-swap = { workspace = true }`,
`thiserror = { workspace = true }` (used by InsertError).

Tests: 12 new (12 passing). 74 server lib tests total green
(62 from PR 1 + 12 new). Clippy clean on server crate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: router restructure + handler refactor for multi-graph (PR 4a/10)

PR 4a of the MR-668 multi-graph server work. The heaviest single PR —
rewires every handler to extract `Arc<GraphHandle>` from a routing
middleware, replaces `AuthenticatedActor(Arc<str>)` with `ResolvedActor`
everywhere, and adds the `ServerMode` discriminator.

Behavior changes:
- **Single mode** (legacy `omnigraph-server <URI>`): flat routes
  (`/snapshot`, `/read`, `/branches`, …) continue to work exactly as
  v0.6.0. Internally, the registry holds a single handle keyed by the
  sentinel `SINGLE_GRAPH_KEY_ID = "default"`; routing middleware injects
  that handle on every request. No HTTP-visible change.
- **Multi mode** (new): routes nest under `/graphs/{graph_id}/...`.
  Routing middleware extracts the graph id from the path, looks it up
  in the registry, and injects the handle. 404 if not found.
  (Multi-mode startup itself lands in PR 5; this PR provides the
  router-side wiring.)

AppState refactor:
- `engine: Arc<Omnigraph>` and `policy_engine: Option<Arc<PolicyEngine>>`
  fields removed — both now live inside `GraphHandle` in the registry.
- `mode: ServerMode { Single { uri } | Multi { config_path } }` added.
- `registry: Arc<GraphRegistry>` added.
- `server_policy: Option<Arc<PolicyEngine>>` added (placeholder for
  management endpoints in PR 6b; unused today).
- Existing constructors (`new`, `new_with_bearer_token{s,_and_policy}`,
  `new_with_workload`, `open*`) build a single-mode AppState
  internally and remain source-compatible. Tests that constructed
  AppState via these constructors continue to work.
- `with_policy_engine` post-construction setter — rebuilds the
  single-mode handle with the policy attached. Engine-layer
  enforcement is NOT reinstalled (matches the old single-field
  semantics; `open_with_bearer_tokens_and_policy` is the path that
  installs both layers).
- `new_multi` constructor added for PR 5's startup loop.
- `uri()` now returns `Option<&str>` (Some in single, None in multi).

Routing middleware:
- `resolve_graph_handle` injects `Arc<GraphHandle>` as a request
  extension. Mode-aware: single returns the only handle; multi parses
  `/graphs/{graph_id}/...` from the URI. Returns 404 in multi mode
  when the graph id is unregistered. Records `graph_id` on the
  current tracing span.
- `require_bearer_auth` updated to insert `ResolvedActor` (was
  `AuthenticatedActor`).

Handler refactor — every protected handler:
- Gains `Extension(handle): Extension<Arc<GraphHandle>>` param.
- Replaces `state.engine` → `handle.engine`.
- Replaces `state.policy_engine()` → `handle.policy.as_deref()`.
- Replaces `state.uri()` → `handle.uri.as_str()` (or `.clone()`
  where String is needed).
- Replaces `Arc::clone(&state.engine)` → `Arc::clone(&handle.engine)`
  (the spawn-and-clone pattern in `server_export` — proof that a
  long-running export survives the registry being mutated later).

authorize_request signature:
- Was: `(state: &AppState, actor: Option<&AuthenticatedActor>, request: PolicyRequest)`.
- Now: `(actor: Option<&ResolvedActor>, policy: Option<&PolicyEngine>, request: PolicyRequest)`.
- Per-graph callers pass `handle.policy.as_deref()`. The (future PR 6b)
  management endpoints will pass `state.server_policy.as_deref()`.

MR-731 invariant preserved:
- The single chokepoint `request.actor_id = actor.actor_id.as_ref().to_string()`
  inside `authorize_request` still overwrites any client-supplied
  actor identity. Regression test
  `actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
  at `tests/server.rs:1114-1216` passes unchanged.

Tests: 0 new (the registry race tests in PR 3 already cover the
data structure; this PR exercises them indirectly via the existing
test suite). 74 lib + 57 server integration + 60 openapi = 191 tests
green. Clippy clean.

LOC: +397 insertions, -153 deletions in `crates/omnigraph-server/src/lib.rs`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: OpenAPI multi-mode cluster filter (PR 4b/10)

PR 4b of the MR-668 multi-graph server work. In multi mode, the served
`/openapi.json` reports cluster routes (`/graphs/{graph_id}/...`) instead
of the legacy flat protected paths — matching what `build_app` actually
mounts (PR 4a's `Router::nest`). Single mode is unchanged.

Implementation:
- New `server_openapi` branch: when `state.mode()` is `Multi`, call
  `nest_paths_under_cluster_prefix(&mut doc)` after `ApiDoc::openapi()`.
- The rewrite consumes `doc.paths.paths`, then for every path-item:
  - If the path is in `ALWAYS_FLAT_PATHS` (`/healthz` for now), keep
    it flat.
  - Otherwise, prefix every operation_id with `cluster_` and reinsert
    the item at `/graphs/{graph_id}<original_path>`.
- Single mode hits no extra work — the path map is untouched.
- The static `ApiDoc::openapi()` still emits the flat surface, so
  in-process callers (the existing `openapi_json()` helper in tests)
  see the unmodified spec.

Why cluster_ prefix on operation IDs: OpenAPI specs require unique
operation_ids across the document. With both flat (single-mode) and
cluster (multi-mode) surfaces ever co-existing in a generated SDK,
the prefix prevents collision. The current served doc only carries
one surface, so the prefix is forward-compat with potential future
dual-surface generation.

Tests: 6 new in `tests/openapi.rs`, all via the `/openapi.json` route
(not the static `ApiDoc::openapi()` helper):
- `multi_mode_openapi_lists_cluster_paths` — every protected path
  appears as a cluster variant.
- `multi_mode_openapi_drops_flat_protected_paths` — flat protected
  paths are absent.
- `multi_mode_openapi_keeps_healthz_flat` — `/healthz` survives.
- `multi_mode_openapi_prefixes_operation_ids_with_cluster` — every
  cluster operation_id starts with `cluster_`.
- `multi_mode_operation_ids_are_unique` — no operation_id collisions.
- `single_mode_openapi_unchanged_by_cluster_filter` — single mode
  still emits the legacy flat surface (regression).

New test helper `app_for_multi_mode(graph_ids)` exercises the new
`AppState::new_multi` constructor from PR 4a — first user of multi-mode
construction outside of unit tests.

Result: 66 openapi tests + 57 server integration tests + 74 lib tests
= 197 green. No regression in the existing OpenAPI drift check
(`openapi_spec_is_up_to_date` still validates the static flat surface
matches the committed openapi.json).

LOC: +67 in lib.rs (rewrite logic), +219 in tests/openapi.rs (test
suite + helper).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: multi-graph startup + mode inference (PR 5/10)

PR 5 of the MR-668 multi-graph server work. This is the first PR that
makes multi mode actually usable end-to-end: operators invoking
`omnigraph-server --config omnigraph.yaml` with a non-empty `graphs:`
map and no single-mode selector now get a running multi-graph server.

Mode inference (MR-668 decision 2, four-rule matrix in
`load_server_settings`):
  1. CLI `<URI>` positional        → Single
  2. CLI `--target <name>`         → Single (URI from graphs.<name>)
  3. `server.graph` in config      → Single (URI from graphs.<name>)
  4. `--config` + non-empty `graphs:` + no single-mode selector
                                   → Multi (all entries in `graphs:`)
  5. otherwise                     → error with migration hint

Rule 5's error message names every escape hatch so operators can fix
their invocation without grepping docs.

Config schema extensions:
  - `TargetConfig.policy: PolicySettings` (per-graph Cedar policy file).
    `#[serde(default)]` so existing single-graph YAMLs keep parsing.
  - `ServerDefaults.policy: PolicySettings` (server-level Cedar policy
    for management endpoints — loaded in PR 5, wired into `GET /graphs`
    in PR 6b).
  - `OmnigraphConfig::resolve_target_policy_file(name)` and
    `resolve_server_policy_file()` helpers — both resolve relative to
    the config file's `base_dir`.

Public types added to `omnigraph-server`:
  - `ServerConfigMode { Single { uri, policy_file } | Multi { graphs,
    config_path, server_policy_file } }`.
  - `GraphStartupConfig { graph_id, uri, policy_file }` — one entry
    per graph in multi mode.

`ServerConfig` shape change:
  - WAS: `{ uri: String, bind, policy_file, allow_unauthenticated }`.
  - NOW: `{ mode: ServerConfigMode, bind, allow_unauthenticated }`.
  - Breaking for any code that constructs `ServerConfig` directly.
    `main.rs` is unaffected (uses `load_server_settings`).

`serve()` now forks on `ServerConfig.mode`:
  - Single: existing flow via `AppState::open_with_bearer_tokens_and_policy`.
  - Multi: parallel open via `futures::stream::iter(graphs)
    .map(open_single_graph).buffer_unordered(4).collect()`. Bound 4 is
    a rule-of-thumb for I/O-bound work — at N≤10 this trades startup
    latency for a small amount of concurrent S3/Lance open pressure.
    Fail-fast: first open error aborts startup; in-flight opens drop
    their engine via Arc (Lance datasets close cleanly).

New helper `open_single_graph(GraphStartupConfig)`:
  - Validates `GraphId` per the regex in PR 1.
  - `Omnigraph::open(uri).await` with descriptive error context.
  - Loads per-graph policy file and re-applies it via
    `Omnigraph::with_policy` (engine-layer enforcement, MR-722).
  - Returns `Arc<GraphHandle>` ready for the registry.

Routing middleware bug fix:
  - `Router::nest("/graphs/{graph_id}", inner)` rewrites
    `request.uri().path()` to the inner suffix (e.g. `/snapshot`).
    The previous middleware tried to parse `{graph_id}` from
    `request.uri().path()` and got 400 instead of 200. Fixed by reading
    from `axum::extract::OriginalUri` request extension, which preserves
    the pre-rewrite URI.
  - Caught by the two new tests
    `cluster_routes_dispatch_per_graph_handle` and
    `cluster_route_for_unknown_graph_returns_404`.

Tests (14 new, all passing):
  - Four-rule matrix: one test per branch + the joint case
    `mode_inference_cli_uri_overrides_graphs_map` + the empty-graphs-map
    error case.
  - Per-graph + server-level policy file path resolution.
  - Reserved `GraphId` rejection at startup.
  - End-to-end multi-graph routing: two graphs side by side, each
    cluster route hits the right engine.
  - Unknown graph id under cluster prefix → 404.
  - Flat routes 404 in multi mode.

Inline `ServerConfig` test (`serve_refuses_to_start_in_state_1_without_unauthenticated`)
and three `server_settings_*` tests updated to the new `mode` shape.

Result: 211 server tests green (74 lib + 71 integration + 66 openapi),
MR-731 regression test still pinned and passing.

LOC: +45 config.rs, +281 lib.rs (net), +395 tests/server.rs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: Cedar resource-model refactor (PR 6a/10)

PR 6a of the MR-668 multi-graph server work. Policy-crate-only refactor —
no HTTP handler changes, no operator-supplied policy.yaml changes. Sets
up the chassis that PR 6b's `GET /graphs` consumes.

Two new `PolicyAction` variants:
  - `GraphCreate` — gates `POST /graphs` (deferred behavioral PR).
  - `GraphList`   — gates `GET /graphs` (lands in PR 6b).

Note: `GraphDelete` is intentionally NOT added in this PR. `DELETE
/graphs/{id}` is deferred from MR-668's v0.7.0 scope to bound complexity
(no `delete_prefix`, no tombstone, no `RegistryLookup::Tombstoned`).
Adding the Cedar action without a consumer would be the same kind of
"dead vocabulary" trap the `Admin` variant already documents.

New `PolicyResourceKind { Graph, Server }` enum, plus a
`PolicyAction::resource_kind()` method that classifies every action.
Per-graph actions (Read, Change, BranchCreate, …) bind to
`Omnigraph::Graph::"<graph_label>"`; server-scoped actions
(GraphCreate, GraphList) bind to the singleton
`Omnigraph::Server::"root"`. `Admin` stays classified as per-graph for
now — MR-724 will pick the final shape when the first consumer surface
ships.

Cedar schema string additions:
  - `entity Server;`
  - `action "graph_create" appliesTo { principal: Actor, resource: Server, ... }`
  - `action "graph_list"   appliesTo { principal: Actor, resource: Server, ... }`

Compiler updates:
  - `compile_policy_source` picks the resource literal based on the
    action's `resource_kind`. Existing graph-only policies generate
    the same Cedar source as before — pinned by
    `per_graph_rules_continue_to_work_alongside_server_rules`.
  - `compile_entities` includes the `Server::"root"` entity only when
    a rule references a server-scoped action. Keeps test assertions
    for graph-only policies tight.
  - `PolicyEngine::authorize` builds the right resource UID at
    request time based on `request.action.resource_kind()`.

Validation rules added to `PolicyConfig::validate`:
  - A rule may not mix server-scoped and per-graph actions (different
    resource kinds need different `permit` clauses).
  - Server-scoped actions cannot have `branch_scope` or
    `target_branch_scope` — there's no branch context at the server
    level.

Operator impact: zero. The Cedar schema `Omnigraph::Server` entity is
internally referenced by `compile_policy_source`; operator policy.yaml
files only declare actions in `rules[].allow.actions` and never
reference the resource entity directly. Decision 6's "internal rename
only; operator policies unaffected" contract is preserved and pinned
by `per_graph_rules_continue_to_work_alongside_server_rules`.

Tests: 5 new (11 policy tests total, up from 6):
  - `graph_list_action_authorizes_against_server_resource`
  - `graph_create_action_authorizes_against_server_resource`
  - `server_scoped_rule_cannot_use_branch_scope`
  - `rule_mixing_server_and_per_graph_actions_is_rejected`
  - `per_graph_rules_continue_to_work_alongside_server_rules`

No regression: 145 server tests (74 lib + 71 integration) still green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: GET /graphs endpoint + per-graph policy wire-up (PR 6b/10)

PR 6b of the MR-668 multi-graph server work. First management endpoint —
`GET /graphs` lists every graph registered with the server, gated by the
server-level Cedar policy from PR 6a.

New API shapes (in `omnigraph-server::api`):
  - `GraphInfo { graph_id, uri }` — one entry per registered graph.
  - `GraphListResponse { graphs: Vec<GraphInfo> }` — sorted alphabetically
    by `graph_id` for deterministic output.

Handler `server_graphs_list`:
  - Mounted at `GET /graphs` in both modes.
  - Single mode: returns 405 (resource exists in the API surface, just
    not operational without a `graphs:` map). 405 chosen over 404 so
    clients see "resource exists, wrong context" rather than "no such
    resource".
  - Multi mode: requires bearer auth (when configured); Cedar-gated by
    `PolicyAction::GraphList` against `Omnigraph::Server::"root"`
    (PR 6a's chassis). Returns the sorted registry list.

Cedar gate composition:
  - When no `server.policy.file` is configured, the MR-723 default-deny
    falls through: `GraphList` is not `Read`, so an authenticated actor
    without a server policy gets 403. This is the right default — don't
    expose the registry until the operator explicitly authorizes it.
  - When a server policy is configured, Cedar evaluates the rule. The
    test `get_graphs_with_server_policy_authorizes_per_cedar` pins the
    admin-allow / viewer-deny split.

Routing:
  - New `management` sub-router holding `/graphs` (auth-required, no
    `resolve_graph_handle` middleware — operates on the registry, not
    a single graph).
  - Single mode merges flat protected routes + management.
  - Multi mode merges nested `/graphs/{graph_id}/...` + management.

OpenAPI:
  - `server_graphs_list` registered in `ApiDoc::paths(...)`.
  - `EXPECTED_PATHS` in `tests/openapi.rs` gains `/graphs`.
  - `openapi.json` regenerated (auto-tracked by
    `openapi_spec_is_up_to_date` in CI).

Tests: 4 new in `tests/server.rs::multi_graph_startup`:
  - `get_graphs_lists_registered_graphs_in_multi_mode`
  - `get_graphs_returns_405_in_single_mode`
  - `get_graphs_requires_bearer_auth_when_configured`
  - `get_graphs_with_server_policy_authorizes_per_cedar`

What's NOT in this PR (deferred):
  - Per-graph policy enforcement is wired through `handle.policy`
    (PR 4a already did this); PR 6b doesn't add new per-graph
    behavior beyond making sure the server policy lookup composes
    cleanly alongside it.
  - `POST /graphs` (PR 7) and `DELETE /graphs/{id}` (out of scope
    for v0.7.0).
  - CLI `omnigraph graphs list` (PR 8 will add).

Result: 215 server tests green (74 lib + 66 openapi + 75 integration),
11 policy tests green. MR-731 spoof regression preserved across all
this work.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: POST /graphs runtime create endpoint (PR 7/10)

PR 7 of the MR-668 multi-graph server work. Operators can now add a
graph to a running multi-graph server without restarting:

  curl -X POST http://server/graphs \
    -H "Content-Type: application/json" \
    -d '{
          "graph_id": "beta",
          "uri": "/data/beta.omni",
          "schema": { "source": "node Person { name: String @key }\n" },
          "policy": { "file": "./policies/beta.yaml" }
        }'

DELETE remains deferred (out of v0.7.0 scope per the trimmed plan —
no `delete_prefix`, no tombstones).

Body shape (decision 7):
  - Nested `schema: { source: "..." }` (mirrors the `policy: { file }`
    pattern; leaves room for future fields without breakage).
  - Optional nested `policy: { file: "..." }` for per-graph Cedar.
  - 32 MiB body limit (reuses `INGEST_REQUEST_BODY_LIMIT_BYTES`).
  - Asymmetric with `SchemaApplyRequest` which keeps flat
    `schema_source: String` — documented in api.rs.

Atomic YAML rewrite + drift detection:
  - New `config::rewrite_atomic(path, new_config, expected_hash)`:
    flock → re-read + hash check → serialize → write `.tmp` → fsync
    → rename → fsync parent dir. Returns the new hash for the caller
    to update its in-memory baseline.
  - New `config::hash_config_file(path)` — SHA-256 of the on-disk
    bytes, used at startup and after each rewrite.
  - New `RewriteAtomicError { Drift | Io | Serialize }` enum.
  - `AppState.config_hash: Option<Arc<Mutex<[u8;32]>>>` carries the
    in-memory baseline. Updated after every successful rewrite so
    subsequent POSTs don't false-trigger drift.
  - The mutex is `std::sync::Mutex` (brief critical section, no .await
    inside). The flock itself serializes file access process-wide
    AND across multiple server instances (defense in depth).
  - All sync I/O runs inside `tokio::task::spawn_blocking` — flock
    is sync.

Handler ordering (the load-bearing sequence):
  1. Mode check: 405 in single mode.
  2. Cedar authorize: `GraphCreate` against `Omnigraph::Server::"root"`.
  3. Validate body: `GraphId::try_from` (regex + reserved-name), empty
     schema/uri checks, per-graph policy file parse.
  4. Pre-check registry for duplicate graph_id / duplicate uri (409).
  5. `Omnigraph::init` the new engine.
  6. Atomic YAML rewrite (drift detection inside).
  7. Publish in registry (atomic re-check via `GraphRegistry::insert`).

Failure modes (documented in handler rustdoc):
  - Init fails → orphan storage at `req.uri` (PR 2a cleans up schema
    files; Lance datasets remain orphans until `delete_prefix` lands).
  - YAML rewrite fails (drift, IO) → orphan storage; YAML unchanged.
  - Registry insert fails (race) → YAML has entry but registry doesn't;
    next restart opens it cleanly.

New dependency: `fs2 = "0.4"` (workspace + omnigraph-server). POSIX-only
file locking. Linux/macOS deployment supported; Windows out of scope.

Tests (10 new in `tests/server.rs::multi_graph_startup`):
  - `post_graphs_creates_a_new_graph_end_to_end` — happy path, includes
    YAML inspection to confirm the rewrite landed.
  - `post_graphs_baseline_hash_updates_between_rewrites` — two POSTs in
    a row both succeed (drift baseline updates correctly).
  - `post_graphs_duplicate_graph_id_returns_409`
  - `post_graphs_duplicate_uri_returns_409`
  - `post_graphs_invalid_graph_id_returns_400` (reserved name)
  - `post_graphs_empty_schema_source_returns_400`
  - `post_graphs_returns_405_in_single_mode`
  - `post_graphs_yaml_drift_detection_returns_503` — operator hand-edits
    omnigraph.yaml; server refuses to clobber.
  - `hash_config_file_is_deterministic_and_detects_changes`
  - `rewrite_atomic_refuses_when_hash_drifts`

OpenAPI: `server_graphs_create` registered in `ApiDoc::paths(...)`;
openapi.json regenerated.

Result: 225 server tests green (74 lib + 66 openapi + 85 integration),
all MR-731 regressions still pinned.

LOC: ~580 lib.rs net (handler + helpers), ~120 config.rs (rewrite
machinery), +71 api.rs (request/response shapes), +332 tests/server.rs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: CLI omnigraph graphs list/create (PR 8/10)

PR 8 of the MR-668 multi-graph server work. CLI parity for the
v0.7.0 management surface: operators can now manage graphs from
the command line against a running multi-graph server.

  omnigraph graphs list --target dev --json
  omnigraph graphs create \
    --target dev \
    --graph-id beta \
    --graph-uri /data/beta.omni \
    --schema schema.pg

DELETE is intentionally absent — server-side DELETE was deferred from
v0.7.0 scope, and shipping a client subcommand for a server endpoint
that doesn't exist would be dead vocabulary. The help output, the
subcommand enum, and the test that pins it (`graphs_subcommand_help_
lists_list_and_create`) all agree.

CLI architecture (modeled on `BranchCommand`):
  - New `Command::Graphs { command: GraphsCommand }` top-level variant.
  - `GraphsCommand { List, Create }` enum.
  - List: GET `<base>/graphs`. Stdout is `<graph_id>\t<uri>` per line,
    or JSON via `--json`.
  - Create: reads `--schema <path>` from local disk, inlines as
    `schema: { source: <file> }` in the POST body (nested per
    MR-668 decision 7). Optional `--policy-file <path>` becomes
    `policy: { file: <path> }`. Returns 201 → "created graph X at Y"
    or JSON via `--json`.
  - Both subcommands reject local URI targets with a clear
    "remote multi-graph server URL" error.

New API type imports in the CLI: `GraphCreateRequest`,
`GraphCreateResponse`, `GraphListResponse`, `GraphSchemaSpec`,
`GraphPolicySpec` — all from `omnigraph-server::api`.

Tests:
  - cli.rs (4 new, non-network):
      * `graphs_subcommand_help_lists_list_and_create` — pins the
        deferral of `delete` (catches scope creep).
      * `graphs_list_against_local_uri_errors_with_remote_only_message`
      * `graphs_create_against_local_uri_errors_with_remote_only_message`
      * `graphs_create_with_missing_schema_file_errors` — pins the
        IO context in the schema-read error path.
  - system_remote.rs (1 new, `#[ignore]` like its peers):
      * `graphs_list_and_create_against_multi_graph_server` — spawns a
        multi-mode server, calls `graphs list` (sees `alpha`),
        `graphs create` (adds `beta`), `graphs list` again (sees both),
        and confirms the new graph is reachable via its cluster route.

CLI suite: 62 tests green (58 existing + 4 new). The new ignored
end-to-end test runs locally with `cargo test --ignored`.

LOC: +159 main.rs (enum + handlers), +88 cli.rs (unit tests),
+131 system_remote.rs (integration test).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: composite e2e tests, race fix, v0.7.0 release (PR 9/10)

PR 9 — the final integration PR for MR-668 multi-graph server work.
Closes the v0.7.0 release.

Composite lifecycle tests (closes gaps flagged in PR 7's coverage
review):
  - `multi_graph_lifecycle_post_query_restart_persistence` — POST a
    graph, query it via cluster route, reload the config from disk
    and confirm `load_server_settings` sees the rewritten YAML.
    Validates the "restart resolves orphans" failure-mode story.
  - `per_graph_policy_enforced_on_post_created_graph` — POST a graph
    with a per-graph policy attached, then send authenticated read
    and change requests. Per-graph Cedar enforcement fires correctly
    on a POST-created graph (engine-layer policy reinstalled via
    `Omnigraph::with_policy` inside the create flow).
  - `concurrent_post_graphs_distinct_ids_all_succeed` — 4 concurrent
    POSTs with distinct graph_ids all return 201. Caught a real
    race in `rewrite_atomic` (see below).

Race fix — `rewrite_atomic_with_modify`:

The first composite test surfaced a real bug. The old
`rewrite_atomic(path, new_config, expected_hash)` captured the
baseline hash OUTSIDE the flock, then called rewrite_atomic which
re-acquired it inside. Under concurrent writers:

  - POST A: captures baseline H0, calls rewrite_atomic.
  - POST B: captures baseline H0 too (before A's update lands).
  - A: acquires flock, on-disk == H0, writes H1, releases.
  - A: updates baseline H0 → H1.
  - B: tries to acquire flock — waits.
  - B: acquires flock. On-disk is now H1. Expected (captured
       before A finished) is H0. MISMATCH → spurious Drift error.

Worse: even if the timing happens to align, B's `updated` config
was constructed from BYTES read before the flock. B writes a config
that doesn't include A's new graph — silent data loss.

The fix: new `config::rewrite_atomic_with_modify(path, baseline,
modify)` takes a closure. Inside the flock + baseline mutex:
  1. Read on-disk bytes, hash, compare to baseline.
  2. Parse on-disk YAML.
  3. Call `modify(parsed)` to produce the new config — receives
     fresh on-disk state, returns the modification.
  4. Serialize + write + fsync + rename + update baseline.

Everything is read-modify-write under the same critical section.
Concurrent writers serialize cleanly. Test confirmed this is no
longer a race.

The old `rewrite_atomic(path, new_config, expected_hash)` API stays
for tests that don't need the read-modify-write shape; the POST
handler switches to the new shape.

Version bump v0.6.0 → v0.7.0:
  - All 5 `crates/*/Cargo.toml` (compiler, engine, policy, cli, server)
    plus their inter-crate `path` dep version constraints.
  - `Cargo.lock` regenerated by `cargo build --workspace`.
  - `AGENTS.md` "Version surveyed" line, capability matrix HTTP-server
    row updated to mention multi-graph + cluster routes + atomic YAML
    rewrite.
  - `openapi.json` regenerated.

Docs:
  - `docs/releases/v0.7.0.md` (new) — release notes with breaking
    changes, new features, deferred items (DELETE, `delete_prefix`,
    actor forwarding), and the single→multi migration recipe.
  - `docs/user/server.md` — substantial section additions for the
    two modes, mode inference, cluster endpoint table, management
    endpoints, `omnigraph.yaml` ownership contract, `POST /graphs`
    body shape + status codes.
  - `docs/user/cli.md` — `omnigraph graphs list/create` section,
    deferred-DELETE note.
  - `docs/user/policy.md` — server-scoped Cedar actions
    (`graph_create`, `graph_list`), per-graph vs server-level policy
    composition, example server-level policy.

Workspace test pass: 573 tests green across all crates. Zero
failures. MR-731 spoof regression still pinned and passing across
the entire 10-PR series.

This commit closes MR-668. v0.7.0 is ready for tagging.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: remove POST /graphs and CLI graphs create (defer runtime graph mgmt)

The POST /graphs runtime-create endpoint shipped in PR 7/10 has three
unresolved high-severity bugs:

  - flock-on-renamed-inode race: the YAML flock is taken on
    omnigraph.yaml itself, then a temp file is renamed over it.
    Cross-process writers end up locking different inodes — both
    believing they hold exclusive access.
  - duplicate-check outside the file lock: precheck runs against
    the in-memory registry only; the locked closure does
    config.graphs.insert(...) unconditionally. Concurrent same-id
    POSTs can persist the loser in YAML while the in-memory registry
    keeps the winner — they disagree after restart.
  - best_effort_cleanup_init_artifacts deletes _schema.pg /
    _schema.ir.json / __schema_state.json on any init failure. An
    accidental re-init against an existing graph's URI destroys its
    schema; subsequent open() fails at read_text(_schema.pg).

The correct fix is a Lance-style cluster catalog (reserve → init →
publish with recovery sidecars), parallel to the engine's existing
__manifest discipline. That work is out of scope for v0.7.0.

For now, disable runtime add/remove from the network and CLI surface.
Operators add graphs by editing omnigraph.yaml and restarting. The
GET /graphs read-only enumeration stays.

Removed:
- POST /graphs handler + router fragment + utoipa registration
- 13 post_graphs_* server tests + 3 composite POST tests +
  multi_mode_app_with_real_config / post_graph helpers
- CLI omnigraph graphs create subcommand + its handler + cli.rs tests
- system_remote.rs combined list+create test trimmed to list-only
- YAML rewrite infra: rewrite_atomic[_with_modify], RewriteAtomicError,
  staging_path, hash_config_file, AppState::config_hash field +
  threading through new_multi and open_multi_graph_state
- fs2 dependency (verified absent from cargo tree)
- sha2/fs2 imports in config.rs (only the rewrite path used them)
- Cedar PolicyAction::GraphCreate variant + "graph_create" match arms
  + action def in Cedar schema + graph_create_action_authorizes_against_server_resource test
- GraphCreateRequest / GraphCreateResponse / GraphSchemaSpec /
  GraphPolicySpec API types (only the POST handler / CLI imported them)

Kept:
- GET /graphs (read-only enumeration) and graph_list Cedar action
- omnigraph graphs list CLI subcommand
- All multi-graph startup, mode inference, cluster routes,
  per-graph + server-level Cedar policies
- server_settings_drive_multi_graph_startup_end_to_end (the test
  that covers operator-authored YAML + restart — the path that
  survives)
- best_effort_cleanup_init_artifacts and the three init failpoints
  (still reachable from CLI `omnigraph init`; preflight fix deferred
  as a follow-up)
- GraphRegistry::insert and its concurrency tests — production
  callers gone, but the method is the natural seam for the future
  cluster-catalog work

Also fixed (transcript issue 4):
- ALWAYS_FLAT_PATHS now includes /graphs so multi-mode OpenAPI
  advertises the management route correctly (was previously rewritten
  to /graphs/{graph_id}/graphs)
- multi_mode_openapi_keeps_healthz_flat → renamed to
  multi_mode_openapi_keeps_management_paths_flat, asserts both
  /healthz and /graphs stay flat
- multi_mode_openapi_prefixes_operation_ids_with_cluster skips
  /graphs in addition to /healthz

Doc fixes:
- docs/user/cli.md: graphs list example was --target http://...,
  but --target is a config-graph-name lookup; corrected to --uri.
  Removed the graphs create example.
- docs/user/server.md: dropped POST /graphs row, "omnigraph.yaml
  ownership", and "POST /graphs body shape" sections. Added a
  paragraph stating runtime add/remove is not exposed in v0.7.0.
- docs/user/policy.md: dropped graph_create action; reworded the
  "Configuration" line to clarify that server-scoped rules (graph_list)
  take neither branch_scope nor target_branch_scope.
- docs/releases/v0.7.0.md: rewrote release narrative — multi-graph
  mode ships; runtime add/remove deferred.
- AGENTS.md: HTTP server bullet and capability matrix row updated to
  reflect read-only GET /graphs and the operator-edit workflow.
- openapi.json regenerated; /graphs has only .get, no .post.

Diff: 17 files, +123 −1525 LOC.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: comment cleanup and policy format style

Strip "PR Na/Nb" sub-PR references throughout MR-668 surfaces — they
were useful during the 10-PR delivery sequence but rot now that the
work is in the tree. Keep the MR-668 umbrella references.

Also:
- Add explicit `when = when` and `resource_literal = resource_literal`
  named args in `compile_policy_source`'s outer `format!` to match the
  surrounding crate style (already explicit for `group` and `action`).
- Rename the best-effort cleanup tracing target from
  "omnigraph::init" to "omnigraph::init::cleanup" so operators can
  filter init-failure cleanup events separately from init's other
  log lines.

No behavior change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop actor_id from PolicyRequest; pass actor as separate arg

The MR-731 "server-authoritative actor identity" invariant was enforced
by an in-function chokepoint (`request.actor_id = actor.actor_id...`
overwrite inside `authorize_request`). That worked but relied on every
caller passing in a `PolicyRequest` and trusting the overwrite — a
comment-enforced invariant.

Move the invariant into the type system:

* `PolicyRequest` no longer carries `actor_id`. The struct now models
  what a caller wants to do, not who they are.
* `PolicyEngine::authorize(actor_id: &str, request: &PolicyRequest)`
  and `validate_request(actor_id, request)` take identity as a
  separate argument. The same shape `PolicyChecker::check` already had
  for the engine layer.
* `authorize_request` in the HTTP layer extracts `actor_id` from the
  bearer-resolved `ResolvedActor` and passes it positionally — no
  overwrite step that could be skipped.
* CLI `omnigraph policy explain` updated (the only other consumer
  that built a `PolicyRequest`).

Public API break for the `omnigraph-policy` crate. Worth it: handlers
can no longer accidentally populate `actor_id` from a request body
field, and external consumers are forced by the compiler to source
actor identity from a trusted path.

The MR-731 chokepoint test
`actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
still passes — the bearer-resolved actor is what reaches the engine.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: consolidate AppState single-mode constructors; delete with_policy_engine

The prior `with_policy_engine` constructor reused the engine `Arc`
from the existing handle (`engine: Arc::clone(&existing.engine)`)
without re-applying `Omnigraph::with_policy`. Combined with
`new_with_workload`, the documented composition pattern was
`AppState::new_with_workload(...).with_policy_engine(p)` — which
produced an `AppState` whose HTTP layer enforced Cedar but whose
underlying engine had no `PolicyChecker` installed. Any caller
reaching the engine via `state.registry().list()[i].engine` could
bypass policy entirely. The doc comment named this gap; the type
system didn't.

Make composition impossible to get wrong:

* Add `AppState::new_single(uri, db, tokens, Option<PolicyEngine>,
  WorkloadController)` — canonical single-mode constructor that
  takes every option together and routes through `build_single_mode`
  (which applies `db.with_policy(checker)` to the engine itself).
* `new`, `new_with_bearer_token`, `new_with_bearer_tokens`,
  `new_with_bearer_tokens_and_policy`, `new_with_workload` all become
  thin wrappers around `new_single`.
* Delete `with_policy_engine`. There is no post-construction policy
  install path any more; the single linear construction forces
  HTTP-layer and engine-layer policy to install together or not at all.

Regression test `engine_layer_policy_fires_via_direct_arc_omnigraph_from_new_single`
constructs an `AppState::new_single` with a deny-all policy, pulls
the `Arc<Omnigraph>` from the registry handle (the same path an
embedded SDK consumer would take), and asserts a direct `mutate_as`
call returns `OmniError::Policy`. Pre-fix this test would have
succeeded the mutation.

Test caller in `ingest_per_actor_admission_cap_returns_429`
migrates from `.with_policy_engine(...)` to `new_single(...,
Some(policy_engine), workload)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: derive any_per_graph_policy on RegistrySnapshot; simplify dup check

`AppState::requires_bearer_auth` walked the entire registry per
request (cloning Arcs into a `Vec`, then `.iter().any(|h| h.policy
.is_some())`) to decide whether the auth middleware should challenge.
The walk is unnecessary — the answer only changes when the registry
mutates, which is exactly the moment a new snapshot is constructed.

Move the flag onto the snapshot itself:

* `RegistrySnapshot { graphs, any_per_graph_policy: bool }`.
* `RegistrySnapshot::new(graphs)` is the only construction path —
  it derives the flag from `graphs.values().any(|h| h.policy
  .is_some())` so the cached value can't drift from the source data.
* `Default` delegates to `new(HashMap::new())`.
* `GraphRegistry::from_handles` and `insert` build snapshots via
  `RegistrySnapshot::new(...)`.
* `GraphRegistry::snapshot_ref()` exposes the current snapshot
  through an `arc_swap::Guard`; callers that need cached derived
  state go through this accessor (callers that only want `graphs`
  still use `list` / `get`).

`requires_bearer_auth` becomes one `ArcSwap::load` + bool read.

Also (drive-by, same file, same hunk): replace the dead
`if let Some(other) = seen_uris.get(...)` + `let _ = other;` pattern
in `from_handles` with a plain `seen_uris.contains_key(...)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: fail-fast multi-graph startup with try_collect

The `open_multi_graph_state` doc comment claims "Fail-fast — the
first open error aborts startup; other in-flight opens are dropped"
but the code did

    .buffer_unordered(4)
    .collect::<Vec<_>>()
    .await
    .into_iter()
    .collect::<Result<Vec<_>>>()?;

which drains every future in the stream before propagating the first
`Err`. With N S3-backed graphs and graph #2 failing fast, the caller
still waits for #1, #3, #4, … to either succeed or fail before
seeing the error.

Replace the four-line dance with `futures::TryStreamExt::try_collect`,
which short-circuits on the first `Err` and drops the rest. The
doc comment now matches behavior.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused State extractor from 7 read-only handlers

After the routing-middleware refactor moved the engine into the per-graph
`GraphHandle` (extracted via `Extension<Arc<GraphHandle>>`), seven
read-only handlers — `server_snapshot`, `server_read`, `server_export`,
`server_schema_get`, `server_branch_list`, `server_commit_list`,
`server_commit_show` — kept an unused `State(_state): State<AppState>`
extractor. Drop it. Each request avoids one `FromRequestParts` clone
of `AppState`'s Arcs.

Handlers that actually use state (workload admission for write paths,
`server_policy` for management endpoints) keep theirs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: emit info! for graph routing decision

`tracing::Span::current().record("graph_id", ...)` in the routing
middleware silently no-ops here: no upstream `#[tracing::instrument]`
on the handlers declares a `graph_id` field, and `TraceLayer::new_for_http`
doesn't either. The recorded value never lands anywhere visible.

Replace with an explicit `info!(graph_id = %handle.key.graph_id,
"graph routed")` event so operators can grep logs and correlate
requests with the active graph. In single mode the value is the
sentinel `"default"`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: align GET /graphs 405 body code with HTTP status

The single-mode `GET /graphs` handler returned an `ApiError` built
via struct literal with `status: METHOD_NOT_ALLOWED, code: BadRequest`.
The body code disagreed with the HTTP status — clients deserializing
on `code` saw `bad_request`, clients deserializing on `status` saw
405. Same bug class as the earlier 503+Conflict mismatch on the
removed YAML drift path.

Close the class for this one remaining instance:

* Add `ErrorCode::MethodNotAllowed` to the API enum.
* Add `ApiError::method_not_allowed(msg)` — pairs the 405 status
  with the matching code.
* Replace the struct literal in `server_graphs_list` with the
  constructor.
* Regenerate `openapi.json` (adds `method_not_allowed` to the
  ErrorCode schema enum).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused axum::handler::Handler import

The import landed in earlier work but no current call site uses it.
Emitted an `unused_imports` warning on every server build.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused fs2 workspace dependency

`fs2 = "0.4"` lingered in [workspace.dependencies] after the
POST /graphs flock-on-rename design was pulled. `cargo tree -i fs2`
reports no consumers in the workspace and the dep is not in
Cargo.lock. Removing the declaration closes the "phantom dep" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: AGENTS.md Cedar row no longer hardcodes action count

The "8 actions" claim drifted as soon as MR-668 added `graph_list`.
Bumping the count would just push the drift one PR forward; the
correct-by-design fix is to defer to the canonical list in
docs/user/policy.md and stop maintaining a duplicate count.

Closes the "doc hardcodes a count that drifts from the enum" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: cfg(test)-gate GraphRegistry::insert and its mutex

`insert` and the `mutate: Mutex<()>` that serializes it had no
runtime consumer in v0.7.0 — the only insertion path at startup
is `from_handles`, and runtime add/remove is deferred until a
managed cluster catalog ships. Leaving both `pub` and live made
them a "looks like API, isn't" footgun: a future change could
build on `insert` without re-establishing the concurrency contract
with an actual consumer in scope.

Gate both together (`#[cfg(test)]` on the method, the field, and
the `tokio::sync::Mutex` import) so the race-pinning tests still
compile but production cannot reach them. When a real consumer
ships, ungate both — they're a unit. Closes the "public API with
no runtime consumer drifts toward incorrect" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop vestigial PolicyEngine surface

* `validate_request` had zero callsites — pure surface for nothing.
* `deny`'s `_actor_id` and `_request` parameters were both unused
  (the underscore prefix gave it away); the message is built by the
  caller before `deny` ever sees the request. Trim both.

Closes the "public API that the type system can't justify" class
for the policy engine. No behavior change; every existing test
stays green because the deletions never had a runtime effect.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for init re-init footgun (red)

A second `Omnigraph::init` against an existing graph URI today
destroys the existing graph's schema artifacts. `init_storage_phase`
overwrites `_schema.pg` before any preflight, and on the inner
`GraphCoordinator::init` failure that follows,
`best_effort_cleanup_init_artifacts` deletes all three schema files.
The existing Lance datasets and `__manifest/` survive but the
schema metadata is gone — unrecoverable without operator surgery.

This test exercises that path and currently fails with
"_schema.pg must not be deleted by a failed re-init", confirming
the destructive cleanup branch fires. The fix in the next commit
makes the test pass by preflighting with `storage.exists()` and
returning a typed error before any write touches disk.

Per AGENTS.md rule 12, the test commit lands just before the fix
commit so the red → green pair is visible in `git log` and a
reviewer can check out this commit alone to reproduce.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: close init re-init footgun via InitOptions preflight (green)

`Omnigraph::init` is "create a new graph"; existing graphs need
an explicit overwrite. Today's behavior — silently overwrite
schema files, then on inner failure delete them via best-effort
cleanup — is destructive against an existing graph regardless of
which branch fires.

Correct-by-design fix:

* New `InitOptions { force: bool }` struct (default `force: false`).
* New `Omnigraph::init_with_options(uri, schema, options)`. The
  old `Omnigraph::init(uri, schema)` is a thin shortcut that
  passes `InitOptions::default()`.
* `init_with_storage` runs a `storage.exists()` preflight on the
  three schema URIs BEFORE any parse, write, or coordinator call.
  Any hit → typed `OmniError::AlreadyInitialized { uri }`. The
  destructive code paths (the `write_text` overwrite and the
  best-effort cleanup) are now unreachable in strict mode against
  an existing graph.
* `force: true` skips the preflight; existing operators who
  actually mean to overwrite opt in explicitly.
* CLI: `omnigraph init --force` maps to `InitOptions { force: true }`.
* HTTP: `OmniError::AlreadyInitialized` maps to 409 via
  `ApiError::from_omni`. Not currently HTTP-reachable (POST /graphs
  was pulled), but the wiring lands here so a future runtime
  create endpoint has one canonical translation.

Closes the "init is destructive against existing state" class.
The regression test added in the previous commit
(`init_on_existing_graph_uri_does_not_destroy_existing_schema`)
turns green: the original schema files now survive a second
init attempt byte-for-byte, and the call errors cleanly with
`AlreadyInitialized`. The four existing
`init_failpoint_after_*_cleans_up_*` tests stay green — strict
mode's preflight passes on a fresh tempdir, and cleanup still
runs as before when a failpoint fires mid-write.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: split PolicyEngine::load into kind-typed loaders

Pre-fix, every caller of `PolicyEngine::load(path, graph_id)`
passed *some* `graph_id` argument — even when the policy was
server-scoped and Cedar's resolution would never touch a Graph
entity. The server-level loader at lib.rs passed the meaningless
sentinel `"server"`. A graph policy file containing a `graph_list`
rule compiled fine; a server policy file containing a `read` rule
compiled fine. Both silently no-op'd at request time because the
engine kind and the rule's resource kind disagreed.

Correct-by-design fix: replace `load` with two kind-typed loaders.

* `PolicyEngine::load_graph(path, graph_id)` — for per-graph
  policy files. Rejects any rule whose action `resource_kind()`
  is `Server`.
* `PolicyEngine::load_server(path)` — for server-level policy
  files. Takes no `graph_id`: server-scoped actions resolve against
  the singleton `Omnigraph::Server::"root"` entity, never a Graph.
  Rejects any rule whose action `resource_kind()` is `Graph`.

The old `load` is hard-deleted in the same commit because every
in-tree consumer migrates here (no semver promise on the workspace
crate, no external pinners). New `PolicyEngineKind` enum types
the loader's intent; `validate_kind_alignment` is the load-time
check that closes the "wrong action, wrong file, silent no-op"
class — operators get a load-time error instead of confused-and-
silent behavior at request time.

Callsites migrated:
* server lib.rs:374 (single-mode per-graph)   → load_graph
* server lib.rs:1065 (multi-mode server)      → load_server
* server lib.rs:1103 (multi-mode per-graph)   → load_graph
* CLI main.rs:732 (resolve_policy_engine)     → load_graph
* tests/server.rs ×5 (4 graph, 1 server)      → load_graph/load_server
* policy_engine_chassis.rs                    → load_graph

Four new in-source tests pin the contract: both rejection paths
and both positive paths.

Closes the "operator puts an action in the wrong file and the
rule silently never matches" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: introduce GraphRouting, retire single_mode_handle

Pre-fix, `AppState` always carried `Arc<GraphRegistry>` even when
serving one graph. Single mode populated the registry with one
handle keyed by the `SINGLE_GRAPH_KEY_ID = "default"` sentinel;
`single_mode_handle` walked the registry, asserted `len == 1`,
and returned the single element with a 500-class "programmer
error" branch on mismatch. Three smells in a row — magic key,
walk-and-assert, programmer-error guard — all because the
single-mode runtime was forced through a multi-mode abstraction.

Correct-by-design fix: type the routing.

* New `pub enum GraphRouting { Single { handle }, Multi { registry,
  config_path } }` on `AppState`. The `Single` arm carries the handle
  directly — no registry, no key, no walk.
* `resolve_graph_handle` middleware matches on `routing`. Single mode
  returns the handle in O(1); multi mode does the same path-extract +
  registry lookup as before. The 500-class programmer-error branch
  is gone — the type system now makes the violated invariant
  ("single mode has exactly one handle") unrepresentable.
* `requires_bearer_auth` reads `handle.policy.is_some()` directly
  in the Single arm; Multi arm still uses the cached
  `any_per_graph_policy` flag.

`ServerMode` and the legacy `registry` field on `AppState` are still
populated for now — C-3 removes both once every reader is migrated.
The `SINGLE_GRAPH_KEY_ID` sentinel and `ServerMode` will also go
away in C-3.

Closes the "single mode forced through a multi-mode abstraction"
class. All 76 server integration tests stay green: handlers still
extract `Extension<Arc<GraphHandle>>` from the request, so the
middleware's internal change is invisible to them.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: remove ServerMode, registry field, and the SINGLE_GRAPH sentinel

C-1/C-2 introduced `GraphRouting` and pointed the middleware at it.
This commit removes the legacy shape that's now dead:

* `ServerMode` enum — deleted. Single mode's `uri` lives on
  `handle.uri`; multi mode's `config_path` lives on the
  `GraphRouting::Multi` arm.
* `AppState.mode: ServerMode` field — deleted.
* `AppState.registry: Arc<GraphRegistry>` field — deleted. Multi
  mode's registry is on `GraphRouting::Multi { registry, .. }`;
  single mode has no registry at all.
* `AppState::mode()`, `AppState::uri()`, `AppState::registry()`
  accessors — deleted. New `AppState::routing() -> &GraphRouting`
  is the single public entry point.
* `SINGLE_GRAPH_KEY_ID` constant — deleted. `GraphHandle.key` is
  still required by the struct, but in single mode the key is now
  only a tracing label (`"default"`, inlined with a comment naming
  its sole remaining purpose). Single-mode flat routes never carry
  a `{graph_id}` parameter, so the key is never compared against
  user input, and there is no registry where it could be a map
  key. C-1/C-2 already removed the registry walk that the sentinel
  was named for.

Callers migrated:
* `build_app` (lib.rs:944) — matches on `state.routing()` instead
  of `state.mode()`.
* `server_graphs_list` (lib.rs:1162) — destructures the Multi arm
  to get the registry; Single arm short-circuits to 405.
* `server_openapi` (lib.rs:1217) — matches the Multi arm for the
  cluster-prefix rewrite.
* `tests/server.rs:3735` — the B2 footgun regression test now
  matches on `state.routing()` to extract the single-mode handle
  (the test's earlier `state.registry().list().next()` shape was
  the closest pre-fix analog to "embedded consumer reaches the
  engine"; the new shape is more direct).

Closes the entire "single mode forced through a multi-mode
abstraction" class. After this commit:
* No magic sentinel as a routing key.
* No `single_mode_handle` walk-and-assert helper.
* No 500-class "programmer error" branch in the middleware.
* No two-field discriminant on `AppState` where one would do.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for nested-route path extraction (red)

`server_branch_delete` and `server_commit_show` use bare
`Path<String>` extractors. In single-mode flat routes
(`/branches/{branch}`, `/commits/{commit_id}`) this works — one
capture, one value. In multi-graph cluster routes
(`/graphs/{graph_id}/branches/{branch}`,
`/graphs/{graph_id}/commits/{commit_id}`) axum 0.8 propagates the
outer `{graph_id}` capture into the inner handler, so the
extractor sees two captures and 500s with
"Wrong number of path arguments. Expected 1 but got 2."

`cluster_routes_dispatch_per_graph_handle` only exercises
`/snapshot` (no Path extractor), so the regression slipped through.
This test closes that gap structurally: every cluster route with
an inner path param gets exercised here.

Currently fails with the exact symptom above. Fix in the next
commit makes it pass.

Per AGENTS.md rule 12, the red test commit lands just before the
fix so the pair is visible in `git log` and a reviewer can check
out this commit alone to reproduce.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: named-field path-param structs for nested cluster routes (green)

`Path<String>` deserializes one path-param value positionally.
Single-mode flat routes (`/branches/{branch}`,
`/commits/{commit_id}`) have one capture; multi-mode nested routes
(`/graphs/{graph_id}/branches/{branch}`,
`/graphs/{graph_id}/commits/{commit_id}`) have two — axum 0.8
propagates the outer capture into nested handlers. Same handler,
two different shapes; the multi-mode shape 500s with
"Wrong number of path arguments. Expected 1 but got 2."

Symptomatic fix: change to `Path<(String, String)>` and ignore the
first element. Breaks again the moment we add another nest layer
(e.g. tenant in Cloud mode).

Correct-by-design fix: named-field structs deserialized by name
from axum's path-param map. Each handler picks only the fields it
needs. Stable across single / multi / future-cloud nest depths
because deserialization is by field name, not position.

* New `BranchPath { branch: String }` (file-local to lib.rs)
* New `CommitPath { commit_id: String }`
* `server_branch_delete` extractor → `Path<BranchPath>`
* `server_commit_show` extractor → `Path<CommitPath>`

Closes the "handler path-extractor type is positional and breaks
when route nesting changes" class. Red test from the previous
commit turns green. All 77 server tests pass (single-mode branch
delete + commit show, plus new multi-mode coverage).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: centralize policy-requires-tokens check in the runtime classifier

Single-mode `open_with_bearer_tokens_and_policy` bailed at lib.rs:380
when policy was installed and no tokens. Multi-mode
`open_multi_graph_state` had no equivalent: the server started, every
request 401'd because no token could ever match, and the operator
spent time debugging a misconfiguration the single-mode path would
have caught at startup.

The doc/code contradiction made the gap easy to miss: the
`ServerRuntimeState::PolicyEnabled` docstring said tokens-or-not
was "unusual but valid — every request fails 401 without a bearer,
which is effectively 'locked'." The single-mode bail contradicted
that. In practice, silent-401-on-every-request is bug-shaped, not
feature-shaped (operators wanting deny-all should configure tokens
plus a deny-all Cedar rule to get meaningful 403s with
policy-decision logging).

Symptomatic fix: add a copy of the bail to multi-mode. Two copies
that can drift again the next time a startup path is added.

Correct-by-design fix: hoist the check into
`classify_server_runtime_state` so both modes get the same
enforcement from one source of truth. The classifier becomes the
single source of truth for "should we start?" and adding a startup
invariant there is now the natural extension point for any future
mode.

Classifier matrix is now complete:

| has_tokens | has_policy | allow_unauthenticated | Result |
|---|---|---|---|
| F | F | F | bail (existing) |
| F | F | T | Open (existing) |
| T | F | * | DefaultDeny (existing) |
| F | T | * | bail (NEW — closes the gap) |
| T | T | * | PolicyEnabled (existing) |

Changes:

* `classify_server_runtime_state` (lib.rs:870-890) gains the
  `(false, true, _) => bail!(…)` arm with a clear message naming
  the failure mode and the two valid resolutions.
* `open_with_bearer_tokens_and_policy` (lib.rs:369+) drops its
  redundant local bail — the classifier rejected the invalid case
  before construction was reached.
* `ServerRuntimeState::PolicyEnabled` docstring is rewritten:
  drops the "(unusual but valid)" carve-out and states plainly
  that PolicyEnabled requires tokens. Names the explicit
  alternative (tokens + deny-all Cedar rule) for operators who
  want the all-requests-denied behavior.
* `classify_policy_enabled_always_wins` test is renamed to
  `classify_policy_enabled_requires_tokens` and the now-invalid
  `(false, true, _)` assertion is removed (covered by the new
  rejection test).
* New `classify_policy_without_tokens_is_rejected` test covers the
  new arm.
* New `serve_refuses_to_start_with_policy_but_no_tokens_multi_mode`
  integration test pins the multi-mode propagation path —
  symmetric with the existing single-mode
  `serve_refuses_to_start_in_state_1_without_unauthenticated`.

Closes the "single mode and multi mode startup branches can drift
on safety invariants" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: close coverage gaps surfaced by the test-coverage audit

The bot-review pass and the subsequent coverage audit surfaced two
material gaps in PR #119's test surface — both easy to close, both
worth closing before merge.

* **Gap 1 — cluster-route sweep.** The Bug-1 path-extractor
  regression slipped through because
  `cluster_routes_dispatch_per_graph_handle` only exercised
  `/snapshot`. The other six protected cluster routes (`/read`,
  `/change`, `/export`, `/schema`, `/schema/apply`, `/ingest`,
  `/branches/merge`) were implicitly trusted to work without any
  multi-mode integration test.

  Add `all_protected_cluster_routes_resolve_to_their_handler`
  (`tests/server.rs`) that hits each protected cluster route with
  a minimal request and asserts the response is consistent with
  the handler being reached — no 404 (router didn't match), no 500
  with "Wrong number of path arguments" (Bug-1 class), no 500 with
  "missing extension" (routing middleware didn't inject the handle).
  Status code is a negative assertion because each handler's
  happy-path inputs differ; what matters is "the request reached
  the handler," not "the handler returned 200" — that's already
  pinned by the single-mode tests.

* **Gap 2 — `--force` happy path.** The strict re-init regression
  test (`init_on_existing_graph_uri_does_not_destroy_existing_schema`)
  pins the error path; nothing pinned the `force: true` escape
  hatch actually doing what its docstring claims.

  Add `init_with_force_recovers_from_orphan_schema_files`
  (`tests/lifecycle.rs`). Writes a bare `_schema.pg` to simulate
  orphan files from a failed prior init, confirms strict mode
  bails as expected, then confirms `init_with_options(force: true)`
  succeeds and produces a functional graph.

  Note: the test follows the documented semantics — force skips
  the preflight only, it does NOT purge existing Lance state. An
  earlier draft of the test (against full overwrite of an existing
  populated graph) failed because `GraphCoordinator::init` errored
  on the existing `__manifest`, which is exactly the limitation
  the `InitOptions::force` docstring already calls out. Recursive
  purge needs `StorageAdapter::delete_prefix` (tracked separately).

Coverage is now fully aligned with the PR's claims.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for GraphList open-mode bypass (red)

Cursor bot's review at commit 4120448 surfaced that
`server_graphs_list` returns 200 in Open mode (`--unauthenticated`,
no tokens, no policy), exposing the full graph registry — graph
IDs and URIs that may contain S3 bucket paths or internal
hostnames — to any unauthenticated caller.

Root cause: `authorize_request`'s no-policy fallback only denies
when `actor.is_some()`. In Open mode `actor: None`, so the
denial branch never fires and the call returns `Ok(())`. The
docstring on `server_graphs_list` claims the endpoint is
"Cedar-gated" and that we "don't leak the registry until the
operator explicitly authorizes it" — but Open mode has no Cedar
at all, so the docstring intent and the code disagree.

This commit renames the existing
`get_graphs_lists_registered_graphs_in_multi_mode` test to
`get_graphs_denied_in_open_mode_without_server_policy` and flips
the assertion from 200 → 403. Today this fails (server returns
200) — exactly the symptom the bot named. The fix in the next
commit tightens the no-policy fallback to deny server-scoped
actions unconditionally, regardless of mode.

Per AGENTS.md rule 12, the red test commit lands just before
the fix so the red → green pair is visible in `git log` and a
reviewer can check out this commit alone to reproduce.

Sort-order coverage that previously lived in the renamed test
moves to `get_graphs_with_server_policy_authorizes_per_cedar`
in the next commit, where the admin-200 response is operator-
authorized and a non-empty body is asserted.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: server-scoped actions always require explicit policy (green)

`server_graphs_list` returned 200 in Open mode (`--unauthenticated`,
no tokens, no policy) because `authorize_request`'s no-policy
fallback only denied when `actor.is_some()` AND action != Read.
In Open mode `actor: None`, so the denial branch never fired and
the call returned `Ok(())` — leaking the registry (graph IDs +
URIs that may contain S3 bucket paths or internal hostnames) to
any unauthenticated caller. The docstring on `server_graphs_list`
claimed it was "Cedar-gated" and that the server should "not leak
the registry until the operator explicitly authorizes it" —
docstring intent and code disagreed.

Symptomatic fix: special-case GraphList. Breaks the moment
another server-scoped action (`graph_create`, `graph_delete`) is
added.

Correct-by-design fix: tie authorization to the action's
`resource_kind()`. Server-scoped actions
(`PolicyResourceKind::Server`) always require explicit policy
authorization — there is no runtime state where they're served
by default. Per-graph actions keep the existing default-deny
logic (DefaultDeny denies non-Read for authenticated actors;
Open mode allows everything per the operator's `--unauthenticated`
opt-in for graph DATA, but not for server topology).

The fix uses the existing `PolicyResourceKind` enum that #119
already added — no new abstraction. Future server-scoped actions
(runtime `graph_create`/`graph_delete` when the cluster catalog
ships) automatically pick up the same enforcement without any
per-action handler change.

Changes:

* `crates/omnigraph-server/src/lib.rs:51` — re-export
  `PolicyResourceKind` (the kind discriminator was already public
  on the omnigraph-policy crate; needed in scope here).
* `crates/omnigraph-server/src/lib.rs:1457` — `authorize_request`'s
  no-policy fallback gains a server-scoped-action check that fires
  before the actor-based default-deny logic. Error message names
  the failure mode and points at `server.policy.file`.
* `crates/omnigraph-server/tests/server.rs:5037` —
  `get_graphs_with_server_policy_authorizes_per_cedar` extended
  to register two graphs in non-alphabetical order and assert
  the admin-200 response is sorted alphabetically. Restores the
  sort-order coverage that lived in
  `get_graphs_lists_registered_graphs_in_multi_mode` before the
  red commit renamed it to assert denial.

Also bundles a small adjacent cleanup that the bot-review flagged:

* `crates/omnigraph-server/src/graph_id.rs:124` — drop the
  unreachable `"openapi.json"` entry from `is_reserved`. The
  regex `^[a-zA-Z0-9-]{1,64}$` rejects every dot-containing name
  before `is_reserved` can run, so dotted entries in this list
  were dead code that misled readers into thinking the list
  needed to cover them. Comment now names the structural
  exclusion. The `rejects_reserved_route_names` test loses its
  `openapi.json` row (covered by `rejects_dots` via the regex).

Closes the "server-scoped management actions silently leak in
Open mode" class. Red test from the previous commit
(`get_graphs_denied_in_open_mode_without_server_policy`) turns
green; all 78 server integration tests + 76 lib tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: fold multi-graph work into v0.6.0 (no separate v0.7.0 release)

The branch had bumped workspace versions to 0.7.0 and added a
dedicated `docs/releases/v0.7.0.md` for the multi-graph work.
Per scope decision: ship the graph-rename and the multi-graph
mode in one v0.6.0 release.

Changes:

* Workspace versions bumped 0.7.0 → 0.6.0 in every crate manifest
  (`omnigraph`, `omnigraph-compiler`, `omnigraph-policy`,
  `omnigraph-server`, `omnigraph-cli`) and their internal
  `path = ..., version = "..."` dependency constraints.
* `docs/releases/v0.7.0.md` content merged into
  `docs/releases/v0.6.0.md`, retargeted to a single coherent
  v0.6.0 release note covering both the graph terminology rename
  and the multi-graph server mode. The original v0.7.0.md is
  deleted.
* All `v0.7.0` / `0.7.0` doc and comment references throughout
  `crates/`, `docs/`, `AGENTS.md`, and `openapi.json` retargeted
  to `v0.6.0` / `0.6.0`. `Cargo.lock` regenerated to match.
* OpenAPI spec regenerated via `OMNIGRAPH_UPDATE_OPENAPI=1
  cargo test -p omnigraph-server --test openapi
  openapi_spec_is_up_to_date` — `"version": "0.6.0"` now.

Verification:

* `cargo build --workspace` — clean (6 pre-existing engine
  warnings only).
* `cargo test --workspace --locked` — zero failures across all
  39 test result groups.
* `bash scripts/check-agents-md.sh` — passes (34 links / 33 docs).
* `grep -rn "0\.7\.0\|v0\.7\.0" --include='*.rs' --include='*.md'
  --include='*.json' --include='*.toml' .` returns no workspace
  hits. The three remaining `0.7.0` strings in `Cargo.lock`
  belong to unrelated 3rd-party crates (`pem-rfc7468`, `radium`,
  `rand_xoshiro`).

The git tag and crates.io publish happen later — this commit
just consolidates the surface so the eventual release is one
coherent v0.6.0 covering all the work since v0.5.0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: sanitize internal refs from v0.6.0 release notes

cubic-dev-ai P2 comments flagged that the release notes carried
internal Linear ticket and RFC references (MR-668, MR-731,
MR-723, RFC 0003, RFC 0004). Per AGENTS.md maintenance rule 5,
"Release docs are public project history. Describe capabilities,
behavior changes, breaking changes, upgrade notes, and user
impact; do not reference private ticket systems, internal
codenames, or planning shorthand that an outside contributor
cannot inspect." The bot's comments are correct against our own
published contract — they were a docs-quality regression
introduced when I drafted these notes.

Replaced each internal reference with the public-facing concept
it stood for. The substantive content (capabilities, behavior,
guarantees) was already present alongside the refs; sanitization
just trimmed the bracketed ticket labels:

* Line 6: dropped `(MR-668)` from the multi-graph mode summary —
  the descriptive name was already self-sufficient.
* Line 24: `MR-731 spoof defense` → `the bearer-derived-actor-
  identity guarantee`; `Forward-compat for Cloud mode (RFC 0003)
  and OAuth provider (RFC 0004)` → "forward-compat seams for
  future multi-tenant and OAuth deployments; they're inert in
  this release" — describes what the operator sees instead of
  pointing at planning docs.
* Line 26: `MR-731's server-authoritative-actor invariant` →
  "the server-authoritative-actor invariant: actor identity is
  always sourced from the bearer-token match resolved at the
  auth boundary" — the public-facing statement of the guarantee.
* Line 36: `(MR-723 default-deny otherwise rejects …)` →
  "without a server policy the default-deny posture rejects …"
  — same content, no ticket label.
* Line 121: `MR-731 spoof regression test` → "The bearer-auth-
  derived-actor-identity regression test (client-supplied
  identity headers are ignored; the server-resolved actor is the
  only identity Cedar sees)" — describes what the test guards
  instead of naming the originating ticket.

Verified: `grep -E 'MR-\d+|RFC[ -]?\d+' docs/releases/v0.6.0.md`
returns no matches; the rest of `docs/releases/` is also clean.
`scripts/check-agents-md.sh` passes.

Note: cubic-dev-ai also flagged `crates/omnigraph-cli/src/main.rs:276`
("doc comment incorrectly references v0.6.0 for a command that
only exists in v0.7.0"). That comment is based on a stale model
of the release surface — after folding v0.7.0 into v0.6.0 in
the previous commit, the multi-graph CLI surface IS in v0.6.0
and the comment is correct as written. No change needed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix: close validated init and multi-graph gaps

* chore: address review cleanup comments

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
											
										
										
											2026-05-28 16:19:31 +02:00
+								    );
-												tests: admission test uses new_with_workload, drops env mutation + #[serial]

Migrates `ingest_per_actor_admission_cap_returns_429` from env-var
override to direct `WorkloadController::new(1, ...)` construction via
`AppState::new_with_workload`. Removes the `EnvGuard` and the
`#[serial]` annotation that paired with it.

Why correct by design (AGENTS.md rule 9): the previous round's matrix
fix (commit 8bd9a5f) shielded the matrix from this test's env
mutation, but the broader bug class — "test A's process-wide env
mutation can leak into any test B that calls
`AppState::open` / `WorkloadController::from_env()`" — was still
reachable by any future test that didn't think to opt out. Closing
the class at the source: this test no longer mutates global state at
all, so no other test needs to defend against it.

Net effect:
- This test no longer needs `#[serial]` (was the only reason it was
  marked) — runs in parallel with the rest of the suite.
- The matrix's defensive `with_defaults()` construction (commit
  8bd9a5f) remains correct but is no longer required for correctness;
  it's now a "belt and suspenders" guard against any FUTURE
  env-mutating test.

Verified locally: both tests pass when run together; full server
suite (44 tests) green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 20:35:41 +02:00
+								    let app = build_app(state);
 								    let _temp = temp;
-												tests: pin /ingest admission gate + 429 Retry-After (red)

Per AGENTS.md rule 8, this commit lands the failing regression test
ahead of the fix. Currently fails on f925ad1 with 8/8 statuses returning
200 because /ingest does not call WorkloadController::try_admit.

The test pins:
- /ingest is gated on per-actor admission control (returns 429 when
  the cap is exceeded).
- 429 responses carry the structured `code: too_many_requests` error
  body so clients can distinguish them from generic conflicts.
- 429 responses include a `Retry-After` header so clients can implement
  bounded backoff. The doc claim at api.rs:343 and lib.rs:344 was that
  this header exists; the IntoResponse impl currently emits no headers.

Two follow-up commits will turn this green:
1. Wire WorkloadController::try_admit on /ingest and the four other
   mutating handlers (Block 2.1).
2. Emit the Retry-After header on 429/503 responses (Block 2.2).

The test uses #[serial] + EnvGuard to override
OMNIGRAPH_PER_ACTOR_INFLIGHT_MAX=1 without racing parallel tests, then
spawns 8 concurrent /ingest tasks aligned at a tokio::sync::Barrier so
multiple tasks reach try_admit close in time. With cap=1, at least one
must be rejected.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-08 16:57:01 +02:00
 								    // Eight concurrent ingests, all from act-flooder. Only one fits in a
 								    // cap=1 in-flight semaphore; the others must 429.
 								    const N: usize = 8;
 								    let barrier = Arc::new(tokio::sync::Barrier::new(N));
 								    let mut handles = Vec::with_capacity(N);
 								    for i in 0..N {
 								        let app = app.clone();
 								        let barrier = Arc::clone(&barrier);
 								        handles.push(tokio::spawn(async move {
 								            // Align the 8 tasks at the barrier so they all attempt
 								            // try_admit close in time.
 								            barrier.wait().await;
 								            let body = serde_json::to_vec(&IngestRequest {
 								                data: format!(
 								                    "{{\"type\":\"Person\",\"data\":{{\"name\":\"flooder-{i}\",\"age\":{i}}}}}\n"
 								                ),
 								                branch: Some("main".to_string()),
 								                from: Some("main".to_string()),
 								                mode: Some(omnigraph::loader::LoadMode::Merge),
 								            })
 								            .unwrap();
 								            let req = Request::builder()
 								                .uri("/ingest")
 								                .method(Method::POST)
 								                .header("authorization", "Bearer flooder-token")
 								                .header("content-type", "application/json")
 								                .body(Body::from(body))
 								                .unwrap();
 								            let response = app.oneshot(req).await.unwrap();
 								            let status = response.status();
 								            let headers = response.headers().clone();
 								            let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
 								            (status, headers, body.to_vec())
 								        }));
 								    }
 								    let mut results = Vec::with_capacity(N);
 								    for h in handles {
 								        results.push(h.await.unwrap());
 								    }
 								    let statuses: Vec<StatusCode> = results.iter().map(|(s, _, _)| *s).collect();
 								    let too_many: Vec<usize> = statuses
 								        .iter()
 								        .enumerate()
 								        .filter(|(_, s)| **s == StatusCode::TOO_MANY_REQUESTS)
 								        .map(|(i, _)| i)
 								        .collect();
 								    assert!(
 								        !too_many.is_empty(),
 								        "expected at least one /ingest under cap=1 to return 429; got statuses: {:?}",
 								        statuses,
 								    );
 								    // Validate the structured error body for each 429 (body must carry
 								    // the `too_many_requests` code so clients can distinguish it from
 								    // generic conflicts).
 								    for i in &too_many {
 								        let body_value: Value = serde_json::from_slice(&results[*i].2).unwrap();
 								        let error: ErrorOutput = serde_json::from_value(body_value).unwrap();
 								        assert_eq!(
 								            error.code,
 								            Some(omnigraph_server::api::ErrorCode::TooManyRequests),
 								            "429 body must carry code=too_many_requests; idx {} got {:?}",
 								            i,
 								            error.code,
 								        );
 								    }
 								    // Validate the `Retry-After` header is set on every 429. Pinned by
 								    // the same test so a future refactor that drops the header from
 								    // `IntoResponse for ApiError` turns this red. The constant
 								    // matches `crates/omnigraph-server/src/lib.rs::ApiError::into_response`.
 								    for i in &too_many {
 								        let retry_after = results[*i]
 								            .1
 								            .get(axum::http::header::RETRY_AFTER)
 								            .and_then(|v| v.to_str().ok())
 								            .map(str::to_string);
 								        assert!(
 								            retry_after.is_some(),
 								            "429 response must include a Retry-After header; idx {} headers were: {:?}",
 								            i,
 								            results[*i].1,
 								        );
 								    }
 								}
-												(feat): multi-graph server mode (#119)

* mr-668: add GraphId newtype + Cloud-mode forward identity stubs (PR 1/10)

PR 1 of the MR-668 multi-graph server work. Pure types, no runtime
behavior changes yet.

Ships the validated identity vocabulary that the rest of the implementation
will consume:

- `GraphId(String)` — `^[a-zA-Z0-9-]{1,64}$`, leading underscore rejected
  (engine reserves every `_*` filename), reserved route names rejected
  (`policies`, `healthz`, `openapi`, `openapi.json`, `graphs`). Validation
  lives in `try_from` only; serde `Deserialize` re-runs it so JSON payloads
  cannot bypass.
- `TenantId(String)` — same regex shape as GraphId. `None` in Cluster
  mode; reserved for Cloud mode (RFC 0003) where it carries the OAuth
  `org_id` claim.
- `GraphKey { tenant_id: Option<TenantId>, graph_id }` — the registry
  HashMap key. `cluster()` constructor for the Cluster-mode default.
- `Scope` enum with `Full` variant — Cluster mode default; RFC 0004 will
  extend with OAuth scopes (`graph:read`/`write`/`admin`/`*`).
- `AuthSource` enum with `Static` variant — Cluster mode default; RFC
  0001 step 1 will add `Oidc`.
- `ResolvedActor { actor_id, tenant_id, scopes, source }` — replaces the
  upcoming refactor of `AuthenticatedActor(Arc<str>)` in PR 4a.

Per MR-668 design decision 13: ship the Cloud-mode forward type shapes
now (no `TokenVerifier` trait yet — that's RFC 0001 step 1) so handler
signatures stay stable across the Cluster → Cloud trajectory. `Scope`
and `AuthSource` use `#[non_exhaustive]` so future variants don't break
caller matches.

Tests: 26 new (15 graph_id + 11 identity), all passing. No regression
in the existing 36 server library tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: Omnigraph::init error-path cleanup + three failpoints (PR 2a/10)

PR 2a of the MR-668 multi-graph server work. Bug fix: a partially-failed
`Omnigraph::init` previously left orphan schema files at the graph URI,
making the URI unusable for a retry (the next `init` would refuse because
`_schema.pg` already exists).

Changes:

1. `init_with_storage` now wraps the I/O phase. On any error from
   `init_storage_phase`, calls `best_effort_cleanup_init_artifacts` to
   remove the three schema files before returning the original error:
   - `_schema.pg`
   - `_schema.ir.json`
   - `__schema_state.json`
   Cleanup is best-effort: a failure to delete is logged via
   `tracing::warn` but does NOT mask the init error.

2. Three failpoints added at the init phase boundaries:
   - `init.after_schema_pg_written`
   - `init.after_schema_contract_written`
   - `init.after_coordinator_init`

3. Four new failpoint tests in `tests/failpoints.rs` pin the cleanup
   behavior at each boundary plus the "original error wins over cleanup
   error" contract. All 23 failpoint tests pass.

Coverage gap (documented in code comments):
Lance per-type datasets and `__manifest/` directory created by
`GraphCoordinator::init` are NOT cleaned up after a coordinator-init-phase
failure. Recursive directory deletion requires `StorageAdapter::delete_prefix`,
which was deferred along with `DELETE /graphs/{id}` (originally PR 2b). When
that primitive lands, the third failpoint test can be tightened to assert
the graph root is fully empty.

Tests: 4 new (init_failpoint_*), all 23 failpoint tests green. No
regression in the 105 engine library tests or 64 end_to_end tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: add GraphHandle + GraphRegistry data structure (PR 3/10)

PR 3 of the MR-668 multi-graph server work. Pure data structure — no
routing changes yet (that's PR 4a).

New file: `crates/omnigraph-server/src/registry.rs`

- `GraphHandle { key: GraphKey, uri: String, engine: Arc<Omnigraph>,
  policy: Option<Arc<PolicyEngine>> }` — the per-graph state that the
  routing middleware (PR 4a) will inject as a request extension.
- `RegistrySnapshot { graphs: HashMap<GraphKey, Arc<GraphHandle>> }` —
  immutable snapshot; replaced atomically via `ArcSwap`.
- `GraphRegistry { snapshot: ArcSwap<_>, mutate: Mutex<()> }` — lock-free
  reads, mutex-serialized mutations.
- `RegistryLookup { Ready(Arc<GraphHandle>) | Gone }` — two-valued, no
  `Tombstoned` variant since DELETE is deferred in v0.7.0 scope.
- `InsertError { DuplicateKey | DuplicateUri }` — both rejection cases
  for create-graph (maps to HTTP 409 in PR 7).
- Methods: `new`, `from_handles` (bulk startup-time init), `get`, `list`,
  `len`, `insert`.

Race semantics pinned by three multi-thread tests:
- `concurrent_insert_same_key_exactly_one_succeeds` — N=8 spawned
  inserts with the same key; exactly 1 returns Ok, 7 return DuplicateKey.
- `concurrent_insert_distinct_keys_all_succeed` — N=8 spawned inserts
  with distinct keys; all succeed.
- `concurrent_reads_during_inserts_see_consistent_snapshots` — reader
  loop concurrent with sequential writes; every listed handle's key
  resolves via `get()` (no torn state).

Why no tombstones field: `DELETE /graphs/{id}` is deferred to bound
the scope of v0.7.0. Without a delete endpoint, there's no use for
tombstones — every key in the registry is `Ready`, and every key
not in the registry is `Gone`. When DELETE lands later, the
`Tombstoned` variant + `tombstones: HashSet<GraphKey>` slot in
additively without breaking caller signatures (the `Gone` variant
remains the "not currently active" case).

Why `tokio::sync::Mutex`: insert is async because PR 7's flow holds
this mutex across the atomic YAML rewrite step (file I/O). std::Mutex
would footgun across .await.

Dependency additions: `arc-swap = { workspace = true }`,
`thiserror = { workspace = true }` (used by InsertError).

Tests: 12 new (12 passing). 74 server lib tests total green
(62 from PR 1 + 12 new). Clippy clean on server crate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: router restructure + handler refactor for multi-graph (PR 4a/10)

PR 4a of the MR-668 multi-graph server work. The heaviest single PR —
rewires every handler to extract `Arc<GraphHandle>` from a routing
middleware, replaces `AuthenticatedActor(Arc<str>)` with `ResolvedActor`
everywhere, and adds the `ServerMode` discriminator.

Behavior changes:
- **Single mode** (legacy `omnigraph-server <URI>`): flat routes
  (`/snapshot`, `/read`, `/branches`, …) continue to work exactly as
  v0.6.0. Internally, the registry holds a single handle keyed by the
  sentinel `SINGLE_GRAPH_KEY_ID = "default"`; routing middleware injects
  that handle on every request. No HTTP-visible change.
- **Multi mode** (new): routes nest under `/graphs/{graph_id}/...`.
  Routing middleware extracts the graph id from the path, looks it up
  in the registry, and injects the handle. 404 if not found.
  (Multi-mode startup itself lands in PR 5; this PR provides the
  router-side wiring.)

AppState refactor:
- `engine: Arc<Omnigraph>` and `policy_engine: Option<Arc<PolicyEngine>>`
  fields removed — both now live inside `GraphHandle` in the registry.
- `mode: ServerMode { Single { uri } | Multi { config_path } }` added.
- `registry: Arc<GraphRegistry>` added.
- `server_policy: Option<Arc<PolicyEngine>>` added (placeholder for
  management endpoints in PR 6b; unused today).
- Existing constructors (`new`, `new_with_bearer_token{s,_and_policy}`,
  `new_with_workload`, `open*`) build a single-mode AppState
  internally and remain source-compatible. Tests that constructed
  AppState via these constructors continue to work.
- `with_policy_engine` post-construction setter — rebuilds the
  single-mode handle with the policy attached. Engine-layer
  enforcement is NOT reinstalled (matches the old single-field
  semantics; `open_with_bearer_tokens_and_policy` is the path that
  installs both layers).
- `new_multi` constructor added for PR 5's startup loop.
- `uri()` now returns `Option<&str>` (Some in single, None in multi).

Routing middleware:
- `resolve_graph_handle` injects `Arc<GraphHandle>` as a request
  extension. Mode-aware: single returns the only handle; multi parses
  `/graphs/{graph_id}/...` from the URI. Returns 404 in multi mode
  when the graph id is unregistered. Records `graph_id` on the
  current tracing span.
- `require_bearer_auth` updated to insert `ResolvedActor` (was
  `AuthenticatedActor`).

Handler refactor — every protected handler:
- Gains `Extension(handle): Extension<Arc<GraphHandle>>` param.
- Replaces `state.engine` → `handle.engine`.
- Replaces `state.policy_engine()` → `handle.policy.as_deref()`.
- Replaces `state.uri()` → `handle.uri.as_str()` (or `.clone()`
  where String is needed).
- Replaces `Arc::clone(&state.engine)` → `Arc::clone(&handle.engine)`
  (the spawn-and-clone pattern in `server_export` — proof that a
  long-running export survives the registry being mutated later).

authorize_request signature:
- Was: `(state: &AppState, actor: Option<&AuthenticatedActor>, request: PolicyRequest)`.
- Now: `(actor: Option<&ResolvedActor>, policy: Option<&PolicyEngine>, request: PolicyRequest)`.
- Per-graph callers pass `handle.policy.as_deref()`. The (future PR 6b)
  management endpoints will pass `state.server_policy.as_deref()`.

MR-731 invariant preserved:
- The single chokepoint `request.actor_id = actor.actor_id.as_ref().to_string()`
  inside `authorize_request` still overwrites any client-supplied
  actor identity. Regression test
  `actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
  at `tests/server.rs:1114-1216` passes unchanged.

Tests: 0 new (the registry race tests in PR 3 already cover the
data structure; this PR exercises them indirectly via the existing
test suite). 74 lib + 57 server integration + 60 openapi = 191 tests
green. Clippy clean.

LOC: +397 insertions, -153 deletions in `crates/omnigraph-server/src/lib.rs`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: OpenAPI multi-mode cluster filter (PR 4b/10)

PR 4b of the MR-668 multi-graph server work. In multi mode, the served
`/openapi.json` reports cluster routes (`/graphs/{graph_id}/...`) instead
of the legacy flat protected paths — matching what `build_app` actually
mounts (PR 4a's `Router::nest`). Single mode is unchanged.

Implementation:
- New `server_openapi` branch: when `state.mode()` is `Multi`, call
  `nest_paths_under_cluster_prefix(&mut doc)` after `ApiDoc::openapi()`.
- The rewrite consumes `doc.paths.paths`, then for every path-item:
  - If the path is in `ALWAYS_FLAT_PATHS` (`/healthz` for now), keep
    it flat.
  - Otherwise, prefix every operation_id with `cluster_` and reinsert
    the item at `/graphs/{graph_id}<original_path>`.
- Single mode hits no extra work — the path map is untouched.
- The static `ApiDoc::openapi()` still emits the flat surface, so
  in-process callers (the existing `openapi_json()` helper in tests)
  see the unmodified spec.

Why cluster_ prefix on operation IDs: OpenAPI specs require unique
operation_ids across the document. With both flat (single-mode) and
cluster (multi-mode) surfaces ever co-existing in a generated SDK,
the prefix prevents collision. The current served doc only carries
one surface, so the prefix is forward-compat with potential future
dual-surface generation.

Tests: 6 new in `tests/openapi.rs`, all via the `/openapi.json` route
(not the static `ApiDoc::openapi()` helper):
- `multi_mode_openapi_lists_cluster_paths` — every protected path
  appears as a cluster variant.
- `multi_mode_openapi_drops_flat_protected_paths` — flat protected
  paths are absent.
- `multi_mode_openapi_keeps_healthz_flat` — `/healthz` survives.
- `multi_mode_openapi_prefixes_operation_ids_with_cluster` — every
  cluster operation_id starts with `cluster_`.
- `multi_mode_operation_ids_are_unique` — no operation_id collisions.
- `single_mode_openapi_unchanged_by_cluster_filter` — single mode
  still emits the legacy flat surface (regression).

New test helper `app_for_multi_mode(graph_ids)` exercises the new
`AppState::new_multi` constructor from PR 4a — first user of multi-mode
construction outside of unit tests.

Result: 66 openapi tests + 57 server integration tests + 74 lib tests
= 197 green. No regression in the existing OpenAPI drift check
(`openapi_spec_is_up_to_date` still validates the static flat surface
matches the committed openapi.json).

LOC: +67 in lib.rs (rewrite logic), +219 in tests/openapi.rs (test
suite + helper).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: multi-graph startup + mode inference (PR 5/10)

PR 5 of the MR-668 multi-graph server work. This is the first PR that
makes multi mode actually usable end-to-end: operators invoking
`omnigraph-server --config omnigraph.yaml` with a non-empty `graphs:`
map and no single-mode selector now get a running multi-graph server.

Mode inference (MR-668 decision 2, four-rule matrix in
`load_server_settings`):
  1. CLI `<URI>` positional        → Single
  2. CLI `--target <name>`         → Single (URI from graphs.<name>)
  3. `server.graph` in config      → Single (URI from graphs.<name>)
  4. `--config` + non-empty `graphs:` + no single-mode selector
                                   → Multi (all entries in `graphs:`)
  5. otherwise                     → error with migration hint

Rule 5's error message names every escape hatch so operators can fix
their invocation without grepping docs.

Config schema extensions:
  - `TargetConfig.policy: PolicySettings` (per-graph Cedar policy file).
    `#[serde(default)]` so existing single-graph YAMLs keep parsing.
  - `ServerDefaults.policy: PolicySettings` (server-level Cedar policy
    for management endpoints — loaded in PR 5, wired into `GET /graphs`
    in PR 6b).
  - `OmnigraphConfig::resolve_target_policy_file(name)` and
    `resolve_server_policy_file()` helpers — both resolve relative to
    the config file's `base_dir`.

Public types added to `omnigraph-server`:
  - `ServerConfigMode { Single { uri, policy_file } | Multi { graphs,
    config_path, server_policy_file } }`.
  - `GraphStartupConfig { graph_id, uri, policy_file }` — one entry
    per graph in multi mode.

`ServerConfig` shape change:
  - WAS: `{ uri: String, bind, policy_file, allow_unauthenticated }`.
  - NOW: `{ mode: ServerConfigMode, bind, allow_unauthenticated }`.
  - Breaking for any code that constructs `ServerConfig` directly.
    `main.rs` is unaffected (uses `load_server_settings`).

`serve()` now forks on `ServerConfig.mode`:
  - Single: existing flow via `AppState::open_with_bearer_tokens_and_policy`.
  - Multi: parallel open via `futures::stream::iter(graphs)
    .map(open_single_graph).buffer_unordered(4).collect()`. Bound 4 is
    a rule-of-thumb for I/O-bound work — at N≤10 this trades startup
    latency for a small amount of concurrent S3/Lance open pressure.
    Fail-fast: first open error aborts startup; in-flight opens drop
    their engine via Arc (Lance datasets close cleanly).

New helper `open_single_graph(GraphStartupConfig)`:
  - Validates `GraphId` per the regex in PR 1.
  - `Omnigraph::open(uri).await` with descriptive error context.
  - Loads per-graph policy file and re-applies it via
    `Omnigraph::with_policy` (engine-layer enforcement, MR-722).
  - Returns `Arc<GraphHandle>` ready for the registry.

Routing middleware bug fix:
  - `Router::nest("/graphs/{graph_id}", inner)` rewrites
    `request.uri().path()` to the inner suffix (e.g. `/snapshot`).
    The previous middleware tried to parse `{graph_id}` from
    `request.uri().path()` and got 400 instead of 200. Fixed by reading
    from `axum::extract::OriginalUri` request extension, which preserves
    the pre-rewrite URI.
  - Caught by the two new tests
    `cluster_routes_dispatch_per_graph_handle` and
    `cluster_route_for_unknown_graph_returns_404`.

Tests (14 new, all passing):
  - Four-rule matrix: one test per branch + the joint case
    `mode_inference_cli_uri_overrides_graphs_map` + the empty-graphs-map
    error case.
  - Per-graph + server-level policy file path resolution.
  - Reserved `GraphId` rejection at startup.
  - End-to-end multi-graph routing: two graphs side by side, each
    cluster route hits the right engine.
  - Unknown graph id under cluster prefix → 404.
  - Flat routes 404 in multi mode.

Inline `ServerConfig` test (`serve_refuses_to_start_in_state_1_without_unauthenticated`)
and three `server_settings_*` tests updated to the new `mode` shape.

Result: 211 server tests green (74 lib + 71 integration + 66 openapi),
MR-731 regression test still pinned and passing.

LOC: +45 config.rs, +281 lib.rs (net), +395 tests/server.rs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: Cedar resource-model refactor (PR 6a/10)

PR 6a of the MR-668 multi-graph server work. Policy-crate-only refactor —
no HTTP handler changes, no operator-supplied policy.yaml changes. Sets
up the chassis that PR 6b's `GET /graphs` consumes.

Two new `PolicyAction` variants:
  - `GraphCreate` — gates `POST /graphs` (deferred behavioral PR).
  - `GraphList`   — gates `GET /graphs` (lands in PR 6b).

Note: `GraphDelete` is intentionally NOT added in this PR. `DELETE
/graphs/{id}` is deferred from MR-668's v0.7.0 scope to bound complexity
(no `delete_prefix`, no tombstone, no `RegistryLookup::Tombstoned`).
Adding the Cedar action without a consumer would be the same kind of
"dead vocabulary" trap the `Admin` variant already documents.

New `PolicyResourceKind { Graph, Server }` enum, plus a
`PolicyAction::resource_kind()` method that classifies every action.
Per-graph actions (Read, Change, BranchCreate, …) bind to
`Omnigraph::Graph::"<graph_label>"`; server-scoped actions
(GraphCreate, GraphList) bind to the singleton
`Omnigraph::Server::"root"`. `Admin` stays classified as per-graph for
now — MR-724 will pick the final shape when the first consumer surface
ships.

Cedar schema string additions:
  - `entity Server;`
  - `action "graph_create" appliesTo { principal: Actor, resource: Server, ... }`
  - `action "graph_list"   appliesTo { principal: Actor, resource: Server, ... }`

Compiler updates:
  - `compile_policy_source` picks the resource literal based on the
    action's `resource_kind`. Existing graph-only policies generate
    the same Cedar source as before — pinned by
    `per_graph_rules_continue_to_work_alongside_server_rules`.
  - `compile_entities` includes the `Server::"root"` entity only when
    a rule references a server-scoped action. Keeps test assertions
    for graph-only policies tight.
  - `PolicyEngine::authorize` builds the right resource UID at
    request time based on `request.action.resource_kind()`.

Validation rules added to `PolicyConfig::validate`:
  - A rule may not mix server-scoped and per-graph actions (different
    resource kinds need different `permit` clauses).
  - Server-scoped actions cannot have `branch_scope` or
    `target_branch_scope` — there's no branch context at the server
    level.

Operator impact: zero. The Cedar schema `Omnigraph::Server` entity is
internally referenced by `compile_policy_source`; operator policy.yaml
files only declare actions in `rules[].allow.actions` and never
reference the resource entity directly. Decision 6's "internal rename
only; operator policies unaffected" contract is preserved and pinned
by `per_graph_rules_continue_to_work_alongside_server_rules`.

Tests: 5 new (11 policy tests total, up from 6):
  - `graph_list_action_authorizes_against_server_resource`
  - `graph_create_action_authorizes_against_server_resource`
  - `server_scoped_rule_cannot_use_branch_scope`
  - `rule_mixing_server_and_per_graph_actions_is_rejected`
  - `per_graph_rules_continue_to_work_alongside_server_rules`

No regression: 145 server tests (74 lib + 71 integration) still green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: GET /graphs endpoint + per-graph policy wire-up (PR 6b/10)

PR 6b of the MR-668 multi-graph server work. First management endpoint —
`GET /graphs` lists every graph registered with the server, gated by the
server-level Cedar policy from PR 6a.

New API shapes (in `omnigraph-server::api`):
  - `GraphInfo { graph_id, uri }` — one entry per registered graph.
  - `GraphListResponse { graphs: Vec<GraphInfo> }` — sorted alphabetically
    by `graph_id` for deterministic output.

Handler `server_graphs_list`:
  - Mounted at `GET /graphs` in both modes.
  - Single mode: returns 405 (resource exists in the API surface, just
    not operational without a `graphs:` map). 405 chosen over 404 so
    clients see "resource exists, wrong context" rather than "no such
    resource".
  - Multi mode: requires bearer auth (when configured); Cedar-gated by
    `PolicyAction::GraphList` against `Omnigraph::Server::"root"`
    (PR 6a's chassis). Returns the sorted registry list.

Cedar gate composition:
  - When no `server.policy.file` is configured, the MR-723 default-deny
    falls through: `GraphList` is not `Read`, so an authenticated actor
    without a server policy gets 403. This is the right default — don't
    expose the registry until the operator explicitly authorizes it.
  - When a server policy is configured, Cedar evaluates the rule. The
    test `get_graphs_with_server_policy_authorizes_per_cedar` pins the
    admin-allow / viewer-deny split.

Routing:
  - New `management` sub-router holding `/graphs` (auth-required, no
    `resolve_graph_handle` middleware — operates on the registry, not
    a single graph).
  - Single mode merges flat protected routes + management.
  - Multi mode merges nested `/graphs/{graph_id}/...` + management.

OpenAPI:
  - `server_graphs_list` registered in `ApiDoc::paths(...)`.
  - `EXPECTED_PATHS` in `tests/openapi.rs` gains `/graphs`.
  - `openapi.json` regenerated (auto-tracked by
    `openapi_spec_is_up_to_date` in CI).

Tests: 4 new in `tests/server.rs::multi_graph_startup`:
  - `get_graphs_lists_registered_graphs_in_multi_mode`
  - `get_graphs_returns_405_in_single_mode`
  - `get_graphs_requires_bearer_auth_when_configured`
  - `get_graphs_with_server_policy_authorizes_per_cedar`

What's NOT in this PR (deferred):
  - Per-graph policy enforcement is wired through `handle.policy`
    (PR 4a already did this); PR 6b doesn't add new per-graph
    behavior beyond making sure the server policy lookup composes
    cleanly alongside it.
  - `POST /graphs` (PR 7) and `DELETE /graphs/{id}` (out of scope
    for v0.7.0).
  - CLI `omnigraph graphs list` (PR 8 will add).

Result: 215 server tests green (74 lib + 66 openapi + 75 integration),
11 policy tests green. MR-731 spoof regression preserved across all
this work.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: POST /graphs runtime create endpoint (PR 7/10)

PR 7 of the MR-668 multi-graph server work. Operators can now add a
graph to a running multi-graph server without restarting:

  curl -X POST http://server/graphs \
    -H "Content-Type: application/json" \
    -d '{
          "graph_id": "beta",
          "uri": "/data/beta.omni",
          "schema": { "source": "node Person { name: String @key }\n" },
          "policy": { "file": "./policies/beta.yaml" }
        }'

DELETE remains deferred (out of v0.7.0 scope per the trimmed plan —
no `delete_prefix`, no tombstones).

Body shape (decision 7):
  - Nested `schema: { source: "..." }` (mirrors the `policy: { file }`
    pattern; leaves room for future fields without breakage).
  - Optional nested `policy: { file: "..." }` for per-graph Cedar.
  - 32 MiB body limit (reuses `INGEST_REQUEST_BODY_LIMIT_BYTES`).
  - Asymmetric with `SchemaApplyRequest` which keeps flat
    `schema_source: String` — documented in api.rs.

Atomic YAML rewrite + drift detection:
  - New `config::rewrite_atomic(path, new_config, expected_hash)`:
    flock → re-read + hash check → serialize → write `.tmp` → fsync
    → rename → fsync parent dir. Returns the new hash for the caller
    to update its in-memory baseline.
  - New `config::hash_config_file(path)` — SHA-256 of the on-disk
    bytes, used at startup and after each rewrite.
  - New `RewriteAtomicError { Drift | Io | Serialize }` enum.
  - `AppState.config_hash: Option<Arc<Mutex<[u8;32]>>>` carries the
    in-memory baseline. Updated after every successful rewrite so
    subsequent POSTs don't false-trigger drift.
  - The mutex is `std::sync::Mutex` (brief critical section, no .await
    inside). The flock itself serializes file access process-wide
    AND across multiple server instances (defense in depth).
  - All sync I/O runs inside `tokio::task::spawn_blocking` — flock
    is sync.

Handler ordering (the load-bearing sequence):
  1. Mode check: 405 in single mode.
  2. Cedar authorize: `GraphCreate` against `Omnigraph::Server::"root"`.
  3. Validate body: `GraphId::try_from` (regex + reserved-name), empty
     schema/uri checks, per-graph policy file parse.
  4. Pre-check registry for duplicate graph_id / duplicate uri (409).
  5. `Omnigraph::init` the new engine.
  6. Atomic YAML rewrite (drift detection inside).
  7. Publish in registry (atomic re-check via `GraphRegistry::insert`).

Failure modes (documented in handler rustdoc):
  - Init fails → orphan storage at `req.uri` (PR 2a cleans up schema
    files; Lance datasets remain orphans until `delete_prefix` lands).
  - YAML rewrite fails (drift, IO) → orphan storage; YAML unchanged.
  - Registry insert fails (race) → YAML has entry but registry doesn't;
    next restart opens it cleanly.

New dependency: `fs2 = "0.4"` (workspace + omnigraph-server). POSIX-only
file locking. Linux/macOS deployment supported; Windows out of scope.

Tests (10 new in `tests/server.rs::multi_graph_startup`):
  - `post_graphs_creates_a_new_graph_end_to_end` — happy path, includes
    YAML inspection to confirm the rewrite landed.
  - `post_graphs_baseline_hash_updates_between_rewrites` — two POSTs in
    a row both succeed (drift baseline updates correctly).
  - `post_graphs_duplicate_graph_id_returns_409`
  - `post_graphs_duplicate_uri_returns_409`
  - `post_graphs_invalid_graph_id_returns_400` (reserved name)
  - `post_graphs_empty_schema_source_returns_400`
  - `post_graphs_returns_405_in_single_mode`
  - `post_graphs_yaml_drift_detection_returns_503` — operator hand-edits
    omnigraph.yaml; server refuses to clobber.
  - `hash_config_file_is_deterministic_and_detects_changes`
  - `rewrite_atomic_refuses_when_hash_drifts`

OpenAPI: `server_graphs_create` registered in `ApiDoc::paths(...)`;
openapi.json regenerated.

Result: 225 server tests green (74 lib + 66 openapi + 85 integration),
all MR-731 regressions still pinned.

LOC: ~580 lib.rs net (handler + helpers), ~120 config.rs (rewrite
machinery), +71 api.rs (request/response shapes), +332 tests/server.rs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: CLI omnigraph graphs list/create (PR 8/10)

PR 8 of the MR-668 multi-graph server work. CLI parity for the
v0.7.0 management surface: operators can now manage graphs from
the command line against a running multi-graph server.

  omnigraph graphs list --target dev --json
  omnigraph graphs create \
    --target dev \
    --graph-id beta \
    --graph-uri /data/beta.omni \
    --schema schema.pg

DELETE is intentionally absent — server-side DELETE was deferred from
v0.7.0 scope, and shipping a client subcommand for a server endpoint
that doesn't exist would be dead vocabulary. The help output, the
subcommand enum, and the test that pins it (`graphs_subcommand_help_
lists_list_and_create`) all agree.

CLI architecture (modeled on `BranchCommand`):
  - New `Command::Graphs { command: GraphsCommand }` top-level variant.
  - `GraphsCommand { List, Create }` enum.
  - List: GET `<base>/graphs`. Stdout is `<graph_id>\t<uri>` per line,
    or JSON via `--json`.
  - Create: reads `--schema <path>` from local disk, inlines as
    `schema: { source: <file> }` in the POST body (nested per
    MR-668 decision 7). Optional `--policy-file <path>` becomes
    `policy: { file: <path> }`. Returns 201 → "created graph X at Y"
    or JSON via `--json`.
  - Both subcommands reject local URI targets with a clear
    "remote multi-graph server URL" error.

New API type imports in the CLI: `GraphCreateRequest`,
`GraphCreateResponse`, `GraphListResponse`, `GraphSchemaSpec`,
`GraphPolicySpec` — all from `omnigraph-server::api`.

Tests:
  - cli.rs (4 new, non-network):
      * `graphs_subcommand_help_lists_list_and_create` — pins the
        deferral of `delete` (catches scope creep).
      * `graphs_list_against_local_uri_errors_with_remote_only_message`
      * `graphs_create_against_local_uri_errors_with_remote_only_message`
      * `graphs_create_with_missing_schema_file_errors` — pins the
        IO context in the schema-read error path.
  - system_remote.rs (1 new, `#[ignore]` like its peers):
      * `graphs_list_and_create_against_multi_graph_server` — spawns a
        multi-mode server, calls `graphs list` (sees `alpha`),
        `graphs create` (adds `beta`), `graphs list` again (sees both),
        and confirms the new graph is reachable via its cluster route.

CLI suite: 62 tests green (58 existing + 4 new). The new ignored
end-to-end test runs locally with `cargo test --ignored`.

LOC: +159 main.rs (enum + handlers), +88 cli.rs (unit tests),
+131 system_remote.rs (integration test).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: composite e2e tests, race fix, v0.7.0 release (PR 9/10)

PR 9 — the final integration PR for MR-668 multi-graph server work.
Closes the v0.7.0 release.

Composite lifecycle tests (closes gaps flagged in PR 7's coverage
review):
  - `multi_graph_lifecycle_post_query_restart_persistence` — POST a
    graph, query it via cluster route, reload the config from disk
    and confirm `load_server_settings` sees the rewritten YAML.
    Validates the "restart resolves orphans" failure-mode story.
  - `per_graph_policy_enforced_on_post_created_graph` — POST a graph
    with a per-graph policy attached, then send authenticated read
    and change requests. Per-graph Cedar enforcement fires correctly
    on a POST-created graph (engine-layer policy reinstalled via
    `Omnigraph::with_policy` inside the create flow).
  - `concurrent_post_graphs_distinct_ids_all_succeed` — 4 concurrent
    POSTs with distinct graph_ids all return 201. Caught a real
    race in `rewrite_atomic` (see below).

Race fix — `rewrite_atomic_with_modify`:

The first composite test surfaced a real bug. The old
`rewrite_atomic(path, new_config, expected_hash)` captured the
baseline hash OUTSIDE the flock, then called rewrite_atomic which
re-acquired it inside. Under concurrent writers:

  - POST A: captures baseline H0, calls rewrite_atomic.
  - POST B: captures baseline H0 too (before A's update lands).
  - A: acquires flock, on-disk == H0, writes H1, releases.
  - A: updates baseline H0 → H1.
  - B: tries to acquire flock — waits.
  - B: acquires flock. On-disk is now H1. Expected (captured
       before A finished) is H0. MISMATCH → spurious Drift error.

Worse: even if the timing happens to align, B's `updated` config
was constructed from BYTES read before the flock. B writes a config
that doesn't include A's new graph — silent data loss.

The fix: new `config::rewrite_atomic_with_modify(path, baseline,
modify)` takes a closure. Inside the flock + baseline mutex:
  1. Read on-disk bytes, hash, compare to baseline.
  2. Parse on-disk YAML.
  3. Call `modify(parsed)` to produce the new config — receives
     fresh on-disk state, returns the modification.
  4. Serialize + write + fsync + rename + update baseline.

Everything is read-modify-write under the same critical section.
Concurrent writers serialize cleanly. Test confirmed this is no
longer a race.

The old `rewrite_atomic(path, new_config, expected_hash)` API stays
for tests that don't need the read-modify-write shape; the POST
handler switches to the new shape.

Version bump v0.6.0 → v0.7.0:
  - All 5 `crates/*/Cargo.toml` (compiler, engine, policy, cli, server)
    plus their inter-crate `path` dep version constraints.
  - `Cargo.lock` regenerated by `cargo build --workspace`.
  - `AGENTS.md` "Version surveyed" line, capability matrix HTTP-server
    row updated to mention multi-graph + cluster routes + atomic YAML
    rewrite.
  - `openapi.json` regenerated.

Docs:
  - `docs/releases/v0.7.0.md` (new) — release notes with breaking
    changes, new features, deferred items (DELETE, `delete_prefix`,
    actor forwarding), and the single→multi migration recipe.
  - `docs/user/server.md` — substantial section additions for the
    two modes, mode inference, cluster endpoint table, management
    endpoints, `omnigraph.yaml` ownership contract, `POST /graphs`
    body shape + status codes.
  - `docs/user/cli.md` — `omnigraph graphs list/create` section,
    deferred-DELETE note.
  - `docs/user/policy.md` — server-scoped Cedar actions
    (`graph_create`, `graph_list`), per-graph vs server-level policy
    composition, example server-level policy.

Workspace test pass: 573 tests green across all crates. Zero
failures. MR-731 spoof regression still pinned and passing across
the entire 10-PR series.

This commit closes MR-668. v0.7.0 is ready for tagging.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: remove POST /graphs and CLI graphs create (defer runtime graph mgmt)

The POST /graphs runtime-create endpoint shipped in PR 7/10 has three
unresolved high-severity bugs:

  - flock-on-renamed-inode race: the YAML flock is taken on
    omnigraph.yaml itself, then a temp file is renamed over it.
    Cross-process writers end up locking different inodes — both
    believing they hold exclusive access.
  - duplicate-check outside the file lock: precheck runs against
    the in-memory registry only; the locked closure does
    config.graphs.insert(...) unconditionally. Concurrent same-id
    POSTs can persist the loser in YAML while the in-memory registry
    keeps the winner — they disagree after restart.
  - best_effort_cleanup_init_artifacts deletes _schema.pg /
    _schema.ir.json / __schema_state.json on any init failure. An
    accidental re-init against an existing graph's URI destroys its
    schema; subsequent open() fails at read_text(_schema.pg).

The correct fix is a Lance-style cluster catalog (reserve → init →
publish with recovery sidecars), parallel to the engine's existing
__manifest discipline. That work is out of scope for v0.7.0.

For now, disable runtime add/remove from the network and CLI surface.
Operators add graphs by editing omnigraph.yaml and restarting. The
GET /graphs read-only enumeration stays.

Removed:
- POST /graphs handler + router fragment + utoipa registration
- 13 post_graphs_* server tests + 3 composite POST tests +
  multi_mode_app_with_real_config / post_graph helpers
- CLI omnigraph graphs create subcommand + its handler + cli.rs tests
- system_remote.rs combined list+create test trimmed to list-only
- YAML rewrite infra: rewrite_atomic[_with_modify], RewriteAtomicError,
  staging_path, hash_config_file, AppState::config_hash field +
  threading through new_multi and open_multi_graph_state
- fs2 dependency (verified absent from cargo tree)
- sha2/fs2 imports in config.rs (only the rewrite path used them)
- Cedar PolicyAction::GraphCreate variant + "graph_create" match arms
  + action def in Cedar schema + graph_create_action_authorizes_against_server_resource test
- GraphCreateRequest / GraphCreateResponse / GraphSchemaSpec /
  GraphPolicySpec API types (only the POST handler / CLI imported them)

Kept:
- GET /graphs (read-only enumeration) and graph_list Cedar action
- omnigraph graphs list CLI subcommand
- All multi-graph startup, mode inference, cluster routes,
  per-graph + server-level Cedar policies
- server_settings_drive_multi_graph_startup_end_to_end (the test
  that covers operator-authored YAML + restart — the path that
  survives)
- best_effort_cleanup_init_artifacts and the three init failpoints
  (still reachable from CLI `omnigraph init`; preflight fix deferred
  as a follow-up)
- GraphRegistry::insert and its concurrency tests — production
  callers gone, but the method is the natural seam for the future
  cluster-catalog work

Also fixed (transcript issue 4):
- ALWAYS_FLAT_PATHS now includes /graphs so multi-mode OpenAPI
  advertises the management route correctly (was previously rewritten
  to /graphs/{graph_id}/graphs)
- multi_mode_openapi_keeps_healthz_flat → renamed to
  multi_mode_openapi_keeps_management_paths_flat, asserts both
  /healthz and /graphs stay flat
- multi_mode_openapi_prefixes_operation_ids_with_cluster skips
  /graphs in addition to /healthz

Doc fixes:
- docs/user/cli.md: graphs list example was --target http://...,
  but --target is a config-graph-name lookup; corrected to --uri.
  Removed the graphs create example.
- docs/user/server.md: dropped POST /graphs row, "omnigraph.yaml
  ownership", and "POST /graphs body shape" sections. Added a
  paragraph stating runtime add/remove is not exposed in v0.7.0.
- docs/user/policy.md: dropped graph_create action; reworded the
  "Configuration" line to clarify that server-scoped rules (graph_list)
  take neither branch_scope nor target_branch_scope.
- docs/releases/v0.7.0.md: rewrote release narrative — multi-graph
  mode ships; runtime add/remove deferred.
- AGENTS.md: HTTP server bullet and capability matrix row updated to
  reflect read-only GET /graphs and the operator-edit workflow.
- openapi.json regenerated; /graphs has only .get, no .post.

Diff: 17 files, +123 −1525 LOC.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: comment cleanup and policy format style

Strip "PR Na/Nb" sub-PR references throughout MR-668 surfaces — they
were useful during the 10-PR delivery sequence but rot now that the
work is in the tree. Keep the MR-668 umbrella references.

Also:
- Add explicit `when = when` and `resource_literal = resource_literal`
  named args in `compile_policy_source`'s outer `format!` to match the
  surrounding crate style (already explicit for `group` and `action`).
- Rename the best-effort cleanup tracing target from
  "omnigraph::init" to "omnigraph::init::cleanup" so operators can
  filter init-failure cleanup events separately from init's other
  log lines.

No behavior change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop actor_id from PolicyRequest; pass actor as separate arg

The MR-731 "server-authoritative actor identity" invariant was enforced
by an in-function chokepoint (`request.actor_id = actor.actor_id...`
overwrite inside `authorize_request`). That worked but relied on every
caller passing in a `PolicyRequest` and trusting the overwrite — a
comment-enforced invariant.

Move the invariant into the type system:

* `PolicyRequest` no longer carries `actor_id`. The struct now models
  what a caller wants to do, not who they are.
* `PolicyEngine::authorize(actor_id: &str, request: &PolicyRequest)`
  and `validate_request(actor_id, request)` take identity as a
  separate argument. The same shape `PolicyChecker::check` already had
  for the engine layer.
* `authorize_request` in the HTTP layer extracts `actor_id` from the
  bearer-resolved `ResolvedActor` and passes it positionally — no
  overwrite step that could be skipped.
* CLI `omnigraph policy explain` updated (the only other consumer
  that built a `PolicyRequest`).

Public API break for the `omnigraph-policy` crate. Worth it: handlers
can no longer accidentally populate `actor_id` from a request body
field, and external consumers are forced by the compiler to source
actor identity from a trusted path.

The MR-731 chokepoint test
`actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
still passes — the bearer-resolved actor is what reaches the engine.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: consolidate AppState single-mode constructors; delete with_policy_engine

The prior `with_policy_engine` constructor reused the engine `Arc`
from the existing handle (`engine: Arc::clone(&existing.engine)`)
without re-applying `Omnigraph::with_policy`. Combined with
`new_with_workload`, the documented composition pattern was
`AppState::new_with_workload(...).with_policy_engine(p)` — which
produced an `AppState` whose HTTP layer enforced Cedar but whose
underlying engine had no `PolicyChecker` installed. Any caller
reaching the engine via `state.registry().list()[i].engine` could
bypass policy entirely. The doc comment named this gap; the type
system didn't.

Make composition impossible to get wrong:

* Add `AppState::new_single(uri, db, tokens, Option<PolicyEngine>,
  WorkloadController)` — canonical single-mode constructor that
  takes every option together and routes through `build_single_mode`
  (which applies `db.with_policy(checker)` to the engine itself).
* `new`, `new_with_bearer_token`, `new_with_bearer_tokens`,
  `new_with_bearer_tokens_and_policy`, `new_with_workload` all become
  thin wrappers around `new_single`.
* Delete `with_policy_engine`. There is no post-construction policy
  install path any more; the single linear construction forces
  HTTP-layer and engine-layer policy to install together or not at all.

Regression test `engine_layer_policy_fires_via_direct_arc_omnigraph_from_new_single`
constructs an `AppState::new_single` with a deny-all policy, pulls
the `Arc<Omnigraph>` from the registry handle (the same path an
embedded SDK consumer would take), and asserts a direct `mutate_as`
call returns `OmniError::Policy`. Pre-fix this test would have
succeeded the mutation.

Test caller in `ingest_per_actor_admission_cap_returns_429`
migrates from `.with_policy_engine(...)` to `new_single(...,
Some(policy_engine), workload)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: derive any_per_graph_policy on RegistrySnapshot; simplify dup check

`AppState::requires_bearer_auth` walked the entire registry per
request (cloning Arcs into a `Vec`, then `.iter().any(|h| h.policy
.is_some())`) to decide whether the auth middleware should challenge.
The walk is unnecessary — the answer only changes when the registry
mutates, which is exactly the moment a new snapshot is constructed.

Move the flag onto the snapshot itself:

* `RegistrySnapshot { graphs, any_per_graph_policy: bool }`.
* `RegistrySnapshot::new(graphs)` is the only construction path —
  it derives the flag from `graphs.values().any(|h| h.policy
  .is_some())` so the cached value can't drift from the source data.
* `Default` delegates to `new(HashMap::new())`.
* `GraphRegistry::from_handles` and `insert` build snapshots via
  `RegistrySnapshot::new(...)`.
* `GraphRegistry::snapshot_ref()` exposes the current snapshot
  through an `arc_swap::Guard`; callers that need cached derived
  state go through this accessor (callers that only want `graphs`
  still use `list` / `get`).

`requires_bearer_auth` becomes one `ArcSwap::load` + bool read.

Also (drive-by, same file, same hunk): replace the dead
`if let Some(other) = seen_uris.get(...)` + `let _ = other;` pattern
in `from_handles` with a plain `seen_uris.contains_key(...)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: fail-fast multi-graph startup with try_collect

The `open_multi_graph_state` doc comment claims "Fail-fast — the
first open error aborts startup; other in-flight opens are dropped"
but the code did

    .buffer_unordered(4)
    .collect::<Vec<_>>()
    .await
    .into_iter()
    .collect::<Result<Vec<_>>>()?;

which drains every future in the stream before propagating the first
`Err`. With N S3-backed graphs and graph #2 failing fast, the caller
still waits for #1, #3, #4, … to either succeed or fail before
seeing the error.

Replace the four-line dance with `futures::TryStreamExt::try_collect`,
which short-circuits on the first `Err` and drops the rest. The
doc comment now matches behavior.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused State extractor from 7 read-only handlers

After the routing-middleware refactor moved the engine into the per-graph
`GraphHandle` (extracted via `Extension<Arc<GraphHandle>>`), seven
read-only handlers — `server_snapshot`, `server_read`, `server_export`,
`server_schema_get`, `server_branch_list`, `server_commit_list`,
`server_commit_show` — kept an unused `State(_state): State<AppState>`
extractor. Drop it. Each request avoids one `FromRequestParts` clone
of `AppState`'s Arcs.

Handlers that actually use state (workload admission for write paths,
`server_policy` for management endpoints) keep theirs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: emit info! for graph routing decision

`tracing::Span::current().record("graph_id", ...)` in the routing
middleware silently no-ops here: no upstream `#[tracing::instrument]`
on the handlers declares a `graph_id` field, and `TraceLayer::new_for_http`
doesn't either. The recorded value never lands anywhere visible.

Replace with an explicit `info!(graph_id = %handle.key.graph_id,
"graph routed")` event so operators can grep logs and correlate
requests with the active graph. In single mode the value is the
sentinel `"default"`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: align GET /graphs 405 body code with HTTP status

The single-mode `GET /graphs` handler returned an `ApiError` built
via struct literal with `status: METHOD_NOT_ALLOWED, code: BadRequest`.
The body code disagreed with the HTTP status — clients deserializing
on `code` saw `bad_request`, clients deserializing on `status` saw
405. Same bug class as the earlier 503+Conflict mismatch on the
removed YAML drift path.

Close the class for this one remaining instance:

* Add `ErrorCode::MethodNotAllowed` to the API enum.
* Add `ApiError::method_not_allowed(msg)` — pairs the 405 status
  with the matching code.
* Replace the struct literal in `server_graphs_list` with the
  constructor.
* Regenerate `openapi.json` (adds `method_not_allowed` to the
  ErrorCode schema enum).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused axum::handler::Handler import

The import landed in earlier work but no current call site uses it.
Emitted an `unused_imports` warning on every server build.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused fs2 workspace dependency

`fs2 = "0.4"` lingered in [workspace.dependencies] after the
POST /graphs flock-on-rename design was pulled. `cargo tree -i fs2`
reports no consumers in the workspace and the dep is not in
Cargo.lock. Removing the declaration closes the "phantom dep" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: AGENTS.md Cedar row no longer hardcodes action count

The "8 actions" claim drifted as soon as MR-668 added `graph_list`.
Bumping the count would just push the drift one PR forward; the
correct-by-design fix is to defer to the canonical list in
docs/user/policy.md and stop maintaining a duplicate count.

Closes the "doc hardcodes a count that drifts from the enum" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: cfg(test)-gate GraphRegistry::insert and its mutex

`insert` and the `mutate: Mutex<()>` that serializes it had no
runtime consumer in v0.7.0 — the only insertion path at startup
is `from_handles`, and runtime add/remove is deferred until a
managed cluster catalog ships. Leaving both `pub` and live made
them a "looks like API, isn't" footgun: a future change could
build on `insert` without re-establishing the concurrency contract
with an actual consumer in scope.

Gate both together (`#[cfg(test)]` on the method, the field, and
the `tokio::sync::Mutex` import) so the race-pinning tests still
compile but production cannot reach them. When a real consumer
ships, ungate both — they're a unit. Closes the "public API with
no runtime consumer drifts toward incorrect" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop vestigial PolicyEngine surface

* `validate_request` had zero callsites — pure surface for nothing.
* `deny`'s `_actor_id` and `_request` parameters were both unused
  (the underscore prefix gave it away); the message is built by the
  caller before `deny` ever sees the request. Trim both.

Closes the "public API that the type system can't justify" class
for the policy engine. No behavior change; every existing test
stays green because the deletions never had a runtime effect.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for init re-init footgun (red)

A second `Omnigraph::init` against an existing graph URI today
destroys the existing graph's schema artifacts. `init_storage_phase`
overwrites `_schema.pg` before any preflight, and on the inner
`GraphCoordinator::init` failure that follows,
`best_effort_cleanup_init_artifacts` deletes all three schema files.
The existing Lance datasets and `__manifest/` survive but the
schema metadata is gone — unrecoverable without operator surgery.

This test exercises that path and currently fails with
"_schema.pg must not be deleted by a failed re-init", confirming
the destructive cleanup branch fires. The fix in the next commit
makes the test pass by preflighting with `storage.exists()` and
returning a typed error before any write touches disk.

Per AGENTS.md rule 12, the test commit lands just before the fix
commit so the red → green pair is visible in `git log` and a
reviewer can check out this commit alone to reproduce.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: close init re-init footgun via InitOptions preflight (green)

`Omnigraph::init` is "create a new graph"; existing graphs need
an explicit overwrite. Today's behavior — silently overwrite
schema files, then on inner failure delete them via best-effort
cleanup — is destructive against an existing graph regardless of
which branch fires.

Correct-by-design fix:

* New `InitOptions { force: bool }` struct (default `force: false`).
* New `Omnigraph::init_with_options(uri, schema, options)`. The
  old `Omnigraph::init(uri, schema)` is a thin shortcut that
  passes `InitOptions::default()`.
* `init_with_storage` runs a `storage.exists()` preflight on the
  three schema URIs BEFORE any parse, write, or coordinator call.
  Any hit → typed `OmniError::AlreadyInitialized { uri }`. The
  destructive code paths (the `write_text` overwrite and the
  best-effort cleanup) are now unreachable in strict mode against
  an existing graph.
* `force: true` skips the preflight; existing operators who
  actually mean to overwrite opt in explicitly.
* CLI: `omnigraph init --force` maps to `InitOptions { force: true }`.
* HTTP: `OmniError::AlreadyInitialized` maps to 409 via
  `ApiError::from_omni`. Not currently HTTP-reachable (POST /graphs
  was pulled), but the wiring lands here so a future runtime
  create endpoint has one canonical translation.

Closes the "init is destructive against existing state" class.
The regression test added in the previous commit
(`init_on_existing_graph_uri_does_not_destroy_existing_schema`)
turns green: the original schema files now survive a second
init attempt byte-for-byte, and the call errors cleanly with
`AlreadyInitialized`. The four existing
`init_failpoint_after_*_cleans_up_*` tests stay green — strict
mode's preflight passes on a fresh tempdir, and cleanup still
runs as before when a failpoint fires mid-write.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: split PolicyEngine::load into kind-typed loaders

Pre-fix, every caller of `PolicyEngine::load(path, graph_id)`
passed *some* `graph_id` argument — even when the policy was
server-scoped and Cedar's resolution would never touch a Graph
entity. The server-level loader at lib.rs passed the meaningless
sentinel `"server"`. A graph policy file containing a `graph_list`
rule compiled fine; a server policy file containing a `read` rule
compiled fine. Both silently no-op'd at request time because the
engine kind and the rule's resource kind disagreed.

Correct-by-design fix: replace `load` with two kind-typed loaders.

* `PolicyEngine::load_graph(path, graph_id)` — for per-graph
  policy files. Rejects any rule whose action `resource_kind()`
  is `Server`.
* `PolicyEngine::load_server(path)` — for server-level policy
  files. Takes no `graph_id`: server-scoped actions resolve against
  the singleton `Omnigraph::Server::"root"` entity, never a Graph.
  Rejects any rule whose action `resource_kind()` is `Graph`.

The old `load` is hard-deleted in the same commit because every
in-tree consumer migrates here (no semver promise on the workspace
crate, no external pinners). New `PolicyEngineKind` enum types
the loader's intent; `validate_kind_alignment` is the load-time
check that closes the "wrong action, wrong file, silent no-op"
class — operators get a load-time error instead of confused-and-
silent behavior at request time.

Callsites migrated:
* server lib.rs:374 (single-mode per-graph)   → load_graph
* server lib.rs:1065 (multi-mode server)      → load_server
* server lib.rs:1103 (multi-mode per-graph)   → load_graph
* CLI main.rs:732 (resolve_policy_engine)     → load_graph
* tests/server.rs ×5 (4 graph, 1 server)      → load_graph/load_server
* policy_engine_chassis.rs                    → load_graph

Four new in-source tests pin the contract: both rejection paths
and both positive paths.

Closes the "operator puts an action in the wrong file and the
rule silently never matches" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: introduce GraphRouting, retire single_mode_handle

Pre-fix, `AppState` always carried `Arc<GraphRegistry>` even when
serving one graph. Single mode populated the registry with one
handle keyed by the `SINGLE_GRAPH_KEY_ID = "default"` sentinel;
`single_mode_handle` walked the registry, asserted `len == 1`,
and returned the single element with a 500-class "programmer
error" branch on mismatch. Three smells in a row — magic key,
walk-and-assert, programmer-error guard — all because the
single-mode runtime was forced through a multi-mode abstraction.

Correct-by-design fix: type the routing.

* New `pub enum GraphRouting { Single { handle }, Multi { registry,
  config_path } }` on `AppState`. The `Single` arm carries the handle
  directly — no registry, no key, no walk.
* `resolve_graph_handle` middleware matches on `routing`. Single mode
  returns the handle in O(1); multi mode does the same path-extract +
  registry lookup as before. The 500-class programmer-error branch
  is gone — the type system now makes the violated invariant
  ("single mode has exactly one handle") unrepresentable.
* `requires_bearer_auth` reads `handle.policy.is_some()` directly
  in the Single arm; Multi arm still uses the cached
  `any_per_graph_policy` flag.

`ServerMode` and the legacy `registry` field on `AppState` are still
populated for now — C-3 removes both once every reader is migrated.
The `SINGLE_GRAPH_KEY_ID` sentinel and `ServerMode` will also go
away in C-3.

Closes the "single mode forced through a multi-mode abstraction"
class. All 76 server integration tests stay green: handlers still
extract `Extension<Arc<GraphHandle>>` from the request, so the
middleware's internal change is invisible to them.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: remove ServerMode, registry field, and the SINGLE_GRAPH sentinel

C-1/C-2 introduced `GraphRouting` and pointed the middleware at it.
This commit removes the legacy shape that's now dead:

* `ServerMode` enum — deleted. Single mode's `uri` lives on
  `handle.uri`; multi mode's `config_path` lives on the
  `GraphRouting::Multi` arm.
* `AppState.mode: ServerMode` field — deleted.
* `AppState.registry: Arc<GraphRegistry>` field — deleted. Multi
  mode's registry is on `GraphRouting::Multi { registry, .. }`;
  single mode has no registry at all.
* `AppState::mode()`, `AppState::uri()`, `AppState::registry()`
  accessors — deleted. New `AppState::routing() -> &GraphRouting`
  is the single public entry point.
* `SINGLE_GRAPH_KEY_ID` constant — deleted. `GraphHandle.key` is
  still required by the struct, but in single mode the key is now
  only a tracing label (`"default"`, inlined with a comment naming
  its sole remaining purpose). Single-mode flat routes never carry
  a `{graph_id}` parameter, so the key is never compared against
  user input, and there is no registry where it could be a map
  key. C-1/C-2 already removed the registry walk that the sentinel
  was named for.

Callers migrated:
* `build_app` (lib.rs:944) — matches on `state.routing()` instead
  of `state.mode()`.
* `server_graphs_list` (lib.rs:1162) — destructures the Multi arm
  to get the registry; Single arm short-circuits to 405.
* `server_openapi` (lib.rs:1217) — matches the Multi arm for the
  cluster-prefix rewrite.
* `tests/server.rs:3735` — the B2 footgun regression test now
  matches on `state.routing()` to extract the single-mode handle
  (the test's earlier `state.registry().list().next()` shape was
  the closest pre-fix analog to "embedded consumer reaches the
  engine"; the new shape is more direct).

Closes the entire "single mode forced through a multi-mode
abstraction" class. After this commit:
* No magic sentinel as a routing key.
* No `single_mode_handle` walk-and-assert helper.
* No 500-class "programmer error" branch in the middleware.
* No two-field discriminant on `AppState` where one would do.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for nested-route path extraction (red)

`server_branch_delete` and `server_commit_show` use bare
`Path<String>` extractors. In single-mode flat routes
(`/branches/{branch}`, `/commits/{commit_id}`) this works — one
capture, one value. In multi-graph cluster routes
(`/graphs/{graph_id}/branches/{branch}`,
`/graphs/{graph_id}/commits/{commit_id}`) axum 0.8 propagates the
outer `{graph_id}` capture into the inner handler, so the
extractor sees two captures and 500s with
"Wrong number of path arguments. Expected 1 but got 2."

`cluster_routes_dispatch_per_graph_handle` only exercises
`/snapshot` (no Path extractor), so the regression slipped through.
This test closes that gap structurally: every cluster route with
an inner path param gets exercised here.

Currently fails with the exact symptom above. Fix in the next
commit makes it pass.

Per AGENTS.md rule 12, the red test commit lands just before the
fix so the pair is visible in `git log` and a reviewer can check
out this commit alone to reproduce.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: named-field path-param structs for nested cluster routes (green)

`Path<String>` deserializes one path-param value positionally.
Single-mode flat routes (`/branches/{branch}`,
`/commits/{commit_id}`) have one capture; multi-mode nested routes
(`/graphs/{graph_id}/branches/{branch}`,
`/graphs/{graph_id}/commits/{commit_id}`) have two — axum 0.8
propagates the outer capture into nested handlers. Same handler,
two different shapes; the multi-mode shape 500s with
"Wrong number of path arguments. Expected 1 but got 2."

Symptomatic fix: change to `Path<(String, String)>` and ignore the
first element. Breaks again the moment we add another nest layer
(e.g. tenant in Cloud mode).

Correct-by-design fix: named-field structs deserialized by name
from axum's path-param map. Each handler picks only the fields it
needs. Stable across single / multi / future-cloud nest depths
because deserialization is by field name, not position.

* New `BranchPath { branch: String }` (file-local to lib.rs)
* New `CommitPath { commit_id: String }`
* `server_branch_delete` extractor → `Path<BranchPath>`
* `server_commit_show` extractor → `Path<CommitPath>`

Closes the "handler path-extractor type is positional and breaks
when route nesting changes" class. Red test from the previous
commit turns green. All 77 server tests pass (single-mode branch
delete + commit show, plus new multi-mode coverage).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: centralize policy-requires-tokens check in the runtime classifier

Single-mode `open_with_bearer_tokens_and_policy` bailed at lib.rs:380
when policy was installed and no tokens. Multi-mode
`open_multi_graph_state` had no equivalent: the server started, every
request 401'd because no token could ever match, and the operator
spent time debugging a misconfiguration the single-mode path would
have caught at startup.

The doc/code contradiction made the gap easy to miss: the
`ServerRuntimeState::PolicyEnabled` docstring said tokens-or-not
was "unusual but valid — every request fails 401 without a bearer,
which is effectively 'locked'." The single-mode bail contradicted
that. In practice, silent-401-on-every-request is bug-shaped, not
feature-shaped (operators wanting deny-all should configure tokens
plus a deny-all Cedar rule to get meaningful 403s with
policy-decision logging).

Symptomatic fix: add a copy of the bail to multi-mode. Two copies
that can drift again the next time a startup path is added.

Correct-by-design fix: hoist the check into
`classify_server_runtime_state` so both modes get the same
enforcement from one source of truth. The classifier becomes the
single source of truth for "should we start?" and adding a startup
invariant there is now the natural extension point for any future
mode.

Classifier matrix is now complete:

| has_tokens | has_policy | allow_unauthenticated | Result |
|---|---|---|---|
| F | F | F | bail (existing) |
| F | F | T | Open (existing) |
| T | F | * | DefaultDeny (existing) |
| F | T | * | bail (NEW — closes the gap) |
| T | T | * | PolicyEnabled (existing) |

Changes:

* `classify_server_runtime_state` (lib.rs:870-890) gains the
  `(false, true, _) => bail!(…)` arm with a clear message naming
  the failure mode and the two valid resolutions.
* `open_with_bearer_tokens_and_policy` (lib.rs:369+) drops its
  redundant local bail — the classifier rejected the invalid case
  before construction was reached.
* `ServerRuntimeState::PolicyEnabled` docstring is rewritten:
  drops the "(unusual but valid)" carve-out and states plainly
  that PolicyEnabled requires tokens. Names the explicit
  alternative (tokens + deny-all Cedar rule) for operators who
  want the all-requests-denied behavior.
* `classify_policy_enabled_always_wins` test is renamed to
  `classify_policy_enabled_requires_tokens` and the now-invalid
  `(false, true, _)` assertion is removed (covered by the new
  rejection test).
* New `classify_policy_without_tokens_is_rejected` test covers the
  new arm.
* New `serve_refuses_to_start_with_policy_but_no_tokens_multi_mode`
  integration test pins the multi-mode propagation path —
  symmetric with the existing single-mode
  `serve_refuses_to_start_in_state_1_without_unauthenticated`.

Closes the "single mode and multi mode startup branches can drift
on safety invariants" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: close coverage gaps surfaced by the test-coverage audit

The bot-review pass and the subsequent coverage audit surfaced two
material gaps in PR #119's test surface — both easy to close, both
worth closing before merge.

* **Gap 1 — cluster-route sweep.** The Bug-1 path-extractor
  regression slipped through because
  `cluster_routes_dispatch_per_graph_handle` only exercised
  `/snapshot`. The other six protected cluster routes (`/read`,
  `/change`, `/export`, `/schema`, `/schema/apply`, `/ingest`,
  `/branches/merge`) were implicitly trusted to work without any
  multi-mode integration test.

  Add `all_protected_cluster_routes_resolve_to_their_handler`
  (`tests/server.rs`) that hits each protected cluster route with
  a minimal request and asserts the response is consistent with
  the handler being reached — no 404 (router didn't match), no 500
  with "Wrong number of path arguments" (Bug-1 class), no 500 with
  "missing extension" (routing middleware didn't inject the handle).
  Status code is a negative assertion because each handler's
  happy-path inputs differ; what matters is "the request reached
  the handler," not "the handler returned 200" — that's already
  pinned by the single-mode tests.

* **Gap 2 — `--force` happy path.** The strict re-init regression
  test (`init_on_existing_graph_uri_does_not_destroy_existing_schema`)
  pins the error path; nothing pinned the `force: true` escape
  hatch actually doing what its docstring claims.

  Add `init_with_force_recovers_from_orphan_schema_files`
  (`tests/lifecycle.rs`). Writes a bare `_schema.pg` to simulate
  orphan files from a failed prior init, confirms strict mode
  bails as expected, then confirms `init_with_options(force: true)`
  succeeds and produces a functional graph.

  Note: the test follows the documented semantics — force skips
  the preflight only, it does NOT purge existing Lance state. An
  earlier draft of the test (against full overwrite of an existing
  populated graph) failed because `GraphCoordinator::init` errored
  on the existing `__manifest`, which is exactly the limitation
  the `InitOptions::force` docstring already calls out. Recursive
  purge needs `StorageAdapter::delete_prefix` (tracked separately).

Coverage is now fully aligned with the PR's claims.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for GraphList open-mode bypass (red)

Cursor bot's review at commit 4120448 surfaced that
`server_graphs_list` returns 200 in Open mode (`--unauthenticated`,
no tokens, no policy), exposing the full graph registry — graph
IDs and URIs that may contain S3 bucket paths or internal
hostnames — to any unauthenticated caller.

Root cause: `authorize_request`'s no-policy fallback only denies
when `actor.is_some()`. In Open mode `actor: None`, so the
denial branch never fires and the call returns `Ok(())`. The
docstring on `server_graphs_list` claims the endpoint is
"Cedar-gated" and that we "don't leak the registry until the
operator explicitly authorizes it" — but Open mode has no Cedar
at all, so the docstring intent and the code disagree.

This commit renames the existing
`get_graphs_lists_registered_graphs_in_multi_mode` test to
`get_graphs_denied_in_open_mode_without_server_policy` and flips
the assertion from 200 → 403. Today this fails (server returns
200) — exactly the symptom the bot named. The fix in the next
commit tightens the no-policy fallback to deny server-scoped
actions unconditionally, regardless of mode.

Per AGENTS.md rule 12, the red test commit lands just before
the fix so the red → green pair is visible in `git log` and a
reviewer can check out this commit alone to reproduce.

Sort-order coverage that previously lived in the renamed test
moves to `get_graphs_with_server_policy_authorizes_per_cedar`
in the next commit, where the admin-200 response is operator-
authorized and a non-empty body is asserted.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: server-scoped actions always require explicit policy (green)

`server_graphs_list` returned 200 in Open mode (`--unauthenticated`,
no tokens, no policy) because `authorize_request`'s no-policy
fallback only denied when `actor.is_some()` AND action != Read.
In Open mode `actor: None`, so the denial branch never fired and
the call returned `Ok(())` — leaking the registry (graph IDs +
URIs that may contain S3 bucket paths or internal hostnames) to
any unauthenticated caller. The docstring on `server_graphs_list`
claimed it was "Cedar-gated" and that the server should "not leak
the registry until the operator explicitly authorizes it" —
docstring intent and code disagreed.

Symptomatic fix: special-case GraphList. Breaks the moment
another server-scoped action (`graph_create`, `graph_delete`) is
added.

Correct-by-design fix: tie authorization to the action's
`resource_kind()`. Server-scoped actions
(`PolicyResourceKind::Server`) always require explicit policy
authorization — there is no runtime state where they're served
by default. Per-graph actions keep the existing default-deny
logic (DefaultDeny denies non-Read for authenticated actors;
Open mode allows everything per the operator's `--unauthenticated`
opt-in for graph DATA, but not for server topology).

The fix uses the existing `PolicyResourceKind` enum that #119
already added — no new abstraction. Future server-scoped actions
(runtime `graph_create`/`graph_delete` when the cluster catalog
ships) automatically pick up the same enforcement without any
per-action handler change.

Changes:

* `crates/omnigraph-server/src/lib.rs:51` — re-export
  `PolicyResourceKind` (the kind discriminator was already public
  on the omnigraph-policy crate; needed in scope here).
* `crates/omnigraph-server/src/lib.rs:1457` — `authorize_request`'s
  no-policy fallback gains a server-scoped-action check that fires
  before the actor-based default-deny logic. Error message names
  the failure mode and points at `server.policy.file`.
* `crates/omnigraph-server/tests/server.rs:5037` —
  `get_graphs_with_server_policy_authorizes_per_cedar` extended
  to register two graphs in non-alphabetical order and assert
  the admin-200 response is sorted alphabetically. Restores the
  sort-order coverage that lived in
  `get_graphs_lists_registered_graphs_in_multi_mode` before the
  red commit renamed it to assert denial.

Also bundles a small adjacent cleanup that the bot-review flagged:

* `crates/omnigraph-server/src/graph_id.rs:124` — drop the
  unreachable `"openapi.json"` entry from `is_reserved`. The
  regex `^[a-zA-Z0-9-]{1,64}$` rejects every dot-containing name
  before `is_reserved` can run, so dotted entries in this list
  were dead code that misled readers into thinking the list
  needed to cover them. Comment now names the structural
  exclusion. The `rejects_reserved_route_names` test loses its
  `openapi.json` row (covered by `rejects_dots` via the regex).

Closes the "server-scoped management actions silently leak in
Open mode" class. Red test from the previous commit
(`get_graphs_denied_in_open_mode_without_server_policy`) turns
green; all 78 server integration tests + 76 lib tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: fold multi-graph work into v0.6.0 (no separate v0.7.0 release)

The branch had bumped workspace versions to 0.7.0 and added a
dedicated `docs/releases/v0.7.0.md` for the multi-graph work.
Per scope decision: ship the graph-rename and the multi-graph
mode in one v0.6.0 release.

Changes:

* Workspace versions bumped 0.7.0 → 0.6.0 in every crate manifest
  (`omnigraph`, `omnigraph-compiler`, `omnigraph-policy`,
  `omnigraph-server`, `omnigraph-cli`) and their internal
  `path = ..., version = "..."` dependency constraints.
* `docs/releases/v0.7.0.md` content merged into
  `docs/releases/v0.6.0.md`, retargeted to a single coherent
  v0.6.0 release note covering both the graph terminology rename
  and the multi-graph server mode. The original v0.7.0.md is
  deleted.
* All `v0.7.0` / `0.7.0` doc and comment references throughout
  `crates/`, `docs/`, `AGENTS.md`, and `openapi.json` retargeted
  to `v0.6.0` / `0.6.0`. `Cargo.lock` regenerated to match.
* OpenAPI spec regenerated via `OMNIGRAPH_UPDATE_OPENAPI=1
  cargo test -p omnigraph-server --test openapi
  openapi_spec_is_up_to_date` — `"version": "0.6.0"` now.

Verification:

* `cargo build --workspace` — clean (6 pre-existing engine
  warnings only).
* `cargo test --workspace --locked` — zero failures across all
  39 test result groups.
* `bash scripts/check-agents-md.sh` — passes (34 links / 33 docs).
* `grep -rn "0\.7\.0\|v0\.7\.0" --include='*.rs' --include='*.md'
  --include='*.json' --include='*.toml' .` returns no workspace
  hits. The three remaining `0.7.0` strings in `Cargo.lock`
  belong to unrelated 3rd-party crates (`pem-rfc7468`, `radium`,
  `rand_xoshiro`).

The git tag and crates.io publish happen later — this commit
just consolidates the surface so the eventual release is one
coherent v0.6.0 covering all the work since v0.5.0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: sanitize internal refs from v0.6.0 release notes

cubic-dev-ai P2 comments flagged that the release notes carried
internal Linear ticket and RFC references (MR-668, MR-731,
MR-723, RFC 0003, RFC 0004). Per AGENTS.md maintenance rule 5,
"Release docs are public project history. Describe capabilities,
behavior changes, breaking changes, upgrade notes, and user
impact; do not reference private ticket systems, internal
codenames, or planning shorthand that an outside contributor
cannot inspect." The bot's comments are correct against our own
published contract — they were a docs-quality regression
introduced when I drafted these notes.

Replaced each internal reference with the public-facing concept
it stood for. The substantive content (capabilities, behavior,
guarantees) was already present alongside the refs; sanitization
just trimmed the bracketed ticket labels:

* Line 6: dropped `(MR-668)` from the multi-graph mode summary —
  the descriptive name was already self-sufficient.
* Line 24: `MR-731 spoof defense` → `the bearer-derived-actor-
  identity guarantee`; `Forward-compat for Cloud mode (RFC 0003)
  and OAuth provider (RFC 0004)` → "forward-compat seams for
  future multi-tenant and OAuth deployments; they're inert in
  this release" — describes what the operator sees instead of
  pointing at planning docs.
* Line 26: `MR-731's server-authoritative-actor invariant` →
  "the server-authoritative-actor invariant: actor identity is
  always sourced from the bearer-token match resolved at the
  auth boundary" — the public-facing statement of the guarantee.
* Line 36: `(MR-723 default-deny otherwise rejects …)` →
  "without a server policy the default-deny posture rejects …"
  — same content, no ticket label.
* Line 121: `MR-731 spoof regression test` → "The bearer-auth-
  derived-actor-identity regression test (client-supplied
  identity headers are ignored; the server-resolved actor is the
  only identity Cedar sees)" — describes what the test guards
  instead of naming the originating ticket.

Verified: `grep -E 'MR-\d+|RFC[ -]?\d+' docs/releases/v0.6.0.md`
returns no matches; the rest of `docs/releases/` is also clean.
`scripts/check-agents-md.sh` passes.

Note: cubic-dev-ai also flagged `crates/omnigraph-cli/src/main.rs:276`
("doc comment incorrectly references v0.6.0 for a command that
only exists in v0.7.0"). That comment is based on a stale model
of the release surface — after folding v0.7.0 into v0.6.0 in
the previous commit, the multi-graph CLI surface IS in v0.6.0
and the comment is correct as written. No change needed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix: close validated init and multi-graph gaps

* chore: address review cleanup comments

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
											
										
										
											2026-05-28 16:19:31 +02:00
+								/// Regression for B2 (MR-668): when an `AppState` is built with a
 								/// per-graph policy and a custom workload, the engine inside the
 								/// routing's `GraphHandle` MUST have the same policy applied via
 								/// `Omnigraph::with_policy`. Pre-fix, `new_with_workload(...).with_policy_engine(p)`
 								/// installed the policy only on the HTTP-layer `handle.policy`; the
 								/// underlying `Arc<Omnigraph>` was reused without `with_policy`, so any
 								/// caller reaching through `state.routing()` could bypass Cedar.
 								///
 								/// This test reaches the engine the same way an embedded SDK consumer
 								/// or future routing code path would, and asserts the policy still
 								/// fires. The deny path is "act-blocked has a valid bearer but isn't in
 								/// the policy's allowed group" — i.e., authenticated-but-unauthorised.
 								#[tokio::test(flavor = "multi_thread")]
 								async fn engine_layer_policy_fires_via_direct_arc_omnigraph_from_new_single() {
 								    use omnigraph_server::GraphRouting;
 								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
 								    let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
 								    // Permit `act-allowed` for change actions; `act-blocked` is not in
 								    // any allowed group — every change request from them must deny.
 								    let policy_path = temp.path().join("policy.yaml");
 								    fs::write(&policy_path, permit_all_policy_yaml(&["act-allowed"])).unwrap();
 								    let policy_engine =
 								        omnigraph_server::PolicyEngine::load_graph(&policy_path, graph.to_string_lossy().as_ref())
 								            .unwrap();
 								    let workload = omnigraph_server::workload::WorkloadController::new(100, 1_000_000_000);
 								    let state = AppState::new_single(
 								        graph.to_string_lossy().to_string(),
 								        db,
 								        vec![("act-blocked".to_string(), "block-token".to_string())],
 								        Some(policy_engine),
 								        workload,
 								    );
 								    // Reach into the routing and pull the engine the same way an
 								    // embedded consumer holding `Arc<Omnigraph>` would. If `new_single`
 								    // failed to apply `with_policy` to the engine, this `mutate_as`
 								    // would succeed — the HTTP-layer is bypassed entirely.
 								    let handle = match state.routing() {
 								        GraphRouting::Single { handle } => Arc::clone(handle),
 								        GraphRouting::Multi { .. } => panic!("expected single-mode routing"),
 								    };
 								    let engine = Arc::clone(&handle.engine);
 								    let mut params: omnigraph_compiler::ParamMap = Default::default();
 								    params.insert(
 								        "name".to_string(),
 								        omnigraph_compiler::Literal::String("EngineLayerBlocked".to_string()),
 								    );
 								    params.insert("age".to_string(), omnigraph_compiler::Literal::Integer(30));
 								    let result = engine
 								        .mutate_as(
 								            "main",
 								            MUTATION_QUERIES,
 								            "insert_person",
 								            &params,
 								            Some("act-blocked"),
 								        )
 								        .await;
 								    match result {
 								        Err(OmniError::Policy(_)) => { /* expected — engine-layer gate fired */ }
 								        Ok(_) => panic!(
 								            "engine-layer policy did NOT fire — act-blocked successfully ran mutate_as via \
 								             the engine pulled from the registry handle. AppState::new_single failed to apply \
 								             with_policy to the underlying Omnigraph engine. This is the B2 footgun the \
 								             with_policy_engine deletion was supposed to close."
 								        ),
 								        Err(other) => panic!("expected OmniError::Policy, got: {other:?}"),
 								    }
 								}
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								#[tokio::test(flavor = "multi_thread")]
 								async fn oversized_request_body_returns_payload_too_large() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_loaded_graph().await;
-												Initial public Omnigraph repository

											
										
										
											2026-04-10 20:49:41 +03:00
+								    let oversized = "x".repeat(1_100_000);
 								    let response = app
 								        .clone()
 								        .oneshot(
 								            Request::builder()
 								                .uri("/read")
 								                .method(Method::POST)
 								                .header("content-type", "application/json")
 								                .body(Body::from(oversized))
 								                .unwrap(),
 								        )
 								        .await
 								        .unwrap();
 								    assert_eq!(response.status(), StatusCode::PAYLOAD_TOO_LARGE);
 								}
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
 								// ─── MR-723 default-deny mode (State 2: tokens without policy) ──────────
 								//
 								// `authorize_request` returns 403 for every action except `Read` when a
 								// PolicyEngine is not installed but bearer tokens are configured. Pinned
 								// by the three tests below — Read allowed, Change/SchemaApply denied —
 								// to prevent regressing back to the pre-MR-723 "tokens configured but
 								// no policy = fully open" trap.
 								#[tokio::test(flavor = "multi_thread")]
 								async fn default_deny_mode_allows_read_for_authenticated_actor() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_graph_with_auth_tokens_only(
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								        &fs::read_to_string(fixture("test.pg")).unwrap(),
 								        &[("act-andrew", "demo-token")],
 								    )
 								    .await;
 								    let (status, _body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/snapshot")
 								            .method(Method::GET)
 								            .header(AUTHORIZATION, "Bearer demo-token")
 								            .body(Body::empty())
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn default_deny_mode_rejects_change_with_forbidden() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_graph_with_auth_tokens_only(
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								        &fs::read_to_string(fixture("test.pg")).unwrap(),
 								        &[("act-andrew", "demo-token")],
 								    )
 								    .await;
 								    let change = ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								        query: MUTATION_QUERIES.to_string(),
 								        name: Some("insert_person".to_string()),
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								        params: Some(json!({ "name": "DefaultDeny", "age": 1 })),
 								        branch: Some("main".to_string()),
 								    };
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/change")
 								            .method(Method::POST)
 								            .header(AUTHORIZATION, "Bearer demo-token")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&change).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::FORBIDDEN);
 								    let error: ErrorOutput = serde_json::from_value(body).unwrap();
 								    assert!(
 								        error.error.contains("default-deny"),
 								        "expected default-deny in error message, got: {}",
 								        error.error
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn default_deny_mode_rejects_schema_apply_with_forbidden() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, app) = app_for_graph_with_auth_tokens_only(
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								        &fs::read_to_string(fixture("test.pg")).unwrap(),
 								        &[("act-andrew", "demo-token")],
 								    )
 								    .await;
 								    let req = SchemaApplyRequest {
 								        schema_source: additive_schema_with_nickname(),
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        ..Default::default()
-												policy: server 3-state default-deny matrix (MR-723) (#105)

Closes the "tokens but no policy" trap. Pre-MR-723, an operator who
configured bearer tokens and forgot to set policy.file got a server
that required auth and then permitted every action — the illusion of
protection. After MR-723, that configuration is default-deny: only
`read` actions succeed; every other action returns HTTP 403.

Three startup states, classified deterministically:

- **Open** — no tokens, no policy. Requires explicit
  `--unauthenticated` flag or `OMNIGRAPH_UNAUTHENTICATED=1`; otherwise
  `serve()` refuses to start. Forces the operator to opt in to
  "fully open dev mode" so it can't happen accidentally.
- **DefaultDeny** — tokens configured, no policy. `authorize_request`
  rejects every action except `Read` with 403. The warn-log on
  startup names the misconfiguration explicitly.
- **PolicyEnabled** — policy file configured. Cedar evaluates every
  request, unchanged from pre-MR-723.

What landed:

- `ServerConfig.allow_unauthenticated: bool` + `--unauthenticated` flag
  on the `omnigraph-server` bin + `OMNIGRAPH_UNAUTHENTICATED` env var
  (`load_server_settings` honors both).
- New `classify_server_runtime_state(has_tokens, has_policy,
  allow_unauthenticated) -> Result<ServerRuntimeState>` pure function.
  `serve()` calls it before opening the engine and bails with a clear
  error when the operator hits the no-tokens-no-policy-no-flag cell.
- `authorize_request` state-2 branch: when `policy_engine()` is None
  but the bearer-auth middleware delivered an authenticated actor, any
  action other than `Read` returns 403 with a message that names the
  misconfiguration.
- `AppState::with_policy_engine(self, engine)` builder method so
  integration tests that need a custom workload (`new_with_workload`)
  can still install a permit-all policy without a new constructor.
- `app_for_loaded_repo_with_auth(token)` and
  `app_for_loaded_repo_with_auth_tokens(tokens)` test helpers now
  install a permit-all policy alongside tokens — they previously
  represented the "tokens but no policy" state that MR-723 makes
  default-deny, and tests that don't care about policy were
  inadvertently coupled to the loophole.

Tests:

- `classify_*` unit tests (3) — every cell of the matrix.
- `default_deny_mode_allows_read_for_authenticated_actor` — GET
  /snapshot succeeds with bearer token + no policy.
- `default_deny_mode_rejects_change_with_forbidden` — POST /change
  rejected with 403 + "default-deny" message.
- `default_deny_mode_rejects_schema_apply_with_forbidden` — POST
  /schema/apply rejected with 403 + "default-deny" message.
- New `app_for_repo_with_auth_tokens_only(schema, tokens)` helper
  builds the State-2 fixture without policy. The pre-MR-723 helpers
  `app_for_loaded_repo_with_auth*` shift semantics to "tokens +
  permit-all" so existing tests retain their original intent.

docs/user/policy.md: new "Server runtime states (MR-723)" section
documents the matrix and the explicit `--unauthenticated` opt-in.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 17:02:26 +03:00
+								    };
 								    let (status, body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/schema/apply")
 								            .method(Method::POST)
 								            .header(AUTHORIZATION, "Bearer demo-token")
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&req).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::FORBIDDEN);
 								    let error: ErrorOutput = serde_json::from_value(body).unwrap();
 								    assert!(
 								        error.error.contains("default-deny"),
 								        "expected default-deny in error message, got: {}",
 								        error.error
 								    );
 								}
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
 								// ─── SDK ↔ HTTP decision parity (MR-722 PR A) ─────────────────────────────
 								//
 								// Engine and HTTP both consult Cedar via `PolicyChecker::check()`; by
 								// construction they cannot disagree on a decision. These tests pin that
 								// property explicitly so a future refactor that introduces a separate
 								// auth path (or copy-pastes Cedar evaluation logic) turns red.
 								//
 								// Four cases cover the per-action scope shapes:
 								// * Change on a protected branch via `mutate_as` / POST /change
 								// * Change with an actor that has no permit
 								// * BranchMerge to a protected target via `branch_merge_as` / POST /branches/merge
 								// * BranchMerge with an actor that has no permit
 								const PARITY_POLICY_YAML: &str = r#"
 								version: 1
 								groups:
 								  team: [act-bruno]
 								  admins: [act-ragnor]
 								protected_branches: [main]
 								rules:
 								  - id: admins-change-anywhere
 								    allow:
 								      actors: { group: admins }
 								      actions: [change]
 								      branch_scope: any
 								  - id: admins-merge-to-protected
 								    allow:
 								      actors: { group: admins }
 								      actions: [branch_merge]
 								      target_branch_scope: protected
 								"#;
 								#[derive(Clone, Copy, Debug)]
 								enum ParityDecision {
 								    Allow,
 								    Deny,
 								}
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								async fn build_parity_graph() -> (tempfile::TempDir, PathBuf, PathBuf) {
 								    // Build a graph with `main` loaded and a `feature` branch ready for
 								    // merge. Returns the graph path and a written policy.yaml path.
 								    let temp = init_loaded_graph().await;
 								    let graph = graph_path(temp.path());
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								    {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								        db.branch_create_from(ReadTarget::branch("main"), "feature")
 								            .await
 								            .unwrap();
 								        db.load_as(
 								            "feature",
 								            r#"{"type":"Person","data":{"name":"ParityEve","age":29}}"#,
 								            LoadMode::Append,
 								            None,
 								        )
 								        .await
 								        .unwrap();
 								    }
 								    let policy_path = temp.path().join("policy.yaml");
 								    fs::write(&policy_path, PARITY_POLICY_YAML).unwrap();
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    (temp, graph, policy_path)
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								}
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								async fn sdk_change_decision(graph: &Path, policy_path: &Path, actor: &str) -> ParityDecision {
-												(feat): multi-graph server mode (#119)

* mr-668: add GraphId newtype + Cloud-mode forward identity stubs (PR 1/10)

PR 1 of the MR-668 multi-graph server work. Pure types, no runtime
behavior changes yet.

Ships the validated identity vocabulary that the rest of the implementation
will consume:

- `GraphId(String)` — `^[a-zA-Z0-9-]{1,64}$`, leading underscore rejected
  (engine reserves every `_*` filename), reserved route names rejected
  (`policies`, `healthz`, `openapi`, `openapi.json`, `graphs`). Validation
  lives in `try_from` only; serde `Deserialize` re-runs it so JSON payloads
  cannot bypass.
- `TenantId(String)` — same regex shape as GraphId. `None` in Cluster
  mode; reserved for Cloud mode (RFC 0003) where it carries the OAuth
  `org_id` claim.
- `GraphKey { tenant_id: Option<TenantId>, graph_id }` — the registry
  HashMap key. `cluster()` constructor for the Cluster-mode default.
- `Scope` enum with `Full` variant — Cluster mode default; RFC 0004 will
  extend with OAuth scopes (`graph:read`/`write`/`admin`/`*`).
- `AuthSource` enum with `Static` variant — Cluster mode default; RFC
  0001 step 1 will add `Oidc`.
- `ResolvedActor { actor_id, tenant_id, scopes, source }` — replaces the
  upcoming refactor of `AuthenticatedActor(Arc<str>)` in PR 4a.

Per MR-668 design decision 13: ship the Cloud-mode forward type shapes
now (no `TokenVerifier` trait yet — that's RFC 0001 step 1) so handler
signatures stay stable across the Cluster → Cloud trajectory. `Scope`
and `AuthSource` use `#[non_exhaustive]` so future variants don't break
caller matches.

Tests: 26 new (15 graph_id + 11 identity), all passing. No regression
in the existing 36 server library tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: Omnigraph::init error-path cleanup + three failpoints (PR 2a/10)

PR 2a of the MR-668 multi-graph server work. Bug fix: a partially-failed
`Omnigraph::init` previously left orphan schema files at the graph URI,
making the URI unusable for a retry (the next `init` would refuse because
`_schema.pg` already exists).

Changes:

1. `init_with_storage` now wraps the I/O phase. On any error from
   `init_storage_phase`, calls `best_effort_cleanup_init_artifacts` to
   remove the three schema files before returning the original error:
   - `_schema.pg`
   - `_schema.ir.json`
   - `__schema_state.json`
   Cleanup is best-effort: a failure to delete is logged via
   `tracing::warn` but does NOT mask the init error.

2. Three failpoints added at the init phase boundaries:
   - `init.after_schema_pg_written`
   - `init.after_schema_contract_written`
   - `init.after_coordinator_init`

3. Four new failpoint tests in `tests/failpoints.rs` pin the cleanup
   behavior at each boundary plus the "original error wins over cleanup
   error" contract. All 23 failpoint tests pass.

Coverage gap (documented in code comments):
Lance per-type datasets and `__manifest/` directory created by
`GraphCoordinator::init` are NOT cleaned up after a coordinator-init-phase
failure. Recursive directory deletion requires `StorageAdapter::delete_prefix`,
which was deferred along with `DELETE /graphs/{id}` (originally PR 2b). When
that primitive lands, the third failpoint test can be tightened to assert
the graph root is fully empty.

Tests: 4 new (init_failpoint_*), all 23 failpoint tests green. No
regression in the 105 engine library tests or 64 end_to_end tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: add GraphHandle + GraphRegistry data structure (PR 3/10)

PR 3 of the MR-668 multi-graph server work. Pure data structure — no
routing changes yet (that's PR 4a).

New file: `crates/omnigraph-server/src/registry.rs`

- `GraphHandle { key: GraphKey, uri: String, engine: Arc<Omnigraph>,
  policy: Option<Arc<PolicyEngine>> }` — the per-graph state that the
  routing middleware (PR 4a) will inject as a request extension.
- `RegistrySnapshot { graphs: HashMap<GraphKey, Arc<GraphHandle>> }` —
  immutable snapshot; replaced atomically via `ArcSwap`.
- `GraphRegistry { snapshot: ArcSwap<_>, mutate: Mutex<()> }` — lock-free
  reads, mutex-serialized mutations.
- `RegistryLookup { Ready(Arc<GraphHandle>) | Gone }` — two-valued, no
  `Tombstoned` variant since DELETE is deferred in v0.7.0 scope.
- `InsertError { DuplicateKey | DuplicateUri }` — both rejection cases
  for create-graph (maps to HTTP 409 in PR 7).
- Methods: `new`, `from_handles` (bulk startup-time init), `get`, `list`,
  `len`, `insert`.

Race semantics pinned by three multi-thread tests:
- `concurrent_insert_same_key_exactly_one_succeeds` — N=8 spawned
  inserts with the same key; exactly 1 returns Ok, 7 return DuplicateKey.
- `concurrent_insert_distinct_keys_all_succeed` — N=8 spawned inserts
  with distinct keys; all succeed.
- `concurrent_reads_during_inserts_see_consistent_snapshots` — reader
  loop concurrent with sequential writes; every listed handle's key
  resolves via `get()` (no torn state).

Why no tombstones field: `DELETE /graphs/{id}` is deferred to bound
the scope of v0.7.0. Without a delete endpoint, there's no use for
tombstones — every key in the registry is `Ready`, and every key
not in the registry is `Gone`. When DELETE lands later, the
`Tombstoned` variant + `tombstones: HashSet<GraphKey>` slot in
additively without breaking caller signatures (the `Gone` variant
remains the "not currently active" case).

Why `tokio::sync::Mutex`: insert is async because PR 7's flow holds
this mutex across the atomic YAML rewrite step (file I/O). std::Mutex
would footgun across .await.

Dependency additions: `arc-swap = { workspace = true }`,
`thiserror = { workspace = true }` (used by InsertError).

Tests: 12 new (12 passing). 74 server lib tests total green
(62 from PR 1 + 12 new). Clippy clean on server crate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: router restructure + handler refactor for multi-graph (PR 4a/10)

PR 4a of the MR-668 multi-graph server work. The heaviest single PR —
rewires every handler to extract `Arc<GraphHandle>` from a routing
middleware, replaces `AuthenticatedActor(Arc<str>)` with `ResolvedActor`
everywhere, and adds the `ServerMode` discriminator.

Behavior changes:
- **Single mode** (legacy `omnigraph-server <URI>`): flat routes
  (`/snapshot`, `/read`, `/branches`, …) continue to work exactly as
  v0.6.0. Internally, the registry holds a single handle keyed by the
  sentinel `SINGLE_GRAPH_KEY_ID = "default"`; routing middleware injects
  that handle on every request. No HTTP-visible change.
- **Multi mode** (new): routes nest under `/graphs/{graph_id}/...`.
  Routing middleware extracts the graph id from the path, looks it up
  in the registry, and injects the handle. 404 if not found.
  (Multi-mode startup itself lands in PR 5; this PR provides the
  router-side wiring.)

AppState refactor:
- `engine: Arc<Omnigraph>` and `policy_engine: Option<Arc<PolicyEngine>>`
  fields removed — both now live inside `GraphHandle` in the registry.
- `mode: ServerMode { Single { uri } | Multi { config_path } }` added.
- `registry: Arc<GraphRegistry>` added.
- `server_policy: Option<Arc<PolicyEngine>>` added (placeholder for
  management endpoints in PR 6b; unused today).
- Existing constructors (`new`, `new_with_bearer_token{s,_and_policy}`,
  `new_with_workload`, `open*`) build a single-mode AppState
  internally and remain source-compatible. Tests that constructed
  AppState via these constructors continue to work.
- `with_policy_engine` post-construction setter — rebuilds the
  single-mode handle with the policy attached. Engine-layer
  enforcement is NOT reinstalled (matches the old single-field
  semantics; `open_with_bearer_tokens_and_policy` is the path that
  installs both layers).
- `new_multi` constructor added for PR 5's startup loop.
- `uri()` now returns `Option<&str>` (Some in single, None in multi).

Routing middleware:
- `resolve_graph_handle` injects `Arc<GraphHandle>` as a request
  extension. Mode-aware: single returns the only handle; multi parses
  `/graphs/{graph_id}/...` from the URI. Returns 404 in multi mode
  when the graph id is unregistered. Records `graph_id` on the
  current tracing span.
- `require_bearer_auth` updated to insert `ResolvedActor` (was
  `AuthenticatedActor`).

Handler refactor — every protected handler:
- Gains `Extension(handle): Extension<Arc<GraphHandle>>` param.
- Replaces `state.engine` → `handle.engine`.
- Replaces `state.policy_engine()` → `handle.policy.as_deref()`.
- Replaces `state.uri()` → `handle.uri.as_str()` (or `.clone()`
  where String is needed).
- Replaces `Arc::clone(&state.engine)` → `Arc::clone(&handle.engine)`
  (the spawn-and-clone pattern in `server_export` — proof that a
  long-running export survives the registry being mutated later).

authorize_request signature:
- Was: `(state: &AppState, actor: Option<&AuthenticatedActor>, request: PolicyRequest)`.
- Now: `(actor: Option<&ResolvedActor>, policy: Option<&PolicyEngine>, request: PolicyRequest)`.
- Per-graph callers pass `handle.policy.as_deref()`. The (future PR 6b)
  management endpoints will pass `state.server_policy.as_deref()`.

MR-731 invariant preserved:
- The single chokepoint `request.actor_id = actor.actor_id.as_ref().to_string()`
  inside `authorize_request` still overwrites any client-supplied
  actor identity. Regression test
  `actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
  at `tests/server.rs:1114-1216` passes unchanged.

Tests: 0 new (the registry race tests in PR 3 already cover the
data structure; this PR exercises them indirectly via the existing
test suite). 74 lib + 57 server integration + 60 openapi = 191 tests
green. Clippy clean.

LOC: +397 insertions, -153 deletions in `crates/omnigraph-server/src/lib.rs`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: OpenAPI multi-mode cluster filter (PR 4b/10)

PR 4b of the MR-668 multi-graph server work. In multi mode, the served
`/openapi.json` reports cluster routes (`/graphs/{graph_id}/...`) instead
of the legacy flat protected paths — matching what `build_app` actually
mounts (PR 4a's `Router::nest`). Single mode is unchanged.

Implementation:
- New `server_openapi` branch: when `state.mode()` is `Multi`, call
  `nest_paths_under_cluster_prefix(&mut doc)` after `ApiDoc::openapi()`.
- The rewrite consumes `doc.paths.paths`, then for every path-item:
  - If the path is in `ALWAYS_FLAT_PATHS` (`/healthz` for now), keep
    it flat.
  - Otherwise, prefix every operation_id with `cluster_` and reinsert
    the item at `/graphs/{graph_id}<original_path>`.
- Single mode hits no extra work — the path map is untouched.
- The static `ApiDoc::openapi()` still emits the flat surface, so
  in-process callers (the existing `openapi_json()` helper in tests)
  see the unmodified spec.

Why cluster_ prefix on operation IDs: OpenAPI specs require unique
operation_ids across the document. With both flat (single-mode) and
cluster (multi-mode) surfaces ever co-existing in a generated SDK,
the prefix prevents collision. The current served doc only carries
one surface, so the prefix is forward-compat with potential future
dual-surface generation.

Tests: 6 new in `tests/openapi.rs`, all via the `/openapi.json` route
(not the static `ApiDoc::openapi()` helper):
- `multi_mode_openapi_lists_cluster_paths` — every protected path
  appears as a cluster variant.
- `multi_mode_openapi_drops_flat_protected_paths` — flat protected
  paths are absent.
- `multi_mode_openapi_keeps_healthz_flat` — `/healthz` survives.
- `multi_mode_openapi_prefixes_operation_ids_with_cluster` — every
  cluster operation_id starts with `cluster_`.
- `multi_mode_operation_ids_are_unique` — no operation_id collisions.
- `single_mode_openapi_unchanged_by_cluster_filter` — single mode
  still emits the legacy flat surface (regression).

New test helper `app_for_multi_mode(graph_ids)` exercises the new
`AppState::new_multi` constructor from PR 4a — first user of multi-mode
construction outside of unit tests.

Result: 66 openapi tests + 57 server integration tests + 74 lib tests
= 197 green. No regression in the existing OpenAPI drift check
(`openapi_spec_is_up_to_date` still validates the static flat surface
matches the committed openapi.json).

LOC: +67 in lib.rs (rewrite logic), +219 in tests/openapi.rs (test
suite + helper).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: multi-graph startup + mode inference (PR 5/10)

PR 5 of the MR-668 multi-graph server work. This is the first PR that
makes multi mode actually usable end-to-end: operators invoking
`omnigraph-server --config omnigraph.yaml` with a non-empty `graphs:`
map and no single-mode selector now get a running multi-graph server.

Mode inference (MR-668 decision 2, four-rule matrix in
`load_server_settings`):
  1. CLI `<URI>` positional        → Single
  2. CLI `--target <name>`         → Single (URI from graphs.<name>)
  3. `server.graph` in config      → Single (URI from graphs.<name>)
  4. `--config` + non-empty `graphs:` + no single-mode selector
                                   → Multi (all entries in `graphs:`)
  5. otherwise                     → error with migration hint

Rule 5's error message names every escape hatch so operators can fix
their invocation without grepping docs.

Config schema extensions:
  - `TargetConfig.policy: PolicySettings` (per-graph Cedar policy file).
    `#[serde(default)]` so existing single-graph YAMLs keep parsing.
  - `ServerDefaults.policy: PolicySettings` (server-level Cedar policy
    for management endpoints — loaded in PR 5, wired into `GET /graphs`
    in PR 6b).
  - `OmnigraphConfig::resolve_target_policy_file(name)` and
    `resolve_server_policy_file()` helpers — both resolve relative to
    the config file's `base_dir`.

Public types added to `omnigraph-server`:
  - `ServerConfigMode { Single { uri, policy_file } | Multi { graphs,
    config_path, server_policy_file } }`.
  - `GraphStartupConfig { graph_id, uri, policy_file }` — one entry
    per graph in multi mode.

`ServerConfig` shape change:
  - WAS: `{ uri: String, bind, policy_file, allow_unauthenticated }`.
  - NOW: `{ mode: ServerConfigMode, bind, allow_unauthenticated }`.
  - Breaking for any code that constructs `ServerConfig` directly.
    `main.rs` is unaffected (uses `load_server_settings`).

`serve()` now forks on `ServerConfig.mode`:
  - Single: existing flow via `AppState::open_with_bearer_tokens_and_policy`.
  - Multi: parallel open via `futures::stream::iter(graphs)
    .map(open_single_graph).buffer_unordered(4).collect()`. Bound 4 is
    a rule-of-thumb for I/O-bound work — at N≤10 this trades startup
    latency for a small amount of concurrent S3/Lance open pressure.
    Fail-fast: first open error aborts startup; in-flight opens drop
    their engine via Arc (Lance datasets close cleanly).

New helper `open_single_graph(GraphStartupConfig)`:
  - Validates `GraphId` per the regex in PR 1.
  - `Omnigraph::open(uri).await` with descriptive error context.
  - Loads per-graph policy file and re-applies it via
    `Omnigraph::with_policy` (engine-layer enforcement, MR-722).
  - Returns `Arc<GraphHandle>` ready for the registry.

Routing middleware bug fix:
  - `Router::nest("/graphs/{graph_id}", inner)` rewrites
    `request.uri().path()` to the inner suffix (e.g. `/snapshot`).
    The previous middleware tried to parse `{graph_id}` from
    `request.uri().path()` and got 400 instead of 200. Fixed by reading
    from `axum::extract::OriginalUri` request extension, which preserves
    the pre-rewrite URI.
  - Caught by the two new tests
    `cluster_routes_dispatch_per_graph_handle` and
    `cluster_route_for_unknown_graph_returns_404`.

Tests (14 new, all passing):
  - Four-rule matrix: one test per branch + the joint case
    `mode_inference_cli_uri_overrides_graphs_map` + the empty-graphs-map
    error case.
  - Per-graph + server-level policy file path resolution.
  - Reserved `GraphId` rejection at startup.
  - End-to-end multi-graph routing: two graphs side by side, each
    cluster route hits the right engine.
  - Unknown graph id under cluster prefix → 404.
  - Flat routes 404 in multi mode.

Inline `ServerConfig` test (`serve_refuses_to_start_in_state_1_without_unauthenticated`)
and three `server_settings_*` tests updated to the new `mode` shape.

Result: 211 server tests green (74 lib + 71 integration + 66 openapi),
MR-731 regression test still pinned and passing.

LOC: +45 config.rs, +281 lib.rs (net), +395 tests/server.rs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: Cedar resource-model refactor (PR 6a/10)

PR 6a of the MR-668 multi-graph server work. Policy-crate-only refactor —
no HTTP handler changes, no operator-supplied policy.yaml changes. Sets
up the chassis that PR 6b's `GET /graphs` consumes.

Two new `PolicyAction` variants:
  - `GraphCreate` — gates `POST /graphs` (deferred behavioral PR).
  - `GraphList`   — gates `GET /graphs` (lands in PR 6b).

Note: `GraphDelete` is intentionally NOT added in this PR. `DELETE
/graphs/{id}` is deferred from MR-668's v0.7.0 scope to bound complexity
(no `delete_prefix`, no tombstone, no `RegistryLookup::Tombstoned`).
Adding the Cedar action without a consumer would be the same kind of
"dead vocabulary" trap the `Admin` variant already documents.

New `PolicyResourceKind { Graph, Server }` enum, plus a
`PolicyAction::resource_kind()` method that classifies every action.
Per-graph actions (Read, Change, BranchCreate, …) bind to
`Omnigraph::Graph::"<graph_label>"`; server-scoped actions
(GraphCreate, GraphList) bind to the singleton
`Omnigraph::Server::"root"`. `Admin` stays classified as per-graph for
now — MR-724 will pick the final shape when the first consumer surface
ships.

Cedar schema string additions:
  - `entity Server;`
  - `action "graph_create" appliesTo { principal: Actor, resource: Server, ... }`
  - `action "graph_list"   appliesTo { principal: Actor, resource: Server, ... }`

Compiler updates:
  - `compile_policy_source` picks the resource literal based on the
    action's `resource_kind`. Existing graph-only policies generate
    the same Cedar source as before — pinned by
    `per_graph_rules_continue_to_work_alongside_server_rules`.
  - `compile_entities` includes the `Server::"root"` entity only when
    a rule references a server-scoped action. Keeps test assertions
    for graph-only policies tight.
  - `PolicyEngine::authorize` builds the right resource UID at
    request time based on `request.action.resource_kind()`.

Validation rules added to `PolicyConfig::validate`:
  - A rule may not mix server-scoped and per-graph actions (different
    resource kinds need different `permit` clauses).
  - Server-scoped actions cannot have `branch_scope` or
    `target_branch_scope` — there's no branch context at the server
    level.

Operator impact: zero. The Cedar schema `Omnigraph::Server` entity is
internally referenced by `compile_policy_source`; operator policy.yaml
files only declare actions in `rules[].allow.actions` and never
reference the resource entity directly. Decision 6's "internal rename
only; operator policies unaffected" contract is preserved and pinned
by `per_graph_rules_continue_to_work_alongside_server_rules`.

Tests: 5 new (11 policy tests total, up from 6):
  - `graph_list_action_authorizes_against_server_resource`
  - `graph_create_action_authorizes_against_server_resource`
  - `server_scoped_rule_cannot_use_branch_scope`
  - `rule_mixing_server_and_per_graph_actions_is_rejected`
  - `per_graph_rules_continue_to_work_alongside_server_rules`

No regression: 145 server tests (74 lib + 71 integration) still green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: GET /graphs endpoint + per-graph policy wire-up (PR 6b/10)

PR 6b of the MR-668 multi-graph server work. First management endpoint —
`GET /graphs` lists every graph registered with the server, gated by the
server-level Cedar policy from PR 6a.

New API shapes (in `omnigraph-server::api`):
  - `GraphInfo { graph_id, uri }` — one entry per registered graph.
  - `GraphListResponse { graphs: Vec<GraphInfo> }` — sorted alphabetically
    by `graph_id` for deterministic output.

Handler `server_graphs_list`:
  - Mounted at `GET /graphs` in both modes.
  - Single mode: returns 405 (resource exists in the API surface, just
    not operational without a `graphs:` map). 405 chosen over 404 so
    clients see "resource exists, wrong context" rather than "no such
    resource".
  - Multi mode: requires bearer auth (when configured); Cedar-gated by
    `PolicyAction::GraphList` against `Omnigraph::Server::"root"`
    (PR 6a's chassis). Returns the sorted registry list.

Cedar gate composition:
  - When no `server.policy.file` is configured, the MR-723 default-deny
    falls through: `GraphList` is not `Read`, so an authenticated actor
    without a server policy gets 403. This is the right default — don't
    expose the registry until the operator explicitly authorizes it.
  - When a server policy is configured, Cedar evaluates the rule. The
    test `get_graphs_with_server_policy_authorizes_per_cedar` pins the
    admin-allow / viewer-deny split.

Routing:
  - New `management` sub-router holding `/graphs` (auth-required, no
    `resolve_graph_handle` middleware — operates on the registry, not
    a single graph).
  - Single mode merges flat protected routes + management.
  - Multi mode merges nested `/graphs/{graph_id}/...` + management.

OpenAPI:
  - `server_graphs_list` registered in `ApiDoc::paths(...)`.
  - `EXPECTED_PATHS` in `tests/openapi.rs` gains `/graphs`.
  - `openapi.json` regenerated (auto-tracked by
    `openapi_spec_is_up_to_date` in CI).

Tests: 4 new in `tests/server.rs::multi_graph_startup`:
  - `get_graphs_lists_registered_graphs_in_multi_mode`
  - `get_graphs_returns_405_in_single_mode`
  - `get_graphs_requires_bearer_auth_when_configured`
  - `get_graphs_with_server_policy_authorizes_per_cedar`

What's NOT in this PR (deferred):
  - Per-graph policy enforcement is wired through `handle.policy`
    (PR 4a already did this); PR 6b doesn't add new per-graph
    behavior beyond making sure the server policy lookup composes
    cleanly alongside it.
  - `POST /graphs` (PR 7) and `DELETE /graphs/{id}` (out of scope
    for v0.7.0).
  - CLI `omnigraph graphs list` (PR 8 will add).

Result: 215 server tests green (74 lib + 66 openapi + 75 integration),
11 policy tests green. MR-731 spoof regression preserved across all
this work.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: POST /graphs runtime create endpoint (PR 7/10)

PR 7 of the MR-668 multi-graph server work. Operators can now add a
graph to a running multi-graph server without restarting:

  curl -X POST http://server/graphs \
    -H "Content-Type: application/json" \
    -d '{
          "graph_id": "beta",
          "uri": "/data/beta.omni",
          "schema": { "source": "node Person { name: String @key }\n" },
          "policy": { "file": "./policies/beta.yaml" }
        }'

DELETE remains deferred (out of v0.7.0 scope per the trimmed plan —
no `delete_prefix`, no tombstones).

Body shape (decision 7):
  - Nested `schema: { source: "..." }` (mirrors the `policy: { file }`
    pattern; leaves room for future fields without breakage).
  - Optional nested `policy: { file: "..." }` for per-graph Cedar.
  - 32 MiB body limit (reuses `INGEST_REQUEST_BODY_LIMIT_BYTES`).
  - Asymmetric with `SchemaApplyRequest` which keeps flat
    `schema_source: String` — documented in api.rs.

Atomic YAML rewrite + drift detection:
  - New `config::rewrite_atomic(path, new_config, expected_hash)`:
    flock → re-read + hash check → serialize → write `.tmp` → fsync
    → rename → fsync parent dir. Returns the new hash for the caller
    to update its in-memory baseline.
  - New `config::hash_config_file(path)` — SHA-256 of the on-disk
    bytes, used at startup and after each rewrite.
  - New `RewriteAtomicError { Drift | Io | Serialize }` enum.
  - `AppState.config_hash: Option<Arc<Mutex<[u8;32]>>>` carries the
    in-memory baseline. Updated after every successful rewrite so
    subsequent POSTs don't false-trigger drift.
  - The mutex is `std::sync::Mutex` (brief critical section, no .await
    inside). The flock itself serializes file access process-wide
    AND across multiple server instances (defense in depth).
  - All sync I/O runs inside `tokio::task::spawn_blocking` — flock
    is sync.

Handler ordering (the load-bearing sequence):
  1. Mode check: 405 in single mode.
  2. Cedar authorize: `GraphCreate` against `Omnigraph::Server::"root"`.
  3. Validate body: `GraphId::try_from` (regex + reserved-name), empty
     schema/uri checks, per-graph policy file parse.
  4. Pre-check registry for duplicate graph_id / duplicate uri (409).
  5. `Omnigraph::init` the new engine.
  6. Atomic YAML rewrite (drift detection inside).
  7. Publish in registry (atomic re-check via `GraphRegistry::insert`).

Failure modes (documented in handler rustdoc):
  - Init fails → orphan storage at `req.uri` (PR 2a cleans up schema
    files; Lance datasets remain orphans until `delete_prefix` lands).
  - YAML rewrite fails (drift, IO) → orphan storage; YAML unchanged.
  - Registry insert fails (race) → YAML has entry but registry doesn't;
    next restart opens it cleanly.

New dependency: `fs2 = "0.4"` (workspace + omnigraph-server). POSIX-only
file locking. Linux/macOS deployment supported; Windows out of scope.

Tests (10 new in `tests/server.rs::multi_graph_startup`):
  - `post_graphs_creates_a_new_graph_end_to_end` — happy path, includes
    YAML inspection to confirm the rewrite landed.
  - `post_graphs_baseline_hash_updates_between_rewrites` — two POSTs in
    a row both succeed (drift baseline updates correctly).
  - `post_graphs_duplicate_graph_id_returns_409`
  - `post_graphs_duplicate_uri_returns_409`
  - `post_graphs_invalid_graph_id_returns_400` (reserved name)
  - `post_graphs_empty_schema_source_returns_400`
  - `post_graphs_returns_405_in_single_mode`
  - `post_graphs_yaml_drift_detection_returns_503` — operator hand-edits
    omnigraph.yaml; server refuses to clobber.
  - `hash_config_file_is_deterministic_and_detects_changes`
  - `rewrite_atomic_refuses_when_hash_drifts`

OpenAPI: `server_graphs_create` registered in `ApiDoc::paths(...)`;
openapi.json regenerated.

Result: 225 server tests green (74 lib + 66 openapi + 85 integration),
all MR-731 regressions still pinned.

LOC: ~580 lib.rs net (handler + helpers), ~120 config.rs (rewrite
machinery), +71 api.rs (request/response shapes), +332 tests/server.rs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: CLI omnigraph graphs list/create (PR 8/10)

PR 8 of the MR-668 multi-graph server work. CLI parity for the
v0.7.0 management surface: operators can now manage graphs from
the command line against a running multi-graph server.

  omnigraph graphs list --target dev --json
  omnigraph graphs create \
    --target dev \
    --graph-id beta \
    --graph-uri /data/beta.omni \
    --schema schema.pg

DELETE is intentionally absent — server-side DELETE was deferred from
v0.7.0 scope, and shipping a client subcommand for a server endpoint
that doesn't exist would be dead vocabulary. The help output, the
subcommand enum, and the test that pins it (`graphs_subcommand_help_
lists_list_and_create`) all agree.

CLI architecture (modeled on `BranchCommand`):
  - New `Command::Graphs { command: GraphsCommand }` top-level variant.
  - `GraphsCommand { List, Create }` enum.
  - List: GET `<base>/graphs`. Stdout is `<graph_id>\t<uri>` per line,
    or JSON via `--json`.
  - Create: reads `--schema <path>` from local disk, inlines as
    `schema: { source: <file> }` in the POST body (nested per
    MR-668 decision 7). Optional `--policy-file <path>` becomes
    `policy: { file: <path> }`. Returns 201 → "created graph X at Y"
    or JSON via `--json`.
  - Both subcommands reject local URI targets with a clear
    "remote multi-graph server URL" error.

New API type imports in the CLI: `GraphCreateRequest`,
`GraphCreateResponse`, `GraphListResponse`, `GraphSchemaSpec`,
`GraphPolicySpec` — all from `omnigraph-server::api`.

Tests:
  - cli.rs (4 new, non-network):
      * `graphs_subcommand_help_lists_list_and_create` — pins the
        deferral of `delete` (catches scope creep).
      * `graphs_list_against_local_uri_errors_with_remote_only_message`
      * `graphs_create_against_local_uri_errors_with_remote_only_message`
      * `graphs_create_with_missing_schema_file_errors` — pins the
        IO context in the schema-read error path.
  - system_remote.rs (1 new, `#[ignore]` like its peers):
      * `graphs_list_and_create_against_multi_graph_server` — spawns a
        multi-mode server, calls `graphs list` (sees `alpha`),
        `graphs create` (adds `beta`), `graphs list` again (sees both),
        and confirms the new graph is reachable via its cluster route.

CLI suite: 62 tests green (58 existing + 4 new). The new ignored
end-to-end test runs locally with `cargo test --ignored`.

LOC: +159 main.rs (enum + handlers), +88 cli.rs (unit tests),
+131 system_remote.rs (integration test).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: composite e2e tests, race fix, v0.7.0 release (PR 9/10)

PR 9 — the final integration PR for MR-668 multi-graph server work.
Closes the v0.7.0 release.

Composite lifecycle tests (closes gaps flagged in PR 7's coverage
review):
  - `multi_graph_lifecycle_post_query_restart_persistence` — POST a
    graph, query it via cluster route, reload the config from disk
    and confirm `load_server_settings` sees the rewritten YAML.
    Validates the "restart resolves orphans" failure-mode story.
  - `per_graph_policy_enforced_on_post_created_graph` — POST a graph
    with a per-graph policy attached, then send authenticated read
    and change requests. Per-graph Cedar enforcement fires correctly
    on a POST-created graph (engine-layer policy reinstalled via
    `Omnigraph::with_policy` inside the create flow).
  - `concurrent_post_graphs_distinct_ids_all_succeed` — 4 concurrent
    POSTs with distinct graph_ids all return 201. Caught a real
    race in `rewrite_atomic` (see below).

Race fix — `rewrite_atomic_with_modify`:

The first composite test surfaced a real bug. The old
`rewrite_atomic(path, new_config, expected_hash)` captured the
baseline hash OUTSIDE the flock, then called rewrite_atomic which
re-acquired it inside. Under concurrent writers:

  - POST A: captures baseline H0, calls rewrite_atomic.
  - POST B: captures baseline H0 too (before A's update lands).
  - A: acquires flock, on-disk == H0, writes H1, releases.
  - A: updates baseline H0 → H1.
  - B: tries to acquire flock — waits.
  - B: acquires flock. On-disk is now H1. Expected (captured
       before A finished) is H0. MISMATCH → spurious Drift error.

Worse: even if the timing happens to align, B's `updated` config
was constructed from BYTES read before the flock. B writes a config
that doesn't include A's new graph — silent data loss.

The fix: new `config::rewrite_atomic_with_modify(path, baseline,
modify)` takes a closure. Inside the flock + baseline mutex:
  1. Read on-disk bytes, hash, compare to baseline.
  2. Parse on-disk YAML.
  3. Call `modify(parsed)` to produce the new config — receives
     fresh on-disk state, returns the modification.
  4. Serialize + write + fsync + rename + update baseline.

Everything is read-modify-write under the same critical section.
Concurrent writers serialize cleanly. Test confirmed this is no
longer a race.

The old `rewrite_atomic(path, new_config, expected_hash)` API stays
for tests that don't need the read-modify-write shape; the POST
handler switches to the new shape.

Version bump v0.6.0 → v0.7.0:
  - All 5 `crates/*/Cargo.toml` (compiler, engine, policy, cli, server)
    plus their inter-crate `path` dep version constraints.
  - `Cargo.lock` regenerated by `cargo build --workspace`.
  - `AGENTS.md` "Version surveyed" line, capability matrix HTTP-server
    row updated to mention multi-graph + cluster routes + atomic YAML
    rewrite.
  - `openapi.json` regenerated.

Docs:
  - `docs/releases/v0.7.0.md` (new) — release notes with breaking
    changes, new features, deferred items (DELETE, `delete_prefix`,
    actor forwarding), and the single→multi migration recipe.
  - `docs/user/server.md` — substantial section additions for the
    two modes, mode inference, cluster endpoint table, management
    endpoints, `omnigraph.yaml` ownership contract, `POST /graphs`
    body shape + status codes.
  - `docs/user/cli.md` — `omnigraph graphs list/create` section,
    deferred-DELETE note.
  - `docs/user/policy.md` — server-scoped Cedar actions
    (`graph_create`, `graph_list`), per-graph vs server-level policy
    composition, example server-level policy.

Workspace test pass: 573 tests green across all crates. Zero
failures. MR-731 spoof regression still pinned and passing across
the entire 10-PR series.

This commit closes MR-668. v0.7.0 is ready for tagging.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: remove POST /graphs and CLI graphs create (defer runtime graph mgmt)

The POST /graphs runtime-create endpoint shipped in PR 7/10 has three
unresolved high-severity bugs:

  - flock-on-renamed-inode race: the YAML flock is taken on
    omnigraph.yaml itself, then a temp file is renamed over it.
    Cross-process writers end up locking different inodes — both
    believing they hold exclusive access.
  - duplicate-check outside the file lock: precheck runs against
    the in-memory registry only; the locked closure does
    config.graphs.insert(...) unconditionally. Concurrent same-id
    POSTs can persist the loser in YAML while the in-memory registry
    keeps the winner — they disagree after restart.
  - best_effort_cleanup_init_artifacts deletes _schema.pg /
    _schema.ir.json / __schema_state.json on any init failure. An
    accidental re-init against an existing graph's URI destroys its
    schema; subsequent open() fails at read_text(_schema.pg).

The correct fix is a Lance-style cluster catalog (reserve → init →
publish with recovery sidecars), parallel to the engine's existing
__manifest discipline. That work is out of scope for v0.7.0.

For now, disable runtime add/remove from the network and CLI surface.
Operators add graphs by editing omnigraph.yaml and restarting. The
GET /graphs read-only enumeration stays.

Removed:
- POST /graphs handler + router fragment + utoipa registration
- 13 post_graphs_* server tests + 3 composite POST tests +
  multi_mode_app_with_real_config / post_graph helpers
- CLI omnigraph graphs create subcommand + its handler + cli.rs tests
- system_remote.rs combined list+create test trimmed to list-only
- YAML rewrite infra: rewrite_atomic[_with_modify], RewriteAtomicError,
  staging_path, hash_config_file, AppState::config_hash field +
  threading through new_multi and open_multi_graph_state
- fs2 dependency (verified absent from cargo tree)
- sha2/fs2 imports in config.rs (only the rewrite path used them)
- Cedar PolicyAction::GraphCreate variant + "graph_create" match arms
  + action def in Cedar schema + graph_create_action_authorizes_against_server_resource test
- GraphCreateRequest / GraphCreateResponse / GraphSchemaSpec /
  GraphPolicySpec API types (only the POST handler / CLI imported them)

Kept:
- GET /graphs (read-only enumeration) and graph_list Cedar action
- omnigraph graphs list CLI subcommand
- All multi-graph startup, mode inference, cluster routes,
  per-graph + server-level Cedar policies
- server_settings_drive_multi_graph_startup_end_to_end (the test
  that covers operator-authored YAML + restart — the path that
  survives)
- best_effort_cleanup_init_artifacts and the three init failpoints
  (still reachable from CLI `omnigraph init`; preflight fix deferred
  as a follow-up)
- GraphRegistry::insert and its concurrency tests — production
  callers gone, but the method is the natural seam for the future
  cluster-catalog work

Also fixed (transcript issue 4):
- ALWAYS_FLAT_PATHS now includes /graphs so multi-mode OpenAPI
  advertises the management route correctly (was previously rewritten
  to /graphs/{graph_id}/graphs)
- multi_mode_openapi_keeps_healthz_flat → renamed to
  multi_mode_openapi_keeps_management_paths_flat, asserts both
  /healthz and /graphs stay flat
- multi_mode_openapi_prefixes_operation_ids_with_cluster skips
  /graphs in addition to /healthz

Doc fixes:
- docs/user/cli.md: graphs list example was --target http://...,
  but --target is a config-graph-name lookup; corrected to --uri.
  Removed the graphs create example.
- docs/user/server.md: dropped POST /graphs row, "omnigraph.yaml
  ownership", and "POST /graphs body shape" sections. Added a
  paragraph stating runtime add/remove is not exposed in v0.7.0.
- docs/user/policy.md: dropped graph_create action; reworded the
  "Configuration" line to clarify that server-scoped rules (graph_list)
  take neither branch_scope nor target_branch_scope.
- docs/releases/v0.7.0.md: rewrote release narrative — multi-graph
  mode ships; runtime add/remove deferred.
- AGENTS.md: HTTP server bullet and capability matrix row updated to
  reflect read-only GET /graphs and the operator-edit workflow.
- openapi.json regenerated; /graphs has only .get, no .post.

Diff: 17 files, +123 −1525 LOC.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: comment cleanup and policy format style

Strip "PR Na/Nb" sub-PR references throughout MR-668 surfaces — they
were useful during the 10-PR delivery sequence but rot now that the
work is in the tree. Keep the MR-668 umbrella references.

Also:
- Add explicit `when = when` and `resource_literal = resource_literal`
  named args in `compile_policy_source`'s outer `format!` to match the
  surrounding crate style (already explicit for `group` and `action`).
- Rename the best-effort cleanup tracing target from
  "omnigraph::init" to "omnigraph::init::cleanup" so operators can
  filter init-failure cleanup events separately from init's other
  log lines.

No behavior change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop actor_id from PolicyRequest; pass actor as separate arg

The MR-731 "server-authoritative actor identity" invariant was enforced
by an in-function chokepoint (`request.actor_id = actor.actor_id...`
overwrite inside `authorize_request`). That worked but relied on every
caller passing in a `PolicyRequest` and trusting the overwrite — a
comment-enforced invariant.

Move the invariant into the type system:

* `PolicyRequest` no longer carries `actor_id`. The struct now models
  what a caller wants to do, not who they are.
* `PolicyEngine::authorize(actor_id: &str, request: &PolicyRequest)`
  and `validate_request(actor_id, request)` take identity as a
  separate argument. The same shape `PolicyChecker::check` already had
  for the engine layer.
* `authorize_request` in the HTTP layer extracts `actor_id` from the
  bearer-resolved `ResolvedActor` and passes it positionally — no
  overwrite step that could be skipped.
* CLI `omnigraph policy explain` updated (the only other consumer
  that built a `PolicyRequest`).

Public API break for the `omnigraph-policy` crate. Worth it: handlers
can no longer accidentally populate `actor_id` from a request body
field, and external consumers are forced by the compiler to source
actor identity from a trusted path.

The MR-731 chokepoint test
`actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
still passes — the bearer-resolved actor is what reaches the engine.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: consolidate AppState single-mode constructors; delete with_policy_engine

The prior `with_policy_engine` constructor reused the engine `Arc`
from the existing handle (`engine: Arc::clone(&existing.engine)`)
without re-applying `Omnigraph::with_policy`. Combined with
`new_with_workload`, the documented composition pattern was
`AppState::new_with_workload(...).with_policy_engine(p)` — which
produced an `AppState` whose HTTP layer enforced Cedar but whose
underlying engine had no `PolicyChecker` installed. Any caller
reaching the engine via `state.registry().list()[i].engine` could
bypass policy entirely. The doc comment named this gap; the type
system didn't.

Make composition impossible to get wrong:

* Add `AppState::new_single(uri, db, tokens, Option<PolicyEngine>,
  WorkloadController)` — canonical single-mode constructor that
  takes every option together and routes through `build_single_mode`
  (which applies `db.with_policy(checker)` to the engine itself).
* `new`, `new_with_bearer_token`, `new_with_bearer_tokens`,
  `new_with_bearer_tokens_and_policy`, `new_with_workload` all become
  thin wrappers around `new_single`.
* Delete `with_policy_engine`. There is no post-construction policy
  install path any more; the single linear construction forces
  HTTP-layer and engine-layer policy to install together or not at all.

Regression test `engine_layer_policy_fires_via_direct_arc_omnigraph_from_new_single`
constructs an `AppState::new_single` with a deny-all policy, pulls
the `Arc<Omnigraph>` from the registry handle (the same path an
embedded SDK consumer would take), and asserts a direct `mutate_as`
call returns `OmniError::Policy`. Pre-fix this test would have
succeeded the mutation.

Test caller in `ingest_per_actor_admission_cap_returns_429`
migrates from `.with_policy_engine(...)` to `new_single(...,
Some(policy_engine), workload)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: derive any_per_graph_policy on RegistrySnapshot; simplify dup check

`AppState::requires_bearer_auth` walked the entire registry per
request (cloning Arcs into a `Vec`, then `.iter().any(|h| h.policy
.is_some())`) to decide whether the auth middleware should challenge.
The walk is unnecessary — the answer only changes when the registry
mutates, which is exactly the moment a new snapshot is constructed.

Move the flag onto the snapshot itself:

* `RegistrySnapshot { graphs, any_per_graph_policy: bool }`.
* `RegistrySnapshot::new(graphs)` is the only construction path —
  it derives the flag from `graphs.values().any(|h| h.policy
  .is_some())` so the cached value can't drift from the source data.
* `Default` delegates to `new(HashMap::new())`.
* `GraphRegistry::from_handles` and `insert` build snapshots via
  `RegistrySnapshot::new(...)`.
* `GraphRegistry::snapshot_ref()` exposes the current snapshot
  through an `arc_swap::Guard`; callers that need cached derived
  state go through this accessor (callers that only want `graphs`
  still use `list` / `get`).

`requires_bearer_auth` becomes one `ArcSwap::load` + bool read.

Also (drive-by, same file, same hunk): replace the dead
`if let Some(other) = seen_uris.get(...)` + `let _ = other;` pattern
in `from_handles` with a plain `seen_uris.contains_key(...)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: fail-fast multi-graph startup with try_collect

The `open_multi_graph_state` doc comment claims "Fail-fast — the
first open error aborts startup; other in-flight opens are dropped"
but the code did

    .buffer_unordered(4)
    .collect::<Vec<_>>()
    .await
    .into_iter()
    .collect::<Result<Vec<_>>>()?;

which drains every future in the stream before propagating the first
`Err`. With N S3-backed graphs and graph #2 failing fast, the caller
still waits for #1, #3, #4, … to either succeed or fail before
seeing the error.

Replace the four-line dance with `futures::TryStreamExt::try_collect`,
which short-circuits on the first `Err` and drops the rest. The
doc comment now matches behavior.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused State extractor from 7 read-only handlers

After the routing-middleware refactor moved the engine into the per-graph
`GraphHandle` (extracted via `Extension<Arc<GraphHandle>>`), seven
read-only handlers — `server_snapshot`, `server_read`, `server_export`,
`server_schema_get`, `server_branch_list`, `server_commit_list`,
`server_commit_show` — kept an unused `State(_state): State<AppState>`
extractor. Drop it. Each request avoids one `FromRequestParts` clone
of `AppState`'s Arcs.

Handlers that actually use state (workload admission for write paths,
`server_policy` for management endpoints) keep theirs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: emit info! for graph routing decision

`tracing::Span::current().record("graph_id", ...)` in the routing
middleware silently no-ops here: no upstream `#[tracing::instrument]`
on the handlers declares a `graph_id` field, and `TraceLayer::new_for_http`
doesn't either. The recorded value never lands anywhere visible.

Replace with an explicit `info!(graph_id = %handle.key.graph_id,
"graph routed")` event so operators can grep logs and correlate
requests with the active graph. In single mode the value is the
sentinel `"default"`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: align GET /graphs 405 body code with HTTP status

The single-mode `GET /graphs` handler returned an `ApiError` built
via struct literal with `status: METHOD_NOT_ALLOWED, code: BadRequest`.
The body code disagreed with the HTTP status — clients deserializing
on `code` saw `bad_request`, clients deserializing on `status` saw
405. Same bug class as the earlier 503+Conflict mismatch on the
removed YAML drift path.

Close the class for this one remaining instance:

* Add `ErrorCode::MethodNotAllowed` to the API enum.
* Add `ApiError::method_not_allowed(msg)` — pairs the 405 status
  with the matching code.
* Replace the struct literal in `server_graphs_list` with the
  constructor.
* Regenerate `openapi.json` (adds `method_not_allowed` to the
  ErrorCode schema enum).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused axum::handler::Handler import

The import landed in earlier work but no current call site uses it.
Emitted an `unused_imports` warning on every server build.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused fs2 workspace dependency

`fs2 = "0.4"` lingered in [workspace.dependencies] after the
POST /graphs flock-on-rename design was pulled. `cargo tree -i fs2`
reports no consumers in the workspace and the dep is not in
Cargo.lock. Removing the declaration closes the "phantom dep" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: AGENTS.md Cedar row no longer hardcodes action count

The "8 actions" claim drifted as soon as MR-668 added `graph_list`.
Bumping the count would just push the drift one PR forward; the
correct-by-design fix is to defer to the canonical list in
docs/user/policy.md and stop maintaining a duplicate count.

Closes the "doc hardcodes a count that drifts from the enum" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: cfg(test)-gate GraphRegistry::insert and its mutex

`insert` and the `mutate: Mutex<()>` that serializes it had no
runtime consumer in v0.7.0 — the only insertion path at startup
is `from_handles`, and runtime add/remove is deferred until a
managed cluster catalog ships. Leaving both `pub` and live made
them a "looks like API, isn't" footgun: a future change could
build on `insert` without re-establishing the concurrency contract
with an actual consumer in scope.

Gate both together (`#[cfg(test)]` on the method, the field, and
the `tokio::sync::Mutex` import) so the race-pinning tests still
compile but production cannot reach them. When a real consumer
ships, ungate both — they're a unit. Closes the "public API with
no runtime consumer drifts toward incorrect" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop vestigial PolicyEngine surface

* `validate_request` had zero callsites — pure surface for nothing.
* `deny`'s `_actor_id` and `_request` parameters were both unused
  (the underscore prefix gave it away); the message is built by the
  caller before `deny` ever sees the request. Trim both.

Closes the "public API that the type system can't justify" class
for the policy engine. No behavior change; every existing test
stays green because the deletions never had a runtime effect.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for init re-init footgun (red)

A second `Omnigraph::init` against an existing graph URI today
destroys the existing graph's schema artifacts. `init_storage_phase`
overwrites `_schema.pg` before any preflight, and on the inner
`GraphCoordinator::init` failure that follows,
`best_effort_cleanup_init_artifacts` deletes all three schema files.
The existing Lance datasets and `__manifest/` survive but the
schema metadata is gone — unrecoverable without operator surgery.

This test exercises that path and currently fails with
"_schema.pg must not be deleted by a failed re-init", confirming
the destructive cleanup branch fires. The fix in the next commit
makes the test pass by preflighting with `storage.exists()` and
returning a typed error before any write touches disk.

Per AGENTS.md rule 12, the test commit lands just before the fix
commit so the red → green pair is visible in `git log` and a
reviewer can check out this commit alone to reproduce.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: close init re-init footgun via InitOptions preflight (green)

`Omnigraph::init` is "create a new graph"; existing graphs need
an explicit overwrite. Today's behavior — silently overwrite
schema files, then on inner failure delete them via best-effort
cleanup — is destructive against an existing graph regardless of
which branch fires.

Correct-by-design fix:

* New `InitOptions { force: bool }` struct (default `force: false`).
* New `Omnigraph::init_with_options(uri, schema, options)`. The
  old `Omnigraph::init(uri, schema)` is a thin shortcut that
  passes `InitOptions::default()`.
* `init_with_storage` runs a `storage.exists()` preflight on the
  three schema URIs BEFORE any parse, write, or coordinator call.
  Any hit → typed `OmniError::AlreadyInitialized { uri }`. The
  destructive code paths (the `write_text` overwrite and the
  best-effort cleanup) are now unreachable in strict mode against
  an existing graph.
* `force: true` skips the preflight; existing operators who
  actually mean to overwrite opt in explicitly.
* CLI: `omnigraph init --force` maps to `InitOptions { force: true }`.
* HTTP: `OmniError::AlreadyInitialized` maps to 409 via
  `ApiError::from_omni`. Not currently HTTP-reachable (POST /graphs
  was pulled), but the wiring lands here so a future runtime
  create endpoint has one canonical translation.

Closes the "init is destructive against existing state" class.
The regression test added in the previous commit
(`init_on_existing_graph_uri_does_not_destroy_existing_schema`)
turns green: the original schema files now survive a second
init attempt byte-for-byte, and the call errors cleanly with
`AlreadyInitialized`. The four existing
`init_failpoint_after_*_cleans_up_*` tests stay green — strict
mode's preflight passes on a fresh tempdir, and cleanup still
runs as before when a failpoint fires mid-write.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: split PolicyEngine::load into kind-typed loaders

Pre-fix, every caller of `PolicyEngine::load(path, graph_id)`
passed *some* `graph_id` argument — even when the policy was
server-scoped and Cedar's resolution would never touch a Graph
entity. The server-level loader at lib.rs passed the meaningless
sentinel `"server"`. A graph policy file containing a `graph_list`
rule compiled fine; a server policy file containing a `read` rule
compiled fine. Both silently no-op'd at request time because the
engine kind and the rule's resource kind disagreed.

Correct-by-design fix: replace `load` with two kind-typed loaders.

* `PolicyEngine::load_graph(path, graph_id)` — for per-graph
  policy files. Rejects any rule whose action `resource_kind()`
  is `Server`.
* `PolicyEngine::load_server(path)` — for server-level policy
  files. Takes no `graph_id`: server-scoped actions resolve against
  the singleton `Omnigraph::Server::"root"` entity, never a Graph.
  Rejects any rule whose action `resource_kind()` is `Graph`.

The old `load` is hard-deleted in the same commit because every
in-tree consumer migrates here (no semver promise on the workspace
crate, no external pinners). New `PolicyEngineKind` enum types
the loader's intent; `validate_kind_alignment` is the load-time
check that closes the "wrong action, wrong file, silent no-op"
class — operators get a load-time error instead of confused-and-
silent behavior at request time.

Callsites migrated:
* server lib.rs:374 (single-mode per-graph)   → load_graph
* server lib.rs:1065 (multi-mode server)      → load_server
* server lib.rs:1103 (multi-mode per-graph)   → load_graph
* CLI main.rs:732 (resolve_policy_engine)     → load_graph
* tests/server.rs ×5 (4 graph, 1 server)      → load_graph/load_server
* policy_engine_chassis.rs                    → load_graph

Four new in-source tests pin the contract: both rejection paths
and both positive paths.

Closes the "operator puts an action in the wrong file and the
rule silently never matches" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: introduce GraphRouting, retire single_mode_handle

Pre-fix, `AppState` always carried `Arc<GraphRegistry>` even when
serving one graph. Single mode populated the registry with one
handle keyed by the `SINGLE_GRAPH_KEY_ID = "default"` sentinel;
`single_mode_handle` walked the registry, asserted `len == 1`,
and returned the single element with a 500-class "programmer
error" branch on mismatch. Three smells in a row — magic key,
walk-and-assert, programmer-error guard — all because the
single-mode runtime was forced through a multi-mode abstraction.

Correct-by-design fix: type the routing.

* New `pub enum GraphRouting { Single { handle }, Multi { registry,
  config_path } }` on `AppState`. The `Single` arm carries the handle
  directly — no registry, no key, no walk.
* `resolve_graph_handle` middleware matches on `routing`. Single mode
  returns the handle in O(1); multi mode does the same path-extract +
  registry lookup as before. The 500-class programmer-error branch
  is gone — the type system now makes the violated invariant
  ("single mode has exactly one handle") unrepresentable.
* `requires_bearer_auth` reads `handle.policy.is_some()` directly
  in the Single arm; Multi arm still uses the cached
  `any_per_graph_policy` flag.

`ServerMode` and the legacy `registry` field on `AppState` are still
populated for now — C-3 removes both once every reader is migrated.
The `SINGLE_GRAPH_KEY_ID` sentinel and `ServerMode` will also go
away in C-3.

Closes the "single mode forced through a multi-mode abstraction"
class. All 76 server integration tests stay green: handlers still
extract `Extension<Arc<GraphHandle>>` from the request, so the
middleware's internal change is invisible to them.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: remove ServerMode, registry field, and the SINGLE_GRAPH sentinel

C-1/C-2 introduced `GraphRouting` and pointed the middleware at it.
This commit removes the legacy shape that's now dead:

* `ServerMode` enum — deleted. Single mode's `uri` lives on
  `handle.uri`; multi mode's `config_path` lives on the
  `GraphRouting::Multi` arm.
* `AppState.mode: ServerMode` field — deleted.
* `AppState.registry: Arc<GraphRegistry>` field — deleted. Multi
  mode's registry is on `GraphRouting::Multi { registry, .. }`;
  single mode has no registry at all.
* `AppState::mode()`, `AppState::uri()`, `AppState::registry()`
  accessors — deleted. New `AppState::routing() -> &GraphRouting`
  is the single public entry point.
* `SINGLE_GRAPH_KEY_ID` constant — deleted. `GraphHandle.key` is
  still required by the struct, but in single mode the key is now
  only a tracing label (`"default"`, inlined with a comment naming
  its sole remaining purpose). Single-mode flat routes never carry
  a `{graph_id}` parameter, so the key is never compared against
  user input, and there is no registry where it could be a map
  key. C-1/C-2 already removed the registry walk that the sentinel
  was named for.

Callers migrated:
* `build_app` (lib.rs:944) — matches on `state.routing()` instead
  of `state.mode()`.
* `server_graphs_list` (lib.rs:1162) — destructures the Multi arm
  to get the registry; Single arm short-circuits to 405.
* `server_openapi` (lib.rs:1217) — matches the Multi arm for the
  cluster-prefix rewrite.
* `tests/server.rs:3735` — the B2 footgun regression test now
  matches on `state.routing()` to extract the single-mode handle
  (the test's earlier `state.registry().list().next()` shape was
  the closest pre-fix analog to "embedded consumer reaches the
  engine"; the new shape is more direct).

Closes the entire "single mode forced through a multi-mode
abstraction" class. After this commit:
* No magic sentinel as a routing key.
* No `single_mode_handle` walk-and-assert helper.
* No 500-class "programmer error" branch in the middleware.
* No two-field discriminant on `AppState` where one would do.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for nested-route path extraction (red)

`server_branch_delete` and `server_commit_show` use bare
`Path<String>` extractors. In single-mode flat routes
(`/branches/{branch}`, `/commits/{commit_id}`) this works — one
capture, one value. In multi-graph cluster routes
(`/graphs/{graph_id}/branches/{branch}`,
`/graphs/{graph_id}/commits/{commit_id}`) axum 0.8 propagates the
outer `{graph_id}` capture into the inner handler, so the
extractor sees two captures and 500s with
"Wrong number of path arguments. Expected 1 but got 2."

`cluster_routes_dispatch_per_graph_handle` only exercises
`/snapshot` (no Path extractor), so the regression slipped through.
This test closes that gap structurally: every cluster route with
an inner path param gets exercised here.

Currently fails with the exact symptom above. Fix in the next
commit makes it pass.

Per AGENTS.md rule 12, the red test commit lands just before the
fix so the pair is visible in `git log` and a reviewer can check
out this commit alone to reproduce.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: named-field path-param structs for nested cluster routes (green)

`Path<String>` deserializes one path-param value positionally.
Single-mode flat routes (`/branches/{branch}`,
`/commits/{commit_id}`) have one capture; multi-mode nested routes
(`/graphs/{graph_id}/branches/{branch}`,
`/graphs/{graph_id}/commits/{commit_id}`) have two — axum 0.8
propagates the outer capture into nested handlers. Same handler,
two different shapes; the multi-mode shape 500s with
"Wrong number of path arguments. Expected 1 but got 2."

Symptomatic fix: change to `Path<(String, String)>` and ignore the
first element. Breaks again the moment we add another nest layer
(e.g. tenant in Cloud mode).

Correct-by-design fix: named-field structs deserialized by name
from axum's path-param map. Each handler picks only the fields it
needs. Stable across single / multi / future-cloud nest depths
because deserialization is by field name, not position.

* New `BranchPath { branch: String }` (file-local to lib.rs)
* New `CommitPath { commit_id: String }`
* `server_branch_delete` extractor → `Path<BranchPath>`
* `server_commit_show` extractor → `Path<CommitPath>`

Closes the "handler path-extractor type is positional and breaks
when route nesting changes" class. Red test from the previous
commit turns green. All 77 server tests pass (single-mode branch
delete + commit show, plus new multi-mode coverage).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: centralize policy-requires-tokens check in the runtime classifier

Single-mode `open_with_bearer_tokens_and_policy` bailed at lib.rs:380
when policy was installed and no tokens. Multi-mode
`open_multi_graph_state` had no equivalent: the server started, every
request 401'd because no token could ever match, and the operator
spent time debugging a misconfiguration the single-mode path would
have caught at startup.

The doc/code contradiction made the gap easy to miss: the
`ServerRuntimeState::PolicyEnabled` docstring said tokens-or-not
was "unusual but valid — every request fails 401 without a bearer,
which is effectively 'locked'." The single-mode bail contradicted
that. In practice, silent-401-on-every-request is bug-shaped, not
feature-shaped (operators wanting deny-all should configure tokens
plus a deny-all Cedar rule to get meaningful 403s with
policy-decision logging).

Symptomatic fix: add a copy of the bail to multi-mode. Two copies
that can drift again the next time a startup path is added.

Correct-by-design fix: hoist the check into
`classify_server_runtime_state` so both modes get the same
enforcement from one source of truth. The classifier becomes the
single source of truth for "should we start?" and adding a startup
invariant there is now the natural extension point for any future
mode.

Classifier matrix is now complete:

| has_tokens | has_policy | allow_unauthenticated | Result |
|---|---|---|---|
| F | F | F | bail (existing) |
| F | F | T | Open (existing) |
| T | F | * | DefaultDeny (existing) |
| F | T | * | bail (NEW — closes the gap) |
| T | T | * | PolicyEnabled (existing) |

Changes:

* `classify_server_runtime_state` (lib.rs:870-890) gains the
  `(false, true, _) => bail!(…)` arm with a clear message naming
  the failure mode and the two valid resolutions.
* `open_with_bearer_tokens_and_policy` (lib.rs:369+) drops its
  redundant local bail — the classifier rejected the invalid case
  before construction was reached.
* `ServerRuntimeState::PolicyEnabled` docstring is rewritten:
  drops the "(unusual but valid)" carve-out and states plainly
  that PolicyEnabled requires tokens. Names the explicit
  alternative (tokens + deny-all Cedar rule) for operators who
  want the all-requests-denied behavior.
* `classify_policy_enabled_always_wins` test is renamed to
  `classify_policy_enabled_requires_tokens` and the now-invalid
  `(false, true, _)` assertion is removed (covered by the new
  rejection test).
* New `classify_policy_without_tokens_is_rejected` test covers the
  new arm.
* New `serve_refuses_to_start_with_policy_but_no_tokens_multi_mode`
  integration test pins the multi-mode propagation path —
  symmetric with the existing single-mode
  `serve_refuses_to_start_in_state_1_without_unauthenticated`.

Closes the "single mode and multi mode startup branches can drift
on safety invariants" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: close coverage gaps surfaced by the test-coverage audit

The bot-review pass and the subsequent coverage audit surfaced two
material gaps in PR #119's test surface — both easy to close, both
worth closing before merge.

* **Gap 1 — cluster-route sweep.** The Bug-1 path-extractor
  regression slipped through because
  `cluster_routes_dispatch_per_graph_handle` only exercised
  `/snapshot`. The other six protected cluster routes (`/read`,
  `/change`, `/export`, `/schema`, `/schema/apply`, `/ingest`,
  `/branches/merge`) were implicitly trusted to work without any
  multi-mode integration test.

  Add `all_protected_cluster_routes_resolve_to_their_handler`
  (`tests/server.rs`) that hits each protected cluster route with
  a minimal request and asserts the response is consistent with
  the handler being reached — no 404 (router didn't match), no 500
  with "Wrong number of path arguments" (Bug-1 class), no 500 with
  "missing extension" (routing middleware didn't inject the handle).
  Status code is a negative assertion because each handler's
  happy-path inputs differ; what matters is "the request reached
  the handler," not "the handler returned 200" — that's already
  pinned by the single-mode tests.

* **Gap 2 — `--force` happy path.** The strict re-init regression
  test (`init_on_existing_graph_uri_does_not_destroy_existing_schema`)
  pins the error path; nothing pinned the `force: true` escape
  hatch actually doing what its docstring claims.

  Add `init_with_force_recovers_from_orphan_schema_files`
  (`tests/lifecycle.rs`). Writes a bare `_schema.pg` to simulate
  orphan files from a failed prior init, confirms strict mode
  bails as expected, then confirms `init_with_options(force: true)`
  succeeds and produces a functional graph.

  Note: the test follows the documented semantics — force skips
  the preflight only, it does NOT purge existing Lance state. An
  earlier draft of the test (against full overwrite of an existing
  populated graph) failed because `GraphCoordinator::init` errored
  on the existing `__manifest`, which is exactly the limitation
  the `InitOptions::force` docstring already calls out. Recursive
  purge needs `StorageAdapter::delete_prefix` (tracked separately).

Coverage is now fully aligned with the PR's claims.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for GraphList open-mode bypass (red)

Cursor bot's review at commit 4120448 surfaced that
`server_graphs_list` returns 200 in Open mode (`--unauthenticated`,
no tokens, no policy), exposing the full graph registry — graph
IDs and URIs that may contain S3 bucket paths or internal
hostnames — to any unauthenticated caller.

Root cause: `authorize_request`'s no-policy fallback only denies
when `actor.is_some()`. In Open mode `actor: None`, so the
denial branch never fires and the call returns `Ok(())`. The
docstring on `server_graphs_list` claims the endpoint is
"Cedar-gated" and that we "don't leak the registry until the
operator explicitly authorizes it" — but Open mode has no Cedar
at all, so the docstring intent and the code disagree.

This commit renames the existing
`get_graphs_lists_registered_graphs_in_multi_mode` test to
`get_graphs_denied_in_open_mode_without_server_policy` and flips
the assertion from 200 → 403. Today this fails (server returns
200) — exactly the symptom the bot named. The fix in the next
commit tightens the no-policy fallback to deny server-scoped
actions unconditionally, regardless of mode.

Per AGENTS.md rule 12, the red test commit lands just before
the fix so the red → green pair is visible in `git log` and a
reviewer can check out this commit alone to reproduce.

Sort-order coverage that previously lived in the renamed test
moves to `get_graphs_with_server_policy_authorizes_per_cedar`
in the next commit, where the admin-200 response is operator-
authorized and a non-empty body is asserted.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: server-scoped actions always require explicit policy (green)

`server_graphs_list` returned 200 in Open mode (`--unauthenticated`,
no tokens, no policy) because `authorize_request`'s no-policy
fallback only denied when `actor.is_some()` AND action != Read.
In Open mode `actor: None`, so the denial branch never fired and
the call returned `Ok(())` — leaking the registry (graph IDs +
URIs that may contain S3 bucket paths or internal hostnames) to
any unauthenticated caller. The docstring on `server_graphs_list`
claimed it was "Cedar-gated" and that the server should "not leak
the registry until the operator explicitly authorizes it" —
docstring intent and code disagreed.

Symptomatic fix: special-case GraphList. Breaks the moment
another server-scoped action (`graph_create`, `graph_delete`) is
added.

Correct-by-design fix: tie authorization to the action's
`resource_kind()`. Server-scoped actions
(`PolicyResourceKind::Server`) always require explicit policy
authorization — there is no runtime state where they're served
by default. Per-graph actions keep the existing default-deny
logic (DefaultDeny denies non-Read for authenticated actors;
Open mode allows everything per the operator's `--unauthenticated`
opt-in for graph DATA, but not for server topology).

The fix uses the existing `PolicyResourceKind` enum that #119
already added — no new abstraction. Future server-scoped actions
(runtime `graph_create`/`graph_delete` when the cluster catalog
ships) automatically pick up the same enforcement without any
per-action handler change.

Changes:

* `crates/omnigraph-server/src/lib.rs:51` — re-export
  `PolicyResourceKind` (the kind discriminator was already public
  on the omnigraph-policy crate; needed in scope here).
* `crates/omnigraph-server/src/lib.rs:1457` — `authorize_request`'s
  no-policy fallback gains a server-scoped-action check that fires
  before the actor-based default-deny logic. Error message names
  the failure mode and points at `server.policy.file`.
* `crates/omnigraph-server/tests/server.rs:5037` —
  `get_graphs_with_server_policy_authorizes_per_cedar` extended
  to register two graphs in non-alphabetical order and assert
  the admin-200 response is sorted alphabetically. Restores the
  sort-order coverage that lived in
  `get_graphs_lists_registered_graphs_in_multi_mode` before the
  red commit renamed it to assert denial.

Also bundles a small adjacent cleanup that the bot-review flagged:

* `crates/omnigraph-server/src/graph_id.rs:124` — drop the
  unreachable `"openapi.json"` entry from `is_reserved`. The
  regex `^[a-zA-Z0-9-]{1,64}$` rejects every dot-containing name
  before `is_reserved` can run, so dotted entries in this list
  were dead code that misled readers into thinking the list
  needed to cover them. Comment now names the structural
  exclusion. The `rejects_reserved_route_names` test loses its
  `openapi.json` row (covered by `rejects_dots` via the regex).

Closes the "server-scoped management actions silently leak in
Open mode" class. Red test from the previous commit
(`get_graphs_denied_in_open_mode_without_server_policy`) turns
green; all 78 server integration tests + 76 lib tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: fold multi-graph work into v0.6.0 (no separate v0.7.0 release)

The branch had bumped workspace versions to 0.7.0 and added a
dedicated `docs/releases/v0.7.0.md` for the multi-graph work.
Per scope decision: ship the graph-rename and the multi-graph
mode in one v0.6.0 release.

Changes:

* Workspace versions bumped 0.7.0 → 0.6.0 in every crate manifest
  (`omnigraph`, `omnigraph-compiler`, `omnigraph-policy`,
  `omnigraph-server`, `omnigraph-cli`) and their internal
  `path = ..., version = "..."` dependency constraints.
* `docs/releases/v0.7.0.md` content merged into
  `docs/releases/v0.6.0.md`, retargeted to a single coherent
  v0.6.0 release note covering both the graph terminology rename
  and the multi-graph server mode. The original v0.7.0.md is
  deleted.
* All `v0.7.0` / `0.7.0` doc and comment references throughout
  `crates/`, `docs/`, `AGENTS.md`, and `openapi.json` retargeted
  to `v0.6.0` / `0.6.0`. `Cargo.lock` regenerated to match.
* OpenAPI spec regenerated via `OMNIGRAPH_UPDATE_OPENAPI=1
  cargo test -p omnigraph-server --test openapi
  openapi_spec_is_up_to_date` — `"version": "0.6.0"` now.

Verification:

* `cargo build --workspace` — clean (6 pre-existing engine
  warnings only).
* `cargo test --workspace --locked` — zero failures across all
  39 test result groups.
* `bash scripts/check-agents-md.sh` — passes (34 links / 33 docs).
* `grep -rn "0\.7\.0\|v0\.7\.0" --include='*.rs' --include='*.md'
  --include='*.json' --include='*.toml' .` returns no workspace
  hits. The three remaining `0.7.0` strings in `Cargo.lock`
  belong to unrelated 3rd-party crates (`pem-rfc7468`, `radium`,
  `rand_xoshiro`).

The git tag and crates.io publish happen later — this commit
just consolidates the surface so the eventual release is one
coherent v0.6.0 covering all the work since v0.5.0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: sanitize internal refs from v0.6.0 release notes

cubic-dev-ai P2 comments flagged that the release notes carried
internal Linear ticket and RFC references (MR-668, MR-731,
MR-723, RFC 0003, RFC 0004). Per AGENTS.md maintenance rule 5,
"Release docs are public project history. Describe capabilities,
behavior changes, breaking changes, upgrade notes, and user
impact; do not reference private ticket systems, internal
codenames, or planning shorthand that an outside contributor
cannot inspect." The bot's comments are correct against our own
published contract — they were a docs-quality regression
introduced when I drafted these notes.

Replaced each internal reference with the public-facing concept
it stood for. The substantive content (capabilities, behavior,
guarantees) was already present alongside the refs; sanitization
just trimmed the bracketed ticket labels:

* Line 6: dropped `(MR-668)` from the multi-graph mode summary —
  the descriptive name was already self-sufficient.
* Line 24: `MR-731 spoof defense` → `the bearer-derived-actor-
  identity guarantee`; `Forward-compat for Cloud mode (RFC 0003)
  and OAuth provider (RFC 0004)` → "forward-compat seams for
  future multi-tenant and OAuth deployments; they're inert in
  this release" — describes what the operator sees instead of
  pointing at planning docs.
* Line 26: `MR-731's server-authoritative-actor invariant` →
  "the server-authoritative-actor invariant: actor identity is
  always sourced from the bearer-token match resolved at the
  auth boundary" — the public-facing statement of the guarantee.
* Line 36: `(MR-723 default-deny otherwise rejects …)` →
  "without a server policy the default-deny posture rejects …"
  — same content, no ticket label.
* Line 121: `MR-731 spoof regression test` → "The bearer-auth-
  derived-actor-identity regression test (client-supplied
  identity headers are ignored; the server-resolved actor is the
  only identity Cedar sees)" — describes what the test guards
  instead of naming the originating ticket.

Verified: `grep -E 'MR-\d+|RFC[ -]?\d+' docs/releases/v0.6.0.md`
returns no matches; the rest of `docs/releases/` is also clean.
`scripts/check-agents-md.sh` passes.

Note: cubic-dev-ai also flagged `crates/omnigraph-cli/src/main.rs:276`
("doc comment incorrectly references v0.6.0 for a command that
only exists in v0.7.0"). That comment is based on a stale model
of the release surface — after folding v0.7.0 into v0.6.0 in
the previous commit, the multi-graph CLI surface IS in v0.6.0
and the comment is correct as written. No change needed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix: close validated init and multi-graph gaps

* chore: address review cleanup comments

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
											
										
										
											2026-05-28 16:19:31 +02:00
+								    let policy = PolicyEngine::load_graph(policy_path, graph.to_string_lossy().as_ref()).unwrap();
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let db = Omnigraph::open(graph.to_str().unwrap())
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								        .await
 								        .unwrap()
 								        .with_policy(Arc::new(policy) as Arc<dyn PolicyChecker>);
 								    let mut params: omnigraph_compiler::ParamMap = Default::default();
 								    // Parameter keys are bare names (no `$` prefix); the runtime resolves
 								    // `$name` references in the query body to `params["name"]`.
 								    params.insert(
 								        "name".to_string(),
 								        omnigraph_compiler::Literal::String("ParityCharlie".to_string()),
 								    );
 								    params.insert("age".to_string(), omnigraph_compiler::Literal::Integer(30));
 								    let result = db
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        .mutate_as(
 								            "main",
 								            MUTATION_QUERIES,
 								            "insert_person",
 								            &params,
 								            Some(actor),
 								        )
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								        .await;
 								    match result {
 								        Ok(_) => ParityDecision::Allow,
 								        Err(OmniError::Policy(_)) => ParityDecision::Deny,
 								        Err(other) => panic!("unexpected SDK error for change: {other:?}"),
 								    }
 								}
 								async fn http_change_decision(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    graph: &Path,
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								    policy_path: &PathBuf,
 								    actor: &str,
 								    token: &str,
 								) -> ParityDecision {
 								    let state = AppState::open_with_bearer_tokens_and_policy(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								        vec![(actor.to_string(), token.to_string())],
 								        Some(policy_path),
 								    )
 								    .await
 								    .unwrap();
 								    let app = build_app(state);
 								    let req = ChangeRequest {
-												feat: inline query strings in CLI and HTTP server (#110)

* feat(MR-656): inline query strings in CLI and HTTP server

CLI:
- Add -e / --query-string <STRING> to omnigraph read and omnigraph change
- Exactly one of --query, --query-string, --alias is required (3-way XOR)
- Empty --query-string is rejected with a clear error

HTTP:
- New POST /query (read-only, clean field names: query/name/params/branch/snapshot)
- Mutations on /query are rejected with 400 -- use POST /change instead
- ChangeRequest fields polished: query (alias query_source), name (alias query_name)
- POST /read and POST /change remain byte-compatible for existing clients

Tests:
- cli.rs: -e happy-path on read/change, mutex error vs --query, empty -e rejected
- system_local.rs: inline -e read and -e change exercise the local flow
- system_remote.rs: inline -e read/change over HTTP plus direct /query 200/400
- server.rs: /query 200, /query 400 on mutation, /change legacy field alias
- openapi.rs: new /query path, QueryRequest schema, ChangeRequest field-name polish

Docs: cli.md (-e examples), cli-reference.md (read/change rows), server.md (/query)
Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* feat(MR-656): rename read/change to query/mutate with deprecation signals

HTTP server:
- Add POST /mutate as canonical write endpoint (pairs with POST /query).
- Mark POST /read and POST /change as deprecated. Three-channel signal:
  * OpenAPI: `deprecated: true` on the operation (every codegen flags
    the generated SDK method).
  * RFC 9745: response `Deprecation: true` header on every response.
  * RFC 8288: response `Link: </successor>; rel="successor-version"`
    pointing at /query and /mutate respectively.
- Share business logic across /mutate and /change via run_mutate(); the
  /change wrapper is the only place that adds the deprecation headers.
- ChangeRequest field aliases (query_source/query_name) preserved.
- AliasCommand serde now accepts `query`/`mutate` alongside `read`/`change`.

CLI:
- Promote `omnigraph query` / `omnigraph mutate` to top-level canonical
  subcommands (clap visible_alias keeps `omnigraph read` / `omnigraph
  change` working forever).
- Promote `omnigraph lint` / `omnigraph check` to top-level (was nested
  under `omnigraph query lint`, which is now a deprecated argv shim that
  rewrites to the canonical form).
- Argv-level preprocessing prints a one-line deprecation warning to
  stderr when any legacy spelling is used. Canonical names are silent.

Tests:
- Server: /mutate works, /change emits Deprecation+Link headers, /read
  emits Deprecation+Link headers, /query carries no deprecation signal.
- OpenAPI: /read and /change flagged deprecated; /query and /mutate not.
- CLI: canonical `lint` matches deprecated `query lint` / `query check`
  output; `read` / `change` print deprecation warnings.

Docs:
- cli.md: new canonical examples; "Deprecated names" migration table.
- cli-reference.md: top-level table updated; aliases.<name>.command
  accepts both legacy and canonical spellings.
- server.md: endpoint inventory shows /query and /mutate as canonical
  and /read and /change as deprecated; dedicated section explains the
  three-channel deprecation signal.
- og-cheet-sheet.md: use new `omnigraph lint` / `omnigraph check`.
- openapi.json regenerated.

Migration is purely cosmetic — every deprecated form continues to work
indefinitely; only the spelling changes.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* fix(MR-656): address Devin Review findings on /query and /change

Two issues raised by Devin Review on PR #110:

1. `POST /query` mutation-rejection error pointed at the deprecated
   `/change` endpoint instead of the canonical `/mutate`. Fixed in
   three places: the runtime error message in `server_query`, the
   utoipa 400-response description, and the handler doc comment. The
   `QueryRequest` schema docstrings in `api.rs` got the same update so
   the openapi.json bodies match. Server and openapi tests updated.

2. `execute_change_remote` serialized `ChangeRequest` directly, which
   emits the new canonical field names `query` / `name` on the wire.
   `#[serde(alias = "query_source")]` only affects deserialization, so
   a newer CLI talking to an older server would have its `/change`
   POST body fail with "missing field: query_source". Fixed by
   extracting a `legacy_change_request_body` helper that hand-rolls
   the JSON with the legacy keys (`query_source` / `query_name`), the
   same byte-stable contract `execute_read_remote` already uses
   against `/read`. Added two unit tests on the helper to lock the
   wire shape in.

Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>

* docs(dev): RFC 001 — inline + stored queries, envelope, MCP

Tracked artifact consolidating the design across MR-656 (this branch),
MR-976 (Phase 1 envelope hardening parent, with MR-977/978/979/980
sub-issues), and MR-969 (stored queries + MCP).

Sections:

* Two paths, one engine — inline `/query` + `/mutate` (this PR) coexist
  with stored `/queries/{name}` (MR-969). Same `run_query` / `run_mutate`
  backend (the fold-in landed in the previous commit).
* Request envelope ("before") — Idempotency-Key, If-Match, X-Deadline,
  X-Trace-Id, expect, dry_run, fields. Phase 1 ships the load-bearing
  subset on `/mutate`.
* Response envelope ("after") — audit_id, snapshot_id, commit_id, stats,
  warnings. Closes the provenance loop today's `ChangeOutput` leaves
  open.
* `.gq` pragmas — `@description`, `@returns`, `@mcp`. Source-of-truth
  for the stored-query agent contract; no separate YAML registry.
* Multi-graph MCP — per-graph `/graphs/{id}/mcp/tools` + `/mcp/invoke`.
  Token binds to one graph by default; cross-graph agents loop.
* Cedar split — `read`/`change` for inline, `invoke_query` for stored.
  Operators deny ad-hoc for agent groups while keeping curated tool
  list open.
* Rejected alternatives — per-env override files, compiled bundles,
  tool-name prefixing across graphs, body-field graph dispatch.

Index entry added under "Active Implementation Plans" so future agents
land on the RFC before touching queries / mutations / envelope code.
`scripts/check-agents-md.sh` clean (35 links, 34 docs).

* docs(server): clarify why run_query lacks AppState parameter

run_mutate takes state for workload admission; run_query doesn't because
reads aren't admission-gated today. Mark the asymmetry as intentional and
flag the two future events that would grow the signature: Phase 1's
`expect: { max_rows_scanned: N }` budget (MR-976) or per-actor admission
extending to stored-read invocations (MR-969). Prevents the natural
"make these symmetrical" follow-up.

* refactor(server): run_query / run_mutate take &ResolvedActor

Replace `Option<Extension<ResolvedActor>>` in the helpers with
`Option<&ResolvedActor>`. Saves MR-969's stored-query handler from
wrapping a bare actor in axum's `Extension(...)` before calling.
Handler signatures (`server_query`, `server_read`, `server_mutate`,
`server_change`) keep `Option<Extension<ResolvedActor>>` because that
is what axum injects, and unwrap at the call site with
`actor.as_ref().map(|Extension(actor)| actor)`.

Net: -13/+10 LOC, 89/0 server tests pass.

* docs(releases): v0.6.0 — describe inline + canonical-named queries (MR-656)

Extend the v0.6.0 release notes to cover the third piece of work landing
alongside the graph terminology rename and multi-graph server mode:
canonical-named `POST /query` and `POST /mutate` endpoints, the CLI's
new `-e/--query-string` flag, the top-level promotion of `lint` /
`check`, and the three-channel deprecation signal on `/read` and
`/change` (OpenAPI `deprecated: true` + RFC 9745 + RFC 8288).

Additions:

* Top blurb: "Two pieces" -> "Three pieces" with a bullet describing
  the rename + inline flow.
* Breaking Changes: new "Query / mutation rename" subsection covering
  the `ChangeRequest` field rename (with the back-compat serde aliases
  and the CLI's `legacy_change_request_body` byte-stable wire helper)
  and the `omnigraph query lint` -> `omnigraph lint` move.
* New: 5 bullets — the two endpoints, the CLI subcommands, the `-e`
  flag, the deprecation signal channels, the widened `aliases.<name>.command`
  vocabulary.
* User Impact: one bullet making explicit that the rename is cosmetic
  on the client side and migration is voluntary.
* Documentation: pointers to the updated `server.md` / `cli.md` /
  `cli-reference.md` and the new `docs/dev/rfc-001-queries-envelope-mcp.md`.

+15/-1 lines. `./scripts/check-agents-md.sh` clean.

* refactor(cli): demote `check` from visible_alias to deprecation shim

`omnigraph check` was a clap `visible_alias` on `lint`, advertised in
`--help` as an equivalent canonical name. Per MR-981 §6 (long-form
flags as canonical, short forms as visible aliases), visible aliases
on subcommand names hurt agent CX: agents emit either spelling
depending on training-data drift, and there's no length signal
pointing at the canonical name.

Changes:

* Remove `#[command(visible_alias = "check")]` from the `Lint` variant.
  `omnigraph --help` now shows only `lint`.
* Add bare `check` to `rewrite_deprecated_argv` so `omnigraph check
  <args>` still works — it rewrites to `omnigraph lint <args>` and
  emits a one-line stderr deprecation warning, matching the existing
  pattern for `read` / `change` / `query lint` / `query check`.
* Fix the nested `query check` shim to substitute `check` -> `lint` in
  the rewritten argv (previously it relied on `check` being a
  visible_alias to reach the `Lint` variant).
* New test `deprecated_check_top_level_rewrites_to_lint` covers: bare
  `check` produces identical stdout to `lint`, emits the deprecation
  warning, and `check` does NOT appear as an alias in `omnigraph
  --help`.
* Release notes updated to reflect the deprecation-shim treatment and
  cross-reference MR-981 §6 reasoning.

Cargo / Go users typing `check` still work indefinitely; one stderr
nudge per invocation teaches the canonical name. Agents see only
`lint` in `--help --json` so they emit one canonical form.

67/0 omnigraph-cli tests pass; 39 workspace test suites green.

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Ragnor Comerford <ragnor.comerford@gmail.com>
Co-authored-by: Ragnor Comerford <hello@ragnor.co>
											
										
										
											2026-05-29 13:41:54 +02:00
+								        query: MUTATION_QUERIES.to_string(),
 								        name: Some("insert_person".to_string()),
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								        params: Some(json!({ "name": "ParityCharlie", "age": 30 })),
 								        branch: Some("main".to_string()),
 								    };
 								    let (status, _body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/change")
 								            .method(Method::POST)
 								            .header(AUTHORIZATION, format!("Bearer {token}"))
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&req).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    match status {
 								        StatusCode::OK => ParityDecision::Allow,
 								        StatusCode::FORBIDDEN => ParityDecision::Deny,
 								        other => panic!("unexpected HTTP status for change: {other}"),
 								    }
 								}
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								async fn sdk_merge_decision(graph: &Path, policy_path: &Path, actor: &str) -> ParityDecision {
-												(feat): multi-graph server mode (#119)

* mr-668: add GraphId newtype + Cloud-mode forward identity stubs (PR 1/10)

PR 1 of the MR-668 multi-graph server work. Pure types, no runtime
behavior changes yet.

Ships the validated identity vocabulary that the rest of the implementation
will consume:

- `GraphId(String)` — `^[a-zA-Z0-9-]{1,64}$`, leading underscore rejected
  (engine reserves every `_*` filename), reserved route names rejected
  (`policies`, `healthz`, `openapi`, `openapi.json`, `graphs`). Validation
  lives in `try_from` only; serde `Deserialize` re-runs it so JSON payloads
  cannot bypass.
- `TenantId(String)` — same regex shape as GraphId. `None` in Cluster
  mode; reserved for Cloud mode (RFC 0003) where it carries the OAuth
  `org_id` claim.
- `GraphKey { tenant_id: Option<TenantId>, graph_id }` — the registry
  HashMap key. `cluster()` constructor for the Cluster-mode default.
- `Scope` enum with `Full` variant — Cluster mode default; RFC 0004 will
  extend with OAuth scopes (`graph:read`/`write`/`admin`/`*`).
- `AuthSource` enum with `Static` variant — Cluster mode default; RFC
  0001 step 1 will add `Oidc`.
- `ResolvedActor { actor_id, tenant_id, scopes, source }` — replaces the
  upcoming refactor of `AuthenticatedActor(Arc<str>)` in PR 4a.

Per MR-668 design decision 13: ship the Cloud-mode forward type shapes
now (no `TokenVerifier` trait yet — that's RFC 0001 step 1) so handler
signatures stay stable across the Cluster → Cloud trajectory. `Scope`
and `AuthSource` use `#[non_exhaustive]` so future variants don't break
caller matches.

Tests: 26 new (15 graph_id + 11 identity), all passing. No regression
in the existing 36 server library tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: Omnigraph::init error-path cleanup + three failpoints (PR 2a/10)

PR 2a of the MR-668 multi-graph server work. Bug fix: a partially-failed
`Omnigraph::init` previously left orphan schema files at the graph URI,
making the URI unusable for a retry (the next `init` would refuse because
`_schema.pg` already exists).

Changes:

1. `init_with_storage` now wraps the I/O phase. On any error from
   `init_storage_phase`, calls `best_effort_cleanup_init_artifacts` to
   remove the three schema files before returning the original error:
   - `_schema.pg`
   - `_schema.ir.json`
   - `__schema_state.json`
   Cleanup is best-effort: a failure to delete is logged via
   `tracing::warn` but does NOT mask the init error.

2. Three failpoints added at the init phase boundaries:
   - `init.after_schema_pg_written`
   - `init.after_schema_contract_written`
   - `init.after_coordinator_init`

3. Four new failpoint tests in `tests/failpoints.rs` pin the cleanup
   behavior at each boundary plus the "original error wins over cleanup
   error" contract. All 23 failpoint tests pass.

Coverage gap (documented in code comments):
Lance per-type datasets and `__manifest/` directory created by
`GraphCoordinator::init` are NOT cleaned up after a coordinator-init-phase
failure. Recursive directory deletion requires `StorageAdapter::delete_prefix`,
which was deferred along with `DELETE /graphs/{id}` (originally PR 2b). When
that primitive lands, the third failpoint test can be tightened to assert
the graph root is fully empty.

Tests: 4 new (init_failpoint_*), all 23 failpoint tests green. No
regression in the 105 engine library tests or 64 end_to_end tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: add GraphHandle + GraphRegistry data structure (PR 3/10)

PR 3 of the MR-668 multi-graph server work. Pure data structure — no
routing changes yet (that's PR 4a).

New file: `crates/omnigraph-server/src/registry.rs`

- `GraphHandle { key: GraphKey, uri: String, engine: Arc<Omnigraph>,
  policy: Option<Arc<PolicyEngine>> }` — the per-graph state that the
  routing middleware (PR 4a) will inject as a request extension.
- `RegistrySnapshot { graphs: HashMap<GraphKey, Arc<GraphHandle>> }` —
  immutable snapshot; replaced atomically via `ArcSwap`.
- `GraphRegistry { snapshot: ArcSwap<_>, mutate: Mutex<()> }` — lock-free
  reads, mutex-serialized mutations.
- `RegistryLookup { Ready(Arc<GraphHandle>) | Gone }` — two-valued, no
  `Tombstoned` variant since DELETE is deferred in v0.7.0 scope.
- `InsertError { DuplicateKey | DuplicateUri }` — both rejection cases
  for create-graph (maps to HTTP 409 in PR 7).
- Methods: `new`, `from_handles` (bulk startup-time init), `get`, `list`,
  `len`, `insert`.

Race semantics pinned by three multi-thread tests:
- `concurrent_insert_same_key_exactly_one_succeeds` — N=8 spawned
  inserts with the same key; exactly 1 returns Ok, 7 return DuplicateKey.
- `concurrent_insert_distinct_keys_all_succeed` — N=8 spawned inserts
  with distinct keys; all succeed.
- `concurrent_reads_during_inserts_see_consistent_snapshots` — reader
  loop concurrent with sequential writes; every listed handle's key
  resolves via `get()` (no torn state).

Why no tombstones field: `DELETE /graphs/{id}` is deferred to bound
the scope of v0.7.0. Without a delete endpoint, there's no use for
tombstones — every key in the registry is `Ready`, and every key
not in the registry is `Gone`. When DELETE lands later, the
`Tombstoned` variant + `tombstones: HashSet<GraphKey>` slot in
additively without breaking caller signatures (the `Gone` variant
remains the "not currently active" case).

Why `tokio::sync::Mutex`: insert is async because PR 7's flow holds
this mutex across the atomic YAML rewrite step (file I/O). std::Mutex
would footgun across .await.

Dependency additions: `arc-swap = { workspace = true }`,
`thiserror = { workspace = true }` (used by InsertError).

Tests: 12 new (12 passing). 74 server lib tests total green
(62 from PR 1 + 12 new). Clippy clean on server crate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: router restructure + handler refactor for multi-graph (PR 4a/10)

PR 4a of the MR-668 multi-graph server work. The heaviest single PR —
rewires every handler to extract `Arc<GraphHandle>` from a routing
middleware, replaces `AuthenticatedActor(Arc<str>)` with `ResolvedActor`
everywhere, and adds the `ServerMode` discriminator.

Behavior changes:
- **Single mode** (legacy `omnigraph-server <URI>`): flat routes
  (`/snapshot`, `/read`, `/branches`, …) continue to work exactly as
  v0.6.0. Internally, the registry holds a single handle keyed by the
  sentinel `SINGLE_GRAPH_KEY_ID = "default"`; routing middleware injects
  that handle on every request. No HTTP-visible change.
- **Multi mode** (new): routes nest under `/graphs/{graph_id}/...`.
  Routing middleware extracts the graph id from the path, looks it up
  in the registry, and injects the handle. 404 if not found.
  (Multi-mode startup itself lands in PR 5; this PR provides the
  router-side wiring.)

AppState refactor:
- `engine: Arc<Omnigraph>` and `policy_engine: Option<Arc<PolicyEngine>>`
  fields removed — both now live inside `GraphHandle` in the registry.
- `mode: ServerMode { Single { uri } | Multi { config_path } }` added.
- `registry: Arc<GraphRegistry>` added.
- `server_policy: Option<Arc<PolicyEngine>>` added (placeholder for
  management endpoints in PR 6b; unused today).
- Existing constructors (`new`, `new_with_bearer_token{s,_and_policy}`,
  `new_with_workload`, `open*`) build a single-mode AppState
  internally and remain source-compatible. Tests that constructed
  AppState via these constructors continue to work.
- `with_policy_engine` post-construction setter — rebuilds the
  single-mode handle with the policy attached. Engine-layer
  enforcement is NOT reinstalled (matches the old single-field
  semantics; `open_with_bearer_tokens_and_policy` is the path that
  installs both layers).
- `new_multi` constructor added for PR 5's startup loop.
- `uri()` now returns `Option<&str>` (Some in single, None in multi).

Routing middleware:
- `resolve_graph_handle` injects `Arc<GraphHandle>` as a request
  extension. Mode-aware: single returns the only handle; multi parses
  `/graphs/{graph_id}/...` from the URI. Returns 404 in multi mode
  when the graph id is unregistered. Records `graph_id` on the
  current tracing span.
- `require_bearer_auth` updated to insert `ResolvedActor` (was
  `AuthenticatedActor`).

Handler refactor — every protected handler:
- Gains `Extension(handle): Extension<Arc<GraphHandle>>` param.
- Replaces `state.engine` → `handle.engine`.
- Replaces `state.policy_engine()` → `handle.policy.as_deref()`.
- Replaces `state.uri()` → `handle.uri.as_str()` (or `.clone()`
  where String is needed).
- Replaces `Arc::clone(&state.engine)` → `Arc::clone(&handle.engine)`
  (the spawn-and-clone pattern in `server_export` — proof that a
  long-running export survives the registry being mutated later).

authorize_request signature:
- Was: `(state: &AppState, actor: Option<&AuthenticatedActor>, request: PolicyRequest)`.
- Now: `(actor: Option<&ResolvedActor>, policy: Option<&PolicyEngine>, request: PolicyRequest)`.
- Per-graph callers pass `handle.policy.as_deref()`. The (future PR 6b)
  management endpoints will pass `state.server_policy.as_deref()`.

MR-731 invariant preserved:
- The single chokepoint `request.actor_id = actor.actor_id.as_ref().to_string()`
  inside `authorize_request` still overwrites any client-supplied
  actor identity. Regression test
  `actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
  at `tests/server.rs:1114-1216` passes unchanged.

Tests: 0 new (the registry race tests in PR 3 already cover the
data structure; this PR exercises them indirectly via the existing
test suite). 74 lib + 57 server integration + 60 openapi = 191 tests
green. Clippy clean.

LOC: +397 insertions, -153 deletions in `crates/omnigraph-server/src/lib.rs`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: OpenAPI multi-mode cluster filter (PR 4b/10)

PR 4b of the MR-668 multi-graph server work. In multi mode, the served
`/openapi.json` reports cluster routes (`/graphs/{graph_id}/...`) instead
of the legacy flat protected paths — matching what `build_app` actually
mounts (PR 4a's `Router::nest`). Single mode is unchanged.

Implementation:
- New `server_openapi` branch: when `state.mode()` is `Multi`, call
  `nest_paths_under_cluster_prefix(&mut doc)` after `ApiDoc::openapi()`.
- The rewrite consumes `doc.paths.paths`, then for every path-item:
  - If the path is in `ALWAYS_FLAT_PATHS` (`/healthz` for now), keep
    it flat.
  - Otherwise, prefix every operation_id with `cluster_` and reinsert
    the item at `/graphs/{graph_id}<original_path>`.
- Single mode hits no extra work — the path map is untouched.
- The static `ApiDoc::openapi()` still emits the flat surface, so
  in-process callers (the existing `openapi_json()` helper in tests)
  see the unmodified spec.

Why cluster_ prefix on operation IDs: OpenAPI specs require unique
operation_ids across the document. With both flat (single-mode) and
cluster (multi-mode) surfaces ever co-existing in a generated SDK,
the prefix prevents collision. The current served doc only carries
one surface, so the prefix is forward-compat with potential future
dual-surface generation.

Tests: 6 new in `tests/openapi.rs`, all via the `/openapi.json` route
(not the static `ApiDoc::openapi()` helper):
- `multi_mode_openapi_lists_cluster_paths` — every protected path
  appears as a cluster variant.
- `multi_mode_openapi_drops_flat_protected_paths` — flat protected
  paths are absent.
- `multi_mode_openapi_keeps_healthz_flat` — `/healthz` survives.
- `multi_mode_openapi_prefixes_operation_ids_with_cluster` — every
  cluster operation_id starts with `cluster_`.
- `multi_mode_operation_ids_are_unique` — no operation_id collisions.
- `single_mode_openapi_unchanged_by_cluster_filter` — single mode
  still emits the legacy flat surface (regression).

New test helper `app_for_multi_mode(graph_ids)` exercises the new
`AppState::new_multi` constructor from PR 4a — first user of multi-mode
construction outside of unit tests.

Result: 66 openapi tests + 57 server integration tests + 74 lib tests
= 197 green. No regression in the existing OpenAPI drift check
(`openapi_spec_is_up_to_date` still validates the static flat surface
matches the committed openapi.json).

LOC: +67 in lib.rs (rewrite logic), +219 in tests/openapi.rs (test
suite + helper).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: multi-graph startup + mode inference (PR 5/10)

PR 5 of the MR-668 multi-graph server work. This is the first PR that
makes multi mode actually usable end-to-end: operators invoking
`omnigraph-server --config omnigraph.yaml` with a non-empty `graphs:`
map and no single-mode selector now get a running multi-graph server.

Mode inference (MR-668 decision 2, four-rule matrix in
`load_server_settings`):
  1. CLI `<URI>` positional        → Single
  2. CLI `--target <name>`         → Single (URI from graphs.<name>)
  3. `server.graph` in config      → Single (URI from graphs.<name>)
  4. `--config` + non-empty `graphs:` + no single-mode selector
                                   → Multi (all entries in `graphs:`)
  5. otherwise                     → error with migration hint

Rule 5's error message names every escape hatch so operators can fix
their invocation without grepping docs.

Config schema extensions:
  - `TargetConfig.policy: PolicySettings` (per-graph Cedar policy file).
    `#[serde(default)]` so existing single-graph YAMLs keep parsing.
  - `ServerDefaults.policy: PolicySettings` (server-level Cedar policy
    for management endpoints — loaded in PR 5, wired into `GET /graphs`
    in PR 6b).
  - `OmnigraphConfig::resolve_target_policy_file(name)` and
    `resolve_server_policy_file()` helpers — both resolve relative to
    the config file's `base_dir`.

Public types added to `omnigraph-server`:
  - `ServerConfigMode { Single { uri, policy_file } | Multi { graphs,
    config_path, server_policy_file } }`.
  - `GraphStartupConfig { graph_id, uri, policy_file }` — one entry
    per graph in multi mode.

`ServerConfig` shape change:
  - WAS: `{ uri: String, bind, policy_file, allow_unauthenticated }`.
  - NOW: `{ mode: ServerConfigMode, bind, allow_unauthenticated }`.
  - Breaking for any code that constructs `ServerConfig` directly.
    `main.rs` is unaffected (uses `load_server_settings`).

`serve()` now forks on `ServerConfig.mode`:
  - Single: existing flow via `AppState::open_with_bearer_tokens_and_policy`.
  - Multi: parallel open via `futures::stream::iter(graphs)
    .map(open_single_graph).buffer_unordered(4).collect()`. Bound 4 is
    a rule-of-thumb for I/O-bound work — at N≤10 this trades startup
    latency for a small amount of concurrent S3/Lance open pressure.
    Fail-fast: first open error aborts startup; in-flight opens drop
    their engine via Arc (Lance datasets close cleanly).

New helper `open_single_graph(GraphStartupConfig)`:
  - Validates `GraphId` per the regex in PR 1.
  - `Omnigraph::open(uri).await` with descriptive error context.
  - Loads per-graph policy file and re-applies it via
    `Omnigraph::with_policy` (engine-layer enforcement, MR-722).
  - Returns `Arc<GraphHandle>` ready for the registry.

Routing middleware bug fix:
  - `Router::nest("/graphs/{graph_id}", inner)` rewrites
    `request.uri().path()` to the inner suffix (e.g. `/snapshot`).
    The previous middleware tried to parse `{graph_id}` from
    `request.uri().path()` and got 400 instead of 200. Fixed by reading
    from `axum::extract::OriginalUri` request extension, which preserves
    the pre-rewrite URI.
  - Caught by the two new tests
    `cluster_routes_dispatch_per_graph_handle` and
    `cluster_route_for_unknown_graph_returns_404`.

Tests (14 new, all passing):
  - Four-rule matrix: one test per branch + the joint case
    `mode_inference_cli_uri_overrides_graphs_map` + the empty-graphs-map
    error case.
  - Per-graph + server-level policy file path resolution.
  - Reserved `GraphId` rejection at startup.
  - End-to-end multi-graph routing: two graphs side by side, each
    cluster route hits the right engine.
  - Unknown graph id under cluster prefix → 404.
  - Flat routes 404 in multi mode.

Inline `ServerConfig` test (`serve_refuses_to_start_in_state_1_without_unauthenticated`)
and three `server_settings_*` tests updated to the new `mode` shape.

Result: 211 server tests green (74 lib + 71 integration + 66 openapi),
MR-731 regression test still pinned and passing.

LOC: +45 config.rs, +281 lib.rs (net), +395 tests/server.rs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: Cedar resource-model refactor (PR 6a/10)

PR 6a of the MR-668 multi-graph server work. Policy-crate-only refactor —
no HTTP handler changes, no operator-supplied policy.yaml changes. Sets
up the chassis that PR 6b's `GET /graphs` consumes.

Two new `PolicyAction` variants:
  - `GraphCreate` — gates `POST /graphs` (deferred behavioral PR).
  - `GraphList`   — gates `GET /graphs` (lands in PR 6b).

Note: `GraphDelete` is intentionally NOT added in this PR. `DELETE
/graphs/{id}` is deferred from MR-668's v0.7.0 scope to bound complexity
(no `delete_prefix`, no tombstone, no `RegistryLookup::Tombstoned`).
Adding the Cedar action without a consumer would be the same kind of
"dead vocabulary" trap the `Admin` variant already documents.

New `PolicyResourceKind { Graph, Server }` enum, plus a
`PolicyAction::resource_kind()` method that classifies every action.
Per-graph actions (Read, Change, BranchCreate, …) bind to
`Omnigraph::Graph::"<graph_label>"`; server-scoped actions
(GraphCreate, GraphList) bind to the singleton
`Omnigraph::Server::"root"`. `Admin` stays classified as per-graph for
now — MR-724 will pick the final shape when the first consumer surface
ships.

Cedar schema string additions:
  - `entity Server;`
  - `action "graph_create" appliesTo { principal: Actor, resource: Server, ... }`
  - `action "graph_list"   appliesTo { principal: Actor, resource: Server, ... }`

Compiler updates:
  - `compile_policy_source` picks the resource literal based on the
    action's `resource_kind`. Existing graph-only policies generate
    the same Cedar source as before — pinned by
    `per_graph_rules_continue_to_work_alongside_server_rules`.
  - `compile_entities` includes the `Server::"root"` entity only when
    a rule references a server-scoped action. Keeps test assertions
    for graph-only policies tight.
  - `PolicyEngine::authorize` builds the right resource UID at
    request time based on `request.action.resource_kind()`.

Validation rules added to `PolicyConfig::validate`:
  - A rule may not mix server-scoped and per-graph actions (different
    resource kinds need different `permit` clauses).
  - Server-scoped actions cannot have `branch_scope` or
    `target_branch_scope` — there's no branch context at the server
    level.

Operator impact: zero. The Cedar schema `Omnigraph::Server` entity is
internally referenced by `compile_policy_source`; operator policy.yaml
files only declare actions in `rules[].allow.actions` and never
reference the resource entity directly. Decision 6's "internal rename
only; operator policies unaffected" contract is preserved and pinned
by `per_graph_rules_continue_to_work_alongside_server_rules`.

Tests: 5 new (11 policy tests total, up from 6):
  - `graph_list_action_authorizes_against_server_resource`
  - `graph_create_action_authorizes_against_server_resource`
  - `server_scoped_rule_cannot_use_branch_scope`
  - `rule_mixing_server_and_per_graph_actions_is_rejected`
  - `per_graph_rules_continue_to_work_alongside_server_rules`

No regression: 145 server tests (74 lib + 71 integration) still green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: GET /graphs endpoint + per-graph policy wire-up (PR 6b/10)

PR 6b of the MR-668 multi-graph server work. First management endpoint —
`GET /graphs` lists every graph registered with the server, gated by the
server-level Cedar policy from PR 6a.

New API shapes (in `omnigraph-server::api`):
  - `GraphInfo { graph_id, uri }` — one entry per registered graph.
  - `GraphListResponse { graphs: Vec<GraphInfo> }` — sorted alphabetically
    by `graph_id` for deterministic output.

Handler `server_graphs_list`:
  - Mounted at `GET /graphs` in both modes.
  - Single mode: returns 405 (resource exists in the API surface, just
    not operational without a `graphs:` map). 405 chosen over 404 so
    clients see "resource exists, wrong context" rather than "no such
    resource".
  - Multi mode: requires bearer auth (when configured); Cedar-gated by
    `PolicyAction::GraphList` against `Omnigraph::Server::"root"`
    (PR 6a's chassis). Returns the sorted registry list.

Cedar gate composition:
  - When no `server.policy.file` is configured, the MR-723 default-deny
    falls through: `GraphList` is not `Read`, so an authenticated actor
    without a server policy gets 403. This is the right default — don't
    expose the registry until the operator explicitly authorizes it.
  - When a server policy is configured, Cedar evaluates the rule. The
    test `get_graphs_with_server_policy_authorizes_per_cedar` pins the
    admin-allow / viewer-deny split.

Routing:
  - New `management` sub-router holding `/graphs` (auth-required, no
    `resolve_graph_handle` middleware — operates on the registry, not
    a single graph).
  - Single mode merges flat protected routes + management.
  - Multi mode merges nested `/graphs/{graph_id}/...` + management.

OpenAPI:
  - `server_graphs_list` registered in `ApiDoc::paths(...)`.
  - `EXPECTED_PATHS` in `tests/openapi.rs` gains `/graphs`.
  - `openapi.json` regenerated (auto-tracked by
    `openapi_spec_is_up_to_date` in CI).

Tests: 4 new in `tests/server.rs::multi_graph_startup`:
  - `get_graphs_lists_registered_graphs_in_multi_mode`
  - `get_graphs_returns_405_in_single_mode`
  - `get_graphs_requires_bearer_auth_when_configured`
  - `get_graphs_with_server_policy_authorizes_per_cedar`

What's NOT in this PR (deferred):
  - Per-graph policy enforcement is wired through `handle.policy`
    (PR 4a already did this); PR 6b doesn't add new per-graph
    behavior beyond making sure the server policy lookup composes
    cleanly alongside it.
  - `POST /graphs` (PR 7) and `DELETE /graphs/{id}` (out of scope
    for v0.7.0).
  - CLI `omnigraph graphs list` (PR 8 will add).

Result: 215 server tests green (74 lib + 66 openapi + 75 integration),
11 policy tests green. MR-731 spoof regression preserved across all
this work.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: POST /graphs runtime create endpoint (PR 7/10)

PR 7 of the MR-668 multi-graph server work. Operators can now add a
graph to a running multi-graph server without restarting:

  curl -X POST http://server/graphs \
    -H "Content-Type: application/json" \
    -d '{
          "graph_id": "beta",
          "uri": "/data/beta.omni",
          "schema": { "source": "node Person { name: String @key }\n" },
          "policy": { "file": "./policies/beta.yaml" }
        }'

DELETE remains deferred (out of v0.7.0 scope per the trimmed plan —
no `delete_prefix`, no tombstones).

Body shape (decision 7):
  - Nested `schema: { source: "..." }` (mirrors the `policy: { file }`
    pattern; leaves room for future fields without breakage).
  - Optional nested `policy: { file: "..." }` for per-graph Cedar.
  - 32 MiB body limit (reuses `INGEST_REQUEST_BODY_LIMIT_BYTES`).
  - Asymmetric with `SchemaApplyRequest` which keeps flat
    `schema_source: String` — documented in api.rs.

Atomic YAML rewrite + drift detection:
  - New `config::rewrite_atomic(path, new_config, expected_hash)`:
    flock → re-read + hash check → serialize → write `.tmp` → fsync
    → rename → fsync parent dir. Returns the new hash for the caller
    to update its in-memory baseline.
  - New `config::hash_config_file(path)` — SHA-256 of the on-disk
    bytes, used at startup and after each rewrite.
  - New `RewriteAtomicError { Drift | Io | Serialize }` enum.
  - `AppState.config_hash: Option<Arc<Mutex<[u8;32]>>>` carries the
    in-memory baseline. Updated after every successful rewrite so
    subsequent POSTs don't false-trigger drift.
  - The mutex is `std::sync::Mutex` (brief critical section, no .await
    inside). The flock itself serializes file access process-wide
    AND across multiple server instances (defense in depth).
  - All sync I/O runs inside `tokio::task::spawn_blocking` — flock
    is sync.

Handler ordering (the load-bearing sequence):
  1. Mode check: 405 in single mode.
  2. Cedar authorize: `GraphCreate` against `Omnigraph::Server::"root"`.
  3. Validate body: `GraphId::try_from` (regex + reserved-name), empty
     schema/uri checks, per-graph policy file parse.
  4. Pre-check registry for duplicate graph_id / duplicate uri (409).
  5. `Omnigraph::init` the new engine.
  6. Atomic YAML rewrite (drift detection inside).
  7. Publish in registry (atomic re-check via `GraphRegistry::insert`).

Failure modes (documented in handler rustdoc):
  - Init fails → orphan storage at `req.uri` (PR 2a cleans up schema
    files; Lance datasets remain orphans until `delete_prefix` lands).
  - YAML rewrite fails (drift, IO) → orphan storage; YAML unchanged.
  - Registry insert fails (race) → YAML has entry but registry doesn't;
    next restart opens it cleanly.

New dependency: `fs2 = "0.4"` (workspace + omnigraph-server). POSIX-only
file locking. Linux/macOS deployment supported; Windows out of scope.

Tests (10 new in `tests/server.rs::multi_graph_startup`):
  - `post_graphs_creates_a_new_graph_end_to_end` — happy path, includes
    YAML inspection to confirm the rewrite landed.
  - `post_graphs_baseline_hash_updates_between_rewrites` — two POSTs in
    a row both succeed (drift baseline updates correctly).
  - `post_graphs_duplicate_graph_id_returns_409`
  - `post_graphs_duplicate_uri_returns_409`
  - `post_graphs_invalid_graph_id_returns_400` (reserved name)
  - `post_graphs_empty_schema_source_returns_400`
  - `post_graphs_returns_405_in_single_mode`
  - `post_graphs_yaml_drift_detection_returns_503` — operator hand-edits
    omnigraph.yaml; server refuses to clobber.
  - `hash_config_file_is_deterministic_and_detects_changes`
  - `rewrite_atomic_refuses_when_hash_drifts`

OpenAPI: `server_graphs_create` registered in `ApiDoc::paths(...)`;
openapi.json regenerated.

Result: 225 server tests green (74 lib + 66 openapi + 85 integration),
all MR-731 regressions still pinned.

LOC: ~580 lib.rs net (handler + helpers), ~120 config.rs (rewrite
machinery), +71 api.rs (request/response shapes), +332 tests/server.rs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: CLI omnigraph graphs list/create (PR 8/10)

PR 8 of the MR-668 multi-graph server work. CLI parity for the
v0.7.0 management surface: operators can now manage graphs from
the command line against a running multi-graph server.

  omnigraph graphs list --target dev --json
  omnigraph graphs create \
    --target dev \
    --graph-id beta \
    --graph-uri /data/beta.omni \
    --schema schema.pg

DELETE is intentionally absent — server-side DELETE was deferred from
v0.7.0 scope, and shipping a client subcommand for a server endpoint
that doesn't exist would be dead vocabulary. The help output, the
subcommand enum, and the test that pins it (`graphs_subcommand_help_
lists_list_and_create`) all agree.

CLI architecture (modeled on `BranchCommand`):
  - New `Command::Graphs { command: GraphsCommand }` top-level variant.
  - `GraphsCommand { List, Create }` enum.
  - List: GET `<base>/graphs`. Stdout is `<graph_id>\t<uri>` per line,
    or JSON via `--json`.
  - Create: reads `--schema <path>` from local disk, inlines as
    `schema: { source: <file> }` in the POST body (nested per
    MR-668 decision 7). Optional `--policy-file <path>` becomes
    `policy: { file: <path> }`. Returns 201 → "created graph X at Y"
    or JSON via `--json`.
  - Both subcommands reject local URI targets with a clear
    "remote multi-graph server URL" error.

New API type imports in the CLI: `GraphCreateRequest`,
`GraphCreateResponse`, `GraphListResponse`, `GraphSchemaSpec`,
`GraphPolicySpec` — all from `omnigraph-server::api`.

Tests:
  - cli.rs (4 new, non-network):
      * `graphs_subcommand_help_lists_list_and_create` — pins the
        deferral of `delete` (catches scope creep).
      * `graphs_list_against_local_uri_errors_with_remote_only_message`
      * `graphs_create_against_local_uri_errors_with_remote_only_message`
      * `graphs_create_with_missing_schema_file_errors` — pins the
        IO context in the schema-read error path.
  - system_remote.rs (1 new, `#[ignore]` like its peers):
      * `graphs_list_and_create_against_multi_graph_server` — spawns a
        multi-mode server, calls `graphs list` (sees `alpha`),
        `graphs create` (adds `beta`), `graphs list` again (sees both),
        and confirms the new graph is reachable via its cluster route.

CLI suite: 62 tests green (58 existing + 4 new). The new ignored
end-to-end test runs locally with `cargo test --ignored`.

LOC: +159 main.rs (enum + handlers), +88 cli.rs (unit tests),
+131 system_remote.rs (integration test).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: composite e2e tests, race fix, v0.7.0 release (PR 9/10)

PR 9 — the final integration PR for MR-668 multi-graph server work.
Closes the v0.7.0 release.

Composite lifecycle tests (closes gaps flagged in PR 7's coverage
review):
  - `multi_graph_lifecycle_post_query_restart_persistence` — POST a
    graph, query it via cluster route, reload the config from disk
    and confirm `load_server_settings` sees the rewritten YAML.
    Validates the "restart resolves orphans" failure-mode story.
  - `per_graph_policy_enforced_on_post_created_graph` — POST a graph
    with a per-graph policy attached, then send authenticated read
    and change requests. Per-graph Cedar enforcement fires correctly
    on a POST-created graph (engine-layer policy reinstalled via
    `Omnigraph::with_policy` inside the create flow).
  - `concurrent_post_graphs_distinct_ids_all_succeed` — 4 concurrent
    POSTs with distinct graph_ids all return 201. Caught a real
    race in `rewrite_atomic` (see below).

Race fix — `rewrite_atomic_with_modify`:

The first composite test surfaced a real bug. The old
`rewrite_atomic(path, new_config, expected_hash)` captured the
baseline hash OUTSIDE the flock, then called rewrite_atomic which
re-acquired it inside. Under concurrent writers:

  - POST A: captures baseline H0, calls rewrite_atomic.
  - POST B: captures baseline H0 too (before A's update lands).
  - A: acquires flock, on-disk == H0, writes H1, releases.
  - A: updates baseline H0 → H1.
  - B: tries to acquire flock — waits.
  - B: acquires flock. On-disk is now H1. Expected (captured
       before A finished) is H0. MISMATCH → spurious Drift error.

Worse: even if the timing happens to align, B's `updated` config
was constructed from BYTES read before the flock. B writes a config
that doesn't include A's new graph — silent data loss.

The fix: new `config::rewrite_atomic_with_modify(path, baseline,
modify)` takes a closure. Inside the flock + baseline mutex:
  1. Read on-disk bytes, hash, compare to baseline.
  2. Parse on-disk YAML.
  3. Call `modify(parsed)` to produce the new config — receives
     fresh on-disk state, returns the modification.
  4. Serialize + write + fsync + rename + update baseline.

Everything is read-modify-write under the same critical section.
Concurrent writers serialize cleanly. Test confirmed this is no
longer a race.

The old `rewrite_atomic(path, new_config, expected_hash)` API stays
for tests that don't need the read-modify-write shape; the POST
handler switches to the new shape.

Version bump v0.6.0 → v0.7.0:
  - All 5 `crates/*/Cargo.toml` (compiler, engine, policy, cli, server)
    plus their inter-crate `path` dep version constraints.
  - `Cargo.lock` regenerated by `cargo build --workspace`.
  - `AGENTS.md` "Version surveyed" line, capability matrix HTTP-server
    row updated to mention multi-graph + cluster routes + atomic YAML
    rewrite.
  - `openapi.json` regenerated.

Docs:
  - `docs/releases/v0.7.0.md` (new) — release notes with breaking
    changes, new features, deferred items (DELETE, `delete_prefix`,
    actor forwarding), and the single→multi migration recipe.
  - `docs/user/server.md` — substantial section additions for the
    two modes, mode inference, cluster endpoint table, management
    endpoints, `omnigraph.yaml` ownership contract, `POST /graphs`
    body shape + status codes.
  - `docs/user/cli.md` — `omnigraph graphs list/create` section,
    deferred-DELETE note.
  - `docs/user/policy.md` — server-scoped Cedar actions
    (`graph_create`, `graph_list`), per-graph vs server-level policy
    composition, example server-level policy.

Workspace test pass: 573 tests green across all crates. Zero
failures. MR-731 spoof regression still pinned and passing across
the entire 10-PR series.

This commit closes MR-668. v0.7.0 is ready for tagging.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: remove POST /graphs and CLI graphs create (defer runtime graph mgmt)

The POST /graphs runtime-create endpoint shipped in PR 7/10 has three
unresolved high-severity bugs:

  - flock-on-renamed-inode race: the YAML flock is taken on
    omnigraph.yaml itself, then a temp file is renamed over it.
    Cross-process writers end up locking different inodes — both
    believing they hold exclusive access.
  - duplicate-check outside the file lock: precheck runs against
    the in-memory registry only; the locked closure does
    config.graphs.insert(...) unconditionally. Concurrent same-id
    POSTs can persist the loser in YAML while the in-memory registry
    keeps the winner — they disagree after restart.
  - best_effort_cleanup_init_artifacts deletes _schema.pg /
    _schema.ir.json / __schema_state.json on any init failure. An
    accidental re-init against an existing graph's URI destroys its
    schema; subsequent open() fails at read_text(_schema.pg).

The correct fix is a Lance-style cluster catalog (reserve → init →
publish with recovery sidecars), parallel to the engine's existing
__manifest discipline. That work is out of scope for v0.7.0.

For now, disable runtime add/remove from the network and CLI surface.
Operators add graphs by editing omnigraph.yaml and restarting. The
GET /graphs read-only enumeration stays.

Removed:
- POST /graphs handler + router fragment + utoipa registration
- 13 post_graphs_* server tests + 3 composite POST tests +
  multi_mode_app_with_real_config / post_graph helpers
- CLI omnigraph graphs create subcommand + its handler + cli.rs tests
- system_remote.rs combined list+create test trimmed to list-only
- YAML rewrite infra: rewrite_atomic[_with_modify], RewriteAtomicError,
  staging_path, hash_config_file, AppState::config_hash field +
  threading through new_multi and open_multi_graph_state
- fs2 dependency (verified absent from cargo tree)
- sha2/fs2 imports in config.rs (only the rewrite path used them)
- Cedar PolicyAction::GraphCreate variant + "graph_create" match arms
  + action def in Cedar schema + graph_create_action_authorizes_against_server_resource test
- GraphCreateRequest / GraphCreateResponse / GraphSchemaSpec /
  GraphPolicySpec API types (only the POST handler / CLI imported them)

Kept:
- GET /graphs (read-only enumeration) and graph_list Cedar action
- omnigraph graphs list CLI subcommand
- All multi-graph startup, mode inference, cluster routes,
  per-graph + server-level Cedar policies
- server_settings_drive_multi_graph_startup_end_to_end (the test
  that covers operator-authored YAML + restart — the path that
  survives)
- best_effort_cleanup_init_artifacts and the three init failpoints
  (still reachable from CLI `omnigraph init`; preflight fix deferred
  as a follow-up)
- GraphRegistry::insert and its concurrency tests — production
  callers gone, but the method is the natural seam for the future
  cluster-catalog work

Also fixed (transcript issue 4):
- ALWAYS_FLAT_PATHS now includes /graphs so multi-mode OpenAPI
  advertises the management route correctly (was previously rewritten
  to /graphs/{graph_id}/graphs)
- multi_mode_openapi_keeps_healthz_flat → renamed to
  multi_mode_openapi_keeps_management_paths_flat, asserts both
  /healthz and /graphs stay flat
- multi_mode_openapi_prefixes_operation_ids_with_cluster skips
  /graphs in addition to /healthz

Doc fixes:
- docs/user/cli.md: graphs list example was --target http://...,
  but --target is a config-graph-name lookup; corrected to --uri.
  Removed the graphs create example.
- docs/user/server.md: dropped POST /graphs row, "omnigraph.yaml
  ownership", and "POST /graphs body shape" sections. Added a
  paragraph stating runtime add/remove is not exposed in v0.7.0.
- docs/user/policy.md: dropped graph_create action; reworded the
  "Configuration" line to clarify that server-scoped rules (graph_list)
  take neither branch_scope nor target_branch_scope.
- docs/releases/v0.7.0.md: rewrote release narrative — multi-graph
  mode ships; runtime add/remove deferred.
- AGENTS.md: HTTP server bullet and capability matrix row updated to
  reflect read-only GET /graphs and the operator-edit workflow.
- openapi.json regenerated; /graphs has only .get, no .post.

Diff: 17 files, +123 −1525 LOC.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: comment cleanup and policy format style

Strip "PR Na/Nb" sub-PR references throughout MR-668 surfaces — they
were useful during the 10-PR delivery sequence but rot now that the
work is in the tree. Keep the MR-668 umbrella references.

Also:
- Add explicit `when = when` and `resource_literal = resource_literal`
  named args in `compile_policy_source`'s outer `format!` to match the
  surrounding crate style (already explicit for `group` and `action`).
- Rename the best-effort cleanup tracing target from
  "omnigraph::init" to "omnigraph::init::cleanup" so operators can
  filter init-failure cleanup events separately from init's other
  log lines.

No behavior change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop actor_id from PolicyRequest; pass actor as separate arg

The MR-731 "server-authoritative actor identity" invariant was enforced
by an in-function chokepoint (`request.actor_id = actor.actor_id...`
overwrite inside `authorize_request`). That worked but relied on every
caller passing in a `PolicyRequest` and trusting the overwrite — a
comment-enforced invariant.

Move the invariant into the type system:

* `PolicyRequest` no longer carries `actor_id`. The struct now models
  what a caller wants to do, not who they are.
* `PolicyEngine::authorize(actor_id: &str, request: &PolicyRequest)`
  and `validate_request(actor_id, request)` take identity as a
  separate argument. The same shape `PolicyChecker::check` already had
  for the engine layer.
* `authorize_request` in the HTTP layer extracts `actor_id` from the
  bearer-resolved `ResolvedActor` and passes it positionally — no
  overwrite step that could be skipped.
* CLI `omnigraph policy explain` updated (the only other consumer
  that built a `PolicyRequest`).

Public API break for the `omnigraph-policy` crate. Worth it: handlers
can no longer accidentally populate `actor_id` from a request body
field, and external consumers are forced by the compiler to source
actor identity from a trusted path.

The MR-731 chokepoint test
`actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
still passes — the bearer-resolved actor is what reaches the engine.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: consolidate AppState single-mode constructors; delete with_policy_engine

The prior `with_policy_engine` constructor reused the engine `Arc`
from the existing handle (`engine: Arc::clone(&existing.engine)`)
without re-applying `Omnigraph::with_policy`. Combined with
`new_with_workload`, the documented composition pattern was
`AppState::new_with_workload(...).with_policy_engine(p)` — which
produced an `AppState` whose HTTP layer enforced Cedar but whose
underlying engine had no `PolicyChecker` installed. Any caller
reaching the engine via `state.registry().list()[i].engine` could
bypass policy entirely. The doc comment named this gap; the type
system didn't.

Make composition impossible to get wrong:

* Add `AppState::new_single(uri, db, tokens, Option<PolicyEngine>,
  WorkloadController)` — canonical single-mode constructor that
  takes every option together and routes through `build_single_mode`
  (which applies `db.with_policy(checker)` to the engine itself).
* `new`, `new_with_bearer_token`, `new_with_bearer_tokens`,
  `new_with_bearer_tokens_and_policy`, `new_with_workload` all become
  thin wrappers around `new_single`.
* Delete `with_policy_engine`. There is no post-construction policy
  install path any more; the single linear construction forces
  HTTP-layer and engine-layer policy to install together or not at all.

Regression test `engine_layer_policy_fires_via_direct_arc_omnigraph_from_new_single`
constructs an `AppState::new_single` with a deny-all policy, pulls
the `Arc<Omnigraph>` from the registry handle (the same path an
embedded SDK consumer would take), and asserts a direct `mutate_as`
call returns `OmniError::Policy`. Pre-fix this test would have
succeeded the mutation.

Test caller in `ingest_per_actor_admission_cap_returns_429`
migrates from `.with_policy_engine(...)` to `new_single(...,
Some(policy_engine), workload)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: derive any_per_graph_policy on RegistrySnapshot; simplify dup check

`AppState::requires_bearer_auth` walked the entire registry per
request (cloning Arcs into a `Vec`, then `.iter().any(|h| h.policy
.is_some())`) to decide whether the auth middleware should challenge.
The walk is unnecessary — the answer only changes when the registry
mutates, which is exactly the moment a new snapshot is constructed.

Move the flag onto the snapshot itself:

* `RegistrySnapshot { graphs, any_per_graph_policy: bool }`.
* `RegistrySnapshot::new(graphs)` is the only construction path —
  it derives the flag from `graphs.values().any(|h| h.policy
  .is_some())` so the cached value can't drift from the source data.
* `Default` delegates to `new(HashMap::new())`.
* `GraphRegistry::from_handles` and `insert` build snapshots via
  `RegistrySnapshot::new(...)`.
* `GraphRegistry::snapshot_ref()` exposes the current snapshot
  through an `arc_swap::Guard`; callers that need cached derived
  state go through this accessor (callers that only want `graphs`
  still use `list` / `get`).

`requires_bearer_auth` becomes one `ArcSwap::load` + bool read.

Also (drive-by, same file, same hunk): replace the dead
`if let Some(other) = seen_uris.get(...)` + `let _ = other;` pattern
in `from_handles` with a plain `seen_uris.contains_key(...)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: fail-fast multi-graph startup with try_collect

The `open_multi_graph_state` doc comment claims "Fail-fast — the
first open error aborts startup; other in-flight opens are dropped"
but the code did

    .buffer_unordered(4)
    .collect::<Vec<_>>()
    .await
    .into_iter()
    .collect::<Result<Vec<_>>>()?;

which drains every future in the stream before propagating the first
`Err`. With N S3-backed graphs and graph #2 failing fast, the caller
still waits for #1, #3, #4, … to either succeed or fail before
seeing the error.

Replace the four-line dance with `futures::TryStreamExt::try_collect`,
which short-circuits on the first `Err` and drops the rest. The
doc comment now matches behavior.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused State extractor from 7 read-only handlers

After the routing-middleware refactor moved the engine into the per-graph
`GraphHandle` (extracted via `Extension<Arc<GraphHandle>>`), seven
read-only handlers — `server_snapshot`, `server_read`, `server_export`,
`server_schema_get`, `server_branch_list`, `server_commit_list`,
`server_commit_show` — kept an unused `State(_state): State<AppState>`
extractor. Drop it. Each request avoids one `FromRequestParts` clone
of `AppState`'s Arcs.

Handlers that actually use state (workload admission for write paths,
`server_policy` for management endpoints) keep theirs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: emit info! for graph routing decision

`tracing::Span::current().record("graph_id", ...)` in the routing
middleware silently no-ops here: no upstream `#[tracing::instrument]`
on the handlers declares a `graph_id` field, and `TraceLayer::new_for_http`
doesn't either. The recorded value never lands anywhere visible.

Replace with an explicit `info!(graph_id = %handle.key.graph_id,
"graph routed")` event so operators can grep logs and correlate
requests with the active graph. In single mode the value is the
sentinel `"default"`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: align GET /graphs 405 body code with HTTP status

The single-mode `GET /graphs` handler returned an `ApiError` built
via struct literal with `status: METHOD_NOT_ALLOWED, code: BadRequest`.
The body code disagreed with the HTTP status — clients deserializing
on `code` saw `bad_request`, clients deserializing on `status` saw
405. Same bug class as the earlier 503+Conflict mismatch on the
removed YAML drift path.

Close the class for this one remaining instance:

* Add `ErrorCode::MethodNotAllowed` to the API enum.
* Add `ApiError::method_not_allowed(msg)` — pairs the 405 status
  with the matching code.
* Replace the struct literal in `server_graphs_list` with the
  constructor.
* Regenerate `openapi.json` (adds `method_not_allowed` to the
  ErrorCode schema enum).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused axum::handler::Handler import

The import landed in earlier work but no current call site uses it.
Emitted an `unused_imports` warning on every server build.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused fs2 workspace dependency

`fs2 = "0.4"` lingered in [workspace.dependencies] after the
POST /graphs flock-on-rename design was pulled. `cargo tree -i fs2`
reports no consumers in the workspace and the dep is not in
Cargo.lock. Removing the declaration closes the "phantom dep" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: AGENTS.md Cedar row no longer hardcodes action count

The "8 actions" claim drifted as soon as MR-668 added `graph_list`.
Bumping the count would just push the drift one PR forward; the
correct-by-design fix is to defer to the canonical list in
docs/user/policy.md and stop maintaining a duplicate count.

Closes the "doc hardcodes a count that drifts from the enum" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: cfg(test)-gate GraphRegistry::insert and its mutex

`insert` and the `mutate: Mutex<()>` that serializes it had no
runtime consumer in v0.7.0 — the only insertion path at startup
is `from_handles`, and runtime add/remove is deferred until a
managed cluster catalog ships. Leaving both `pub` and live made
them a "looks like API, isn't" footgun: a future change could
build on `insert` without re-establishing the concurrency contract
with an actual consumer in scope.

Gate both together (`#[cfg(test)]` on the method, the field, and
the `tokio::sync::Mutex` import) so the race-pinning tests still
compile but production cannot reach them. When a real consumer
ships, ungate both — they're a unit. Closes the "public API with
no runtime consumer drifts toward incorrect" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop vestigial PolicyEngine surface

* `validate_request` had zero callsites — pure surface for nothing.
* `deny`'s `_actor_id` and `_request` parameters were both unused
  (the underscore prefix gave it away); the message is built by the
  caller before `deny` ever sees the request. Trim both.

Closes the "public API that the type system can't justify" class
for the policy engine. No behavior change; every existing test
stays green because the deletions never had a runtime effect.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for init re-init footgun (red)

A second `Omnigraph::init` against an existing graph URI today
destroys the existing graph's schema artifacts. `init_storage_phase`
overwrites `_schema.pg` before any preflight, and on the inner
`GraphCoordinator::init` failure that follows,
`best_effort_cleanup_init_artifacts` deletes all three schema files.
The existing Lance datasets and `__manifest/` survive but the
schema metadata is gone — unrecoverable without operator surgery.

This test exercises that path and currently fails with
"_schema.pg must not be deleted by a failed re-init", confirming
the destructive cleanup branch fires. The fix in the next commit
makes the test pass by preflighting with `storage.exists()` and
returning a typed error before any write touches disk.

Per AGENTS.md rule 12, the test commit lands just before the fix
commit so the red → green pair is visible in `git log` and a
reviewer can check out this commit alone to reproduce.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: close init re-init footgun via InitOptions preflight (green)

`Omnigraph::init` is "create a new graph"; existing graphs need
an explicit overwrite. Today's behavior — silently overwrite
schema files, then on inner failure delete them via best-effort
cleanup — is destructive against an existing graph regardless of
which branch fires.

Correct-by-design fix:

* New `InitOptions { force: bool }` struct (default `force: false`).
* New `Omnigraph::init_with_options(uri, schema, options)`. The
  old `Omnigraph::init(uri, schema)` is a thin shortcut that
  passes `InitOptions::default()`.
* `init_with_storage` runs a `storage.exists()` preflight on the
  three schema URIs BEFORE any parse, write, or coordinator call.
  Any hit → typed `OmniError::AlreadyInitialized { uri }`. The
  destructive code paths (the `write_text` overwrite and the
  best-effort cleanup) are now unreachable in strict mode against
  an existing graph.
* `force: true` skips the preflight; existing operators who
  actually mean to overwrite opt in explicitly.
* CLI: `omnigraph init --force` maps to `InitOptions { force: true }`.
* HTTP: `OmniError::AlreadyInitialized` maps to 409 via
  `ApiError::from_omni`. Not currently HTTP-reachable (POST /graphs
  was pulled), but the wiring lands here so a future runtime
  create endpoint has one canonical translation.

Closes the "init is destructive against existing state" class.
The regression test added in the previous commit
(`init_on_existing_graph_uri_does_not_destroy_existing_schema`)
turns green: the original schema files now survive a second
init attempt byte-for-byte, and the call errors cleanly with
`AlreadyInitialized`. The four existing
`init_failpoint_after_*_cleans_up_*` tests stay green — strict
mode's preflight passes on a fresh tempdir, and cleanup still
runs as before when a failpoint fires mid-write.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: split PolicyEngine::load into kind-typed loaders

Pre-fix, every caller of `PolicyEngine::load(path, graph_id)`
passed *some* `graph_id` argument — even when the policy was
server-scoped and Cedar's resolution would never touch a Graph
entity. The server-level loader at lib.rs passed the meaningless
sentinel `"server"`. A graph policy file containing a `graph_list`
rule compiled fine; a server policy file containing a `read` rule
compiled fine. Both silently no-op'd at request time because the
engine kind and the rule's resource kind disagreed.

Correct-by-design fix: replace `load` with two kind-typed loaders.

* `PolicyEngine::load_graph(path, graph_id)` — for per-graph
  policy files. Rejects any rule whose action `resource_kind()`
  is `Server`.
* `PolicyEngine::load_server(path)` — for server-level policy
  files. Takes no `graph_id`: server-scoped actions resolve against
  the singleton `Omnigraph::Server::"root"` entity, never a Graph.
  Rejects any rule whose action `resource_kind()` is `Graph`.

The old `load` is hard-deleted in the same commit because every
in-tree consumer migrates here (no semver promise on the workspace
crate, no external pinners). New `PolicyEngineKind` enum types
the loader's intent; `validate_kind_alignment` is the load-time
check that closes the "wrong action, wrong file, silent no-op"
class — operators get a load-time error instead of confused-and-
silent behavior at request time.

Callsites migrated:
* server lib.rs:374 (single-mode per-graph)   → load_graph
* server lib.rs:1065 (multi-mode server)      → load_server
* server lib.rs:1103 (multi-mode per-graph)   → load_graph
* CLI main.rs:732 (resolve_policy_engine)     → load_graph
* tests/server.rs ×5 (4 graph, 1 server)      → load_graph/load_server
* policy_engine_chassis.rs                    → load_graph

Four new in-source tests pin the contract: both rejection paths
and both positive paths.

Closes the "operator puts an action in the wrong file and the
rule silently never matches" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: introduce GraphRouting, retire single_mode_handle

Pre-fix, `AppState` always carried `Arc<GraphRegistry>` even when
serving one graph. Single mode populated the registry with one
handle keyed by the `SINGLE_GRAPH_KEY_ID = "default"` sentinel;
`single_mode_handle` walked the registry, asserted `len == 1`,
and returned the single element with a 500-class "programmer
error" branch on mismatch. Three smells in a row — magic key,
walk-and-assert, programmer-error guard — all because the
single-mode runtime was forced through a multi-mode abstraction.

Correct-by-design fix: type the routing.

* New `pub enum GraphRouting { Single { handle }, Multi { registry,
  config_path } }` on `AppState`. The `Single` arm carries the handle
  directly — no registry, no key, no walk.
* `resolve_graph_handle` middleware matches on `routing`. Single mode
  returns the handle in O(1); multi mode does the same path-extract +
  registry lookup as before. The 500-class programmer-error branch
  is gone — the type system now makes the violated invariant
  ("single mode has exactly one handle") unrepresentable.
* `requires_bearer_auth` reads `handle.policy.is_some()` directly
  in the Single arm; Multi arm still uses the cached
  `any_per_graph_policy` flag.

`ServerMode` and the legacy `registry` field on `AppState` are still
populated for now — C-3 removes both once every reader is migrated.
The `SINGLE_GRAPH_KEY_ID` sentinel and `ServerMode` will also go
away in C-3.

Closes the "single mode forced through a multi-mode abstraction"
class. All 76 server integration tests stay green: handlers still
extract `Extension<Arc<GraphHandle>>` from the request, so the
middleware's internal change is invisible to them.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: remove ServerMode, registry field, and the SINGLE_GRAPH sentinel

C-1/C-2 introduced `GraphRouting` and pointed the middleware at it.
This commit removes the legacy shape that's now dead:

* `ServerMode` enum — deleted. Single mode's `uri` lives on
  `handle.uri`; multi mode's `config_path` lives on the
  `GraphRouting::Multi` arm.
* `AppState.mode: ServerMode` field — deleted.
* `AppState.registry: Arc<GraphRegistry>` field — deleted. Multi
  mode's registry is on `GraphRouting::Multi { registry, .. }`;
  single mode has no registry at all.
* `AppState::mode()`, `AppState::uri()`, `AppState::registry()`
  accessors — deleted. New `AppState::routing() -> &GraphRouting`
  is the single public entry point.
* `SINGLE_GRAPH_KEY_ID` constant — deleted. `GraphHandle.key` is
  still required by the struct, but in single mode the key is now
  only a tracing label (`"default"`, inlined with a comment naming
  its sole remaining purpose). Single-mode flat routes never carry
  a `{graph_id}` parameter, so the key is never compared against
  user input, and there is no registry where it could be a map
  key. C-1/C-2 already removed the registry walk that the sentinel
  was named for.

Callers migrated:
* `build_app` (lib.rs:944) — matches on `state.routing()` instead
  of `state.mode()`.
* `server_graphs_list` (lib.rs:1162) — destructures the Multi arm
  to get the registry; Single arm short-circuits to 405.
* `server_openapi` (lib.rs:1217) — matches the Multi arm for the
  cluster-prefix rewrite.
* `tests/server.rs:3735` — the B2 footgun regression test now
  matches on `state.routing()` to extract the single-mode handle
  (the test's earlier `state.registry().list().next()` shape was
  the closest pre-fix analog to "embedded consumer reaches the
  engine"; the new shape is more direct).

Closes the entire "single mode forced through a multi-mode
abstraction" class. After this commit:
* No magic sentinel as a routing key.
* No `single_mode_handle` walk-and-assert helper.
* No 500-class "programmer error" branch in the middleware.
* No two-field discriminant on `AppState` where one would do.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for nested-route path extraction (red)

`server_branch_delete` and `server_commit_show` use bare
`Path<String>` extractors. In single-mode flat routes
(`/branches/{branch}`, `/commits/{commit_id}`) this works — one
capture, one value. In multi-graph cluster routes
(`/graphs/{graph_id}/branches/{branch}`,
`/graphs/{graph_id}/commits/{commit_id}`) axum 0.8 propagates the
outer `{graph_id}` capture into the inner handler, so the
extractor sees two captures and 500s with
"Wrong number of path arguments. Expected 1 but got 2."

`cluster_routes_dispatch_per_graph_handle` only exercises
`/snapshot` (no Path extractor), so the regression slipped through.
This test closes that gap structurally: every cluster route with
an inner path param gets exercised here.

Currently fails with the exact symptom above. Fix in the next
commit makes it pass.

Per AGENTS.md rule 12, the red test commit lands just before the
fix so the pair is visible in `git log` and a reviewer can check
out this commit alone to reproduce.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: named-field path-param structs for nested cluster routes (green)

`Path<String>` deserializes one path-param value positionally.
Single-mode flat routes (`/branches/{branch}`,
`/commits/{commit_id}`) have one capture; multi-mode nested routes
(`/graphs/{graph_id}/branches/{branch}`,
`/graphs/{graph_id}/commits/{commit_id}`) have two — axum 0.8
propagates the outer capture into nested handlers. Same handler,
two different shapes; the multi-mode shape 500s with
"Wrong number of path arguments. Expected 1 but got 2."

Symptomatic fix: change to `Path<(String, String)>` and ignore the
first element. Breaks again the moment we add another nest layer
(e.g. tenant in Cloud mode).

Correct-by-design fix: named-field structs deserialized by name
from axum's path-param map. Each handler picks only the fields it
needs. Stable across single / multi / future-cloud nest depths
because deserialization is by field name, not position.

* New `BranchPath { branch: String }` (file-local to lib.rs)
* New `CommitPath { commit_id: String }`
* `server_branch_delete` extractor → `Path<BranchPath>`
* `server_commit_show` extractor → `Path<CommitPath>`

Closes the "handler path-extractor type is positional and breaks
when route nesting changes" class. Red test from the previous
commit turns green. All 77 server tests pass (single-mode branch
delete + commit show, plus new multi-mode coverage).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: centralize policy-requires-tokens check in the runtime classifier

Single-mode `open_with_bearer_tokens_and_policy` bailed at lib.rs:380
when policy was installed and no tokens. Multi-mode
`open_multi_graph_state` had no equivalent: the server started, every
request 401'd because no token could ever match, and the operator
spent time debugging a misconfiguration the single-mode path would
have caught at startup.

The doc/code contradiction made the gap easy to miss: the
`ServerRuntimeState::PolicyEnabled` docstring said tokens-or-not
was "unusual but valid — every request fails 401 without a bearer,
which is effectively 'locked'." The single-mode bail contradicted
that. In practice, silent-401-on-every-request is bug-shaped, not
feature-shaped (operators wanting deny-all should configure tokens
plus a deny-all Cedar rule to get meaningful 403s with
policy-decision logging).

Symptomatic fix: add a copy of the bail to multi-mode. Two copies
that can drift again the next time a startup path is added.

Correct-by-design fix: hoist the check into
`classify_server_runtime_state` so both modes get the same
enforcement from one source of truth. The classifier becomes the
single source of truth for "should we start?" and adding a startup
invariant there is now the natural extension point for any future
mode.

Classifier matrix is now complete:

| has_tokens | has_policy | allow_unauthenticated | Result |
|---|---|---|---|
| F | F | F | bail (existing) |
| F | F | T | Open (existing) |
| T | F | * | DefaultDeny (existing) |
| F | T | * | bail (NEW — closes the gap) |
| T | T | * | PolicyEnabled (existing) |

Changes:

* `classify_server_runtime_state` (lib.rs:870-890) gains the
  `(false, true, _) => bail!(…)` arm with a clear message naming
  the failure mode and the two valid resolutions.
* `open_with_bearer_tokens_and_policy` (lib.rs:369+) drops its
  redundant local bail — the classifier rejected the invalid case
  before construction was reached.
* `ServerRuntimeState::PolicyEnabled` docstring is rewritten:
  drops the "(unusual but valid)" carve-out and states plainly
  that PolicyEnabled requires tokens. Names the explicit
  alternative (tokens + deny-all Cedar rule) for operators who
  want the all-requests-denied behavior.
* `classify_policy_enabled_always_wins` test is renamed to
  `classify_policy_enabled_requires_tokens` and the now-invalid
  `(false, true, _)` assertion is removed (covered by the new
  rejection test).
* New `classify_policy_without_tokens_is_rejected` test covers the
  new arm.
* New `serve_refuses_to_start_with_policy_but_no_tokens_multi_mode`
  integration test pins the multi-mode propagation path —
  symmetric with the existing single-mode
  `serve_refuses_to_start_in_state_1_without_unauthenticated`.

Closes the "single mode and multi mode startup branches can drift
on safety invariants" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: close coverage gaps surfaced by the test-coverage audit

The bot-review pass and the subsequent coverage audit surfaced two
material gaps in PR #119's test surface — both easy to close, both
worth closing before merge.

* **Gap 1 — cluster-route sweep.** The Bug-1 path-extractor
  regression slipped through because
  `cluster_routes_dispatch_per_graph_handle` only exercised
  `/snapshot`. The other six protected cluster routes (`/read`,
  `/change`, `/export`, `/schema`, `/schema/apply`, `/ingest`,
  `/branches/merge`) were implicitly trusted to work without any
  multi-mode integration test.

  Add `all_protected_cluster_routes_resolve_to_their_handler`
  (`tests/server.rs`) that hits each protected cluster route with
  a minimal request and asserts the response is consistent with
  the handler being reached — no 404 (router didn't match), no 500
  with "Wrong number of path arguments" (Bug-1 class), no 500 with
  "missing extension" (routing middleware didn't inject the handle).
  Status code is a negative assertion because each handler's
  happy-path inputs differ; what matters is "the request reached
  the handler," not "the handler returned 200" — that's already
  pinned by the single-mode tests.

* **Gap 2 — `--force` happy path.** The strict re-init regression
  test (`init_on_existing_graph_uri_does_not_destroy_existing_schema`)
  pins the error path; nothing pinned the `force: true` escape
  hatch actually doing what its docstring claims.

  Add `init_with_force_recovers_from_orphan_schema_files`
  (`tests/lifecycle.rs`). Writes a bare `_schema.pg` to simulate
  orphan files from a failed prior init, confirms strict mode
  bails as expected, then confirms `init_with_options(force: true)`
  succeeds and produces a functional graph.

  Note: the test follows the documented semantics — force skips
  the preflight only, it does NOT purge existing Lance state. An
  earlier draft of the test (against full overwrite of an existing
  populated graph) failed because `GraphCoordinator::init` errored
  on the existing `__manifest`, which is exactly the limitation
  the `InitOptions::force` docstring already calls out. Recursive
  purge needs `StorageAdapter::delete_prefix` (tracked separately).

Coverage is now fully aligned with the PR's claims.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for GraphList open-mode bypass (red)

Cursor bot's review at commit 4120448 surfaced that
`server_graphs_list` returns 200 in Open mode (`--unauthenticated`,
no tokens, no policy), exposing the full graph registry — graph
IDs and URIs that may contain S3 bucket paths or internal
hostnames — to any unauthenticated caller.

Root cause: `authorize_request`'s no-policy fallback only denies
when `actor.is_some()`. In Open mode `actor: None`, so the
denial branch never fires and the call returns `Ok(())`. The
docstring on `server_graphs_list` claims the endpoint is
"Cedar-gated" and that we "don't leak the registry until the
operator explicitly authorizes it" — but Open mode has no Cedar
at all, so the docstring intent and the code disagree.

This commit renames the existing
`get_graphs_lists_registered_graphs_in_multi_mode` test to
`get_graphs_denied_in_open_mode_without_server_policy` and flips
the assertion from 200 → 403. Today this fails (server returns
200) — exactly the symptom the bot named. The fix in the next
commit tightens the no-policy fallback to deny server-scoped
actions unconditionally, regardless of mode.

Per AGENTS.md rule 12, the red test commit lands just before
the fix so the red → green pair is visible in `git log` and a
reviewer can check out this commit alone to reproduce.

Sort-order coverage that previously lived in the renamed test
moves to `get_graphs_with_server_policy_authorizes_per_cedar`
in the next commit, where the admin-200 response is operator-
authorized and a non-empty body is asserted.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: server-scoped actions always require explicit policy (green)

`server_graphs_list` returned 200 in Open mode (`--unauthenticated`,
no tokens, no policy) because `authorize_request`'s no-policy
fallback only denied when `actor.is_some()` AND action != Read.
In Open mode `actor: None`, so the denial branch never fired and
the call returned `Ok(())` — leaking the registry (graph IDs +
URIs that may contain S3 bucket paths or internal hostnames) to
any unauthenticated caller. The docstring on `server_graphs_list`
claimed it was "Cedar-gated" and that the server should "not leak
the registry until the operator explicitly authorizes it" —
docstring intent and code disagreed.

Symptomatic fix: special-case GraphList. Breaks the moment
another server-scoped action (`graph_create`, `graph_delete`) is
added.

Correct-by-design fix: tie authorization to the action's
`resource_kind()`. Server-scoped actions
(`PolicyResourceKind::Server`) always require explicit policy
authorization — there is no runtime state where they're served
by default. Per-graph actions keep the existing default-deny
logic (DefaultDeny denies non-Read for authenticated actors;
Open mode allows everything per the operator's `--unauthenticated`
opt-in for graph DATA, but not for server topology).

The fix uses the existing `PolicyResourceKind` enum that #119
already added — no new abstraction. Future server-scoped actions
(runtime `graph_create`/`graph_delete` when the cluster catalog
ships) automatically pick up the same enforcement without any
per-action handler change.

Changes:

* `crates/omnigraph-server/src/lib.rs:51` — re-export
  `PolicyResourceKind` (the kind discriminator was already public
  on the omnigraph-policy crate; needed in scope here).
* `crates/omnigraph-server/src/lib.rs:1457` — `authorize_request`'s
  no-policy fallback gains a server-scoped-action check that fires
  before the actor-based default-deny logic. Error message names
  the failure mode and points at `server.policy.file`.
* `crates/omnigraph-server/tests/server.rs:5037` —
  `get_graphs_with_server_policy_authorizes_per_cedar` extended
  to register two graphs in non-alphabetical order and assert
  the admin-200 response is sorted alphabetically. Restores the
  sort-order coverage that lived in
  `get_graphs_lists_registered_graphs_in_multi_mode` before the
  red commit renamed it to assert denial.

Also bundles a small adjacent cleanup that the bot-review flagged:

* `crates/omnigraph-server/src/graph_id.rs:124` — drop the
  unreachable `"openapi.json"` entry from `is_reserved`. The
  regex `^[a-zA-Z0-9-]{1,64}$` rejects every dot-containing name
  before `is_reserved` can run, so dotted entries in this list
  were dead code that misled readers into thinking the list
  needed to cover them. Comment now names the structural
  exclusion. The `rejects_reserved_route_names` test loses its
  `openapi.json` row (covered by `rejects_dots` via the regex).

Closes the "server-scoped management actions silently leak in
Open mode" class. Red test from the previous commit
(`get_graphs_denied_in_open_mode_without_server_policy`) turns
green; all 78 server integration tests + 76 lib tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: fold multi-graph work into v0.6.0 (no separate v0.7.0 release)

The branch had bumped workspace versions to 0.7.0 and added a
dedicated `docs/releases/v0.7.0.md` for the multi-graph work.
Per scope decision: ship the graph-rename and the multi-graph
mode in one v0.6.0 release.

Changes:

* Workspace versions bumped 0.7.0 → 0.6.0 in every crate manifest
  (`omnigraph`, `omnigraph-compiler`, `omnigraph-policy`,
  `omnigraph-server`, `omnigraph-cli`) and their internal
  `path = ..., version = "..."` dependency constraints.
* `docs/releases/v0.7.0.md` content merged into
  `docs/releases/v0.6.0.md`, retargeted to a single coherent
  v0.6.0 release note covering both the graph terminology rename
  and the multi-graph server mode. The original v0.7.0.md is
  deleted.
* All `v0.7.0` / `0.7.0` doc and comment references throughout
  `crates/`, `docs/`, `AGENTS.md`, and `openapi.json` retargeted
  to `v0.6.0` / `0.6.0`. `Cargo.lock` regenerated to match.
* OpenAPI spec regenerated via `OMNIGRAPH_UPDATE_OPENAPI=1
  cargo test -p omnigraph-server --test openapi
  openapi_spec_is_up_to_date` — `"version": "0.6.0"` now.

Verification:

* `cargo build --workspace` — clean (6 pre-existing engine
  warnings only).
* `cargo test --workspace --locked` — zero failures across all
  39 test result groups.
* `bash scripts/check-agents-md.sh` — passes (34 links / 33 docs).
* `grep -rn "0\.7\.0\|v0\.7\.0" --include='*.rs' --include='*.md'
  --include='*.json' --include='*.toml' .` returns no workspace
  hits. The three remaining `0.7.0` strings in `Cargo.lock`
  belong to unrelated 3rd-party crates (`pem-rfc7468`, `radium`,
  `rand_xoshiro`).

The git tag and crates.io publish happen later — this commit
just consolidates the surface so the eventual release is one
coherent v0.6.0 covering all the work since v0.5.0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: sanitize internal refs from v0.6.0 release notes

cubic-dev-ai P2 comments flagged that the release notes carried
internal Linear ticket and RFC references (MR-668, MR-731,
MR-723, RFC 0003, RFC 0004). Per AGENTS.md maintenance rule 5,
"Release docs are public project history. Describe capabilities,
behavior changes, breaking changes, upgrade notes, and user
impact; do not reference private ticket systems, internal
codenames, or planning shorthand that an outside contributor
cannot inspect." The bot's comments are correct against our own
published contract — they were a docs-quality regression
introduced when I drafted these notes.

Replaced each internal reference with the public-facing concept
it stood for. The substantive content (capabilities, behavior,
guarantees) was already present alongside the refs; sanitization
just trimmed the bracketed ticket labels:

* Line 6: dropped `(MR-668)` from the multi-graph mode summary —
  the descriptive name was already self-sufficient.
* Line 24: `MR-731 spoof defense` → `the bearer-derived-actor-
  identity guarantee`; `Forward-compat for Cloud mode (RFC 0003)
  and OAuth provider (RFC 0004)` → "forward-compat seams for
  future multi-tenant and OAuth deployments; they're inert in
  this release" — describes what the operator sees instead of
  pointing at planning docs.
* Line 26: `MR-731's server-authoritative-actor invariant` →
  "the server-authoritative-actor invariant: actor identity is
  always sourced from the bearer-token match resolved at the
  auth boundary" — the public-facing statement of the guarantee.
* Line 36: `(MR-723 default-deny otherwise rejects …)` →
  "without a server policy the default-deny posture rejects …"
  — same content, no ticket label.
* Line 121: `MR-731 spoof regression test` → "The bearer-auth-
  derived-actor-identity regression test (client-supplied
  identity headers are ignored; the server-resolved actor is the
  only identity Cedar sees)" — describes what the test guards
  instead of naming the originating ticket.

Verified: `grep -E 'MR-\d+|RFC[ -]?\d+' docs/releases/v0.6.0.md`
returns no matches; the rest of `docs/releases/` is also clean.
`scripts/check-agents-md.sh` passes.

Note: cubic-dev-ai also flagged `crates/omnigraph-cli/src/main.rs:276`
("doc comment incorrectly references v0.6.0 for a command that
only exists in v0.7.0"). That comment is based on a stale model
of the release surface — after folding v0.7.0 into v0.6.0 in
the previous commit, the multi-graph CLI surface IS in v0.6.0
and the comment is correct as written. No change needed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix: close validated init and multi-graph gaps

* chore: address review cleanup comments

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
											
										
										
											2026-05-28 16:19:31 +02:00
+								    let policy = PolicyEngine::load_graph(policy_path, graph.to_string_lossy().as_ref()).unwrap();
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let db = Omnigraph::open(graph.to_str().unwrap())
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								        .await
 								        .unwrap()
 								        .with_policy(Arc::new(policy) as Arc<dyn PolicyChecker>);
 								    let result = db.branch_merge_as("feature", "main", Some(actor)).await;
 								    match result {
 								        Ok(_) => ParityDecision::Allow,
 								        Err(OmniError::Policy(_)) => ParityDecision::Deny,
 								        Err(other) => panic!("unexpected SDK error for branch_merge: {other:?}"),
 								    }
 								}
 								async fn http_merge_decision(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    graph: &Path,
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								    policy_path: &PathBuf,
 								    actor: &str,
 								    token: &str,
 								) -> ParityDecision {
 								    let state = AppState::open_with_bearer_tokens_and_policy(
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        graph.to_string_lossy().to_string(),
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								        vec![(actor.to_string(), token.to_string())],
 								        Some(policy_path),
 								    )
 								    .await
 								    .unwrap();
 								    let app = build_app(state);
 								    let req = BranchMergeRequest {
 								        source: "feature".to_string(),
 								        target: Some("main".to_string()),
 								    };
 								    let (status, _body) = json_response(
 								        &app,
 								        Request::builder()
 								            .uri("/branches/merge")
 								            .method(Method::POST)
 								            .header(AUTHORIZATION, format!("Bearer {token}"))
 								            .header("content-type", "application/json")
 								            .body(Body::from(serde_json::to_vec(&req).unwrap()))
 								            .unwrap(),
 								    )
 								    .await;
 								    match status {
 								        StatusCode::OK => ParityDecision::Allow,
 								        StatusCode::FORBIDDEN => ParityDecision::Deny,
 								        other => panic!("unexpected HTTP status for branch_merge: {other}"),
 								    }
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn policy_decision_parity_change_admin_on_main_allowed() {
 								    // (act-ragnor, change, main) — admins-change-anywhere rule applies.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    // Both SDK and HTTP must allow. Each path uses its own fresh graph
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								    // because allow→side-effects.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_t1, graph1, policy1) = build_parity_graph().await;
 								    let sdk = sdk_change_decision(&graph1, &policy1, "act-ragnor").await;
 								    let (_t2, graph2, policy2) = build_parity_graph().await;
 								    let http = http_change_decision(&graph2, &policy2, "act-ragnor", "ragnor-token").await;
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								    assert!(
 								        matches!(sdk, ParityDecision::Allow) && matches!(http, ParityDecision::Allow),
 								        "SDK={sdk:?} HTTP={http:?} — should both Allow",
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn policy_decision_parity_change_team_on_main_denied() {
 								    // (act-bruno, change, main) — no rule grants bruno change on
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    // protected. Both SDK and HTTP must deny. Same graph is reusable
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								    // because deny→no side-effects.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, graph, policy) = build_parity_graph().await;
 								    let sdk = sdk_change_decision(&graph, &policy, "act-bruno").await;
 								    let http = http_change_decision(&graph, &policy, "act-bruno", "bruno-token").await;
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								    assert!(
 								        matches!(sdk, ParityDecision::Deny) && matches!(http, ParityDecision::Deny),
 								        "SDK={sdk:?} HTTP={http:?} — should both Deny",
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn policy_decision_parity_branch_merge_admin_allowed() {
 								    // (act-ragnor, branch_merge, feature→main) — admins-merge-to-protected
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    // rule applies. Both Allow. Each path uses its own fresh graph —
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								    // a successful merge consumes the feature branch's commit on main.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_t1, graph1, policy1) = build_parity_graph().await;
 								    let sdk = sdk_merge_decision(&graph1, &policy1, "act-ragnor").await;
 								    let (_t2, graph2, policy2) = build_parity_graph().await;
 								    let http = http_merge_decision(&graph2, &policy2, "act-ragnor", "ragnor-token").await;
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								    assert!(
 								        matches!(sdk, ParityDecision::Allow) && matches!(http, ParityDecision::Allow),
 								        "SDK={sdk:?} HTTP={http:?} — should both Allow",
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn policy_decision_parity_branch_merge_team_denied() {
 								    // (act-bruno, branch_merge, feature→main) — no rule grants bruno
 								    // branch_merge. Both Deny.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (_temp, graph, policy) = build_parity_graph().await;
 								    let sdk = sdk_merge_decision(&graph, &policy, "act-bruno").await;
 								    let http = http_merge_decision(&graph, &policy, "act-bruno", "bruno-token").await;
-												tests: policy chassis e2e gap-fills (MR-722 follow-up) (#106)

* tests: policy chassis e2e gap-fills (MR-722 follow-up)

Audit after PRs #101-105 surfaced real e2e gaps in the policy chassis
that could let regressions ride through silently. Coverage was strong
at the SDK level (18 chassis tests) and reasonable at HTTP (12+ policy
tests), but the CLI×writer matrix was asymmetric (only `change` tested
end-to-end), the `cli.actor` config-only precedence path was untested,
the `OMNIGRAPH_UNAUTHENTICATED` env-var read path was unexercised,
`serve()`'s startup-refusal propagation was structural-review only,
and engine↔HTTP decision parity was a structural property without a
test pinning it. This commit closes those gaps.

Added (15 new tests, all test-only):

* `policy_engine_chassis.rs` (+2): `load_file_as` allow + deny pair —
  PR #104 added the actor-aware mirror of `load_file` but it was only
  exercised via CLI integration; this is direct-SDK coverage.

* `omnigraph-server/src/lib.rs` mod tests (+2):
  - `unauthenticated_env_var_classification` — consolidated single
    test (process-global env var; running parallel would race) that
    pins truthy values, falsy values, unset, and CLI-flag-overrides-
    env behavior of the `OMNIGRAPH_UNAUTHENTICATED` read path inside
    `load_server_settings`.
  - `serve_refuses_to_start_in_state_1_without_unauthenticated` —
    `#[serial]` integration test. Clears all bearer-token env vars,
    builds a `ServerConfig` with no policy file and no flag, calls
    `serve(config).await`, asserts Err before any side-effecting
    work (Lance dataset open, TcpListener::bind). Guards the
    classifier→serve propagation path so a future refactor that
    drops the call turns red.

* `omnigraph-server/tests/server.rs` (+4): `policy_decision_parity_*`
  — four cases (Change×allowed+denied, BranchMerge×allowed+denied).
  Each case runs the same Cedar decision via both SDK
  (`Omnigraph::with_policy().mutate_as` / `branch_merge_as`) and HTTP
  (`POST /change` / `POST /branches/merge`) and asserts both either
  Allow or Deny. The structural property (both paths call
  `PolicyChecker::check`) is now test-asserted.

* `omnigraph-cli/tests/system_local.rs` (+8): the CLI×writer matrix
  fan-out:
  - `local_cli_load_enforces_engine_layer_policy`
  - `local_cli_ingest_enforces_engine_layer_policy`
  - `local_cli_schema_apply_enforces_engine_layer_policy`
  - `local_cli_branch_create_enforces_engine_layer_policy`
  - `local_cli_branch_delete_enforces_engine_layer_policy`
  - `local_cli_branch_merge_enforces_engine_layer_policy`
  Each: one denied case (`--as act-bruno` against protected main) +
  one allowed case (`--as act-ragnor` via existing/extended admins-*
  rules).
  Plus:
  - `local_cli_actor_from_config_used_when_no_flag` — proves the
    config-only precedence path works.
  - `local_cli_actor_flag_overrides_config_actor` — proves the
    `--as` flag wins over `cli.actor` in the config.
  Adds `local_policy_config_with_actor` helper. Extends
  `POLICY_E2E_YAML` with `admins-branch-ops` (BranchCreate +
  BranchDelete) and `admins-schema-apply` rules so the CLI×writer
  matrix has positive-case rule coverage.

Verification: all new tests pass; full `cargo test --workspace
--locked` is green; `scripts/check-agents-md.sh` passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

* tests: serialize env-touching server lib tests to fix CI flake

CI flake on PR #106's Test Workspace job: two of the new tests
(`serve_refuses_to_start_in_state_1_without_unauthenticated` and
`unauthenticated_env_var_classification`) raced against
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`,
which sets `OMNIGRAPH_SERVER_BEARER_TOKEN` via `EnvGuard`.

While `serve_refuses` was mid-execution with its EnvGuard cleared,
the bearer-token test's EnvGuard had `OMNIGRAPH_SERVER_BEARER_TOKEN`
set; `resolve_token_source()` saw it and classified the runtime
state as `DefaultDeny` rather than refusing — so the test panicked
with "Dataset at path X not found" instead of the expected refusal
message. The unauthenticated test had the symmetric failure: its
`OMNIGRAPH_UNAUTHENTICATED="anything"` got overwritten by a peer
`EnvGuard` drop.

Fix: mark every test that uses `EnvGuard` with `#[serial]` so they
serialize against each other (default key). Already on
`serve_refuses_to_start_in_state_1_without_unauthenticated`; added
to `unauthenticated_env_var_classification` and
`server_bearer_tokens_from_env_reads_legacy_token_and_token_file`.
The `parse_bearer_tokens_json_*` tests don't touch env vars and
stay parallel.

Locally green (36 tests pass on my workstation); the parallelism
issue is CI-runner-specific (more aggressive thread interleaving)
but the fix is universal.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-18 22:25:04 +03:00
+								    assert!(
 								        matches!(sdk, ParityDecision::Deny) && matches!(http, ParityDecision::Deny),
 								        "SDK={sdk:?} HTTP={http:?} — should both Deny",
 								    );
 								}
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
 								// ─── MR-694 PR B: HTTP soft + hard drop semantics + data preservation ────
 								//
 								// SDK-level drop semantics are pinned in `crates/omnigraph/tests/schema_apply.rs`.
 								// These HTTP-side tests mirror the assertions through POST /schema/apply
 								// and exercise the new `allow_data_loss` field (closes the gap where
 								// the schema-lint chassis v1.2 shipped Hard mode on the CLI but the
 								// HTTP request struct had no equivalent field).
 								#[tokio::test(flavor = "multi_thread")]
 								async fn schema_apply_route_soft_drops_property_via_http() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								        &fs::read_to_string(fixture("test.pg")).unwrap(),
 								        &[("act-ragnor", "admin-token")],
 								        SCHEMA_APPLY_POLICY_YAML,
 								    )
 								    .await;
 								    // Load a row that has the column we're about to drop.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let graph = graph_path(temp.path());
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								    {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								        db.load(
 								            "main",
 								            r#"{"type":"Person","data":{"name":"PreDrop","age":42}}"#,
 								            LoadMode::Append,
 								        )
 								        .await
 								        .unwrap();
 								    }
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let pre_version = manifest_dataset_version(&graph).await;
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
 								    let (status, payload) = json_response(
 								        &app,
 								        Request::builder()
 								            .method(Method::POST)
 								            .uri("/schema/apply")
 								            .header("content-type", "application/json")
 								            .header("authorization", "Bearer admin-token")
 								            .body(Body::from(
 								                serde_json::to_vec(&SchemaApplyRequest {
 								                    schema_source: schema_without_age(),
 								                    ..Default::default()
 								                })
 								                .unwrap(),
 								            ))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(payload["applied"], true);
 								    // Catalog reflects the drop: `age` is gone from the live schema.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								    assert!(
 								        !reopened.catalog().node_types["Person"]
 								            .properties
 								            .contains_key("age"),
 								        "catalog should not contain `age` after drop"
 								    );
 								    // Soft drop preserves the prior version — `age` is still readable
 								    // via time travel to the pre-drop manifest version. Mirrors the
 								    // SDK-side assertion in `apply_schema_drops_a_nullable_property_softly_preserves_prior_version`.
 								    let pre_drop_snapshot = reopened.snapshot_at_version(pre_version).await.unwrap();
 								    let pre_drop_ds = pre_drop_snapshot.open("node:Person").await.unwrap();
 								    let pre_drop_fields = pre_drop_ds
 								        .schema()
 								        .fields
 								        .iter()
 								        .map(|f| f.name.clone())
 								        .collect::<Vec<_>>();
 								    assert!(
 								        pre_drop_fields.iter().any(|f| f == "age"),
 								        "soft drop should leave the pre-drop dataset's `age` column \
 								         time-travel-reachable; got fields {pre_drop_fields:?}"
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn schema_apply_route_soft_drops_node_type_via_http() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								        &fs::read_to_string(fixture("test.pg")).unwrap(),
 								        &[("act-ragnor", "admin-token")],
 								        SCHEMA_APPLY_POLICY_YAML,
 								    )
 								    .await;
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let graph = graph_path(temp.path());
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
 								    let (status, payload) = json_response(
 								        &app,
 								        Request::builder()
 								            .method(Method::POST)
 								            .uri("/schema/apply")
 								            .header("content-type", "application/json")
 								            .header("authorization", "Bearer admin-token")
 								            .body(Body::from(
 								                serde_json::to_vec(&SchemaApplyRequest {
 								                    schema_source: schema_without_company(),
 								                    ..Default::default()
 								                })
 								                .unwrap(),
 								            ))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(payload["applied"], true);
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								    assert!(
 								        !reopened.catalog().node_types.contains_key("Company"),
 								        "catalog should not contain `Company` after drop"
 								    );
 								    assert!(
 								        !reopened.catalog().edge_types.contains_key("WorksAt"),
 								        "catalog should not contain `WorksAt` after cascade"
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn schema_apply_route_hard_drops_property_with_allow_data_loss() {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								        &fs::read_to_string(fixture("test.pg")).unwrap(),
 								        &[("act-ragnor", "admin-token")],
 								        SCHEMA_APPLY_POLICY_YAML,
 								    )
 								    .await;
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let graph = graph_path(temp.path());
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								    {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								        db.load(
 								            "main",
 								            r#"{"type":"Person","data":{"name":"PreDropHard","age":50}}"#,
 								            LoadMode::Append,
 								        )
 								        .await
 								        .unwrap();
 								    }
 								    // Apply with allow_data_loss=true → Hard mode promotion.
 								    let (status, payload) = json_response(
 								        &app,
 								        Request::builder()
 								            .method(Method::POST)
 								            .uri("/schema/apply")
 								            .header("content-type", "application/json")
 								            .header("authorization", "Bearer admin-token")
 								            .body(Body::from(
 								                serde_json::to_vec(&SchemaApplyRequest {
 								                    schema_source: schema_without_age(),
 								                    allow_data_loss: true,
 								                })
 								                .unwrap(),
 								            ))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(payload["applied"], true);
 								    // Catalog reflects the drop.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								    assert!(
 								        !reopened.catalog().node_types["Person"]
 								            .properties
 								            .contains_key("age"),
 								        "catalog should not contain `age` after Hard drop"
 								    );
 								    // Plan steps should show DropMode::Hard for property drops.
 								    let steps = payload["steps"].as_array().expect("steps array");
 								    let drop_step = steps
 								        .iter()
 								        .find(|s| s["kind"] == "drop_property")
 								        .expect("plan should include drop_property step");
 								    let mode = &drop_step["mode"];
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    assert_eq!(
 								        mode, "hard",
 								        "expected hard mode under allow_data_loss=true"
 								    );
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn schema_apply_route_keeps_drops_soft_without_flag() {
 								    // Symmetric to the Hard test: same schema change, but no
 								    // allow_data_loss flag → drops stay Soft (prior column data
 								    // remains time-travel-reachable). Pins the default semantics
 								    // against accidental Hard promotion.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								        &fs::read_to_string(fixture("test.pg")).unwrap(),
 								        &[("act-ragnor", "admin-token")],
 								        SCHEMA_APPLY_POLICY_YAML,
 								    )
 								    .await;
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let graph = graph_path(temp.path());
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
 								    let (status, payload) = json_response(
 								        &app,
 								        Request::builder()
 								            .method(Method::POST)
 								            .uri("/schema/apply")
 								            .header("content-type", "application/json")
 								            .header("authorization", "Bearer admin-token")
 								            .body(Body::from(
 								                serde_json::to_vec(&SchemaApplyRequest {
 								                    schema_source: schema_without_age(),
 								                    allow_data_loss: false,
 								                })
 								                .unwrap(),
 								            ))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(payload["applied"], true);
 								    let steps = payload["steps"].as_array().expect("steps array");
 								    let drop_step = steps
 								        .iter()
 								        .find(|s| s["kind"] == "drop_property")
 								        .expect("plan should include drop_property step");
 								    let mode = &drop_step["mode"];
 								    assert_eq!(mode, "soft", "expected soft mode without allow_data_loss");
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let _ = graph;
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								}
 								#[tokio::test(flavor = "multi_thread")]
 								async fn schema_apply_route_additive_property_preserves_existing_rows() {
 								    // SDK suite covers rename and drop data preservation. Additive
 								    // AddProperty wasn't pinned with a row-count check anywhere.
 								    // Load N rows, apply schema adding nullable property, verify
 								    // every row is still readable and the new column is null.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								        &fs::read_to_string(fixture("test.pg")).unwrap(),
 								        &[("act-ragnor", "admin-token")],
 								        SCHEMA_APPLY_POLICY_YAML,
 								    )
 								    .await;
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let graph = graph_path(temp.path());
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
 								    // Standard fixture data: 4 Persons + 1 Company. Load it.
 								    let pre_count = {
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								        let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								        db.load(
 								            "main",
 								            &fs::read_to_string(fixture("test.jsonl")).unwrap(),
 								            LoadMode::Append,
 								        )
 								        .await
 								        .unwrap();
 								        let snap = db
 								            .snapshot_of(omnigraph::db::ReadTarget::branch("main"))
 								            .await
 								            .unwrap();
 								        snap.entry("node:Person").expect("Person").row_count
 								    };
 								    assert!(pre_count > 0, "fixture should have loaded Person rows");
 								    let (status, payload) = json_response(
 								        &app,
 								        Request::builder()
 								            .method(Method::POST)
 								            .uri("/schema/apply")
 								            .header("content-type", "application/json")
 								            .header("authorization", "Bearer admin-token")
 								            .body(Body::from(
 								                serde_json::to_vec(&SchemaApplyRequest {
 								                    schema_source: additive_schema_with_nickname(),
 								                    ..Default::default()
 								                })
 								                .unwrap(),
 								            ))
 								            .unwrap(),
 								    )
 								    .await;
 								    assert_eq!(status, StatusCode::OK);
 								    assert_eq!(payload["applied"], true);
 								    // Row count preserved.
-												Rename repo terminology to graph (#118)
											
										
										
											2026-05-24 16:46:00 +01:00
+								    let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
 								    let snap = db
 								        .snapshot_of(omnigraph::db::ReadTarget::branch("main"))
-												schema: HTTP allow_data_loss exposure + e2e drop coverage (MR-694 follow-up) (#107)

The schema-lint chassis v1.2 (PR #100) shipped `--allow-data-loss` on
the CLI, but `SchemaApplyRequest` had no equivalent field — Hard-mode
drops were CLI-only. This commit closes that feature gap and adds e2e
test coverage for drop modes across HTTP + CLI, plus data preservation
on additive apply, plus a CLI↔SDK plan-parity assertion.

Feature gap closed:

- `crates/omnigraph-server/src/api.rs` — added `allow_data_loss: bool`
  (default false via `#[serde(default)]`) to `SchemaApplyRequest`.
  Added `Default` derive so test usages can use `..Default::default()`.
- `crates/omnigraph-server/src/lib.rs` — `server_schema_apply` now
  constructs `SchemaApplyOptions { allow_data_loss: request.allow_data_loss }`
  and threads through to `apply_schema_as`.
- `crates/omnigraph-cli/src/main.rs` — remote-URI schema-apply path
  used to bail with "--allow-data-loss not yet supported on remote";
  now forwards the flag into the JSON payload so the CLI behaves
  identically against local and remote URIs.
- `openapi.json` — regenerated; only diff is the new field on
  `SchemaApplyRequest`.

Tests added (8 new):

* `crates/omnigraph-server/tests/server.rs` (+5):
  - `schema_apply_route_soft_drops_property_via_http` — POST schema
    removing nullable property, verify catalog reflects the drop AND
    `snapshot_at_version(pre)` still has `age` in the field list
    (time-travel reachability is the Soft contract).
  - `schema_apply_route_soft_drops_node_type_via_http` — POST schema
    removing `Company` node + cascading `WorksAt` edge.
  - `schema_apply_route_hard_drops_property_with_allow_data_loss` —
    POST with `allow_data_loss: true`, verify plan step reports
    `mode: hard`.
  - `schema_apply_route_keeps_drops_soft_without_flag` — same schema
    without flag, verify `mode: soft`. Pins default semantics against
    accidental Hard promotion.
  - `schema_apply_route_additive_property_preserves_existing_rows` —
    load fixture, POST adding nullable property, verify row count
    preserved (SDK suite covers data preservation on drops + renames;
    additive AddProperty wasn't pinned).
  Plus helpers `schema_without_age` and `schema_without_company`.

* `crates/omnigraph-cli/tests/cli.rs` (+3):
  - `schema_apply_allow_data_loss_flag_promotes_drops_to_hard` — CLI
    `omnigraph schema apply --allow-data-loss --schema X.pg --json`,
    verify plan step has `mode: hard`.
  - `schema_apply_without_allow_data_loss_keeps_soft_drops` — without
    flag, verify Soft.
  - `schema_plan_parity_cli_and_sdk` — same `.pg` source through
    `Omnigraph::plan_schema` (SDK) and `omnigraph schema plan --json`
    (CLI), assert the steps array is byte-identical post-JSON. HTTP
    has no `/schema/plan` endpoint; apply-side parity is implicitly
    covered by the HTTP drop tests + CLI drop tests using identical
    fixtures.

Docs:

- `docs/user/schema-language.md` — new "Destructive drops" section
  documenting Soft vs Hard semantics and that `allow_data_loss` is
  now honored uniformly across CLI / HTTP / SDK.

Verification: every new test passes; full `cargo test --workspace --locked`
green; `scripts/check-agents-md.sh` passes.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
											
										
										
											2026-05-19 01:56:46 +03:00
+								        .await
 								        .unwrap();
 								    let post_count = snap.entry("node:Person").expect("Person").row_count;
 								    assert_eq!(
 								        post_count, pre_count,
 								        "AddProperty should preserve row count",
 								    );
 								}
-												(feat): multi-graph server mode (#119)

* mr-668: add GraphId newtype + Cloud-mode forward identity stubs (PR 1/10)

PR 1 of the MR-668 multi-graph server work. Pure types, no runtime
behavior changes yet.

Ships the validated identity vocabulary that the rest of the implementation
will consume:

- `GraphId(String)` — `^[a-zA-Z0-9-]{1,64}$`, leading underscore rejected
  (engine reserves every `_*` filename), reserved route names rejected
  (`policies`, `healthz`, `openapi`, `openapi.json`, `graphs`). Validation
  lives in `try_from` only; serde `Deserialize` re-runs it so JSON payloads
  cannot bypass.
- `TenantId(String)` — same regex shape as GraphId. `None` in Cluster
  mode; reserved for Cloud mode (RFC 0003) where it carries the OAuth
  `org_id` claim.
- `GraphKey { tenant_id: Option<TenantId>, graph_id }` — the registry
  HashMap key. `cluster()` constructor for the Cluster-mode default.
- `Scope` enum with `Full` variant — Cluster mode default; RFC 0004 will
  extend with OAuth scopes (`graph:read`/`write`/`admin`/`*`).
- `AuthSource` enum with `Static` variant — Cluster mode default; RFC
  0001 step 1 will add `Oidc`.
- `ResolvedActor { actor_id, tenant_id, scopes, source }` — replaces the
  upcoming refactor of `AuthenticatedActor(Arc<str>)` in PR 4a.

Per MR-668 design decision 13: ship the Cloud-mode forward type shapes
now (no `TokenVerifier` trait yet — that's RFC 0001 step 1) so handler
signatures stay stable across the Cluster → Cloud trajectory. `Scope`
and `AuthSource` use `#[non_exhaustive]` so future variants don't break
caller matches.

Tests: 26 new (15 graph_id + 11 identity), all passing. No regression
in the existing 36 server library tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: Omnigraph::init error-path cleanup + three failpoints (PR 2a/10)

PR 2a of the MR-668 multi-graph server work. Bug fix: a partially-failed
`Omnigraph::init` previously left orphan schema files at the graph URI,
making the URI unusable for a retry (the next `init` would refuse because
`_schema.pg` already exists).

Changes:

1. `init_with_storage` now wraps the I/O phase. On any error from
   `init_storage_phase`, calls `best_effort_cleanup_init_artifacts` to
   remove the three schema files before returning the original error:
   - `_schema.pg`
   - `_schema.ir.json`
   - `__schema_state.json`
   Cleanup is best-effort: a failure to delete is logged via
   `tracing::warn` but does NOT mask the init error.

2. Three failpoints added at the init phase boundaries:
   - `init.after_schema_pg_written`
   - `init.after_schema_contract_written`
   - `init.after_coordinator_init`

3. Four new failpoint tests in `tests/failpoints.rs` pin the cleanup
   behavior at each boundary plus the "original error wins over cleanup
   error" contract. All 23 failpoint tests pass.

Coverage gap (documented in code comments):
Lance per-type datasets and `__manifest/` directory created by
`GraphCoordinator::init` are NOT cleaned up after a coordinator-init-phase
failure. Recursive directory deletion requires `StorageAdapter::delete_prefix`,
which was deferred along with `DELETE /graphs/{id}` (originally PR 2b). When
that primitive lands, the third failpoint test can be tightened to assert
the graph root is fully empty.

Tests: 4 new (init_failpoint_*), all 23 failpoint tests green. No
regression in the 105 engine library tests or 64 end_to_end tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: add GraphHandle + GraphRegistry data structure (PR 3/10)

PR 3 of the MR-668 multi-graph server work. Pure data structure — no
routing changes yet (that's PR 4a).

New file: `crates/omnigraph-server/src/registry.rs`

- `GraphHandle { key: GraphKey, uri: String, engine: Arc<Omnigraph>,
  policy: Option<Arc<PolicyEngine>> }` — the per-graph state that the
  routing middleware (PR 4a) will inject as a request extension.
- `RegistrySnapshot { graphs: HashMap<GraphKey, Arc<GraphHandle>> }` —
  immutable snapshot; replaced atomically via `ArcSwap`.
- `GraphRegistry { snapshot: ArcSwap<_>, mutate: Mutex<()> }` — lock-free
  reads, mutex-serialized mutations.
- `RegistryLookup { Ready(Arc<GraphHandle>) | Gone }` — two-valued, no
  `Tombstoned` variant since DELETE is deferred in v0.7.0 scope.
- `InsertError { DuplicateKey | DuplicateUri }` — both rejection cases
  for create-graph (maps to HTTP 409 in PR 7).
- Methods: `new`, `from_handles` (bulk startup-time init), `get`, `list`,
  `len`, `insert`.

Race semantics pinned by three multi-thread tests:
- `concurrent_insert_same_key_exactly_one_succeeds` — N=8 spawned
  inserts with the same key; exactly 1 returns Ok, 7 return DuplicateKey.
- `concurrent_insert_distinct_keys_all_succeed` — N=8 spawned inserts
  with distinct keys; all succeed.
- `concurrent_reads_during_inserts_see_consistent_snapshots` — reader
  loop concurrent with sequential writes; every listed handle's key
  resolves via `get()` (no torn state).

Why no tombstones field: `DELETE /graphs/{id}` is deferred to bound
the scope of v0.7.0. Without a delete endpoint, there's no use for
tombstones — every key in the registry is `Ready`, and every key
not in the registry is `Gone`. When DELETE lands later, the
`Tombstoned` variant + `tombstones: HashSet<GraphKey>` slot in
additively without breaking caller signatures (the `Gone` variant
remains the "not currently active" case).

Why `tokio::sync::Mutex`: insert is async because PR 7's flow holds
this mutex across the atomic YAML rewrite step (file I/O). std::Mutex
would footgun across .await.

Dependency additions: `arc-swap = { workspace = true }`,
`thiserror = { workspace = true }` (used by InsertError).

Tests: 12 new (12 passing). 74 server lib tests total green
(62 from PR 1 + 12 new). Clippy clean on server crate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: router restructure + handler refactor for multi-graph (PR 4a/10)

PR 4a of the MR-668 multi-graph server work. The heaviest single PR —
rewires every handler to extract `Arc<GraphHandle>` from a routing
middleware, replaces `AuthenticatedActor(Arc<str>)` with `ResolvedActor`
everywhere, and adds the `ServerMode` discriminator.

Behavior changes:
- **Single mode** (legacy `omnigraph-server <URI>`): flat routes
  (`/snapshot`, `/read`, `/branches`, …) continue to work exactly as
  v0.6.0. Internally, the registry holds a single handle keyed by the
  sentinel `SINGLE_GRAPH_KEY_ID = "default"`; routing middleware injects
  that handle on every request. No HTTP-visible change.
- **Multi mode** (new): routes nest under `/graphs/{graph_id}/...`.
  Routing middleware extracts the graph id from the path, looks it up
  in the registry, and injects the handle. 404 if not found.
  (Multi-mode startup itself lands in PR 5; this PR provides the
  router-side wiring.)

AppState refactor:
- `engine: Arc<Omnigraph>` and `policy_engine: Option<Arc<PolicyEngine>>`
  fields removed — both now live inside `GraphHandle` in the registry.
- `mode: ServerMode { Single { uri } | Multi { config_path } }` added.
- `registry: Arc<GraphRegistry>` added.
- `server_policy: Option<Arc<PolicyEngine>>` added (placeholder for
  management endpoints in PR 6b; unused today).
- Existing constructors (`new`, `new_with_bearer_token{s,_and_policy}`,
  `new_with_workload`, `open*`) build a single-mode AppState
  internally and remain source-compatible. Tests that constructed
  AppState via these constructors continue to work.
- `with_policy_engine` post-construction setter — rebuilds the
  single-mode handle with the policy attached. Engine-layer
  enforcement is NOT reinstalled (matches the old single-field
  semantics; `open_with_bearer_tokens_and_policy` is the path that
  installs both layers).
- `new_multi` constructor added for PR 5's startup loop.
- `uri()` now returns `Option<&str>` (Some in single, None in multi).

Routing middleware:
- `resolve_graph_handle` injects `Arc<GraphHandle>` as a request
  extension. Mode-aware: single returns the only handle; multi parses
  `/graphs/{graph_id}/...` from the URI. Returns 404 in multi mode
  when the graph id is unregistered. Records `graph_id` on the
  current tracing span.
- `require_bearer_auth` updated to insert `ResolvedActor` (was
  `AuthenticatedActor`).

Handler refactor — every protected handler:
- Gains `Extension(handle): Extension<Arc<GraphHandle>>` param.
- Replaces `state.engine` → `handle.engine`.
- Replaces `state.policy_engine()` → `handle.policy.as_deref()`.
- Replaces `state.uri()` → `handle.uri.as_str()` (or `.clone()`
  where String is needed).
- Replaces `Arc::clone(&state.engine)` → `Arc::clone(&handle.engine)`
  (the spawn-and-clone pattern in `server_export` — proof that a
  long-running export survives the registry being mutated later).

authorize_request signature:
- Was: `(state: &AppState, actor: Option<&AuthenticatedActor>, request: PolicyRequest)`.
- Now: `(actor: Option<&ResolvedActor>, policy: Option<&PolicyEngine>, request: PolicyRequest)`.
- Per-graph callers pass `handle.policy.as_deref()`. The (future PR 6b)
  management endpoints will pass `state.server_policy.as_deref()`.

MR-731 invariant preserved:
- The single chokepoint `request.actor_id = actor.actor_id.as_ref().to_string()`
  inside `authorize_request` still overwrites any client-supplied
  actor identity. Regression test
  `actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
  at `tests/server.rs:1114-1216` passes unchanged.

Tests: 0 new (the registry race tests in PR 3 already cover the
data structure; this PR exercises them indirectly via the existing
test suite). 74 lib + 57 server integration + 60 openapi = 191 tests
green. Clippy clean.

LOC: +397 insertions, -153 deletions in `crates/omnigraph-server/src/lib.rs`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: OpenAPI multi-mode cluster filter (PR 4b/10)

PR 4b of the MR-668 multi-graph server work. In multi mode, the served
`/openapi.json` reports cluster routes (`/graphs/{graph_id}/...`) instead
of the legacy flat protected paths — matching what `build_app` actually
mounts (PR 4a's `Router::nest`). Single mode is unchanged.

Implementation:
- New `server_openapi` branch: when `state.mode()` is `Multi`, call
  `nest_paths_under_cluster_prefix(&mut doc)` after `ApiDoc::openapi()`.
- The rewrite consumes `doc.paths.paths`, then for every path-item:
  - If the path is in `ALWAYS_FLAT_PATHS` (`/healthz` for now), keep
    it flat.
  - Otherwise, prefix every operation_id with `cluster_` and reinsert
    the item at `/graphs/{graph_id}<original_path>`.
- Single mode hits no extra work — the path map is untouched.
- The static `ApiDoc::openapi()` still emits the flat surface, so
  in-process callers (the existing `openapi_json()` helper in tests)
  see the unmodified spec.

Why cluster_ prefix on operation IDs: OpenAPI specs require unique
operation_ids across the document. With both flat (single-mode) and
cluster (multi-mode) surfaces ever co-existing in a generated SDK,
the prefix prevents collision. The current served doc only carries
one surface, so the prefix is forward-compat with potential future
dual-surface generation.

Tests: 6 new in `tests/openapi.rs`, all via the `/openapi.json` route
(not the static `ApiDoc::openapi()` helper):
- `multi_mode_openapi_lists_cluster_paths` — every protected path
  appears as a cluster variant.
- `multi_mode_openapi_drops_flat_protected_paths` — flat protected
  paths are absent.
- `multi_mode_openapi_keeps_healthz_flat` — `/healthz` survives.
- `multi_mode_openapi_prefixes_operation_ids_with_cluster` — every
  cluster operation_id starts with `cluster_`.
- `multi_mode_operation_ids_are_unique` — no operation_id collisions.
- `single_mode_openapi_unchanged_by_cluster_filter` — single mode
  still emits the legacy flat surface (regression).

New test helper `app_for_multi_mode(graph_ids)` exercises the new
`AppState::new_multi` constructor from PR 4a — first user of multi-mode
construction outside of unit tests.

Result: 66 openapi tests + 57 server integration tests + 74 lib tests
= 197 green. No regression in the existing OpenAPI drift check
(`openapi_spec_is_up_to_date` still validates the static flat surface
matches the committed openapi.json).

LOC: +67 in lib.rs (rewrite logic), +219 in tests/openapi.rs (test
suite + helper).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: multi-graph startup + mode inference (PR 5/10)

PR 5 of the MR-668 multi-graph server work. This is the first PR that
makes multi mode actually usable end-to-end: operators invoking
`omnigraph-server --config omnigraph.yaml` with a non-empty `graphs:`
map and no single-mode selector now get a running multi-graph server.

Mode inference (MR-668 decision 2, four-rule matrix in
`load_server_settings`):
  1. CLI `<URI>` positional        → Single
  2. CLI `--target <name>`         → Single (URI from graphs.<name>)
  3. `server.graph` in config      → Single (URI from graphs.<name>)
  4. `--config` + non-empty `graphs:` + no single-mode selector
                                   → Multi (all entries in `graphs:`)
  5. otherwise                     → error with migration hint

Rule 5's error message names every escape hatch so operators can fix
their invocation without grepping docs.

Config schema extensions:
  - `TargetConfig.policy: PolicySettings` (per-graph Cedar policy file).
    `#[serde(default)]` so existing single-graph YAMLs keep parsing.
  - `ServerDefaults.policy: PolicySettings` (server-level Cedar policy
    for management endpoints — loaded in PR 5, wired into `GET /graphs`
    in PR 6b).
  - `OmnigraphConfig::resolve_target_policy_file(name)` and
    `resolve_server_policy_file()` helpers — both resolve relative to
    the config file's `base_dir`.

Public types added to `omnigraph-server`:
  - `ServerConfigMode { Single { uri, policy_file } | Multi { graphs,
    config_path, server_policy_file } }`.
  - `GraphStartupConfig { graph_id, uri, policy_file }` — one entry
    per graph in multi mode.

`ServerConfig` shape change:
  - WAS: `{ uri: String, bind, policy_file, allow_unauthenticated }`.
  - NOW: `{ mode: ServerConfigMode, bind, allow_unauthenticated }`.
  - Breaking for any code that constructs `ServerConfig` directly.
    `main.rs` is unaffected (uses `load_server_settings`).

`serve()` now forks on `ServerConfig.mode`:
  - Single: existing flow via `AppState::open_with_bearer_tokens_and_policy`.
  - Multi: parallel open via `futures::stream::iter(graphs)
    .map(open_single_graph).buffer_unordered(4).collect()`. Bound 4 is
    a rule-of-thumb for I/O-bound work — at N≤10 this trades startup
    latency for a small amount of concurrent S3/Lance open pressure.
    Fail-fast: first open error aborts startup; in-flight opens drop
    their engine via Arc (Lance datasets close cleanly).

New helper `open_single_graph(GraphStartupConfig)`:
  - Validates `GraphId` per the regex in PR 1.
  - `Omnigraph::open(uri).await` with descriptive error context.
  - Loads per-graph policy file and re-applies it via
    `Omnigraph::with_policy` (engine-layer enforcement, MR-722).
  - Returns `Arc<GraphHandle>` ready for the registry.

Routing middleware bug fix:
  - `Router::nest("/graphs/{graph_id}", inner)` rewrites
    `request.uri().path()` to the inner suffix (e.g. `/snapshot`).
    The previous middleware tried to parse `{graph_id}` from
    `request.uri().path()` and got 400 instead of 200. Fixed by reading
    from `axum::extract::OriginalUri` request extension, which preserves
    the pre-rewrite URI.
  - Caught by the two new tests
    `cluster_routes_dispatch_per_graph_handle` and
    `cluster_route_for_unknown_graph_returns_404`.

Tests (14 new, all passing):
  - Four-rule matrix: one test per branch + the joint case
    `mode_inference_cli_uri_overrides_graphs_map` + the empty-graphs-map
    error case.
  - Per-graph + server-level policy file path resolution.
  - Reserved `GraphId` rejection at startup.
  - End-to-end multi-graph routing: two graphs side by side, each
    cluster route hits the right engine.
  - Unknown graph id under cluster prefix → 404.
  - Flat routes 404 in multi mode.

Inline `ServerConfig` test (`serve_refuses_to_start_in_state_1_without_unauthenticated`)
and three `server_settings_*` tests updated to the new `mode` shape.

Result: 211 server tests green (74 lib + 71 integration + 66 openapi),
MR-731 regression test still pinned and passing.

LOC: +45 config.rs, +281 lib.rs (net), +395 tests/server.rs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: Cedar resource-model refactor (PR 6a/10)

PR 6a of the MR-668 multi-graph server work. Policy-crate-only refactor —
no HTTP handler changes, no operator-supplied policy.yaml changes. Sets
up the chassis that PR 6b's `GET /graphs` consumes.

Two new `PolicyAction` variants:
  - `GraphCreate` — gates `POST /graphs` (deferred behavioral PR).
  - `GraphList`   — gates `GET /graphs` (lands in PR 6b).

Note: `GraphDelete` is intentionally NOT added in this PR. `DELETE
/graphs/{id}` is deferred from MR-668's v0.7.0 scope to bound complexity
(no `delete_prefix`, no tombstone, no `RegistryLookup::Tombstoned`).
Adding the Cedar action without a consumer would be the same kind of
"dead vocabulary" trap the `Admin` variant already documents.

New `PolicyResourceKind { Graph, Server }` enum, plus a
`PolicyAction::resource_kind()` method that classifies every action.
Per-graph actions (Read, Change, BranchCreate, …) bind to
`Omnigraph::Graph::"<graph_label>"`; server-scoped actions
(GraphCreate, GraphList) bind to the singleton
`Omnigraph::Server::"root"`. `Admin` stays classified as per-graph for
now — MR-724 will pick the final shape when the first consumer surface
ships.

Cedar schema string additions:
  - `entity Server;`
  - `action "graph_create" appliesTo { principal: Actor, resource: Server, ... }`
  - `action "graph_list"   appliesTo { principal: Actor, resource: Server, ... }`

Compiler updates:
  - `compile_policy_source` picks the resource literal based on the
    action's `resource_kind`. Existing graph-only policies generate
    the same Cedar source as before — pinned by
    `per_graph_rules_continue_to_work_alongside_server_rules`.
  - `compile_entities` includes the `Server::"root"` entity only when
    a rule references a server-scoped action. Keeps test assertions
    for graph-only policies tight.
  - `PolicyEngine::authorize` builds the right resource UID at
    request time based on `request.action.resource_kind()`.

Validation rules added to `PolicyConfig::validate`:
  - A rule may not mix server-scoped and per-graph actions (different
    resource kinds need different `permit` clauses).
  - Server-scoped actions cannot have `branch_scope` or
    `target_branch_scope` — there's no branch context at the server
    level.

Operator impact: zero. The Cedar schema `Omnigraph::Server` entity is
internally referenced by `compile_policy_source`; operator policy.yaml
files only declare actions in `rules[].allow.actions` and never
reference the resource entity directly. Decision 6's "internal rename
only; operator policies unaffected" contract is preserved and pinned
by `per_graph_rules_continue_to_work_alongside_server_rules`.

Tests: 5 new (11 policy tests total, up from 6):
  - `graph_list_action_authorizes_against_server_resource`
  - `graph_create_action_authorizes_against_server_resource`
  - `server_scoped_rule_cannot_use_branch_scope`
  - `rule_mixing_server_and_per_graph_actions_is_rejected`
  - `per_graph_rules_continue_to_work_alongside_server_rules`

No regression: 145 server tests (74 lib + 71 integration) still green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: GET /graphs endpoint + per-graph policy wire-up (PR 6b/10)

PR 6b of the MR-668 multi-graph server work. First management endpoint —
`GET /graphs` lists every graph registered with the server, gated by the
server-level Cedar policy from PR 6a.

New API shapes (in `omnigraph-server::api`):
  - `GraphInfo { graph_id, uri }` — one entry per registered graph.
  - `GraphListResponse { graphs: Vec<GraphInfo> }` — sorted alphabetically
    by `graph_id` for deterministic output.

Handler `server_graphs_list`:
  - Mounted at `GET /graphs` in both modes.
  - Single mode: returns 405 (resource exists in the API surface, just
    not operational without a `graphs:` map). 405 chosen over 404 so
    clients see "resource exists, wrong context" rather than "no such
    resource".
  - Multi mode: requires bearer auth (when configured); Cedar-gated by
    `PolicyAction::GraphList` against `Omnigraph::Server::"root"`
    (PR 6a's chassis). Returns the sorted registry list.

Cedar gate composition:
  - When no `server.policy.file` is configured, the MR-723 default-deny
    falls through: `GraphList` is not `Read`, so an authenticated actor
    without a server policy gets 403. This is the right default — don't
    expose the registry until the operator explicitly authorizes it.
  - When a server policy is configured, Cedar evaluates the rule. The
    test `get_graphs_with_server_policy_authorizes_per_cedar` pins the
    admin-allow / viewer-deny split.

Routing:
  - New `management` sub-router holding `/graphs` (auth-required, no
    `resolve_graph_handle` middleware — operates on the registry, not
    a single graph).
  - Single mode merges flat protected routes + management.
  - Multi mode merges nested `/graphs/{graph_id}/...` + management.

OpenAPI:
  - `server_graphs_list` registered in `ApiDoc::paths(...)`.
  - `EXPECTED_PATHS` in `tests/openapi.rs` gains `/graphs`.
  - `openapi.json` regenerated (auto-tracked by
    `openapi_spec_is_up_to_date` in CI).

Tests: 4 new in `tests/server.rs::multi_graph_startup`:
  - `get_graphs_lists_registered_graphs_in_multi_mode`
  - `get_graphs_returns_405_in_single_mode`
  - `get_graphs_requires_bearer_auth_when_configured`
  - `get_graphs_with_server_policy_authorizes_per_cedar`

What's NOT in this PR (deferred):
  - Per-graph policy enforcement is wired through `handle.policy`
    (PR 4a already did this); PR 6b doesn't add new per-graph
    behavior beyond making sure the server policy lookup composes
    cleanly alongside it.
  - `POST /graphs` (PR 7) and `DELETE /graphs/{id}` (out of scope
    for v0.7.0).
  - CLI `omnigraph graphs list` (PR 8 will add).

Result: 215 server tests green (74 lib + 66 openapi + 75 integration),
11 policy tests green. MR-731 spoof regression preserved across all
this work.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: POST /graphs runtime create endpoint (PR 7/10)

PR 7 of the MR-668 multi-graph server work. Operators can now add a
graph to a running multi-graph server without restarting:

  curl -X POST http://server/graphs \
    -H "Content-Type: application/json" \
    -d '{
          "graph_id": "beta",
          "uri": "/data/beta.omni",
          "schema": { "source": "node Person { name: String @key }\n" },
          "policy": { "file": "./policies/beta.yaml" }
        }'

DELETE remains deferred (out of v0.7.0 scope per the trimmed plan —
no `delete_prefix`, no tombstones).

Body shape (decision 7):
  - Nested `schema: { source: "..." }` (mirrors the `policy: { file }`
    pattern; leaves room for future fields without breakage).
  - Optional nested `policy: { file: "..." }` for per-graph Cedar.
  - 32 MiB body limit (reuses `INGEST_REQUEST_BODY_LIMIT_BYTES`).
  - Asymmetric with `SchemaApplyRequest` which keeps flat
    `schema_source: String` — documented in api.rs.

Atomic YAML rewrite + drift detection:
  - New `config::rewrite_atomic(path, new_config, expected_hash)`:
    flock → re-read + hash check → serialize → write `.tmp` → fsync
    → rename → fsync parent dir. Returns the new hash for the caller
    to update its in-memory baseline.
  - New `config::hash_config_file(path)` — SHA-256 of the on-disk
    bytes, used at startup and after each rewrite.
  - New `RewriteAtomicError { Drift | Io | Serialize }` enum.
  - `AppState.config_hash: Option<Arc<Mutex<[u8;32]>>>` carries the
    in-memory baseline. Updated after every successful rewrite so
    subsequent POSTs don't false-trigger drift.
  - The mutex is `std::sync::Mutex` (brief critical section, no .await
    inside). The flock itself serializes file access process-wide
    AND across multiple server instances (defense in depth).
  - All sync I/O runs inside `tokio::task::spawn_blocking` — flock
    is sync.

Handler ordering (the load-bearing sequence):
  1. Mode check: 405 in single mode.
  2. Cedar authorize: `GraphCreate` against `Omnigraph::Server::"root"`.
  3. Validate body: `GraphId::try_from` (regex + reserved-name), empty
     schema/uri checks, per-graph policy file parse.
  4. Pre-check registry for duplicate graph_id / duplicate uri (409).
  5. `Omnigraph::init` the new engine.
  6. Atomic YAML rewrite (drift detection inside).
  7. Publish in registry (atomic re-check via `GraphRegistry::insert`).

Failure modes (documented in handler rustdoc):
  - Init fails → orphan storage at `req.uri` (PR 2a cleans up schema
    files; Lance datasets remain orphans until `delete_prefix` lands).
  - YAML rewrite fails (drift, IO) → orphan storage; YAML unchanged.
  - Registry insert fails (race) → YAML has entry but registry doesn't;
    next restart opens it cleanly.

New dependency: `fs2 = "0.4"` (workspace + omnigraph-server). POSIX-only
file locking. Linux/macOS deployment supported; Windows out of scope.

Tests (10 new in `tests/server.rs::multi_graph_startup`):
  - `post_graphs_creates_a_new_graph_end_to_end` — happy path, includes
    YAML inspection to confirm the rewrite landed.
  - `post_graphs_baseline_hash_updates_between_rewrites` — two POSTs in
    a row both succeed (drift baseline updates correctly).
  - `post_graphs_duplicate_graph_id_returns_409`
  - `post_graphs_duplicate_uri_returns_409`
  - `post_graphs_invalid_graph_id_returns_400` (reserved name)
  - `post_graphs_empty_schema_source_returns_400`
  - `post_graphs_returns_405_in_single_mode`
  - `post_graphs_yaml_drift_detection_returns_503` — operator hand-edits
    omnigraph.yaml; server refuses to clobber.
  - `hash_config_file_is_deterministic_and_detects_changes`
  - `rewrite_atomic_refuses_when_hash_drifts`

OpenAPI: `server_graphs_create` registered in `ApiDoc::paths(...)`;
openapi.json regenerated.

Result: 225 server tests green (74 lib + 66 openapi + 85 integration),
all MR-731 regressions still pinned.

LOC: ~580 lib.rs net (handler + helpers), ~120 config.rs (rewrite
machinery), +71 api.rs (request/response shapes), +332 tests/server.rs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: CLI omnigraph graphs list/create (PR 8/10)

PR 8 of the MR-668 multi-graph server work. CLI parity for the
v0.7.0 management surface: operators can now manage graphs from
the command line against a running multi-graph server.

  omnigraph graphs list --target dev --json
  omnigraph graphs create \
    --target dev \
    --graph-id beta \
    --graph-uri /data/beta.omni \
    --schema schema.pg

DELETE is intentionally absent — server-side DELETE was deferred from
v0.7.0 scope, and shipping a client subcommand for a server endpoint
that doesn't exist would be dead vocabulary. The help output, the
subcommand enum, and the test that pins it (`graphs_subcommand_help_
lists_list_and_create`) all agree.

CLI architecture (modeled on `BranchCommand`):
  - New `Command::Graphs { command: GraphsCommand }` top-level variant.
  - `GraphsCommand { List, Create }` enum.
  - List: GET `<base>/graphs`. Stdout is `<graph_id>\t<uri>` per line,
    or JSON via `--json`.
  - Create: reads `--schema <path>` from local disk, inlines as
    `schema: { source: <file> }` in the POST body (nested per
    MR-668 decision 7). Optional `--policy-file <path>` becomes
    `policy: { file: <path> }`. Returns 201 → "created graph X at Y"
    or JSON via `--json`.
  - Both subcommands reject local URI targets with a clear
    "remote multi-graph server URL" error.

New API type imports in the CLI: `GraphCreateRequest`,
`GraphCreateResponse`, `GraphListResponse`, `GraphSchemaSpec`,
`GraphPolicySpec` — all from `omnigraph-server::api`.

Tests:
  - cli.rs (4 new, non-network):
      * `graphs_subcommand_help_lists_list_and_create` — pins the
        deferral of `delete` (catches scope creep).
      * `graphs_list_against_local_uri_errors_with_remote_only_message`
      * `graphs_create_against_local_uri_errors_with_remote_only_message`
      * `graphs_create_with_missing_schema_file_errors` — pins the
        IO context in the schema-read error path.
  - system_remote.rs (1 new, `#[ignore]` like its peers):
      * `graphs_list_and_create_against_multi_graph_server` — spawns a
        multi-mode server, calls `graphs list` (sees `alpha`),
        `graphs create` (adds `beta`), `graphs list` again (sees both),
        and confirms the new graph is reachable via its cluster route.

CLI suite: 62 tests green (58 existing + 4 new). The new ignored
end-to-end test runs locally with `cargo test --ignored`.

LOC: +159 main.rs (enum + handlers), +88 cli.rs (unit tests),
+131 system_remote.rs (integration test).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: composite e2e tests, race fix, v0.7.0 release (PR 9/10)

PR 9 — the final integration PR for MR-668 multi-graph server work.
Closes the v0.7.0 release.

Composite lifecycle tests (closes gaps flagged in PR 7's coverage
review):
  - `multi_graph_lifecycle_post_query_restart_persistence` — POST a
    graph, query it via cluster route, reload the config from disk
    and confirm `load_server_settings` sees the rewritten YAML.
    Validates the "restart resolves orphans" failure-mode story.
  - `per_graph_policy_enforced_on_post_created_graph` — POST a graph
    with a per-graph policy attached, then send authenticated read
    and change requests. Per-graph Cedar enforcement fires correctly
    on a POST-created graph (engine-layer policy reinstalled via
    `Omnigraph::with_policy` inside the create flow).
  - `concurrent_post_graphs_distinct_ids_all_succeed` — 4 concurrent
    POSTs with distinct graph_ids all return 201. Caught a real
    race in `rewrite_atomic` (see below).

Race fix — `rewrite_atomic_with_modify`:

The first composite test surfaced a real bug. The old
`rewrite_atomic(path, new_config, expected_hash)` captured the
baseline hash OUTSIDE the flock, then called rewrite_atomic which
re-acquired it inside. Under concurrent writers:

  - POST A: captures baseline H0, calls rewrite_atomic.
  - POST B: captures baseline H0 too (before A's update lands).
  - A: acquires flock, on-disk == H0, writes H1, releases.
  - A: updates baseline H0 → H1.
  - B: tries to acquire flock — waits.
  - B: acquires flock. On-disk is now H1. Expected (captured
       before A finished) is H0. MISMATCH → spurious Drift error.

Worse: even if the timing happens to align, B's `updated` config
was constructed from BYTES read before the flock. B writes a config
that doesn't include A's new graph — silent data loss.

The fix: new `config::rewrite_atomic_with_modify(path, baseline,
modify)` takes a closure. Inside the flock + baseline mutex:
  1. Read on-disk bytes, hash, compare to baseline.
  2. Parse on-disk YAML.
  3. Call `modify(parsed)` to produce the new config — receives
     fresh on-disk state, returns the modification.
  4. Serialize + write + fsync + rename + update baseline.

Everything is read-modify-write under the same critical section.
Concurrent writers serialize cleanly. Test confirmed this is no
longer a race.

The old `rewrite_atomic(path, new_config, expected_hash)` API stays
for tests that don't need the read-modify-write shape; the POST
handler switches to the new shape.

Version bump v0.6.0 → v0.7.0:
  - All 5 `crates/*/Cargo.toml` (compiler, engine, policy, cli, server)
    plus their inter-crate `path` dep version constraints.
  - `Cargo.lock` regenerated by `cargo build --workspace`.
  - `AGENTS.md` "Version surveyed" line, capability matrix HTTP-server
    row updated to mention multi-graph + cluster routes + atomic YAML
    rewrite.
  - `openapi.json` regenerated.

Docs:
  - `docs/releases/v0.7.0.md` (new) — release notes with breaking
    changes, new features, deferred items (DELETE, `delete_prefix`,
    actor forwarding), and the single→multi migration recipe.
  - `docs/user/server.md` — substantial section additions for the
    two modes, mode inference, cluster endpoint table, management
    endpoints, `omnigraph.yaml` ownership contract, `POST /graphs`
    body shape + status codes.
  - `docs/user/cli.md` — `omnigraph graphs list/create` section,
    deferred-DELETE note.
  - `docs/user/policy.md` — server-scoped Cedar actions
    (`graph_create`, `graph_list`), per-graph vs server-level policy
    composition, example server-level policy.

Workspace test pass: 573 tests green across all crates. Zero
failures. MR-731 spoof regression still pinned and passing across
the entire 10-PR series.

This commit closes MR-668. v0.7.0 is ready for tagging.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: remove POST /graphs and CLI graphs create (defer runtime graph mgmt)

The POST /graphs runtime-create endpoint shipped in PR 7/10 has three
unresolved high-severity bugs:

  - flock-on-renamed-inode race: the YAML flock is taken on
    omnigraph.yaml itself, then a temp file is renamed over it.
    Cross-process writers end up locking different inodes — both
    believing they hold exclusive access.
  - duplicate-check outside the file lock: precheck runs against
    the in-memory registry only; the locked closure does
    config.graphs.insert(...) unconditionally. Concurrent same-id
    POSTs can persist the loser in YAML while the in-memory registry
    keeps the winner — they disagree after restart.
  - best_effort_cleanup_init_artifacts deletes _schema.pg /
    _schema.ir.json / __schema_state.json on any init failure. An
    accidental re-init against an existing graph's URI destroys its
    schema; subsequent open() fails at read_text(_schema.pg).

The correct fix is a Lance-style cluster catalog (reserve → init →
publish with recovery sidecars), parallel to the engine's existing
__manifest discipline. That work is out of scope for v0.7.0.

For now, disable runtime add/remove from the network and CLI surface.
Operators add graphs by editing omnigraph.yaml and restarting. The
GET /graphs read-only enumeration stays.

Removed:
- POST /graphs handler + router fragment + utoipa registration
- 13 post_graphs_* server tests + 3 composite POST tests +
  multi_mode_app_with_real_config / post_graph helpers
- CLI omnigraph graphs create subcommand + its handler + cli.rs tests
- system_remote.rs combined list+create test trimmed to list-only
- YAML rewrite infra: rewrite_atomic[_with_modify], RewriteAtomicError,
  staging_path, hash_config_file, AppState::config_hash field +
  threading through new_multi and open_multi_graph_state
- fs2 dependency (verified absent from cargo tree)
- sha2/fs2 imports in config.rs (only the rewrite path used them)
- Cedar PolicyAction::GraphCreate variant + "graph_create" match arms
  + action def in Cedar schema + graph_create_action_authorizes_against_server_resource test
- GraphCreateRequest / GraphCreateResponse / GraphSchemaSpec /
  GraphPolicySpec API types (only the POST handler / CLI imported them)

Kept:
- GET /graphs (read-only enumeration) and graph_list Cedar action
- omnigraph graphs list CLI subcommand
- All multi-graph startup, mode inference, cluster routes,
  per-graph + server-level Cedar policies
- server_settings_drive_multi_graph_startup_end_to_end (the test
  that covers operator-authored YAML + restart — the path that
  survives)
- best_effort_cleanup_init_artifacts and the three init failpoints
  (still reachable from CLI `omnigraph init`; preflight fix deferred
  as a follow-up)
- GraphRegistry::insert and its concurrency tests — production
  callers gone, but the method is the natural seam for the future
  cluster-catalog work

Also fixed (transcript issue 4):
- ALWAYS_FLAT_PATHS now includes /graphs so multi-mode OpenAPI
  advertises the management route correctly (was previously rewritten
  to /graphs/{graph_id}/graphs)
- multi_mode_openapi_keeps_healthz_flat → renamed to
  multi_mode_openapi_keeps_management_paths_flat, asserts both
  /healthz and /graphs stay flat
- multi_mode_openapi_prefixes_operation_ids_with_cluster skips
  /graphs in addition to /healthz

Doc fixes:
- docs/user/cli.md: graphs list example was --target http://...,
  but --target is a config-graph-name lookup; corrected to --uri.
  Removed the graphs create example.
- docs/user/server.md: dropped POST /graphs row, "omnigraph.yaml
  ownership", and "POST /graphs body shape" sections. Added a
  paragraph stating runtime add/remove is not exposed in v0.7.0.
- docs/user/policy.md: dropped graph_create action; reworded the
  "Configuration" line to clarify that server-scoped rules (graph_list)
  take neither branch_scope nor target_branch_scope.
- docs/releases/v0.7.0.md: rewrote release narrative — multi-graph
  mode ships; runtime add/remove deferred.
- AGENTS.md: HTTP server bullet and capability matrix row updated to
  reflect read-only GET /graphs and the operator-edit workflow.
- openapi.json regenerated; /graphs has only .get, no .post.

Diff: 17 files, +123 −1525 LOC.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: comment cleanup and policy format style

Strip "PR Na/Nb" sub-PR references throughout MR-668 surfaces — they
were useful during the 10-PR delivery sequence but rot now that the
work is in the tree. Keep the MR-668 umbrella references.

Also:
- Add explicit `when = when` and `resource_literal = resource_literal`
  named args in `compile_policy_source`'s outer `format!` to match the
  surrounding crate style (already explicit for `group` and `action`).
- Rename the best-effort cleanup tracing target from
  "omnigraph::init" to "omnigraph::init::cleanup" so operators can
  filter init-failure cleanup events separately from init's other
  log lines.

No behavior change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop actor_id from PolicyRequest; pass actor as separate arg

The MR-731 "server-authoritative actor identity" invariant was enforced
by an in-function chokepoint (`request.actor_id = actor.actor_id...`
overwrite inside `authorize_request`). That worked but relied on every
caller passing in a `PolicyRequest` and trusting the overwrite — a
comment-enforced invariant.

Move the invariant into the type system:

* `PolicyRequest` no longer carries `actor_id`. The struct now models
  what a caller wants to do, not who they are.
* `PolicyEngine::authorize(actor_id: &str, request: &PolicyRequest)`
  and `validate_request(actor_id, request)` take identity as a
  separate argument. The same shape `PolicyChecker::check` already had
  for the engine layer.
* `authorize_request` in the HTTP layer extracts `actor_id` from the
  bearer-resolved `ResolvedActor` and passes it positionally — no
  overwrite step that could be skipped.
* CLI `omnigraph policy explain` updated (the only other consumer
  that built a `PolicyRequest`).

Public API break for the `omnigraph-policy` crate. Worth it: handlers
can no longer accidentally populate `actor_id` from a request body
field, and external consumers are forced by the compiler to source
actor identity from a trusted path.

The MR-731 chokepoint test
`actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
still passes — the bearer-resolved actor is what reaches the engine.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: consolidate AppState single-mode constructors; delete with_policy_engine

The prior `with_policy_engine` constructor reused the engine `Arc`
from the existing handle (`engine: Arc::clone(&existing.engine)`)
without re-applying `Omnigraph::with_policy`. Combined with
`new_with_workload`, the documented composition pattern was
`AppState::new_with_workload(...).with_policy_engine(p)` — which
produced an `AppState` whose HTTP layer enforced Cedar but whose
underlying engine had no `PolicyChecker` installed. Any caller
reaching the engine via `state.registry().list()[i].engine` could
bypass policy entirely. The doc comment named this gap; the type
system didn't.

Make composition impossible to get wrong:

* Add `AppState::new_single(uri, db, tokens, Option<PolicyEngine>,
  WorkloadController)` — canonical single-mode constructor that
  takes every option together and routes through `build_single_mode`
  (which applies `db.with_policy(checker)` to the engine itself).
* `new`, `new_with_bearer_token`, `new_with_bearer_tokens`,
  `new_with_bearer_tokens_and_policy`, `new_with_workload` all become
  thin wrappers around `new_single`.
* Delete `with_policy_engine`. There is no post-construction policy
  install path any more; the single linear construction forces
  HTTP-layer and engine-layer policy to install together or not at all.

Regression test `engine_layer_policy_fires_via_direct_arc_omnigraph_from_new_single`
constructs an `AppState::new_single` with a deny-all policy, pulls
the `Arc<Omnigraph>` from the registry handle (the same path an
embedded SDK consumer would take), and asserts a direct `mutate_as`
call returns `OmniError::Policy`. Pre-fix this test would have
succeeded the mutation.

Test caller in `ingest_per_actor_admission_cap_returns_429`
migrates from `.with_policy_engine(...)` to `new_single(...,
Some(policy_engine), workload)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: derive any_per_graph_policy on RegistrySnapshot; simplify dup check

`AppState::requires_bearer_auth` walked the entire registry per
request (cloning Arcs into a `Vec`, then `.iter().any(|h| h.policy
.is_some())`) to decide whether the auth middleware should challenge.
The walk is unnecessary — the answer only changes when the registry
mutates, which is exactly the moment a new snapshot is constructed.

Move the flag onto the snapshot itself:

* `RegistrySnapshot { graphs, any_per_graph_policy: bool }`.
* `RegistrySnapshot::new(graphs)` is the only construction path —
  it derives the flag from `graphs.values().any(|h| h.policy
  .is_some())` so the cached value can't drift from the source data.
* `Default` delegates to `new(HashMap::new())`.
* `GraphRegistry::from_handles` and `insert` build snapshots via
  `RegistrySnapshot::new(...)`.
* `GraphRegistry::snapshot_ref()` exposes the current snapshot
  through an `arc_swap::Guard`; callers that need cached derived
  state go through this accessor (callers that only want `graphs`
  still use `list` / `get`).

`requires_bearer_auth` becomes one `ArcSwap::load` + bool read.

Also (drive-by, same file, same hunk): replace the dead
`if let Some(other) = seen_uris.get(...)` + `let _ = other;` pattern
in `from_handles` with a plain `seen_uris.contains_key(...)`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: fail-fast multi-graph startup with try_collect

The `open_multi_graph_state` doc comment claims "Fail-fast — the
first open error aborts startup; other in-flight opens are dropped"
but the code did

    .buffer_unordered(4)
    .collect::<Vec<_>>()
    .await
    .into_iter()
    .collect::<Result<Vec<_>>>()?;

which drains every future in the stream before propagating the first
`Err`. With N S3-backed graphs and graph #2 failing fast, the caller
still waits for #1, #3, #4, … to either succeed or fail before
seeing the error.

Replace the four-line dance with `futures::TryStreamExt::try_collect`,
which short-circuits on the first `Err` and drops the rest. The
doc comment now matches behavior.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused State extractor from 7 read-only handlers

After the routing-middleware refactor moved the engine into the per-graph
`GraphHandle` (extracted via `Extension<Arc<GraphHandle>>`), seven
read-only handlers — `server_snapshot`, `server_read`, `server_export`,
`server_schema_get`, `server_branch_list`, `server_commit_list`,
`server_commit_show` — kept an unused `State(_state): State<AppState>`
extractor. Drop it. Each request avoids one `FromRequestParts` clone
of `AppState`'s Arcs.

Handlers that actually use state (workload admission for write paths,
`server_policy` for management endpoints) keep theirs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: emit info! for graph routing decision

`tracing::Span::current().record("graph_id", ...)` in the routing
middleware silently no-ops here: no upstream `#[tracing::instrument]`
on the handlers declares a `graph_id` field, and `TraceLayer::new_for_http`
doesn't either. The recorded value never lands anywhere visible.

Replace with an explicit `info!(graph_id = %handle.key.graph_id,
"graph routed")` event so operators can grep logs and correlate
requests with the active graph. In single mode the value is the
sentinel `"default"`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: align GET /graphs 405 body code with HTTP status

The single-mode `GET /graphs` handler returned an `ApiError` built
via struct literal with `status: METHOD_NOT_ALLOWED, code: BadRequest`.
The body code disagreed with the HTTP status — clients deserializing
on `code` saw `bad_request`, clients deserializing on `status` saw
405. Same bug class as the earlier 503+Conflict mismatch on the
removed YAML drift path.

Close the class for this one remaining instance:

* Add `ErrorCode::MethodNotAllowed` to the API enum.
* Add `ApiError::method_not_allowed(msg)` — pairs the 405 status
  with the matching code.
* Replace the struct literal in `server_graphs_list` with the
  constructor.
* Regenerate `openapi.json` (adds `method_not_allowed` to the
  ErrorCode schema enum).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused axum::handler::Handler import

The import landed in earlier work but no current call site uses it.
Emitted an `unused_imports` warning on every server build.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop unused fs2 workspace dependency

`fs2 = "0.4"` lingered in [workspace.dependencies] after the
POST /graphs flock-on-rename design was pulled. `cargo tree -i fs2`
reports no consumers in the workspace and the dep is not in
Cargo.lock. Removing the declaration closes the "phantom dep" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: AGENTS.md Cedar row no longer hardcodes action count

The "8 actions" claim drifted as soon as MR-668 added `graph_list`.
Bumping the count would just push the drift one PR forward; the
correct-by-design fix is to defer to the canonical list in
docs/user/policy.md and stop maintaining a duplicate count.

Closes the "doc hardcodes a count that drifts from the enum" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: cfg(test)-gate GraphRegistry::insert and its mutex

`insert` and the `mutate: Mutex<()>` that serializes it had no
runtime consumer in v0.7.0 — the only insertion path at startup
is `from_handles`, and runtime add/remove is deferred until a
managed cluster catalog ships. Leaving both `pub` and live made
them a "looks like API, isn't" footgun: a future change could
build on `insert` without re-establishing the concurrency contract
with an actual consumer in scope.

Gate both together (`#[cfg(test)]` on the method, the field, and
the `tokio::sync::Mutex` import) so the race-pinning tests still
compile but production cannot reach them. When a real consumer
ships, ungate both — they're a unit. Closes the "public API with
no runtime consumer drifts toward incorrect" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: drop vestigial PolicyEngine surface

* `validate_request` had zero callsites — pure surface for nothing.
* `deny`'s `_actor_id` and `_request` parameters were both unused
  (the underscore prefix gave it away); the message is built by the
  caller before `deny` ever sees the request. Trim both.

Closes the "public API that the type system can't justify" class
for the policy engine. No behavior change; every existing test
stays green because the deletions never had a runtime effect.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for init re-init footgun (red)

A second `Omnigraph::init` against an existing graph URI today
destroys the existing graph's schema artifacts. `init_storage_phase`
overwrites `_schema.pg` before any preflight, and on the inner
`GraphCoordinator::init` failure that follows,
`best_effort_cleanup_init_artifacts` deletes all three schema files.
The existing Lance datasets and `__manifest/` survive but the
schema metadata is gone — unrecoverable without operator surgery.

This test exercises that path and currently fails with
"_schema.pg must not be deleted by a failed re-init", confirming
the destructive cleanup branch fires. The fix in the next commit
makes the test pass by preflighting with `storage.exists()` and
returning a typed error before any write touches disk.

Per AGENTS.md rule 12, the test commit lands just before the fix
commit so the red → green pair is visible in `git log` and a
reviewer can check out this commit alone to reproduce.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: close init re-init footgun via InitOptions preflight (green)

`Omnigraph::init` is "create a new graph"; existing graphs need
an explicit overwrite. Today's behavior — silently overwrite
schema files, then on inner failure delete them via best-effort
cleanup — is destructive against an existing graph regardless of
which branch fires.

Correct-by-design fix:

* New `InitOptions { force: bool }` struct (default `force: false`).
* New `Omnigraph::init_with_options(uri, schema, options)`. The
  old `Omnigraph::init(uri, schema)` is a thin shortcut that
  passes `InitOptions::default()`.
* `init_with_storage` runs a `storage.exists()` preflight on the
  three schema URIs BEFORE any parse, write, or coordinator call.
  Any hit → typed `OmniError::AlreadyInitialized { uri }`. The
  destructive code paths (the `write_text` overwrite and the
  best-effort cleanup) are now unreachable in strict mode against
  an existing graph.
* `force: true` skips the preflight; existing operators who
  actually mean to overwrite opt in explicitly.
* CLI: `omnigraph init --force` maps to `InitOptions { force: true }`.
* HTTP: `OmniError::AlreadyInitialized` maps to 409 via
  `ApiError::from_omni`. Not currently HTTP-reachable (POST /graphs
  was pulled), but the wiring lands here so a future runtime
  create endpoint has one canonical translation.

Closes the "init is destructive against existing state" class.
The regression test added in the previous commit
(`init_on_existing_graph_uri_does_not_destroy_existing_schema`)
turns green: the original schema files now survive a second
init attempt byte-for-byte, and the call errors cleanly with
`AlreadyInitialized`. The four existing
`init_failpoint_after_*_cleans_up_*` tests stay green — strict
mode's preflight passes on a fresh tempdir, and cleanup still
runs as before when a failpoint fires mid-write.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: split PolicyEngine::load into kind-typed loaders

Pre-fix, every caller of `PolicyEngine::load(path, graph_id)`
passed *some* `graph_id` argument — even when the policy was
server-scoped and Cedar's resolution would never touch a Graph
entity. The server-level loader at lib.rs passed the meaningless
sentinel `"server"`. A graph policy file containing a `graph_list`
rule compiled fine; a server policy file containing a `read` rule
compiled fine. Both silently no-op'd at request time because the
engine kind and the rule's resource kind disagreed.

Correct-by-design fix: replace `load` with two kind-typed loaders.

* `PolicyEngine::load_graph(path, graph_id)` — for per-graph
  policy files. Rejects any rule whose action `resource_kind()`
  is `Server`.
* `PolicyEngine::load_server(path)` — for server-level policy
  files. Takes no `graph_id`: server-scoped actions resolve against
  the singleton `Omnigraph::Server::"root"` entity, never a Graph.
  Rejects any rule whose action `resource_kind()` is `Graph`.

The old `load` is hard-deleted in the same commit because every
in-tree consumer migrates here (no semver promise on the workspace
crate, no external pinners). New `PolicyEngineKind` enum types
the loader's intent; `validate_kind_alignment` is the load-time
check that closes the "wrong action, wrong file, silent no-op"
class — operators get a load-time error instead of confused-and-
silent behavior at request time.

Callsites migrated:
* server lib.rs:374 (single-mode per-graph)   → load_graph
* server lib.rs:1065 (multi-mode server)      → load_server
* server lib.rs:1103 (multi-mode per-graph)   → load_graph
* CLI main.rs:732 (resolve_policy_engine)     → load_graph
* tests/server.rs ×5 (4 graph, 1 server)      → load_graph/load_server
* policy_engine_chassis.rs                    → load_graph

Four new in-source tests pin the contract: both rejection paths
and both positive paths.

Closes the "operator puts an action in the wrong file and the
rule silently never matches" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: introduce GraphRouting, retire single_mode_handle

Pre-fix, `AppState` always carried `Arc<GraphRegistry>` even when
serving one graph. Single mode populated the registry with one
handle keyed by the `SINGLE_GRAPH_KEY_ID = "default"` sentinel;
`single_mode_handle` walked the registry, asserted `len == 1`,
and returned the single element with a 500-class "programmer
error" branch on mismatch. Three smells in a row — magic key,
walk-and-assert, programmer-error guard — all because the
single-mode runtime was forced through a multi-mode abstraction.

Correct-by-design fix: type the routing.

* New `pub enum GraphRouting { Single { handle }, Multi { registry,
  config_path } }` on `AppState`. The `Single` arm carries the handle
  directly — no registry, no key, no walk.
* `resolve_graph_handle` middleware matches on `routing`. Single mode
  returns the handle in O(1); multi mode does the same path-extract +
  registry lookup as before. The 500-class programmer-error branch
  is gone — the type system now makes the violated invariant
  ("single mode has exactly one handle") unrepresentable.
* `requires_bearer_auth` reads `handle.policy.is_some()` directly
  in the Single arm; Multi arm still uses the cached
  `any_per_graph_policy` flag.

`ServerMode` and the legacy `registry` field on `AppState` are still
populated for now — C-3 removes both once every reader is migrated.
The `SINGLE_GRAPH_KEY_ID` sentinel and `ServerMode` will also go
away in C-3.

Closes the "single mode forced through a multi-mode abstraction"
class. All 76 server integration tests stay green: handlers still
extract `Extension<Arc<GraphHandle>>` from the request, so the
middleware's internal change is invisible to them.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: remove ServerMode, registry field, and the SINGLE_GRAPH sentinel

C-1/C-2 introduced `GraphRouting` and pointed the middleware at it.
This commit removes the legacy shape that's now dead:

* `ServerMode` enum — deleted. Single mode's `uri` lives on
  `handle.uri`; multi mode's `config_path` lives on the
  `GraphRouting::Multi` arm.
* `AppState.mode: ServerMode` field — deleted.
* `AppState.registry: Arc<GraphRegistry>` field — deleted. Multi
  mode's registry is on `GraphRouting::Multi { registry, .. }`;
  single mode has no registry at all.
* `AppState::mode()`, `AppState::uri()`, `AppState::registry()`
  accessors — deleted. New `AppState::routing() -> &GraphRouting`
  is the single public entry point.
* `SINGLE_GRAPH_KEY_ID` constant — deleted. `GraphHandle.key` is
  still required by the struct, but in single mode the key is now
  only a tracing label (`"default"`, inlined with a comment naming
  its sole remaining purpose). Single-mode flat routes never carry
  a `{graph_id}` parameter, so the key is never compared against
  user input, and there is no registry where it could be a map
  key. C-1/C-2 already removed the registry walk that the sentinel
  was named for.

Callers migrated:
* `build_app` (lib.rs:944) — matches on `state.routing()` instead
  of `state.mode()`.
* `server_graphs_list` (lib.rs:1162) — destructures the Multi arm
  to get the registry; Single arm short-circuits to 405.
* `server_openapi` (lib.rs:1217) — matches the Multi arm for the
  cluster-prefix rewrite.
* `tests/server.rs:3735` — the B2 footgun regression test now
  matches on `state.routing()` to extract the single-mode handle
  (the test's earlier `state.registry().list().next()` shape was
  the closest pre-fix analog to "embedded consumer reaches the
  engine"; the new shape is more direct).

Closes the entire "single mode forced through a multi-mode
abstraction" class. After this commit:
* No magic sentinel as a routing key.
* No `single_mode_handle` walk-and-assert helper.
* No 500-class "programmer error" branch in the middleware.
* No two-field discriminant on `AppState` where one would do.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for nested-route path extraction (red)

`server_branch_delete` and `server_commit_show` use bare
`Path<String>` extractors. In single-mode flat routes
(`/branches/{branch}`, `/commits/{commit_id}`) this works — one
capture, one value. In multi-graph cluster routes
(`/graphs/{graph_id}/branches/{branch}`,
`/graphs/{graph_id}/commits/{commit_id}`) axum 0.8 propagates the
outer `{graph_id}` capture into the inner handler, so the
extractor sees two captures and 500s with
"Wrong number of path arguments. Expected 1 but got 2."

`cluster_routes_dispatch_per_graph_handle` only exercises
`/snapshot` (no Path extractor), so the regression slipped through.
This test closes that gap structurally: every cluster route with
an inner path param gets exercised here.

Currently fails with the exact symptom above. Fix in the next
commit makes it pass.

Per AGENTS.md rule 12, the red test commit lands just before the
fix so the pair is visible in `git log` and a reviewer can check
out this commit alone to reproduce.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: named-field path-param structs for nested cluster routes (green)

`Path<String>` deserializes one path-param value positionally.
Single-mode flat routes (`/branches/{branch}`,
`/commits/{commit_id}`) have one capture; multi-mode nested routes
(`/graphs/{graph_id}/branches/{branch}`,
`/graphs/{graph_id}/commits/{commit_id}`) have two — axum 0.8
propagates the outer capture into nested handlers. Same handler,
two different shapes; the multi-mode shape 500s with
"Wrong number of path arguments. Expected 1 but got 2."

Symptomatic fix: change to `Path<(String, String)>` and ignore the
first element. Breaks again the moment we add another nest layer
(e.g. tenant in Cloud mode).

Correct-by-design fix: named-field structs deserialized by name
from axum's path-param map. Each handler picks only the fields it
needs. Stable across single / multi / future-cloud nest depths
because deserialization is by field name, not position.

* New `BranchPath { branch: String }` (file-local to lib.rs)
* New `CommitPath { commit_id: String }`
* `server_branch_delete` extractor → `Path<BranchPath>`
* `server_commit_show` extractor → `Path<CommitPath>`

Closes the "handler path-extractor type is positional and breaks
when route nesting changes" class. Red test from the previous
commit turns green. All 77 server tests pass (single-mode branch
delete + commit show, plus new multi-mode coverage).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: centralize policy-requires-tokens check in the runtime classifier

Single-mode `open_with_bearer_tokens_and_policy` bailed at lib.rs:380
when policy was installed and no tokens. Multi-mode
`open_multi_graph_state` had no equivalent: the server started, every
request 401'd because no token could ever match, and the operator
spent time debugging a misconfiguration the single-mode path would
have caught at startup.

The doc/code contradiction made the gap easy to miss: the
`ServerRuntimeState::PolicyEnabled` docstring said tokens-or-not
was "unusual but valid — every request fails 401 without a bearer,
which is effectively 'locked'." The single-mode bail contradicted
that. In practice, silent-401-on-every-request is bug-shaped, not
feature-shaped (operators wanting deny-all should configure tokens
plus a deny-all Cedar rule to get meaningful 403s with
policy-decision logging).

Symptomatic fix: add a copy of the bail to multi-mode. Two copies
that can drift again the next time a startup path is added.

Correct-by-design fix: hoist the check into
`classify_server_runtime_state` so both modes get the same
enforcement from one source of truth. The classifier becomes the
single source of truth for "should we start?" and adding a startup
invariant there is now the natural extension point for any future
mode.

Classifier matrix is now complete:

| has_tokens | has_policy | allow_unauthenticated | Result |
|---|---|---|---|
| F | F | F | bail (existing) |
| F | F | T | Open (existing) |
| T | F | * | DefaultDeny (existing) |
| F | T | * | bail (NEW — closes the gap) |
| T | T | * | PolicyEnabled (existing) |

Changes:

* `classify_server_runtime_state` (lib.rs:870-890) gains the
  `(false, true, _) => bail!(…)` arm with a clear message naming
  the failure mode and the two valid resolutions.
* `open_with_bearer_tokens_and_policy` (lib.rs:369+) drops its
  redundant local bail — the classifier rejected the invalid case
  before construction was reached.
* `ServerRuntimeState::PolicyEnabled` docstring is rewritten:
  drops the "(unusual but valid)" carve-out and states plainly
  that PolicyEnabled requires tokens. Names the explicit
  alternative (tokens + deny-all Cedar rule) for operators who
  want the all-requests-denied behavior.
* `classify_policy_enabled_always_wins` test is renamed to
  `classify_policy_enabled_requires_tokens` and the now-invalid
  `(false, true, _)` assertion is removed (covered by the new
  rejection test).
* New `classify_policy_without_tokens_is_rejected` test covers the
  new arm.
* New `serve_refuses_to_start_with_policy_but_no_tokens_multi_mode`
  integration test pins the multi-mode propagation path —
  symmetric with the existing single-mode
  `serve_refuses_to_start_in_state_1_without_unauthenticated`.

Closes the "single mode and multi mode startup branches can drift
on safety invariants" class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: close coverage gaps surfaced by the test-coverage audit

The bot-review pass and the subsequent coverage audit surfaced two
material gaps in PR #119's test surface — both easy to close, both
worth closing before merge.

* **Gap 1 — cluster-route sweep.** The Bug-1 path-extractor
  regression slipped through because
  `cluster_routes_dispatch_per_graph_handle` only exercised
  `/snapshot`. The other six protected cluster routes (`/read`,
  `/change`, `/export`, `/schema`, `/schema/apply`, `/ingest`,
  `/branches/merge`) were implicitly trusted to work without any
  multi-mode integration test.

  Add `all_protected_cluster_routes_resolve_to_their_handler`
  (`tests/server.rs`) that hits each protected cluster route with
  a minimal request and asserts the response is consistent with
  the handler being reached — no 404 (router didn't match), no 500
  with "Wrong number of path arguments" (Bug-1 class), no 500 with
  "missing extension" (routing middleware didn't inject the handle).
  Status code is a negative assertion because each handler's
  happy-path inputs differ; what matters is "the request reached
  the handler," not "the handler returned 200" — that's already
  pinned by the single-mode tests.

* **Gap 2 — `--force` happy path.** The strict re-init regression
  test (`init_on_existing_graph_uri_does_not_destroy_existing_schema`)
  pins the error path; nothing pinned the `force: true` escape
  hatch actually doing what its docstring claims.

  Add `init_with_force_recovers_from_orphan_schema_files`
  (`tests/lifecycle.rs`). Writes a bare `_schema.pg` to simulate
  orphan files from a failed prior init, confirms strict mode
  bails as expected, then confirms `init_with_options(force: true)`
  succeeds and produces a functional graph.

  Note: the test follows the documented semantics — force skips
  the preflight only, it does NOT purge existing Lance state. An
  earlier draft of the test (against full overwrite of an existing
  populated graph) failed because `GraphCoordinator::init` errored
  on the existing `__manifest`, which is exactly the limitation
  the `InitOptions::force` docstring already calls out. Recursive
  purge needs `StorageAdapter::delete_prefix` (tracked separately).

Coverage is now fully aligned with the PR's claims.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: regression test for GraphList open-mode bypass (red)

Cursor bot's review at commit 4120448 surfaced that
`server_graphs_list` returns 200 in Open mode (`--unauthenticated`,
no tokens, no policy), exposing the full graph registry — graph
IDs and URIs that may contain S3 bucket paths or internal
hostnames — to any unauthenticated caller.

Root cause: `authorize_request`'s no-policy fallback only denies
when `actor.is_some()`. In Open mode `actor: None`, so the
denial branch never fires and the call returns `Ok(())`. The
docstring on `server_graphs_list` claims the endpoint is
"Cedar-gated" and that we "don't leak the registry until the
operator explicitly authorizes it" — but Open mode has no Cedar
at all, so the docstring intent and the code disagree.

This commit renames the existing
`get_graphs_lists_registered_graphs_in_multi_mode` test to
`get_graphs_denied_in_open_mode_without_server_policy` and flips
the assertion from 200 → 403. Today this fails (server returns
200) — exactly the symptom the bot named. The fix in the next
commit tightens the no-policy fallback to deny server-scoped
actions unconditionally, regardless of mode.

Per AGENTS.md rule 12, the red test commit lands just before
the fix so the red → green pair is visible in `git log` and a
reviewer can check out this commit alone to reproduce.

Sort-order coverage that previously lived in the renamed test
moves to `get_graphs_with_server_policy_authorizes_per_cedar`
in the next commit, where the admin-200 response is operator-
authorized and a non-empty body is asserted.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: server-scoped actions always require explicit policy (green)

`server_graphs_list` returned 200 in Open mode (`--unauthenticated`,
no tokens, no policy) because `authorize_request`'s no-policy
fallback only denied when `actor.is_some()` AND action != Read.
In Open mode `actor: None`, so the denial branch never fired and
the call returned `Ok(())` — leaking the registry (graph IDs +
URIs that may contain S3 bucket paths or internal hostnames) to
any unauthenticated caller. The docstring on `server_graphs_list`
claimed it was "Cedar-gated" and that the server should "not leak
the registry until the operator explicitly authorizes it" —
docstring intent and code disagreed.

Symptomatic fix: special-case GraphList. Breaks the moment
another server-scoped action (`graph_create`, `graph_delete`) is
added.

Correct-by-design fix: tie authorization to the action's
`resource_kind()`. Server-scoped actions
(`PolicyResourceKind::Server`) always require explicit policy
authorization — there is no runtime state where they're served
by default. Per-graph actions keep the existing default-deny
logic (DefaultDeny denies non-Read for authenticated actors;
Open mode allows everything per the operator's `--unauthenticated`
opt-in for graph DATA, but not for server topology).

The fix uses the existing `PolicyResourceKind` enum that #119
already added — no new abstraction. Future server-scoped actions
(runtime `graph_create`/`graph_delete` when the cluster catalog
ships) automatically pick up the same enforcement without any
per-action handler change.

Changes:

* `crates/omnigraph-server/src/lib.rs:51` — re-export
  `PolicyResourceKind` (the kind discriminator was already public
  on the omnigraph-policy crate; needed in scope here).
* `crates/omnigraph-server/src/lib.rs:1457` — `authorize_request`'s
  no-policy fallback gains a server-scoped-action check that fires
  before the actor-based default-deny logic. Error message names
  the failure mode and points at `server.policy.file`.
* `crates/omnigraph-server/tests/server.rs:5037` —
  `get_graphs_with_server_policy_authorizes_per_cedar` extended
  to register two graphs in non-alphabetical order and assert
  the admin-200 response is sorted alphabetically. Restores the
  sort-order coverage that lived in
  `get_graphs_lists_registered_graphs_in_multi_mode` before the
  red commit renamed it to assert denial.

Also bundles a small adjacent cleanup that the bot-review flagged:

* `crates/omnigraph-server/src/graph_id.rs:124` — drop the
  unreachable `"openapi.json"` entry from `is_reserved`. The
  regex `^[a-zA-Z0-9-]{1,64}$` rejects every dot-containing name
  before `is_reserved` can run, so dotted entries in this list
  were dead code that misled readers into thinking the list
  needed to cover them. Comment now names the structural
  exclusion. The `rejects_reserved_route_names` test loses its
  `openapi.json` row (covered by `rejects_dots` via the regex).

Closes the "server-scoped management actions silently leak in
Open mode" class. Red test from the previous commit
(`get_graphs_denied_in_open_mode_without_server_policy`) turns
green; all 78 server integration tests + 76 lib tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: fold multi-graph work into v0.6.0 (no separate v0.7.0 release)

The branch had bumped workspace versions to 0.7.0 and added a
dedicated `docs/releases/v0.7.0.md` for the multi-graph work.
Per scope decision: ship the graph-rename and the multi-graph
mode in one v0.6.0 release.

Changes:

* Workspace versions bumped 0.7.0 → 0.6.0 in every crate manifest
  (`omnigraph`, `omnigraph-compiler`, `omnigraph-policy`,
  `omnigraph-server`, `omnigraph-cli`) and their internal
  `path = ..., version = "..."` dependency constraints.
* `docs/releases/v0.7.0.md` content merged into
  `docs/releases/v0.6.0.md`, retargeted to a single coherent
  v0.6.0 release note covering both the graph terminology rename
  and the multi-graph server mode. The original v0.7.0.md is
  deleted.
* All `v0.7.0` / `0.7.0` doc and comment references throughout
  `crates/`, `docs/`, `AGENTS.md`, and `openapi.json` retargeted
  to `v0.6.0` / `0.6.0`. `Cargo.lock` regenerated to match.
* OpenAPI spec regenerated via `OMNIGRAPH_UPDATE_OPENAPI=1
  cargo test -p omnigraph-server --test openapi
  openapi_spec_is_up_to_date` — `"version": "0.6.0"` now.

Verification:

* `cargo build --workspace` — clean (6 pre-existing engine
  warnings only).
* `cargo test --workspace --locked` — zero failures across all
  39 test result groups.
* `bash scripts/check-agents-md.sh` — passes (34 links / 33 docs).
* `grep -rn "0\.7\.0\|v0\.7\.0" --include='*.rs' --include='*.md'
  --include='*.json' --include='*.toml' .` returns no workspace
  hits. The three remaining `0.7.0` strings in `Cargo.lock`
  belong to unrelated 3rd-party crates (`pem-rfc7468`, `radium`,
  `rand_xoshiro`).

The git tag and crates.io publish happen later — this commit
just consolidates the surface so the eventual release is one
coherent v0.6.0 covering all the work since v0.5.0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* mr-668: sanitize internal refs from v0.6.0 release notes

cubic-dev-ai P2 comments flagged that the release notes carried
internal Linear ticket and RFC references (MR-668, MR-731,
MR-723, RFC 0003, RFC 0004). Per AGENTS.md maintenance rule 5,
"Release docs are public project history. Describe capabilities,
behavior changes, breaking changes, upgrade notes, and user
impact; do not reference private ticket systems, internal
codenames, or planning shorthand that an outside contributor
cannot inspect." The bot's comments are correct against our own
published contract — they were a docs-quality regression
introduced when I drafted these notes.

Replaced each internal reference with the public-facing concept
it stood for. The substantive content (capabilities, behavior,
guarantees) was already present alongside the refs; sanitization
just trimmed the bracketed ticket labels:

* Line 6: dropped `(MR-668)` from the multi-graph mode summary —
  the descriptive name was already self-sufficient.
* Line 24: `MR-731 spoof defense` → `the bearer-derived-actor-
  identity guarantee`; `Forward-compat for Cloud mode (RFC 0003)
  and OAuth provider (RFC 0004)` → "forward-compat seams for
  future multi-tenant and OAuth deployments; they're inert in
  this release" — describes what the operator sees instead of
  pointing at planning docs.
* Line 26: `MR-731's server-authoritative-actor invariant` →
  "the server-authoritative-actor invariant: actor identity is
  always sourced from the bearer-token match resolved at the
  auth boundary" — the public-facing statement of the guarantee.
* Line 36: `(MR-723 default-deny otherwise rejects …)` →
  "without a server policy the default-deny posture rejects …"
  — same content, no ticket label.
* Line 121: `MR-731 spoof regression test` → "The bearer-auth-
  derived-actor-identity regression test (client-supplied
  identity headers are ignored; the server-resolved actor is the
  only identity Cedar sees)" — describes what the test guards
  instead of naming the originating ticket.

Verified: `grep -E 'MR-\d+|RFC[ -]?\d+' docs/releases/v0.6.0.md`
returns no matches; the rest of `docs/releases/` is also clean.
`scripts/check-agents-md.sh` passes.

Note: cubic-dev-ai also flagged `crates/omnigraph-cli/src/main.rs:276`
("doc comment incorrectly references v0.6.0 for a command that
only exists in v0.7.0"). That comment is based on a stale model
of the release surface — after folding v0.7.0 into v0.6.0 in
the previous commit, the multi-graph CLI surface IS in v0.6.0
and the comment is correct as written. No change needed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix: close validated init and multi-graph gaps

* chore: address review cleanup comments

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
											
										
										
											2026-05-28 16:19:31 +02:00
 								// ─── MR-668: multi-graph startup ──────────────────────────────────────────
 								mod multi_graph_startup {
 								    use super::*;
 								    use omnigraph::storage::normalize_root_uri;
 								    use omnigraph_server::{
 								        GraphHandle, GraphId, GraphKey, GraphRegistry, InsertError, ServerConfig, ServerConfigMode,
 								        load_server_settings,
 								    };
 								    use std::sync::Arc;
 								    async fn build_multi_mode_app(graph_ids: &[&str]) -> (Vec<tempfile::TempDir>, Router) {
 								        let mut dirs = Vec::with_capacity(graph_ids.len());
 								        let mut handles = Vec::with_capacity(graph_ids.len());
 								        for id in graph_ids {
 								            let dir = tempfile::tempdir().unwrap();
 								            let graph_uri = dir.path().join(id).to_str().unwrap().to_string();
 								            let schema = fs::read_to_string(fixture("test.pg")).unwrap();
 								            let engine = Omnigraph::init(&graph_uri, &schema).await.unwrap();
 								            handles.push(Arc::new(GraphHandle {
 								                key: GraphKey::cluster(GraphId::try_from(*id).unwrap()),
 								                uri: graph_uri,
 								                engine: Arc::new(engine),
 								                policy: None,
 								            }));
 								            dirs.push(dir);
 								        }
 								        let workload = omnigraph_server::workload::WorkloadController::from_env();
 								        let state = AppState::new_multi(handles, Vec::new(), None, workload, None).unwrap();
 								        let app = build_app(state);
 								        (dirs, app)
 								    }
 								    /// Cluster route `/graphs/{graph_id}/snapshot` resolves to the right
 								    /// engine. Two graphs side by side; assert each responds to its own
 								    /// id and does NOT respond to the other's URL.
 								    #[tokio::test(flavor = "multi_thread")]
 								    async fn cluster_routes_dispatch_per_graph_handle() {
 								        let (_dirs, app) = build_multi_mode_app(&["alpha", "beta"]).await;
 								        for id in ["alpha", "beta"] {
 								            let resp = app
 								                .clone()
 								                .oneshot(
 								                    Request::builder()
 								                        .method(Method::GET)
 								                        .uri(format!("/graphs/{id}/snapshot?branch=main"))
 								                        .body(Body::empty())
 								                        .unwrap(),
 								                )
 								                .await
 								                .unwrap();
 								            assert_eq!(
 								                resp.status(),
 								                StatusCode::OK,
 								                "graph '{id}' must respond OK on its cluster snapshot route"
 								            );
 								        }
 								    }
 								    /// Unknown graph id under the cluster prefix yields 404 (not 500,
 								    /// not 410 — `Gone` is reserved for the future DELETE flow).
 								    #[tokio::test(flavor = "multi_thread")]
 								    async fn cluster_route_for_unknown_graph_returns_404() {
 								        let (_dirs, app) = build_multi_mode_app(&["alpha"]).await;
 								        let resp = app
 								            .oneshot(
 								                Request::builder()
 								                    .method(Method::GET)
 								                    .uri("/graphs/nonexistent/snapshot?branch=main")
 								                    .body(Body::empty())
 								                    .unwrap(),
 								            )
 								            .await
 								            .unwrap();
 								        assert_eq!(resp.status(), StatusCode::NOT_FOUND);
 								    }
 								    /// Coverage net for cluster-route regressions across every
 								    /// protected handler — not just the few that have inner path
 								    /// params. Bug-1 surfaced because only `/snapshot` was being
 								    /// exercised in cluster mode, leaving the other six protected
 								    /// routes implicitly untested. This sweep hits each one and
 								    /// asserts the response shows the handler was reached: no 404
 								    /// (router didn't match), no 500 with "Wrong number of path
 								    /// arguments" (path extractor broke), no 500 with "missing
 								    /// extension" (routing middleware didn't inject the handle).
 								    ///
 								    /// Status codes are negative assertions because each handler's
 								    /// happy-path inputs differ — what matters is "the request
 								    /// reached the handler," not "the handler returned 200." The
 								    /// individual handlers' logic is already tested in single mode.
 								    #[tokio::test(flavor = "multi_thread")]
 								    async fn all_protected_cluster_routes_resolve_to_their_handler() {
 								        let (_dirs, app) = build_multi_mode_app(&["alpha"]).await;
 								        // (method, path, body) — one minimal request per protected
 								        // cluster route. Bodies are valid enough that the router and
 								        // extractors succeed; whether the engine ultimately returns
 								        // 200 or 4xx is per-handler and not what this test pins.
 								        let cases: &[(Method, &str, Option<&str>)] = &[
 								            (Method::GET, "/graphs/alpha/snapshot?branch=main", None),
 								            (Method::GET, "/graphs/alpha/schema", None),
 								            (Method::GET, "/graphs/alpha/branches", None),
 								            (Method::GET, "/graphs/alpha/commits", None),
 								            (
 								                Method::POST,
 								                "/graphs/alpha/read",
 								                Some(r#"{"query_source":"query q() { return {} }"}"#),
 								            ),
 								            (
 								                Method::POST,
 								                "/graphs/alpha/change",
 								                Some(r#"{"query_source":"query q() { return {} }"}"#),
 								            ),
 								            (
 								                Method::POST,
 								                "/graphs/alpha/export",
 								                Some(r#"{"branch":"main"}"#),
 								            ),
 								            (
 								                Method::POST,
 								                "/graphs/alpha/schema/apply",
 								                Some(r#"{"schema_source":"","allow_data_loss":false}"#),
 								            ),
 								            (Method::POST, "/graphs/alpha/ingest", Some(r#"{"data":""}"#)),
 								            (
 								                Method::POST,
 								                "/graphs/alpha/branches/merge",
 								                Some(r#"{"source":"main","target":"main"}"#),
 								            ),
 								        ];
 								        for (method, path, body) in cases {
 								            let req_body = body
 								                .map(|s| Body::from(s.to_string()))
 								                .unwrap_or_else(Body::empty);
 								            let req = Request::builder()
 								                .method(method.clone())
 								                .uri(*path)
 								                .header("content-type", "application/json")
 								                .body(req_body)
 								                .unwrap();
 								            let resp = app.clone().oneshot(req).await.unwrap();
 								            let status = resp.status();
 								            let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
 								            let body_str = String::from_utf8_lossy(&bytes);
 								            assert_ne!(
 								                status,
 								                StatusCode::NOT_FOUND,
 								                "{} {} — router didn't match (cluster-route mounting regression). Body: {}",
 								                method,
 								                path,
 								                body_str,
 								            );
 								            assert!(
 								                !(status == StatusCode::INTERNAL_SERVER_ERROR
 								                    && body_str.contains("Wrong number of path arguments")),
 								                "{} {} — path extractor broke (Bug-1 class regression). Body: {}",
 								                method,
 								                path,
 								                body_str,
 								            );
 								            assert!(
 								                !(status == StatusCode::INTERNAL_SERVER_ERROR
 								                    && body_str.to_lowercase().contains("missing extension")),
 								                "{} {} — routing middleware didn't inject GraphHandle. Body: {}",
 								                method,
 								                path,
 								                body_str,
 								            );
 								        }
 								    }
 								    /// Regression for the bot-surfaced path-extractor bug: cluster
 								    /// routes whose inner path also captures a parameter
 								    /// (`/graphs/{graph_id}/branches/{branch}`,
 								    /// `/graphs/{graph_id}/commits/{commit_id}`) must extract the
 								    /// inner param cleanly. Axum 0.8 propagates the outer `{graph_id}`
 								    /// capture into nested handlers, so a `Path<String>` extractor
 								    /// would see two values and fail with "Wrong number of path
 								    /// arguments. Expected 1 but got 2." Today both DELETE branch and
 								    /// GET commit-by-id break in multi-mode because their handlers
 								    /// use bare `Path<String>` — this test pins the fix.
 								    ///
 								    /// The broader `all_protected_cluster_routes_resolve_to_their_handler`
 								    /// test sweeps the full route surface; this one stays narrowly
 								    /// targeted at the inner-path-param shape because that's the
 								    /// specific regression class.
 								    #[tokio::test(flavor = "multi_thread")]
 								    async fn cluster_routes_with_inner_path_params_deserialize_correctly() {
 								        let (_dirs, app) = build_multi_mode_app(&["alpha"]).await;
 								        // Create a branch we can then delete — DELETE /graphs/alpha/branches/feature
 								        let create_resp = app
 								            .clone()
 								            .oneshot(
 								                Request::builder()
 								                    .method(Method::POST)
 								                    .uri("/graphs/alpha/branches")
 								                    .header("content-type", "application/json")
 								                    .body(Body::from(r#"{"name":"feature"}"#))
 								                    .unwrap(),
 								            )
 								            .await
 								            .unwrap();
 								        assert_eq!(
 								            create_resp.status(),
 								            StatusCode::OK,
 								            "branch create on the cluster route must succeed before delete can be tested"
 								        );
 								        // DELETE /graphs/{graph_id}/branches/{branch} — exercises a handler
 								        // whose only Path extractor (`branch`) is inside a nested route
 								        // that also captures `graph_id`. The handler must pick `branch`
 								        // by name, not by position.
 								        let delete_resp = app
 								            .clone()
 								            .oneshot(
 								                Request::builder()
 								                    .method(Method::DELETE)
 								                    .uri("/graphs/alpha/branches/feature")
 								                    .body(Body::empty())
 								                    .unwrap(),
 								            )
 								            .await
 								            .unwrap();
 								        let delete_status = delete_resp.status();
 								        let delete_body = to_bytes(delete_resp.into_body(), usize::MAX).await.unwrap();
 								        assert_eq!(
 								            delete_status,
 								            StatusCode::OK,
 								            "DELETE /graphs/{{id}}/branches/{{branch}} must extract `branch` cleanly. \
 								             Body: {}",
 								            String::from_utf8_lossy(&delete_body),
 								        );
 								        // GET /graphs/{graph_id}/commits/{commit_id} — same shape: the
 								        // handler's only Path extractor is the inner `commit_id`, which
 								        // must deserialize by name even though `graph_id` is also in scope.
 								        // We don't know a real commit_id, but the failure mode under test
 								        // is path extraction, not commit lookup — a 404 from the engine
 								        // is fine; a 500 with "Wrong number of path arguments" is the bug.
 								        let commit_resp = app
 								            .oneshot(
 								                Request::builder()
 								                    .method(Method::GET)
 								                    .uri("/graphs/alpha/commits/0000000000000000")
 								                    .body(Body::empty())
 								                    .unwrap(),
 								            )
 								            .await
 								            .unwrap();
 								        let commit_status = commit_resp.status();
 								        let commit_body = to_bytes(commit_resp.into_body(), usize::MAX).await.unwrap();
 								        let body_str = String::from_utf8_lossy(&commit_body);
 								        assert!(
 								            commit_status != StatusCode::INTERNAL_SERVER_ERROR
 								                || !body_str.contains("Wrong number of path arguments"),
 								            "GET /graphs/{{id}}/commits/{{commit_id}} must extract `commit_id` cleanly. \
 								             Got: {} | {}",
 								            commit_status,
 								            body_str,
 								        );
 								    }
 								    /// Flat routes 404 in multi mode — the router only mounts under
 								    /// `/graphs/{graph_id}/...` so `/snapshot` doesn't resolve.
 								    #[tokio::test(flavor = "multi_thread")]
 								    async fn flat_routes_404_in_multi_mode() {
 								        let (_dirs, app) = build_multi_mode_app(&["alpha"]).await;
 								        let resp = app
 								            .oneshot(
 								                Request::builder()
 								                    .method(Method::GET)
 								                    .uri("/snapshot?branch=main")
 								                    .body(Body::empty())
 								                    .unwrap(),
 								            )
 								            .await
 								            .unwrap();
 								        assert_eq!(resp.status(), StatusCode::NOT_FOUND);
 								    }
 								    /// `GraphId` validation runs at startup — a reserved name in
 								    /// `omnigraph.yaml` produces a clear error rather than getting
 								    /// rejected per-request.
 								    #[test]
 								    fn load_server_settings_rejects_reserved_graph_id() {
 								        let temp = tempfile::tempdir().unwrap();
 								        let config_path = temp.path().join("omnigraph.yaml");
 								        fs::write(
 								            &config_path,
 								            r#"
 								graphs:
 								  policies:
 								    uri: /tmp/g1.omni
 								"#,
 								        )
 								        .unwrap();
 								        let err = load_server_settings(Some(&config_path), None, None, None, false).unwrap_err();
 								        assert!(
 								            err.to_string().contains("invalid graph id 'policies'"),
 								            "expected reserved-name rejection, got: {err}"
 								        );
 								    }
 								    #[tokio::test(flavor = "multi_thread")]
 								    async fn registry_rejects_duplicate_normalized_graph_uris() {
 								        let dir = tempfile::tempdir().unwrap();
 								        let graph_uri = dir.path().join("same").to_str().unwrap().to_string();
 								        let schema = fs::read_to_string(fixture("test.pg")).unwrap();
 								        let engine = Arc::new(Omnigraph::init(&graph_uri, &schema).await.unwrap());
 								        let alpha = Arc::new(GraphHandle {
 								            key: GraphKey::cluster(GraphId::try_from("alpha").unwrap()),
 								            uri: graph_uri.clone(),
 								            engine: Arc::clone(&engine),
 								            policy: None,
 								        });
 								        let beta = Arc::new(GraphHandle {
 								            key: GraphKey::cluster(GraphId::try_from("beta").unwrap()),
 								            uri: format!("file://{graph_uri}/"),
 								            engine,
 								            policy: None,
 								        });
 								        match GraphRegistry::from_handles(vec![alpha, beta]) {
 								            Err(InsertError::DuplicateUri(uri)) => {
 								                assert!(
 								                    normalize_root_uri(&uri).is_ok(),
 								                    "duplicate URI should still be parseable, got {uri}"
 								                );
 								            }
 								            Err(err) => panic!("expected DuplicateUri for normalized aliases, got {err:?}"),
 								            Ok(_) => panic!("expected DuplicateUri for normalized aliases, got Ok"),
 								        }
 								    }
 								    #[tokio::test(flavor = "multi_thread")]
 								    async fn registry_stores_canonical_graph_uri() {
 								        let dir = tempfile::tempdir().unwrap();
 								        let graph_uri = dir.path().join("canonical").to_str().unwrap().to_string();
 								        let schema = fs::read_to_string(fixture("test.pg")).unwrap();
 								        let engine = Omnigraph::init(&graph_uri, &schema).await.unwrap();
 								        let handle = Arc::new(GraphHandle {
 								            key: GraphKey::cluster(GraphId::try_from("alpha").unwrap()),
 								            uri: format!("file://{graph_uri}/"),
 								            engine: Arc::new(engine),
 								            policy: None,
 								        });
 								        let registry = GraphRegistry::from_handles(vec![handle]).unwrap();
 								        let listed = registry.list();
 								        assert_eq!(listed.len(), 1);
 								        assert_eq!(listed[0].uri, graph_uri);
 								    }
 								    // ── Four-rule mode inference matrix ───────────────────────────────
 								    /// Rule 1: CLI positional URI → Single.
 								    #[test]
 								    fn mode_inference_cli_uri_is_single() {
 								        let settings = load_server_settings(
 								            None,
 								            Some("/tmp/cli.omni".to_string()),
 								            None,
 								            None,
 								            true, // allow unauth so we get past the runtime-state check
 								        )
 								        .unwrap();
 								        match settings.mode {
 								            ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "/tmp/cli.omni"),
 								            ServerConfigMode::Multi { .. } => panic!("expected Single (rule 1), got Multi"),
 								        }
 								    }
 								    /// Rule 2: --target picks one graph from `graphs:` map → Single.
 								    #[test]
 								    fn mode_inference_cli_target_is_single() {
 								        let temp = tempfile::tempdir().unwrap();
 								        let config_path = temp.path().join("omnigraph.yaml");
 								        fs::write(
 								            &config_path,
 								            r#"
 								graphs:
 								  alpha:
 								    uri: /tmp/alpha.omni
 								  beta:
 								    uri: /tmp/beta.omni
 								"#,
 								        )
 								        .unwrap();
 								        let settings =
 								            load_server_settings(Some(&config_path), None, Some("alpha".into()), None, true)
 								                .unwrap();
 								        match settings.mode {
 								            ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "/tmp/alpha.omni"),
 								            ServerConfigMode::Multi { .. } => panic!("expected Single (rule 2), got Multi"),
 								        }
 								    }
 								    /// Rule 3: `server.graph` set → Single (target picked from config).
 								    #[test]
 								    fn mode_inference_server_graph_is_single() {
 								        let temp = tempfile::tempdir().unwrap();
 								        let config_path = temp.path().join("omnigraph.yaml");
 								        fs::write(
 								            &config_path,
 								            r#"
 								graphs:
 								  alpha:
 								    uri: /tmp/alpha.omni
 								  beta:
 								    uri: /tmp/beta.omni
 								server:
 								  graph: beta
 								"#,
 								        )
 								        .unwrap();
 								        let settings = load_server_settings(Some(&config_path), None, None, None, true).unwrap();
 								        match settings.mode {
 								            ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "/tmp/beta.omni"),
 								            ServerConfigMode::Multi { .. } => panic!("expected Single (rule 3), got Multi"),
 								        }
 								    }
 								    /// Rule 4: `--config` + non-empty `graphs:` + no single-mode selector → Multi.
 								    #[test]
 								    fn mode_inference_config_plus_graphs_is_multi() {
 								        let temp = tempfile::tempdir().unwrap();
 								        let config_path = temp.path().join("omnigraph.yaml");
 								        fs::write(
 								            &config_path,
 								            r#"
 								graphs:
 								  alpha:
 								    uri: /tmp/alpha.omni
 								  beta:
 								    uri: /tmp/beta.omni
 								"#,
 								        )
 								        .unwrap();
 								        let settings = load_server_settings(Some(&config_path), None, None, None, true).unwrap();
 								        match settings.mode {
 								            ServerConfigMode::Multi { graphs, .. } => {
 								                let ids: Vec<&str> = graphs.iter().map(|g| g.graph_id.as_str()).collect();
 								                // BTreeMap iteration order is alphabetical.
 								                assert_eq!(ids, vec!["alpha", "beta"]);
 								            }
 								            ServerConfigMode::Single { .. } => panic!("expected Multi (rule 4), got Single"),
 								        }
 								    }
 								    #[test]
 								    fn mode_inference_multi_rejects_top_level_policy_file() {
 								        let temp = tempfile::tempdir().unwrap();
 								        let config_path = temp.path().join("omnigraph.yaml");
 								        fs::write(
 								            &config_path,
 								            r#"
 								policy:
 								  file: ./policy.yaml
 								graphs:
 								  alpha:
 								    uri: /tmp/alpha.omni
 								"#,
 								        )
 								        .unwrap();
 								        let err = load_server_settings(Some(&config_path), None, None, None, true).unwrap_err();
 								        let msg = err.to_string();
 								        assert!(
 								            msg.contains("top-level `policy.file` is single-graph/CLI-local policy only"),
 								            "expected single-graph policy guidance, got: {msg}"
 								        );
 								        assert!(
 								            msg.contains("graphs.<graph_id>.policy.file"),
 								            "expected per-graph migration guidance, got: {msg}"
 								        );
 								        assert!(
 								            msg.contains("server.policy.file"),
 								            "expected server policy migration guidance, got: {msg}"
 								        );
 								    }
 								    #[test]
 								    fn mode_inference_normalizes_multi_graph_uris() {
 								        let temp = tempfile::tempdir().unwrap();
 								        let graph = temp.path().join("alpha.omni");
 								        let config_path = temp.path().join("omnigraph.yaml");
 								        fs::write(
 								            &config_path,
 								            format!(
 								                r#"
 								graphs:
 								  alpha:
 								    uri: file://{}/
 								"#,
 								                graph.display()
 								            ),
 								        )
 								        .unwrap();
 								        let settings = load_server_settings(Some(&config_path), None, None, None, true).unwrap();
 								        match settings.mode {
 								            ServerConfigMode::Multi { graphs, .. } => {
 								                assert_eq!(graphs[0].uri, graph.to_string_lossy());
 								            }
 								            ServerConfigMode::Single { .. } => panic!("expected Multi"),
 								        }
 								    }
 								    /// Rule 5: nothing → error with migration hint.
 								    #[test]
 								    fn mode_inference_no_inputs_errors_with_migration_hint() {
 								        let err = load_server_settings(None, None, None, None, true).unwrap_err();
 								        let msg = err.to_string();
 								        assert!(
 								            msg.contains("no graph to serve"),
 								            "expected migration-hint error, got: {msg}"
 								        );
 								    }
 								    /// Rule 4 sub-case: `--config` with empty `graphs:` map and no
 								    /// single-mode selector → rule 5 fires (no graph to serve).
 								    #[test]
 								    fn mode_inference_empty_graphs_map_errors() {
 								        let temp = tempfile::tempdir().unwrap();
 								        let config_path = temp.path().join("omnigraph.yaml");
 								        fs::write(&config_path, "server:\n  bind: 127.0.0.1:8080\n").unwrap();
 								        let err = load_server_settings(Some(&config_path), None, None, None, true).unwrap_err();
 								        assert!(err.to_string().contains("no graph to serve"));
 								    }
 								    /// `--config` + `<URI>` together: URI wins → Single (the CLI URI
 								    /// takes precedence over the config's graphs map).
 								    #[test]
 								    fn mode_inference_cli_uri_overrides_graphs_map() {
 								        let temp = tempfile::tempdir().unwrap();
 								        let config_path = temp.path().join("omnigraph.yaml");
 								        fs::write(
 								            &config_path,
 								            r#"
 								graphs:
 								  alpha:
 								    uri: /tmp/alpha.omni
 								"#,
 								        )
 								        .unwrap();
 								        let settings = load_server_settings(
 								            Some(&config_path),
 								            Some("/tmp/cli-override.omni".to_string()),
 								            None,
 								            None,
 								            true,
 								        )
 								        .unwrap();
 								        match settings.mode {
 								            ServerConfigMode::Single { uri, .. } => {
 								                assert_eq!(
 								                    uri, "/tmp/cli-override.omni",
 								                    "CLI URI must win over graphs: map"
 								                );
 								            }
 								            ServerConfigMode::Multi { .. } => {
 								                panic!("expected Single (CLI URI wins), got Multi")
 								            }
 								        }
 								    }
 								    /// Per-graph `policy.file` is resolved relative to the config base_dir.
 								    #[test]
 								    fn per_graph_policy_file_is_resolved_relative_to_base_dir() {
 								        let temp = tempfile::tempdir().unwrap();
 								        let config_path = temp.path().join("omnigraph.yaml");
 								        fs::write(
 								            &config_path,
 								            r#"
 								graphs:
 								  alpha:
 								    uri: /tmp/alpha.omni
 								    policy:
 								      file: ./policies/alpha.yaml
 								  beta:
 								    uri: /tmp/beta.omni
 								"#,
 								        )
 								        .unwrap();
 								        let settings = load_server_settings(Some(&config_path), None, None, None, true).unwrap();
 								        let graphs = match settings.mode {
 								            ServerConfigMode::Multi { graphs, .. } => graphs,
 								            _ => panic!("expected Multi"),
 								        };
 								        // graphs is BTreeMap-iter order (alphabetical).
 								        let alpha = &graphs[0];
 								        let beta = &graphs[1];
 								        assert_eq!(alpha.graph_id, "alpha");
 								        assert_eq!(
 								            alpha.policy_file.as_ref().unwrap(),
 								            &temp.path().join("policies/alpha.yaml")
 								        );
 								        assert_eq!(beta.graph_id, "beta");
 								        assert!(beta.policy_file.is_none());
 								    }
 								    /// `server.policy.file` resolves alongside the graphs map.
 								    #[test]
 								    fn server_policy_file_is_resolved_relative_to_base_dir() {
 								        let temp = tempfile::tempdir().unwrap();
 								        let config_path = temp.path().join("omnigraph.yaml");
 								        fs::write(
 								            &config_path,
 								            r#"
 								server:
 								  policy:
 								    file: ./server-policy.yaml
 								graphs:
 								  alpha:
 								    uri: /tmp/alpha.omni
 								"#,
 								        )
 								        .unwrap();
 								        let settings = load_server_settings(Some(&config_path), None, None, None, true).unwrap();
 								        match settings.mode {
 								            ServerConfigMode::Multi {
 								                server_policy_file, ..
 								            } => {
 								                assert_eq!(
 								                    server_policy_file.unwrap(),
 								                    temp.path().join("server-policy.yaml")
 								                );
 								            }
 								            _ => panic!("expected Multi"),
 								        }
 								    }
 								    /// `GET /graphs` must NOT leak the registry in Open mode without
 								    /// an explicit server policy. Operators who pass `--unauthenticated`
 								    /// opted into trusting the network for graph DATA, not for leaking
 								    /// server topology (graph IDs + URIs, which may contain S3 bucket
 								    /// paths or internal hostnames). Cedar gating the management
 								    /// surface is the documented contract for `server_graphs_list`
 								    /// ("don't leak the registry until the operator explicitly
 								    /// authorizes it"); enforcing that contract in every runtime
 								    /// state — not just `PolicyEnabled` — is the correct-by-design
 								    /// closure of the open-mode hole the bot-review pass surfaced.
 								    ///
 								    /// Today (pre-fix) this returns 200 because `authorize_request`'s
 								    /// no-policy fallback only denies when `actor.is_some()`, so Open
 								    /// mode (`actor: None`) falls through to `Ok(())`. The fix in the
 								    /// next commit tightens the fallback so server-scoped actions
 								    /// always require explicit policy.
 								    ///
 								    /// Sort-order coverage previously lived here; it has moved to
 								    /// `get_graphs_with_server_policy_authorizes_per_cedar` where
 								    /// the response body is now non-empty and operator-authorized.
 								    #[tokio::test(flavor = "multi_thread")]
 								    async fn get_graphs_denied_in_open_mode_without_server_policy() {
 								        let (_dirs, app) = build_multi_mode_app(&["beta", "alpha"]).await;
 								        let resp = app
 								            .oneshot(
 								                Request::builder()
 								                    .method(Method::GET)
 								                    .uri("/graphs")
 								                    .body(Body::empty())
 								                    .unwrap(),
 								            )
 								            .await
 								            .unwrap();
 								        let status = resp.status();
 								        let body = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
 								        let body_str = String::from_utf8_lossy(&body);
 								        assert_eq!(
 								            status,
 								            StatusCode::FORBIDDEN,
 								            "GET /graphs must require an explicit server policy in every \
 								             runtime state; Open-mode bypass would leak server topology. \
 								             Body: {body_str}",
 								        );
 								    }
 								    /// `GET /graphs` returns 405 in single mode (resource exists in the
 								    /// API surface, just not operational without a `graphs:` map).
 								    #[tokio::test(flavor = "multi_thread")]
 								    async fn get_graphs_returns_405_in_single_mode() {
 								        let temp = init_loaded_graph().await;
 								        let graph = graph_path(temp.path());
 								        let state = AppState::open(graph.to_string_lossy().to_string())
 								            .await
 								            .unwrap();
 								        let app = build_app(state);
 								        let resp = app
 								            .oneshot(
 								                Request::builder()
 								                    .method(Method::GET)
 								                    .uri("/graphs")
 								                    .body(Body::empty())
 								                    .unwrap(),
 								            )
 								            .await
 								            .unwrap();
 								        assert_eq!(resp.status(), StatusCode::METHOD_NOT_ALLOWED);
 								    }
 								    /// `GET /graphs` requires bearer auth when tokens are configured.
 								    #[tokio::test(flavor = "multi_thread")]
 								    async fn get_graphs_requires_bearer_auth_when_configured() {
 								        use omnigraph_server::{GraphHandle, GraphId, GraphKey};
 								        // Build a multi-mode app with bearer tokens configured.
 								        let dir = tempfile::tempdir().unwrap();
 								        let graph_uri = dir.path().join("alpha").to_str().unwrap().to_string();
 								        let schema = fs::read_to_string(fixture("test.pg")).unwrap();
 								        let engine = Omnigraph::init(&graph_uri, &schema).await.unwrap();
 								        let handle = Arc::new(GraphHandle {
 								            key: GraphKey::cluster(GraphId::try_from("alpha").unwrap()),
 								            uri: graph_uri,
 								            engine: Arc::new(engine),
 								            policy: None,
 								        });
 								        let tokens = vec![("act-andrew".to_string(), "secret-token".to_string())];
 								        let workload = omnigraph_server::workload::WorkloadController::from_env();
 								        let state = AppState::new_multi(vec![handle], tokens, None, workload, None).unwrap();
 								        let app = build_app(state);
 								        // No Authorization header → 401.
 								        let resp_no_auth = app
 								            .clone()
 								            .oneshot(
 								                Request::builder()
 								                    .method(Method::GET)
 								                    .uri("/graphs")
 								                    .body(Body::empty())
 								                    .unwrap(),
 								            )
 								            .await
 								            .unwrap();
 								        assert_eq!(resp_no_auth.status(), StatusCode::UNAUTHORIZED);
 								        // With auth but no server policy → 403 (default-deny, since
 								        // GraphList is not Read).
 								        let resp_authed = app
 								            .oneshot(
 								                Request::builder()
 								                    .method(Method::GET)
 								                    .uri("/graphs")
 								                    .header("authorization", "Bearer secret-token")
 								                    .body(Body::empty())
 								                    .unwrap(),
 								            )
 								            .await
 								            .unwrap();
 								        assert_eq!(resp_authed.status(), StatusCode::FORBIDDEN);
 								    }
 								    /// `GET /graphs` with a server policy that allows `graph_list` → 200
 								    /// and returns the registry sorted alphabetically by `graph_id`.
 								    /// `GET /graphs` with a server policy that does NOT allow
 								    /// `graph_list` (viewer group) → 403.
 								    ///
 								    /// This test owns the alphabetical-sort coverage that previously
 								    /// lived in `get_graphs_lists_registered_graphs_in_multi_mode`.
 								    /// That test now asserts denial in Open mode (server-scoped actions
 								    /// require explicit policy in every runtime state), so the positive
 								    /// body-shape assertions need a home where the response is
 								    /// operator-authorized — here.
 								    #[tokio::test(flavor = "multi_thread")]
 								    async fn get_graphs_with_server_policy_authorizes_per_cedar() {
 								        use omnigraph_policy::PolicyEngine;
 								        use omnigraph_server::{GraphHandle, GraphId, GraphKey};
 								        let dir = tempfile::tempdir().unwrap();
 								        // Two graphs deliberately registered in non-alphabetical order
 								        // so the test would fail if the handler relied on insertion
 								        // order instead of server-side sorting.
 								        let schema = fs::read_to_string(fixture("test.pg")).unwrap();
 								        let mut handles = Vec::new();
 								        for id in ["beta", "alpha"] {
 								            let graph_uri = dir.path().join(id).to_str().unwrap().to_string();
 								            let engine = Omnigraph::init(&graph_uri, &schema).await.unwrap();
 								            handles.push(Arc::new(GraphHandle {
 								                key: GraphKey::cluster(GraphId::try_from(id).unwrap()),
 								                uri: graph_uri,
 								                engine: Arc::new(engine),
 								                policy: None,
 								            }));
 								        }
 								        // Server policy: admins can graph_list, viewers cannot.
 								        let policy_path = dir.path().join("server-policy.yaml");
 								        fs::write(
 								            &policy_path,
 								            r#"
 								version: 1
 								groups:
 								  admins: [act-andrew]
 								  viewers: [act-bruno]
 								rules:
 								  - id: admins-list-graphs
 								    allow:
 								      actors: { group: admins }
 								      actions: [graph_list]
 								"#,
 								        )
 								        .unwrap();
 								        let server_policy = PolicyEngine::load_server(&policy_path).unwrap();
 								        let tokens = vec![
 								            ("act-andrew".to_string(), "andrew-token".to_string()),
 								            ("act-bruno".to_string(), "bruno-token".to_string()),
 								        ];
 								        let workload = omnigraph_server::workload::WorkloadController::from_env();
 								        let state =
 								            AppState::new_multi(handles, tokens, Some(server_policy), workload, None).unwrap();
 								        let app = build_app(state);
 								        // Admin → 200, body returns both graphs alphabetically sorted.
 								        let resp_admin = app
 								            .clone()
 								            .oneshot(
 								                Request::builder()
 								                    .method(Method::GET)
 								                    .uri("/graphs")
 								                    .header("authorization", "Bearer andrew-token")
 								                    .body(Body::empty())
 								                    .unwrap(),
 								            )
 								            .await
 								            .unwrap();
 								        assert_eq!(
 								            resp_admin.status(),
 								            StatusCode::OK,
 								            "admin must be allowed graph_list"
 								        );
 								        let body = to_bytes(resp_admin.into_body(), usize::MAX).await.unwrap();
 								        let json: Value = serde_json::from_slice(&body).unwrap();
 								        let graphs = json["graphs"].as_array().unwrap();
 								        assert_eq!(graphs.len(), 2, "response must list both registered graphs");
 								        assert_eq!(
 								            graphs[0]["graph_id"].as_str().unwrap(),
 								            "alpha",
 								            "server must sort graphs alphabetically by graph_id (insertion order was 'beta', 'alpha')"
 								        );
 								        assert_eq!(graphs[1]["graph_id"].as_str().unwrap(), "beta");
 								        // Viewer → 403
 								        let resp_viewer = app
 								            .oneshot(
 								                Request::builder()
 								                    .method(Method::GET)
 								                    .uri("/graphs")
 								                    .header("authorization", "Bearer bruno-token")
 								                    .body(Body::empty())
 								                    .unwrap(),
 								            )
 								            .await
 								            .unwrap();
 								        assert_eq!(
 								            resp_viewer.status(),
 								            StatusCode::FORBIDDEN,
 								            "viewer must be denied graph_list (Cedar gate)"
 								        );
 								    }
 								    /// Loads an `omnigraph.yaml` with two graphs and verifies multi-mode
 								    /// inference plus graph entry resolution. Cluster-route dispatch is
 								    /// covered by the route tests above.
 								    #[tokio::test(flavor = "multi_thread")]
 								    async fn server_settings_load_multi_graph_config_entries() {
 								        let cfg_dir = tempfile::tempdir().unwrap();
 								        // Real graph storage dirs (the URIs in the config must point to
 								        // a graph init-able location).
 								        let alpha_dir = cfg_dir.path().join("alpha.omni");
 								        let beta_dir = cfg_dir.path().join("beta.omni");
 								        let schema = fs::read_to_string(fixture("test.pg")).unwrap();
 								        Omnigraph::init(alpha_dir.to_str().unwrap(), &schema)
 								            .await
 								            .unwrap();
 								        Omnigraph::init(beta_dir.to_str().unwrap(), &schema)
 								            .await
 								            .unwrap();
 								        let config_path = cfg_dir.path().join("omnigraph.yaml");
 								        fs::write(
 								            &config_path,
 								            format!(
 								                r#"
 								graphs:
 								  alpha:
 								    uri: {alpha}
 								  beta:
 								    uri: {beta}
 								"#,
 								                alpha = alpha_dir.display(),
 								                beta = beta_dir.display(),
 								            ),
 								        )
 								        .unwrap();
 								        let settings: ServerConfig =
 								            load_server_settings(Some(&config_path), None, None, None, true).unwrap();
 								        assert!(matches!(settings.mode, ServerConfigMode::Multi { .. }));
 								        match settings.mode {
 								            ServerConfigMode::Multi { graphs, .. } => {
 								                assert_eq!(graphs.len(), 2);
 								                let ids: Vec<&str> = graphs.iter().map(|g| g.graph_id.as_str()).collect();
 								                assert_eq!(ids, vec!["alpha", "beta"]);
 								            }
 								            _ => unreachable!(),
 								        }
 								    }
 								}