//! Cluster-mode boot and the concurrent branch-ops matrix. //! Moved verbatim from tests/server.rs in the modularization. use std::fs; use axum::body::{Body, to_bytes}; use axum::http::{Method, Request, StatusCode}; use omnigraph_server::api::ErrorOutput; use omnigraph_server::{AppState, build_app}; use serde_json::Value; use tower::ServiceExt; mod support; use support::*; #[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn concurrent_branch_ops_morphological_matrix() { // Cell a: Merge × Merge, distinct targets. // Pre-fix on b09a097/22d76db: branch_merge_impl's swap-restore race // landed feature_a's content in target_b instead of target_a (and // vice versa — symmetric swap). Identity asserts catch both // asymmetric and symmetric variants. { let cell = "a:merge×merge:distinct-targets"; let h = matrix::Harness::new().await; h.create_branch("main", "feature-a-cella").await; h.insert_person("feature-a-cella", "EveA-cella", 22).await; h.create_branch("main", "feature-b-cella").await; h.insert_person("feature-b-cella", "FrankB-cella", 33).await; h.create_branch("main", "target-a-cella").await; h.create_branch("main", "target-b-cella").await; let (sa, sb) = h .run_pair( matrix::op_merge("feature-a-cella".to_string(), "target-a-cella".to_string()), matrix::op_merge("feature-b-cella".to_string(), "target-b-cella".to_string()), ) .await; assert_eq!(sa.status, StatusCode::OK, "[{}] merge a", cell); assert_eq!(sb.status, StatusCode::OK, "[{}] merge b", cell); h.assert_persons("target-a-cella", cell, &["EveA-cella"], &["FrankB-cella"]) .await; h.assert_persons("target-b-cella", cell, &["FrankB-cella"], &["EveA-cella"]) .await; h.assert_post_op_sentinel(cell, "sentinel-cella").await; } // Cell b: Merge × Merge, same target / distinct sources. // Both want to land in main. merge_exclusive serializes; both should // succeed and main should contain BOTH sources' contributions. { let cell = "b:merge×merge:same-target-distinct-sources"; let h = matrix::Harness::new().await; h.create_branch("main", "src-x-cellb").await; h.insert_person("src-x-cellb", "Xavier-cellb", 41).await; h.create_branch("main", "src-y-cellb").await; h.insert_person("src-y-cellb", "Yvonne-cellb", 42).await; let (sa, sb) = h .run_pair( matrix::op_merge("src-x-cellb".to_string(), "main".to_string()), matrix::op_merge("src-y-cellb".to_string(), "main".to_string()), ) .await; assert_eq!(sa.status, StatusCode::OK, "[{}] merge x", cell); assert_eq!(sb.status, StatusCode::OK, "[{}] merge y", cell); h.assert_persons("main", cell, &["Xavier-cellb", "Yvonne-cellb"], &[]) .await; h.assert_post_op_sentinel(cell, "sentinel-cellb").await; } // Cell c: Merge × Merge, same source / distinct targets (fanout). // One source merged into two targets simultaneously. merge_exclusive // serializes; both targets should reflect the source's content. { let cell = "c:merge×merge:same-source-distinct-targets"; let h = matrix::Harness::new().await; h.create_branch("main", "src-shared-cellc").await; h.insert_person("src-shared-cellc", "Sharon-cellc", 50) .await; h.create_branch("main", "tgt-1-cellc").await; h.create_branch("main", "tgt-2-cellc").await; let (sa, sb) = h .run_pair( matrix::op_merge("src-shared-cellc".to_string(), "tgt-1-cellc".to_string()), matrix::op_merge("src-shared-cellc".to_string(), "tgt-2-cellc".to_string()), ) .await; assert_eq!(sa.status, StatusCode::OK, "[{}] merge into tgt-1", cell); assert_eq!(sb.status, StatusCode::OK, "[{}] merge into tgt-2", cell); h.assert_persons("tgt-1-cellc", cell, &["Sharon-cellc"], &[]) .await; h.assert_persons("tgt-2-cellc", cell, &["Sharon-cellc"], &[]) .await; h.assert_post_op_sentinel(cell, "sentinel-cellc").await; } // Cell d: Merge × Change, both touching main. C2 permits both // succeed, or exactly one clean 409 if the merge detects target // movement after planning but before acquiring the queue. { let cell = "d:merge×change:into-target"; let h = matrix::Harness::new().await; h.create_branch("main", "feature-celld").await; h.insert_person("feature-celld", "EveD-celld", 22).await; let (sa, sb) = h .run_pair( matrix::op_merge("feature-celld".to_string(), "main".to_string()), matrix::op_change_insert("main".to_string(), "FrankD-celld".to_string(), 33), ) .await; assert_eq!(sb.status, StatusCode::OK, "[{}] change", cell); assert!( sa.status == StatusCode::OK || sa.status == StatusCode::CONFLICT, "[{}] merge must be 200 or clean 409, got {}", cell, sa.status ); if sa.status == StatusCode::OK { h.assert_persons("main", cell, &["EveD-celld", "FrankD-celld"], &[]) .await; } else { let error: ErrorOutput = serde_json::from_slice(&sa.body).unwrap(); let conflict = error .manifest_conflict .expect("merge 409 must include manifest_conflict"); assert_eq!( conflict.table_key, "node:Person", "[{}] conflict table", cell ); h.assert_persons("main", cell, &["FrankD-celld"], &["EveD-celld"]) .await; } h.assert_post_op_sentinel(cell, "sentinel-celld").await; } // Cell e: Merge × BranchCreateFrom-target. Concurrent fork off the // merge target while the merge runs. Both should succeed; the new // branch should have a coherent view (either pre- or post-merge, // both valid). After both, target = main has the merged content. { let cell = "e:merge×branch_create_from:target"; let h = matrix::Harness::new().await; h.create_branch("main", "src-celle").await; h.insert_person("src-celle", "Eve-celle", 22).await; let (sa, sb) = h .run_pair( matrix::op_merge("src-celle".to_string(), "main".to_string()), matrix::op_branch_create("main".to_string(), "fork-celle".to_string()), ) .await; assert_eq!(sa.status, StatusCode::OK, "[{}] merge", cell); assert_eq!(sb.status, StatusCode::OK, "[{}] branch_create_from", cell); // Main definitely has Eve. h.assert_persons("main", cell, &["Eve-celle"], &[]).await; // fork-celle was forked off main at SOME version; main's current // count is 5 (4 seeded + Eve). fork-celle has either 4 (pre-merge // snapshot) or 5 (post-merge snapshot); both are valid timings. let fork_count = h.person_count("fork-celle").await; assert!( fork_count == 4 || fork_count == 5, "[{}] fork-celle row count must be pre- or post-merge view (4 or 5), got {}", cell, fork_count ); h.assert_post_op_sentinel(cell, "sentinel-celle").await; } // Cell f: BranchCreateFrom × BranchCreateFrom, distinct parents. // Pre-fix on f925ad1: swap-restore race in branch_create_from_impl // forked the new branch off the wrong parent. Identity asserts pin // that fork-from-A inherits A's content, fork-from-B inherits B's. { let cell = "f:branch_create_from×branch_create_from:distinct-parents"; let h = matrix::Harness::new().await; h.create_branch("main", "alpha-cellf").await; h.insert_person("alpha-cellf", "Eve-cellf", 22).await; h.create_branch("main", "beta-cellf").await; let (sa, sb) = h .run_pair( matrix::op_branch_create("alpha-cellf".to_string(), "gamma-cellf".to_string()), matrix::op_branch_create("beta-cellf".to_string(), "delta-cellf".to_string()), ) .await; assert_eq!(sa.status, StatusCode::OK, "[{}] gamma create", cell); assert_eq!(sb.status, StatusCode::OK, "[{}] delta create", cell); // gamma forks off alpha → must contain Eve. h.assert_persons("gamma-cellf", cell, &["Eve-cellf"], &[]) .await; // delta forks off beta → must NOT contain Eve. h.assert_persons("delta-cellf", cell, &[], &["Eve-cellf"]) .await; h.assert_post_op_sentinel(cell, "sentinel-cellf").await; } // Cell g: BranchCreateFrom × BranchDelete, unrelated branches. // Disjoint branches; both should complete cleanly without // interference. { let cell = "g:branch_create_from×branch_delete:unrelated"; let h = matrix::Harness::new().await; h.create_branch("main", "doomed-cellg").await; let (sa, sb) = h .run_pair( matrix::op_branch_create("main".to_string(), "newborn-cellg".to_string()), matrix::op_branch_delete("doomed-cellg".to_string()), ) .await; assert_eq!(sa.status, StatusCode::OK, "[{}] create newborn", cell); assert_eq!(sb.status, StatusCode::OK, "[{}] delete doomed", cell); // newborn-cellg exists with main's content. h.assert_persons("newborn-cellg", cell, &["Alice"], &[]) .await; h.assert_post_op_sentinel(cell, "sentinel-cellg").await; } // Cell h: BranchDelete × BranchDelete, distinct branches. Both call // refresh() internally; verify no deadlock and both deletes land. { let cell = "h:branch_delete×branch_delete:distinct"; let h = matrix::Harness::new().await; h.create_branch("main", "doomed1-cellh").await; h.create_branch("main", "doomed2-cellh").await; let (sa, sb) = h .run_pair( matrix::op_branch_delete("doomed1-cellh".to_string()), matrix::op_branch_delete("doomed2-cellh".to_string()), ) .await; assert_eq!(sa.status, StatusCode::OK, "[{}] delete 1", cell); assert_eq!(sb.status, StatusCode::OK, "[{}] delete 2", cell); // Verify both gone via /branches list (snapshot would still work // for a deleted branch via parent fallback in some paths, so we // use the explicit list). let r = h .app .clone() .oneshot( Request::builder() .uri("/branches") .method(Method::GET) .body(Body::empty()) .unwrap(), ) .await .unwrap(); assert_eq!(r.status(), StatusCode::OK); let body = to_bytes(r.into_body(), usize::MAX).await.unwrap(); let list_body: Value = serde_json::from_slice(&body).unwrap(); let branches: Vec<&str> = list_body["branches"] .as_array() .unwrap() .iter() .filter_map(|v| v.as_str()) .collect(); assert!( !branches.contains(&"doomed1-cellh"), "[{}] doomed1 still in branch list: {:?}", cell, branches ); assert!( !branches.contains(&"doomed2-cellh"), "[{}] doomed2 still in branch list: {:?}", cell, branches ); h.assert_post_op_sentinel(cell, "sentinel-cellh").await; } // Cell i: BranchDelete × Change, on a different branch. Delete one // branch while a /change runs on main. Both should succeed. { let cell = "i:branch_delete×change:distinct-branch"; let h = matrix::Harness::new().await; h.create_branch("main", "doomed-celli").await; let (sa, sb) = h .run_pair( matrix::op_branch_delete("doomed-celli".to_string()), matrix::op_change_insert("main".to_string(), "Pat-celli".to_string(), 44), ) .await; assert_eq!(sa.status, StatusCode::OK, "[{}] delete", cell); assert_eq!(sb.status, StatusCode::OK, "[{}] change", cell); h.assert_persons("main", cell, &["Pat-celli"], &[]).await; h.assert_post_op_sentinel(cell, "sentinel-celli").await; } // Cell j: BranchCreateFrom × Change, both on main. The fork timing // determines whether the new branch sees the change (pre or post). // Both valid. Main must contain the inserted row. { let cell = "j:branch_create_from×change:on-source"; let h = matrix::Harness::new().await; let (sa, sb) = h .run_pair( matrix::op_branch_create("main".to_string(), "twin-cellj".to_string()), matrix::op_change_insert("main".to_string(), "Quincy-cellj".to_string(), 55), ) .await; assert_eq!(sa.status, StatusCode::OK, "[{}] branch_create", cell); assert_eq!(sb.status, StatusCode::OK, "[{}] change", cell); h.assert_persons("main", cell, &["Quincy-cellj"], &[]).await; // twin-cellj has either pre-change view (no Quincy) or // post-change view (with Quincy); either is valid. let twin_has_quincy = h.person_exists("twin-cellj", "Quincy-cellj").await; let _ = twin_has_quincy; // either valid timing — just ensure no panic h.assert_post_op_sentinel(cell, "sentinel-cellj").await; } // Cell k: reopen consistency. Run a representative concurrent pair, // drop the engine, reopen on a separate handle, verify state matches. { let cell = "k:reopen-after-pair"; let h = matrix::Harness::new().await; h.create_branch("main", "src-cellk").await; h.insert_person("src-cellk", "Rita-cellk", 36).await; let (sa, sb) = h .run_pair( matrix::op_merge("src-cellk".to_string(), "main".to_string()), matrix::op_change_insert("main".to_string(), "Steve-cellk".to_string(), 37), ) .await; assert_eq!(sb.status, StatusCode::OK, "[{}] change", cell); assert!( sa.status == StatusCode::OK || sa.status == StatusCode::CONFLICT, "[{}] merge must be 200 or clean 409, got {}", cell, sa.status ); if sa.status == StatusCode::OK { h.assert_persons("main", cell, &["Rita-cellk", "Steve-cellk"], &[]) .await; } else { let error: ErrorOutput = serde_json::from_slice(&sa.body).unwrap(); let conflict = error .manifest_conflict .expect("merge 409 must include manifest_conflict"); assert_eq!( conflict.table_key, "node:Person", "[{}] conflict table", cell ); h.assert_persons("main", cell, &["Steve-cellk"], &["Rita-cellk"]) .await; } // Reopen via a fresh AppState on the same graph. let graph_uri = format!("{}/server.omni", h._temp.path().display()); let reopened = AppState::open(graph_uri.clone()).await.unwrap(); let app2 = build_app(reopened); // Sanity: the same identity check via the new app must see // Rita and Steve. let r = app2 .clone() .oneshot( Request::builder() .uri("/snapshot?branch=main") .method(Method::GET) .body(Body::empty()) .unwrap(), ) .await .unwrap(); assert_eq!(r.status(), StatusCode::OK, "[{}] reopen snapshot", cell); let body = to_bytes(r.into_body(), usize::MAX).await.unwrap(); let v: Value = serde_json::from_slice(&body).unwrap(); let person_rows = v["tables"] .as_array() .and_then(|tables| { tables .iter() .find(|t| t["table_key"].as_str() == Some("node:Person")) }) .and_then(|t| t["row_count"].as_u64()) .expect("reopen snapshot must include node:Person row_count"); let expected_rows = if sa.status == StatusCode::OK { 6 } else { 5 }; assert_eq!( person_rows, expected_rows, "[{}] reopened main should include seed (4) + committed concurrent writes", cell, ); } } #[tokio::test] async fn cluster_boot_serves_applied_state() { let temp = converged_cluster_dir("").await; let settings = cluster_settings(temp.path()).await.unwrap(); let omnigraph_server::ServerConfigMode::Multi { graphs, config_path, server_policy, } = settings.mode else { panic!("cluster boot must select multi-graph routing"); }; assert_eq!(graphs.len(), 1); assert_eq!(graphs[0].graph_id, "knowledge"); assert!(server_policy.is_none()); let state = omnigraph_server::open_multi_graph_state(graphs, Vec::new(), None, config_path) .await .unwrap(); let app = build_app(state); // The management surface keeps its closed-by-default contract: without a // cluster-scoped policy bundle there is no server-level Cedar engine, so // GET /graphs refuses even in cluster mode. let (status, body) = json_response( &app, Request::builder().uri("/graphs").body(Body::empty()).unwrap(), ) .await; assert_eq!(status, StatusCode::FORBIDDEN, "{body}"); let (status, body) = json_response( &app, Request::builder() .uri("/graphs/knowledge/queries") .body(Body::empty()) .unwrap(), ) .await; assert_eq!(status, StatusCode::OK, "{body}"); assert!( body["queries"] .as_array() .unwrap() .iter() .any(|q| q["name"] == "find_person"), "{body}" ); let (status, body) = json_response( &app, Request::builder() .method(Method::POST) .uri("/graphs/knowledge/queries/find_person") .header("content-type", "application/json") .body(Body::from(r#"{"params":{"name":"nobody"}}"#)) .unwrap(), ) .await; assert_eq!(status, StatusCode::OK, "{body}"); } #[tokio::test] async fn cluster_boot_wires_policy_bindings_into_cedar_slots() { let temp = tempfile::tempdir().unwrap(); drop(temp); let policy_block = r#"policies: graph_rules: file: ./graph.policy.yaml applies_to: [knowledge] cluster_rules: file: ./cluster.policy.yaml applies_to: [cluster] "#; let temp = { let temp = tempfile::tempdir().unwrap(); fs::write( temp.path().join("people.pg"), "\nnode Person {\n name: String @key\n}\n", ) .unwrap(); fs::write( temp.path().join("people.gq"), "\nquery find_person($name: String) {\n match { $p: Person { name: $name } }\n return { $p.name }\n}\n", ) .unwrap(); fs::write( temp.path().join("graph.policy.yaml"), permit_all_policy_yaml(&["default"]), ) .unwrap(); fs::write( temp.path().join("cluster.policy.yaml"), permit_all_policy_yaml(&["default"]).replace("protected_branches: [main]\n", "protected_branches: [main]\nkind: server\n"), ) .unwrap(); fs::write( temp.path().join("cluster.yaml"), format!( r#" version: 1 graphs: knowledge: schema: ./people.pg queries: find_person: file: ./people.gq {policy_block}"# ), ) .unwrap(); let import = omnigraph_cluster::import_config_dir(temp.path()).await; assert!(import.ok, "{:?}", import.diagnostics); let apply = omnigraph_cluster::apply_config_dir(temp.path()).await; assert!(apply.ok && apply.converged, "{:?}", apply.diagnostics); temp }; let settings = cluster_settings(temp.path()).await.unwrap(); let omnigraph_server::ServerConfigMode::Multi { graphs, server_policy, .. } = settings.mode else { panic!("cluster boot must select multi-graph routing"); }; // Cluster boots carry policy CONTENT (digest-verified catalog blobs), // not paths — the catalog may live on object storage. let omnigraph_server::PolicySource::Inline(graph_policy) = graphs[0].policy.as_ref().expect("graph-bound bundle") else { panic!("cluster-mode graph policy must be inline content"); }; assert!(graph_policy.contains("actors:"), "{graph_policy:?}"); let omnigraph_server::PolicySource::Inline(server_policy) = server_policy.expect("cluster-bound bundle") else { panic!("cluster-mode server policy must be inline content"); }; assert!(server_policy.contains("kind: server"), "{server_policy:?}"); } #[tokio::test] async fn cluster_boot_refusals() { // Mutual exclusion with --config / URI. let temp = converged_cluster_dir("").await; let dir = temp.path().to_path_buf(); let err = omnigraph_server::load_server_settings( Some(&dir.join("omnigraph.yaml")), Some(&dir), None, None, None, true, ) .await .unwrap_err(); assert!(err.to_string().contains("exclusive boot source"), "{err}"); let err = omnigraph_server::load_server_settings( None, Some(&dir), Some("file:///tmp/x.omni".to_string()), None, None, true, ) .await .unwrap_err(); assert!(err.to_string().contains("exclusive boot source"), "{err}"); // Tampered catalog blob refuses boot with the remedy. let blob_dir = dir.join("__cluster/resources/query/knowledge/find_person"); let blob = fs::read_dir(&blob_dir).unwrap().next().unwrap().unwrap().path(); fs::write(&blob, "tampered").unwrap(); let err = cluster_settings(&dir).await.unwrap_err(); assert!( err.to_string().contains("catalog_payload_digest_mismatch"), "{err}" ); assert!(err.to_string().contains("cluster refresh"), "{err}"); // Missing state refuses with the import/apply remedy. let empty = tempfile::tempdir().unwrap(); let err = cluster_settings(empty.path()).await.unwrap_err(); assert!(err.to_string().contains("cluster_state_missing"), "{err}"); }