Merge pull request #170 from ModernRelay/feat/cluster-graph-create-4a

feat(cluster): Stage 4A — graph create in cluster apply
This commit is contained in:
Andrew Altshuler 2026-06-10 05:19:17 +03:00 committed by GitHub
commit e6921157cc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 1297 additions and 178 deletions

View file

@ -3558,7 +3558,7 @@ async fn main() -> Result<()> {
finish_cluster_plan(&output, json)?;
}
ClusterCommand::Apply { config, json } => {
let output = apply_config_dir(config);
let output = apply_config_dir(config).await;
finish_cluster_apply(&output, json)?;
}
ClusterCommand::Status { config, json } => {

View file

@ -1285,7 +1285,7 @@ node Person {
/// Disaster input fails closed: a destroyed graph root drifts the ledger,
/// the plan proposes deferred creates, and apply moves nothing.
#[test]
fn cluster_e2e_graph_root_destruction_drifts_and_apply_moves_nothing() {
fn cluster_e2e_graph_root_destruction_drifts_then_apply_recreates_empty_graph() {
let temp = tempdir().unwrap();
write_cluster_config_fixture(temp.path());
init_cluster_derived_graph(temp.path());
@ -1327,15 +1327,20 @@ fn cluster_e2e_graph_root_destruction_drifts_and_apply_moves_nothing() {
let plan = cluster_json(temp.path(), "plan");
assert_eq!(change_for(&plan, "graph.knowledge")["operation"], "create");
assert_eq!(change_for(&plan, "graph.knowledge")["disposition"], "deferred");
assert_eq!(change_for(&plan, "schema.knowledge")["disposition"], "deferred");
// Stage 4A: the re-create is executable and the plan says so — nothing
// hidden about converging a destroyed root back to an EMPTY graph (the
// data was already lost; this is declarative convergence, RFC-004 §D1).
assert_eq!(change_for(&plan, "graph.knowledge")["disposition"], "applied");
assert_eq!(change_for(&plan, "schema.knowledge")["disposition"], "applied");
// Converged-then-destroyed: query/policy are already in state at the
// desired digests, so they are not changes at all.
assert_eq!(plan["changes"].as_array().unwrap().len(), 2, "{plan}");
let disaster_apply = cluster_json(temp.path(), "apply");
assert_eq!(disaster_apply["applied_count"], 0, "{disaster_apply}");
assert_eq!(disaster_apply["converged"], false, "{disaster_apply}");
let recreate = cluster_json(temp.path(), "apply");
assert_eq!(recreate["ok"], true, "{recreate}");
assert_eq!(recreate["converged"], true, "{recreate}");
// The empty graph is back on disk; catalog state survived throughout.
assert!(temp.path().join("graphs/knowledge.omni").exists());
let state: serde_json::Value = serde_json::from_str(
&fs::read_to_string(temp.path().join("__cluster/state.json")).unwrap(),
)
@ -1352,59 +1357,84 @@ fn cluster_e2e_graph_root_destruction_drifts_and_apply_moves_nothing() {
);
}
/// The disposition matrix as a system: one apply over two graphs (one live,
/// one not yet created) plus graph-spanning and cluster-scoped policies must
/// produce all four dispositions at once — then converge after the second
/// graph appears.
/// The disposition matrix as a system under Stage 4A: a fresh multi-graph
/// config converges in ONE apply (both graphs created, spanning and
/// cluster-scoped policies applied), and a later mixed run — schema update
/// (deferred), its dependent query (blocked), an independent query update
/// (applied), its composite (derived) — shows all four dispositions at once
/// before the graph-plane schema apply closes the loop.
#[test]
fn cluster_e2e_multi_graph_mixed_dispositions_then_converge() {
let temp = tempdir().unwrap();
write_multi_graph_cluster_fixture(temp.path());
init_cluster_derived_graph(temp.path()); // knowledge only
// No manual init: Stage 4A creates both graphs.
let import = cluster_json(temp.path(), "import");
assert_eq!(import["ok"], true, "{import}");
let apply = cluster_json(temp.path(), "apply");
assert_eq!(apply["ok"], true, "{apply}");
assert_eq!(apply["converged"], false, "{apply}");
assert_eq!(apply["applied_count"], 2, "{apply}");
assert_eq!(apply["converged"], true, "{apply}");
assert_eq!(change_for(&apply, "graph.knowledge")["disposition"], "applied");
assert_eq!(
change_for(&apply, "query.knowledge.find_person")["disposition"],
"applied"
);
assert_eq!(
change_for(&apply, "policy.cluster_wide")["disposition"],
change_for(&apply, "graph.engineering")["disposition"],
"applied"
);
assert_eq!(
change_for(&apply, "query.engineering.find_service")["disposition"],
"applied"
);
// The graph-spanning and cluster-scoped policies ride the same run.
assert_eq!(change_for(&apply, "policy.shared")["disposition"], "applied");
assert_eq!(
change_for(&apply, "policy.cluster_wide")["disposition"],
"applied"
);
assert!(temp.path().join("graphs/knowledge.omni").exists());
assert!(temp.path().join("graphs/engineering.omni").exists());
// Mixed run: a knowledge schema update (4B territory — deferred) gates
// its query update (blocked), while an engineering query update is
// independent (applied) and re-derives its composite.
fs::write(
temp.path().join("people.pg"),
"\nnode Person {\n name: String @key\n age: I32?\n bio: String?\n}\n",
)
.unwrap();
fs::write(
temp.path().join("people.gq"),
"\nquery find_person($name: String) {\n match { $p: Person { name: $name } }\n return { $p.name }\n}\n",
)
.unwrap();
fs::write(
temp.path().join("services.gq"),
"\nquery find_service($name: String) {\n match { $s: Service { name: $name } }\n return { $s.name, $s.name }\n}\n",
)
.unwrap();
let mixed = cluster_json(temp.path(), "apply");
assert_eq!(mixed["ok"], true, "{mixed}");
assert_eq!(mixed["converged"], false, "{mixed}");
assert_eq!(change_for(&mixed, "schema.knowledge")["disposition"], "deferred");
assert_eq!(change_for(&mixed, "graph.knowledge")["disposition"], "deferred");
assert_eq!(
change_for(&mixed, "query.knowledge.find_person")["disposition"],
"blocked"
);
assert_eq!(
change_for(&apply, "query.engineering.find_service")["reason"],
"dependency_missing"
);
// One missing dependency graph blocks the whole spanning policy.
assert_eq!(change_for(&apply, "policy.shared")["disposition"], "blocked");
assert_eq!(
change_for(&apply, "graph.engineering")["disposition"],
"deferred"
change_for(&mixed, "query.knowledge.find_person")["reason"],
"dependency_not_applied"
);
assert_eq!(
change_for(&apply, "schema.engineering")["disposition"],
"deferred"
change_for(&mixed, "query.engineering.find_service")["disposition"],
"applied"
);
assert_eq!(
change_for(&apply, "graph.knowledge")["disposition"],
change_for(&mixed, "graph.engineering")["disposition"],
"derived"
);
assert_eq!(
apply["resource_statuses"]["policy.shared"]["status"],
"blocked"
);
// Deterministic ordering: changes sorted by resource address.
let order: Vec<&str> = apply["changes"]
let order: Vec<&str> = mixed["changes"]
.as_array()
.unwrap()
.iter()
@ -1412,21 +1442,22 @@ fn cluster_e2e_multi_graph_mixed_dispositions_then_converge() {
.collect();
let mut sorted = order.clone();
sorted.sort_unstable();
assert_eq!(order, sorted, "{apply}");
assert_eq!(order, sorted, "{mixed}");
// The second graph appears; refresh observes it; apply converges.
init_named_cluster_graph(temp.path(), "engineering", "services.pg");
// The graph-plane tool applies the schema; refresh observes; converge.
output_success(
cli()
.arg("schema")
.arg("apply")
.arg(temp.path().join("graphs/knowledge.omni"))
.arg("--schema")
.arg(temp.path().join("people.pg"))
.arg("--json"),
);
let refresh = cluster_json(temp.path(), "refresh");
assert_eq!(refresh["ok"], true, "{refresh}");
let converge = cluster_json(temp.path(), "apply");
assert_eq!(converge["ok"], true, "{converge}");
assert_eq!(converge["converged"], true, "{converge}");
assert_eq!(
change_for(&converge, "query.engineering.find_service")["disposition"],
"applied"
);
assert_eq!(change_for(&converge, "policy.shared")["disposition"], "applied");
let final_plan = cluster_json(temp.path(), "plan");
assert!(
@ -1435,6 +1466,39 @@ fn cluster_e2e_multi_graph_mixed_dispositions_then_converge() {
);
}
/// Stage 4A headline: a declared graph is created by `cluster apply` itself —
/// no manual `omnigraph init` anywhere in the flow.
#[test]
fn cluster_e2e_declared_graph_created_by_apply() {
let temp = tempdir().unwrap();
write_cluster_config_fixture(temp.path());
let import = cluster_json(temp.path(), "import");
assert_eq!(import["ok"], true, "{import}");
let apply = cluster_json(temp.path(), "apply");
assert_eq!(apply["ok"], true, "{apply}");
assert_eq!(apply["converged"], true, "{apply}");
assert_eq!(change_for(&apply, "graph.knowledge")["disposition"], "applied");
assert!(temp.path().join("graphs/knowledge.omni").exists());
// The created graph is a real graph: the per-graph CLI can open it.
let snapshot = output_success(
cli()
.arg("snapshot")
.arg(temp.path().join("graphs/knowledge.omni")),
);
assert!(!stdout_string(&snapshot).is_empty());
let plan = cluster_json(temp.path(), "plan");
assert!(plan["changes"].as_array().unwrap().is_empty(), "{plan}");
let status = cluster_json(temp.path(), "status");
assert_eq!(
status["resource_statuses"]["graph.knowledge"]["status"],
"applied"
);
}
/// Catalog payload drift self-heals across the command surface: status warns
/// read-only, refresh persists the drift and drops the digest, apply
/// republishes the blob, status comes back clean.

File diff suppressed because it is too large Load diff

View file

@ -99,14 +99,14 @@ fn query_blob(config_dir: &Path, digests: &BTreeMap<String, String>) -> PathBuf
.join(format!("{}.gq", digests["query.knowledge.find_person"]))
}
#[test]
fn failpoint_wiring_returns_injected_diagnostic() {
#[tokio::test]
async fn failpoint_wiring_returns_injected_diagnostic() {
let scenario = FailScenario::setup();
let dir = fixture();
seed_applyable_state(dir.path());
let _failpoint = ScopedFailPoint::new("cluster_apply.after_payload_phase", "return");
let out = apply_config_dir(dir.path());
let out = apply_config_dir(dir.path()).await;
assert!(!out.ok);
assert!(out.diagnostics.iter().any(|diagnostic| {
diagnostic.code == "injected_failpoint"
@ -121,8 +121,8 @@ fn failpoint_wiring_returns_injected_diagnostic() {
/// Crash between the payload phase and the state write: blobs are on disk,
/// state.json is byte-identical, nothing is acknowledged — and a plain re-run
/// repairs by trusting the existing content-addressed blobs.
#[test]
fn apply_crash_after_payload_phase_leaves_state_unmoved_then_recovers() {
#[tokio::test]
async fn apply_crash_after_payload_phase_leaves_state_unmoved_then_recovers() {
let scenario = FailScenario::setup();
let dir = fixture();
let digests = seed_applyable_state(dir.path());
@ -130,7 +130,7 @@ fn apply_crash_after_payload_phase_leaves_state_unmoved_then_recovers() {
{
let _failpoint = ScopedFailPoint::new("cluster_apply.after_payload_phase", "return");
let out = apply_config_dir(dir.path());
let out = apply_config_dir(dir.path()).await;
assert!(!out.ok);
assert!(!out.state_written);
assert!(!out.converged);
@ -149,7 +149,7 @@ fn apply_crash_after_payload_phase_leaves_state_unmoved_then_recovers() {
}
// The repair is a plain re-run: existing blobs are trusted by digest.
let recovered = apply_config_dir(dir.path());
let recovered = apply_config_dir(dir.path()).await;
assert!(recovered.ok, "{:?}", recovered.diagnostics);
assert!(recovered.converged);
assert!(recovered.state_written);
@ -163,8 +163,8 @@ fn apply_crash_after_payload_phase_leaves_state_unmoved_then_recovers() {
/// A concurrent writer mutating state.json between apply's read and its write
/// (possible under `state.lock: false`) must surface `state_cas_mismatch`,
/// acknowledge nothing, and leave the concurrent writer's state on disk.
#[test]
fn apply_cas_race_surfaces_state_cas_mismatch() {
#[tokio::test]
async fn apply_cas_race_surfaces_state_cas_mismatch() {
let scenario = FailScenario::setup();
let dir = fixture();
let digests = seed_applyable_state(dir.path());
@ -182,7 +182,7 @@ fn apply_cas_race_surfaces_state_cas_mismatch() {
fs::write(&race_path, serde_json::to_string_pretty(&state).unwrap()).unwrap();
});
let out = apply_config_dir(dir.path());
let out = apply_config_dir(dir.path()).await;
drop(failpoint);
assert!(!out.ok);
@ -212,8 +212,136 @@ fn apply_cas_race_surfaces_state_cas_mismatch() {
assert!(query_blob(dir.path(), &digests).exists());
// Recovery is a plain re-run against the rewritten state.
let recovered = apply_config_dir(dir.path());
let recovered = apply_config_dir(dir.path()).await;
assert!(recovered.ok, "{:?}", recovered.diagnostics);
assert!(recovered.converged);
scenario.teardown();
}
fn seed_empty_state(config_dir: &Path) {
let state_dir = config_dir.join("__cluster");
fs::create_dir_all(&state_dir).unwrap();
fs::write(
state_dir.join("state.json"),
r#"{
"version": 1,
"state_revision": 1,
"applied_revision": { "resources": {} }
}
"#,
)
.unwrap();
}
fn recovery_sidecars(config_dir: &Path) -> Vec<PathBuf> {
match fs::read_dir(config_dir.join("__cluster/recoveries")) {
Ok(entries) => {
let mut paths: Vec<PathBuf> = entries
.flatten()
.map(|entry| entry.path())
.filter(|path| path.extension().is_some_and(|ext| ext == "json"))
.collect();
paths.sort();
paths
}
Err(_) => Vec::new(),
}
}
/// Crash before the init: the create-intent sidecar survives, nothing moved.
/// The next run's sweep removes the intent (row 1) and the same run creates
/// the graph and converges.
#[tokio::test]
async fn create_crash_before_init_recovers_via_sweep() {
let scenario = FailScenario::setup();
let dir = fixture();
seed_empty_state(dir.path());
{
let _failpoint = ScopedFailPoint::new("cluster_apply.before_graph_create", "return");
let out = apply_config_dir(dir.path()).await;
assert!(!out.ok);
assert!(out.diagnostics.iter().any(|diagnostic| {
diagnostic.code == "injected_failpoint"
&& diagnostic
.message
.contains("cluster_apply.before_graph_create")
}));
assert_eq!(recovery_sidecars(dir.path()).len(), 1);
assert!(!dir.path().join("graphs/knowledge.omni").exists());
// No resource digest moved.
let state: serde_json::Value = serde_json::from_str(
&fs::read_to_string(dir.path().join("__cluster/state.json")).unwrap(),
)
.unwrap();
assert!(
state["applied_revision"]["resources"]
.as_object()
.unwrap()
.is_empty()
);
}
let recovered = apply_config_dir(dir.path()).await;
assert!(recovered.ok, "{:?}", recovered.diagnostics);
assert!(recovered.converged);
assert!(dir.path().join("graphs/knowledge.omni").exists());
assert!(recovery_sidecars(dir.path()).is_empty());
scenario.teardown();
}
/// Crash after the init but before the state CAS: the graph exists, the
/// ledger is stale, nothing was acknowledged. The next run's sweep rolls the
/// ledger forward (row 4) with an audit entry, and the run converges.
#[tokio::test]
async fn create_crash_after_init_rolls_state_forward() {
let scenario = FailScenario::setup();
let dir = fixture();
seed_empty_state(dir.path());
let state_before = fs::read(dir.path().join("__cluster/state.json")).unwrap();
{
let _failpoint = ScopedFailPoint::new("cluster_apply.after_graph_create", "return");
let out = apply_config_dir(dir.path()).await;
assert!(!out.ok);
assert!(!out.state_written);
// The graph exists; the cluster state is byte-identical (no ack).
assert!(dir.path().join("graphs/knowledge.omni").exists());
assert_eq!(
fs::read(dir.path().join("__cluster/state.json")).unwrap(),
state_before
);
// The sidecar carries the post-init manifest pin.
let sidecars = recovery_sidecars(dir.path());
assert_eq!(sidecars.len(), 1);
let sidecar: serde_json::Value =
serde_json::from_str(&fs::read_to_string(&sidecars[0]).unwrap()).unwrap();
assert!(
sidecar["expected_manifest_version"].is_number(),
"{sidecar}"
);
}
let recovered = apply_config_dir(dir.path()).await;
assert!(recovered.ok, "{:?}", recovered.diagnostics);
assert!(
recovered
.diagnostics
.iter()
.any(|diagnostic| diagnostic.code == "cluster_recovery_rolled_forward")
);
assert!(recovered.converged);
assert!(recovery_sidecars(dir.path()).is_empty());
let state: serde_json::Value = serde_json::from_str(
&fs::read_to_string(dir.path().join("__cluster/state.json")).unwrap(),
)
.unwrap();
assert!(
state["recovery_records"]
.as_object()
.unwrap()
.values()
.any(|record| record["outcome"] == "rolled_forward")
);
scenario.teardown();
}

View file

@ -8,7 +8,7 @@ This file is the always-on map of the test surface. **Consult it before every ta
|---|---|---|
| `omnigraph` (engine) | `crates/omnigraph/tests/` | Integration tests (21 files), fixture-driven, share `tests/helpers/mod.rs` |
| `omnigraph-cli` | `crates/omnigraph-cli/tests/` | `cli.rs` (unit-ish; includes the `cluster_e2e_*` lifecycle compositions over the spawned binary — lost-state re-import recovery, out-of-band drift, graph-root destruction, multi-graph mixed-disposition convergence), `system_local.rs`, `system_remote.rs`, share `tests/support/mod.rs` |
| `omnigraph-cluster` | mostly in-source `#[cfg(test)] mod tests`; `tests/failpoints.rs` (feature-gated) | Cluster config parser, local JSON state diff, state CAS/lock handling/recovery, read-only validate/plan/status plus explicit refresh/import graph observations, config-only apply (content-addressed payload publish, disposition gating, composite-digest convergence, idempotent re-apply), catalog payload verification (status read-only, refresh drift + self-heal), and failpoint crash-mid-apply / CAS-race coverage |
| `omnigraph-cluster` | mostly in-source `#[cfg(test)] mod tests`; `tests/failpoints.rs` (feature-gated) | Cluster config parser, local JSON state diff, state CAS/lock handling/recovery, read-only validate/plan/status plus explicit refresh/import graph observations, config-only apply (content-addressed payload publish, disposition gating, composite-digest convergence, idempotent re-apply), catalog payload verification (status read-only, refresh drift + self-heal), failpoint crash-mid-apply / CAS-race coverage, and Stage 4A graph creation (create executor, recovery sidecars + sweep rows, create crash windows) |
| `omnigraph-server` | `crates/omnigraph-server/tests/` | `server.rs` (HTTP-level), `openapi.rs` (OpenAPI drift / regeneration) |
| `omnigraph-compiler` | mostly in-source `#[cfg(test)] mod tests` | Parser, type-checker, IR lowering, lint |

View file

@ -1,15 +1,17 @@
# Cluster Config
**Status:** Stage 3A config-only apply preview.
**Status:** Stage 4A graph-create apply preview.
Cluster config is the future control-plane configuration surface for a whole
OmniGraph deployment. In this stage, OmniGraph can validate a local
`cluster.yaml` folder, produce a deterministic read-only plan, inspect the
local JSON state ledger, explicitly refresh/import graph observations into
that ledger, manually remove a held local state lock by exact lock id, and
**apply the config-only subset of the plan** — stored-query and policy-bundle
catalog writes. It does not move graph manifests, change schemas, start
servers, or serve anything it applies: the server still boots from
**apply the executable subset of the plan** — stored-query and policy-bundle
catalog writes, and **graph creation**: a declared graph that does not exist
yet is initialized by apply itself at the derived root. It does not change
existing schemas (deferred to a later stage), move existing graph manifests,
start servers, or serve anything it applies: the server still boots from
`omnigraph.yaml`.
## Commands
@ -153,8 +155,8 @@ condition in `reason`).
## Apply
`cluster apply` executes the config-only subset of the plan — stored-query and
policy-bundle changes. There is no confirm flag: `cluster plan` is the preview,
`cluster apply` executes the executable subset of the plan — stored-query and
policy-bundle changes, and graph creates. There is no confirm flag: `cluster plan` is the preview,
and apply recomputes the same diff under the state lock before executing, so a
stale preview can never be applied. Apply requires an existing `state.json`
(`state_missing` directs you to `cluster import` first).
@ -180,9 +182,39 @@ still boots from `omnigraph.yaml`; no query or policy applied here serves
traffic until the server-boot stage ships, as an explicit per-deployment mode
switch.
Graph and schema changes are never executed by this stage. They are reported
as `deferred` (warning `apply_unsupported_change`), and query/policy changes
that depend on them are `blocked` (warning `apply_dependency_blocked`, status
### Graph creation
A `graph.<id>` create (the graph is declared but no root exists) is executed
by apply: the graph is initialized at the derived root
```text
<config-dir>/graphs/<graph-id>.omni
```
with the declared schema, before any catalog writes, so queries and policies
that depend on the new graph apply **in the same run**. Each create is fenced
by a recovery sidecar under `__cluster/recoveries/{ulid}.json`, written before
the init and removed only after the state update lands. If apply crashes in
between, the next state-mutating command (`apply`, `refresh`, `import`) runs a
**recovery sweep** that classifies the survivor by observation: an absent root
removes the stale intent; a completed create rolls the cluster state forward
(recorded in the state's `recovery_records`); a partial root reports
`graph_create_incomplete` (status `error` — remove the root and re-run apply;
nothing is auto-deleted); unexpected graph content reports
`actual_applied_state_pending` (status `drifted` — run `cluster refresh` and
re-plan). While a kept sidecar is pending, that graph's create and its
dependents are blocked with `cluster_recovery_pending`. Read-only commands
(`status`, `plan`) warn about pending sidecars without acting on them.
**Re-creation is convergence.** If a graph root disappears out-of-band,
`refresh` records the drift and the next `plan` proposes a create — and apply
will execute it, producing an **empty** graph at the root. The data was
already lost when the root vanished; the create is visible in the plan
(disposition `applied`) before anything runs.
Schema changes to existing graphs are never executed by this stage. They are
reported as `deferred` (warning `apply_unsupported_change`), and query/policy
changes that depend on them are `blocked` (warning `apply_dependency_blocked`, status
`blocked` in state). A partially-applicable plan still exits 0 with warnings;
the JSON `converged` field is the automation signal for "state now matches the
desired revision". The applied `config_digest` is only recorded when apply