mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-12 01:45:14 +02:00
Merge pull request #170 from ModernRelay/feat/cluster-graph-create-4a
feat(cluster): Stage 4A — graph create in cluster apply
This commit is contained in:
commit
e6921157cc
6 changed files with 1297 additions and 178 deletions
|
|
@ -3558,7 +3558,7 @@ async fn main() -> Result<()> {
|
|||
finish_cluster_plan(&output, json)?;
|
||||
}
|
||||
ClusterCommand::Apply { config, json } => {
|
||||
let output = apply_config_dir(config);
|
||||
let output = apply_config_dir(config).await;
|
||||
finish_cluster_apply(&output, json)?;
|
||||
}
|
||||
ClusterCommand::Status { config, json } => {
|
||||
|
|
|
|||
|
|
@ -1285,7 +1285,7 @@ node Person {
|
|||
/// Disaster input fails closed: a destroyed graph root drifts the ledger,
|
||||
/// the plan proposes deferred creates, and apply moves nothing.
|
||||
#[test]
|
||||
fn cluster_e2e_graph_root_destruction_drifts_and_apply_moves_nothing() {
|
||||
fn cluster_e2e_graph_root_destruction_drifts_then_apply_recreates_empty_graph() {
|
||||
let temp = tempdir().unwrap();
|
||||
write_cluster_config_fixture(temp.path());
|
||||
init_cluster_derived_graph(temp.path());
|
||||
|
|
@ -1327,15 +1327,20 @@ fn cluster_e2e_graph_root_destruction_drifts_and_apply_moves_nothing() {
|
|||
|
||||
let plan = cluster_json(temp.path(), "plan");
|
||||
assert_eq!(change_for(&plan, "graph.knowledge")["operation"], "create");
|
||||
assert_eq!(change_for(&plan, "graph.knowledge")["disposition"], "deferred");
|
||||
assert_eq!(change_for(&plan, "schema.knowledge")["disposition"], "deferred");
|
||||
// Stage 4A: the re-create is executable and the plan says so — nothing
|
||||
// hidden about converging a destroyed root back to an EMPTY graph (the
|
||||
// data was already lost; this is declarative convergence, RFC-004 §D1).
|
||||
assert_eq!(change_for(&plan, "graph.knowledge")["disposition"], "applied");
|
||||
assert_eq!(change_for(&plan, "schema.knowledge")["disposition"], "applied");
|
||||
// Converged-then-destroyed: query/policy are already in state at the
|
||||
// desired digests, so they are not changes at all.
|
||||
assert_eq!(plan["changes"].as_array().unwrap().len(), 2, "{plan}");
|
||||
|
||||
let disaster_apply = cluster_json(temp.path(), "apply");
|
||||
assert_eq!(disaster_apply["applied_count"], 0, "{disaster_apply}");
|
||||
assert_eq!(disaster_apply["converged"], false, "{disaster_apply}");
|
||||
let recreate = cluster_json(temp.path(), "apply");
|
||||
assert_eq!(recreate["ok"], true, "{recreate}");
|
||||
assert_eq!(recreate["converged"], true, "{recreate}");
|
||||
// The empty graph is back on disk; catalog state survived throughout.
|
||||
assert!(temp.path().join("graphs/knowledge.omni").exists());
|
||||
let state: serde_json::Value = serde_json::from_str(
|
||||
&fs::read_to_string(temp.path().join("__cluster/state.json")).unwrap(),
|
||||
)
|
||||
|
|
@ -1352,59 +1357,84 @@ fn cluster_e2e_graph_root_destruction_drifts_and_apply_moves_nothing() {
|
|||
);
|
||||
}
|
||||
|
||||
/// The disposition matrix as a system: one apply over two graphs (one live,
|
||||
/// one not yet created) plus graph-spanning and cluster-scoped policies must
|
||||
/// produce all four dispositions at once — then converge after the second
|
||||
/// graph appears.
|
||||
/// The disposition matrix as a system under Stage 4A: a fresh multi-graph
|
||||
/// config converges in ONE apply (both graphs created, spanning and
|
||||
/// cluster-scoped policies applied), and a later mixed run — schema update
|
||||
/// (deferred), its dependent query (blocked), an independent query update
|
||||
/// (applied), its composite (derived) — shows all four dispositions at once
|
||||
/// before the graph-plane schema apply closes the loop.
|
||||
#[test]
|
||||
fn cluster_e2e_multi_graph_mixed_dispositions_then_converge() {
|
||||
let temp = tempdir().unwrap();
|
||||
write_multi_graph_cluster_fixture(temp.path());
|
||||
init_cluster_derived_graph(temp.path()); // knowledge only
|
||||
// No manual init: Stage 4A creates both graphs.
|
||||
|
||||
let import = cluster_json(temp.path(), "import");
|
||||
assert_eq!(import["ok"], true, "{import}");
|
||||
|
||||
let apply = cluster_json(temp.path(), "apply");
|
||||
assert_eq!(apply["ok"], true, "{apply}");
|
||||
assert_eq!(apply["converged"], false, "{apply}");
|
||||
assert_eq!(apply["applied_count"], 2, "{apply}");
|
||||
assert_eq!(apply["converged"], true, "{apply}");
|
||||
assert_eq!(change_for(&apply, "graph.knowledge")["disposition"], "applied");
|
||||
assert_eq!(
|
||||
change_for(&apply, "query.knowledge.find_person")["disposition"],
|
||||
"applied"
|
||||
);
|
||||
assert_eq!(
|
||||
change_for(&apply, "policy.cluster_wide")["disposition"],
|
||||
change_for(&apply, "graph.engineering")["disposition"],
|
||||
"applied"
|
||||
);
|
||||
assert_eq!(
|
||||
change_for(&apply, "query.engineering.find_service")["disposition"],
|
||||
"applied"
|
||||
);
|
||||
// The graph-spanning and cluster-scoped policies ride the same run.
|
||||
assert_eq!(change_for(&apply, "policy.shared")["disposition"], "applied");
|
||||
assert_eq!(
|
||||
change_for(&apply, "policy.cluster_wide")["disposition"],
|
||||
"applied"
|
||||
);
|
||||
assert!(temp.path().join("graphs/knowledge.omni").exists());
|
||||
assert!(temp.path().join("graphs/engineering.omni").exists());
|
||||
|
||||
// Mixed run: a knowledge schema update (4B territory — deferred) gates
|
||||
// its query update (blocked), while an engineering query update is
|
||||
// independent (applied) and re-derives its composite.
|
||||
fs::write(
|
||||
temp.path().join("people.pg"),
|
||||
"\nnode Person {\n name: String @key\n age: I32?\n bio: String?\n}\n",
|
||||
)
|
||||
.unwrap();
|
||||
fs::write(
|
||||
temp.path().join("people.gq"),
|
||||
"\nquery find_person($name: String) {\n match { $p: Person { name: $name } }\n return { $p.name }\n}\n",
|
||||
)
|
||||
.unwrap();
|
||||
fs::write(
|
||||
temp.path().join("services.gq"),
|
||||
"\nquery find_service($name: String) {\n match { $s: Service { name: $name } }\n return { $s.name, $s.name }\n}\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mixed = cluster_json(temp.path(), "apply");
|
||||
assert_eq!(mixed["ok"], true, "{mixed}");
|
||||
assert_eq!(mixed["converged"], false, "{mixed}");
|
||||
assert_eq!(change_for(&mixed, "schema.knowledge")["disposition"], "deferred");
|
||||
assert_eq!(change_for(&mixed, "graph.knowledge")["disposition"], "deferred");
|
||||
assert_eq!(
|
||||
change_for(&mixed, "query.knowledge.find_person")["disposition"],
|
||||
"blocked"
|
||||
);
|
||||
assert_eq!(
|
||||
change_for(&apply, "query.engineering.find_service")["reason"],
|
||||
"dependency_missing"
|
||||
);
|
||||
// One missing dependency graph blocks the whole spanning policy.
|
||||
assert_eq!(change_for(&apply, "policy.shared")["disposition"], "blocked");
|
||||
assert_eq!(
|
||||
change_for(&apply, "graph.engineering")["disposition"],
|
||||
"deferred"
|
||||
change_for(&mixed, "query.knowledge.find_person")["reason"],
|
||||
"dependency_not_applied"
|
||||
);
|
||||
assert_eq!(
|
||||
change_for(&apply, "schema.engineering")["disposition"],
|
||||
"deferred"
|
||||
change_for(&mixed, "query.engineering.find_service")["disposition"],
|
||||
"applied"
|
||||
);
|
||||
assert_eq!(
|
||||
change_for(&apply, "graph.knowledge")["disposition"],
|
||||
change_for(&mixed, "graph.engineering")["disposition"],
|
||||
"derived"
|
||||
);
|
||||
assert_eq!(
|
||||
apply["resource_statuses"]["policy.shared"]["status"],
|
||||
"blocked"
|
||||
);
|
||||
// Deterministic ordering: changes sorted by resource address.
|
||||
let order: Vec<&str> = apply["changes"]
|
||||
let order: Vec<&str> = mixed["changes"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
|
|
@ -1412,21 +1442,22 @@ fn cluster_e2e_multi_graph_mixed_dispositions_then_converge() {
|
|||
.collect();
|
||||
let mut sorted = order.clone();
|
||||
sorted.sort_unstable();
|
||||
assert_eq!(order, sorted, "{apply}");
|
||||
assert_eq!(order, sorted, "{mixed}");
|
||||
|
||||
// The second graph appears; refresh observes it; apply converges.
|
||||
init_named_cluster_graph(temp.path(), "engineering", "services.pg");
|
||||
// The graph-plane tool applies the schema; refresh observes; converge.
|
||||
output_success(
|
||||
cli()
|
||||
.arg("schema")
|
||||
.arg("apply")
|
||||
.arg(temp.path().join("graphs/knowledge.omni"))
|
||||
.arg("--schema")
|
||||
.arg(temp.path().join("people.pg"))
|
||||
.arg("--json"),
|
||||
);
|
||||
let refresh = cluster_json(temp.path(), "refresh");
|
||||
assert_eq!(refresh["ok"], true, "{refresh}");
|
||||
|
||||
let converge = cluster_json(temp.path(), "apply");
|
||||
assert_eq!(converge["ok"], true, "{converge}");
|
||||
assert_eq!(converge["converged"], true, "{converge}");
|
||||
assert_eq!(
|
||||
change_for(&converge, "query.engineering.find_service")["disposition"],
|
||||
"applied"
|
||||
);
|
||||
assert_eq!(change_for(&converge, "policy.shared")["disposition"], "applied");
|
||||
|
||||
let final_plan = cluster_json(temp.path(), "plan");
|
||||
assert!(
|
||||
|
|
@ -1435,6 +1466,39 @@ fn cluster_e2e_multi_graph_mixed_dispositions_then_converge() {
|
|||
);
|
||||
}
|
||||
|
||||
/// Stage 4A headline: a declared graph is created by `cluster apply` itself —
|
||||
/// no manual `omnigraph init` anywhere in the flow.
|
||||
#[test]
|
||||
fn cluster_e2e_declared_graph_created_by_apply() {
|
||||
let temp = tempdir().unwrap();
|
||||
write_cluster_config_fixture(temp.path());
|
||||
|
||||
let import = cluster_json(temp.path(), "import");
|
||||
assert_eq!(import["ok"], true, "{import}");
|
||||
|
||||
let apply = cluster_json(temp.path(), "apply");
|
||||
assert_eq!(apply["ok"], true, "{apply}");
|
||||
assert_eq!(apply["converged"], true, "{apply}");
|
||||
assert_eq!(change_for(&apply, "graph.knowledge")["disposition"], "applied");
|
||||
assert!(temp.path().join("graphs/knowledge.omni").exists());
|
||||
|
||||
// The created graph is a real graph: the per-graph CLI can open it.
|
||||
let snapshot = output_success(
|
||||
cli()
|
||||
.arg("snapshot")
|
||||
.arg(temp.path().join("graphs/knowledge.omni")),
|
||||
);
|
||||
assert!(!stdout_string(&snapshot).is_empty());
|
||||
|
||||
let plan = cluster_json(temp.path(), "plan");
|
||||
assert!(plan["changes"].as_array().unwrap().is_empty(), "{plan}");
|
||||
let status = cluster_json(temp.path(), "status");
|
||||
assert_eq!(
|
||||
status["resource_statuses"]["graph.knowledge"]["status"],
|
||||
"applied"
|
||||
);
|
||||
}
|
||||
|
||||
/// Catalog payload drift self-heals across the command surface: status warns
|
||||
/// read-only, refresh persists the drift and drops the digest, apply
|
||||
/// republishes the blob, status comes back clean.
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -99,14 +99,14 @@ fn query_blob(config_dir: &Path, digests: &BTreeMap<String, String>) -> PathBuf
|
|||
.join(format!("{}.gq", digests["query.knowledge.find_person"]))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failpoint_wiring_returns_injected_diagnostic() {
|
||||
#[tokio::test]
|
||||
async fn failpoint_wiring_returns_injected_diagnostic() {
|
||||
let scenario = FailScenario::setup();
|
||||
let dir = fixture();
|
||||
seed_applyable_state(dir.path());
|
||||
|
||||
let _failpoint = ScopedFailPoint::new("cluster_apply.after_payload_phase", "return");
|
||||
let out = apply_config_dir(dir.path());
|
||||
let out = apply_config_dir(dir.path()).await;
|
||||
assert!(!out.ok);
|
||||
assert!(out.diagnostics.iter().any(|diagnostic| {
|
||||
diagnostic.code == "injected_failpoint"
|
||||
|
|
@ -121,8 +121,8 @@ fn failpoint_wiring_returns_injected_diagnostic() {
|
|||
/// Crash between the payload phase and the state write: blobs are on disk,
|
||||
/// state.json is byte-identical, nothing is acknowledged — and a plain re-run
|
||||
/// repairs by trusting the existing content-addressed blobs.
|
||||
#[test]
|
||||
fn apply_crash_after_payload_phase_leaves_state_unmoved_then_recovers() {
|
||||
#[tokio::test]
|
||||
async fn apply_crash_after_payload_phase_leaves_state_unmoved_then_recovers() {
|
||||
let scenario = FailScenario::setup();
|
||||
let dir = fixture();
|
||||
let digests = seed_applyable_state(dir.path());
|
||||
|
|
@ -130,7 +130,7 @@ fn apply_crash_after_payload_phase_leaves_state_unmoved_then_recovers() {
|
|||
|
||||
{
|
||||
let _failpoint = ScopedFailPoint::new("cluster_apply.after_payload_phase", "return");
|
||||
let out = apply_config_dir(dir.path());
|
||||
let out = apply_config_dir(dir.path()).await;
|
||||
assert!(!out.ok);
|
||||
assert!(!out.state_written);
|
||||
assert!(!out.converged);
|
||||
|
|
@ -149,7 +149,7 @@ fn apply_crash_after_payload_phase_leaves_state_unmoved_then_recovers() {
|
|||
}
|
||||
|
||||
// The repair is a plain re-run: existing blobs are trusted by digest.
|
||||
let recovered = apply_config_dir(dir.path());
|
||||
let recovered = apply_config_dir(dir.path()).await;
|
||||
assert!(recovered.ok, "{:?}", recovered.diagnostics);
|
||||
assert!(recovered.converged);
|
||||
assert!(recovered.state_written);
|
||||
|
|
@ -163,8 +163,8 @@ fn apply_crash_after_payload_phase_leaves_state_unmoved_then_recovers() {
|
|||
/// A concurrent writer mutating state.json between apply's read and its write
|
||||
/// (possible under `state.lock: false`) must surface `state_cas_mismatch`,
|
||||
/// acknowledge nothing, and leave the concurrent writer's state on disk.
|
||||
#[test]
|
||||
fn apply_cas_race_surfaces_state_cas_mismatch() {
|
||||
#[tokio::test]
|
||||
async fn apply_cas_race_surfaces_state_cas_mismatch() {
|
||||
let scenario = FailScenario::setup();
|
||||
let dir = fixture();
|
||||
let digests = seed_applyable_state(dir.path());
|
||||
|
|
@ -182,7 +182,7 @@ fn apply_cas_race_surfaces_state_cas_mismatch() {
|
|||
fs::write(&race_path, serde_json::to_string_pretty(&state).unwrap()).unwrap();
|
||||
});
|
||||
|
||||
let out = apply_config_dir(dir.path());
|
||||
let out = apply_config_dir(dir.path()).await;
|
||||
drop(failpoint);
|
||||
|
||||
assert!(!out.ok);
|
||||
|
|
@ -212,8 +212,136 @@ fn apply_cas_race_surfaces_state_cas_mismatch() {
|
|||
assert!(query_blob(dir.path(), &digests).exists());
|
||||
|
||||
// Recovery is a plain re-run against the rewritten state.
|
||||
let recovered = apply_config_dir(dir.path());
|
||||
let recovered = apply_config_dir(dir.path()).await;
|
||||
assert!(recovered.ok, "{:?}", recovered.diagnostics);
|
||||
assert!(recovered.converged);
|
||||
scenario.teardown();
|
||||
}
|
||||
|
||||
fn seed_empty_state(config_dir: &Path) {
|
||||
let state_dir = config_dir.join("__cluster");
|
||||
fs::create_dir_all(&state_dir).unwrap();
|
||||
fs::write(
|
||||
state_dir.join("state.json"),
|
||||
r#"{
|
||||
"version": 1,
|
||||
"state_revision": 1,
|
||||
"applied_revision": { "resources": {} }
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
fn recovery_sidecars(config_dir: &Path) -> Vec<PathBuf> {
|
||||
match fs::read_dir(config_dir.join("__cluster/recoveries")) {
|
||||
Ok(entries) => {
|
||||
let mut paths: Vec<PathBuf> = entries
|
||||
.flatten()
|
||||
.map(|entry| entry.path())
|
||||
.filter(|path| path.extension().is_some_and(|ext| ext == "json"))
|
||||
.collect();
|
||||
paths.sort();
|
||||
paths
|
||||
}
|
||||
Err(_) => Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Crash before the init: the create-intent sidecar survives, nothing moved.
|
||||
/// The next run's sweep removes the intent (row 1) and the same run creates
|
||||
/// the graph and converges.
|
||||
#[tokio::test]
|
||||
async fn create_crash_before_init_recovers_via_sweep() {
|
||||
let scenario = FailScenario::setup();
|
||||
let dir = fixture();
|
||||
seed_empty_state(dir.path());
|
||||
|
||||
{
|
||||
let _failpoint = ScopedFailPoint::new("cluster_apply.before_graph_create", "return");
|
||||
let out = apply_config_dir(dir.path()).await;
|
||||
assert!(!out.ok);
|
||||
assert!(out.diagnostics.iter().any(|diagnostic| {
|
||||
diagnostic.code == "injected_failpoint"
|
||||
&& diagnostic
|
||||
.message
|
||||
.contains("cluster_apply.before_graph_create")
|
||||
}));
|
||||
assert_eq!(recovery_sidecars(dir.path()).len(), 1);
|
||||
assert!(!dir.path().join("graphs/knowledge.omni").exists());
|
||||
// No resource digest moved.
|
||||
let state: serde_json::Value = serde_json::from_str(
|
||||
&fs::read_to_string(dir.path().join("__cluster/state.json")).unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
assert!(
|
||||
state["applied_revision"]["resources"]
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.is_empty()
|
||||
);
|
||||
}
|
||||
|
||||
let recovered = apply_config_dir(dir.path()).await;
|
||||
assert!(recovered.ok, "{:?}", recovered.diagnostics);
|
||||
assert!(recovered.converged);
|
||||
assert!(dir.path().join("graphs/knowledge.omni").exists());
|
||||
assert!(recovery_sidecars(dir.path()).is_empty());
|
||||
scenario.teardown();
|
||||
}
|
||||
|
||||
/// Crash after the init but before the state CAS: the graph exists, the
|
||||
/// ledger is stale, nothing was acknowledged. The next run's sweep rolls the
|
||||
/// ledger forward (row 4) with an audit entry, and the run converges.
|
||||
#[tokio::test]
|
||||
async fn create_crash_after_init_rolls_state_forward() {
|
||||
let scenario = FailScenario::setup();
|
||||
let dir = fixture();
|
||||
seed_empty_state(dir.path());
|
||||
let state_before = fs::read(dir.path().join("__cluster/state.json")).unwrap();
|
||||
|
||||
{
|
||||
let _failpoint = ScopedFailPoint::new("cluster_apply.after_graph_create", "return");
|
||||
let out = apply_config_dir(dir.path()).await;
|
||||
assert!(!out.ok);
|
||||
assert!(!out.state_written);
|
||||
// The graph exists; the cluster state is byte-identical (no ack).
|
||||
assert!(dir.path().join("graphs/knowledge.omni").exists());
|
||||
assert_eq!(
|
||||
fs::read(dir.path().join("__cluster/state.json")).unwrap(),
|
||||
state_before
|
||||
);
|
||||
// The sidecar carries the post-init manifest pin.
|
||||
let sidecars = recovery_sidecars(dir.path());
|
||||
assert_eq!(sidecars.len(), 1);
|
||||
let sidecar: serde_json::Value =
|
||||
serde_json::from_str(&fs::read_to_string(&sidecars[0]).unwrap()).unwrap();
|
||||
assert!(
|
||||
sidecar["expected_manifest_version"].is_number(),
|
||||
"{sidecar}"
|
||||
);
|
||||
}
|
||||
|
||||
let recovered = apply_config_dir(dir.path()).await;
|
||||
assert!(recovered.ok, "{:?}", recovered.diagnostics);
|
||||
assert!(
|
||||
recovered
|
||||
.diagnostics
|
||||
.iter()
|
||||
.any(|diagnostic| diagnostic.code == "cluster_recovery_rolled_forward")
|
||||
);
|
||||
assert!(recovered.converged);
|
||||
assert!(recovery_sidecars(dir.path()).is_empty());
|
||||
let state: serde_json::Value = serde_json::from_str(
|
||||
&fs::read_to_string(dir.path().join("__cluster/state.json")).unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
assert!(
|
||||
state["recovery_records"]
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.values()
|
||||
.any(|record| record["outcome"] == "rolled_forward")
|
||||
);
|
||||
scenario.teardown();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ This file is the always-on map of the test surface. **Consult it before every ta
|
|||
|---|---|---|
|
||||
| `omnigraph` (engine) | `crates/omnigraph/tests/` | Integration tests (21 files), fixture-driven, share `tests/helpers/mod.rs` |
|
||||
| `omnigraph-cli` | `crates/omnigraph-cli/tests/` | `cli.rs` (unit-ish; includes the `cluster_e2e_*` lifecycle compositions over the spawned binary — lost-state re-import recovery, out-of-band drift, graph-root destruction, multi-graph mixed-disposition convergence), `system_local.rs`, `system_remote.rs`, share `tests/support/mod.rs` |
|
||||
| `omnigraph-cluster` | mostly in-source `#[cfg(test)] mod tests`; `tests/failpoints.rs` (feature-gated) | Cluster config parser, local JSON state diff, state CAS/lock handling/recovery, read-only validate/plan/status plus explicit refresh/import graph observations, config-only apply (content-addressed payload publish, disposition gating, composite-digest convergence, idempotent re-apply), catalog payload verification (status read-only, refresh drift + self-heal), and failpoint crash-mid-apply / CAS-race coverage |
|
||||
| `omnigraph-cluster` | mostly in-source `#[cfg(test)] mod tests`; `tests/failpoints.rs` (feature-gated) | Cluster config parser, local JSON state diff, state CAS/lock handling/recovery, read-only validate/plan/status plus explicit refresh/import graph observations, config-only apply (content-addressed payload publish, disposition gating, composite-digest convergence, idempotent re-apply), catalog payload verification (status read-only, refresh drift + self-heal), failpoint crash-mid-apply / CAS-race coverage, and Stage 4A graph creation (create executor, recovery sidecars + sweep rows, create crash windows) |
|
||||
| `omnigraph-server` | `crates/omnigraph-server/tests/` | `server.rs` (HTTP-level), `openapi.rs` (OpenAPI drift / regeneration) |
|
||||
| `omnigraph-compiler` | mostly in-source `#[cfg(test)] mod tests` | Parser, type-checker, IR lowering, lint |
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,17 @@
|
|||
# Cluster Config
|
||||
|
||||
**Status:** Stage 3A config-only apply preview.
|
||||
**Status:** Stage 4A graph-create apply preview.
|
||||
|
||||
Cluster config is the future control-plane configuration surface for a whole
|
||||
OmniGraph deployment. In this stage, OmniGraph can validate a local
|
||||
`cluster.yaml` folder, produce a deterministic read-only plan, inspect the
|
||||
local JSON state ledger, explicitly refresh/import graph observations into
|
||||
that ledger, manually remove a held local state lock by exact lock id, and
|
||||
**apply the config-only subset of the plan** — stored-query and policy-bundle
|
||||
catalog writes. It does not move graph manifests, change schemas, start
|
||||
servers, or serve anything it applies: the server still boots from
|
||||
**apply the executable subset of the plan** — stored-query and policy-bundle
|
||||
catalog writes, and **graph creation**: a declared graph that does not exist
|
||||
yet is initialized by apply itself at the derived root. It does not change
|
||||
existing schemas (deferred to a later stage), move existing graph manifests,
|
||||
start servers, or serve anything it applies: the server still boots from
|
||||
`omnigraph.yaml`.
|
||||
|
||||
## Commands
|
||||
|
|
@ -153,8 +155,8 @@ condition in `reason`).
|
|||
|
||||
## Apply
|
||||
|
||||
`cluster apply` executes the config-only subset of the plan — stored-query and
|
||||
policy-bundle changes. There is no confirm flag: `cluster plan` is the preview,
|
||||
`cluster apply` executes the executable subset of the plan — stored-query and
|
||||
policy-bundle changes, and graph creates. There is no confirm flag: `cluster plan` is the preview,
|
||||
and apply recomputes the same diff under the state lock before executing, so a
|
||||
stale preview can never be applied. Apply requires an existing `state.json`
|
||||
(`state_missing` directs you to `cluster import` first).
|
||||
|
|
@ -180,9 +182,39 @@ still boots from `omnigraph.yaml`; no query or policy applied here serves
|
|||
traffic until the server-boot stage ships, as an explicit per-deployment mode
|
||||
switch.
|
||||
|
||||
Graph and schema changes are never executed by this stage. They are reported
|
||||
as `deferred` (warning `apply_unsupported_change`), and query/policy changes
|
||||
that depend on them are `blocked` (warning `apply_dependency_blocked`, status
|
||||
### Graph creation
|
||||
|
||||
A `graph.<id>` create (the graph is declared but no root exists) is executed
|
||||
by apply: the graph is initialized at the derived root
|
||||
|
||||
```text
|
||||
<config-dir>/graphs/<graph-id>.omni
|
||||
```
|
||||
|
||||
with the declared schema, before any catalog writes, so queries and policies
|
||||
that depend on the new graph apply **in the same run**. Each create is fenced
|
||||
by a recovery sidecar under `__cluster/recoveries/{ulid}.json`, written before
|
||||
the init and removed only after the state update lands. If apply crashes in
|
||||
between, the next state-mutating command (`apply`, `refresh`, `import`) runs a
|
||||
**recovery sweep** that classifies the survivor by observation: an absent root
|
||||
removes the stale intent; a completed create rolls the cluster state forward
|
||||
(recorded in the state's `recovery_records`); a partial root reports
|
||||
`graph_create_incomplete` (status `error` — remove the root and re-run apply;
|
||||
nothing is auto-deleted); unexpected graph content reports
|
||||
`actual_applied_state_pending` (status `drifted` — run `cluster refresh` and
|
||||
re-plan). While a kept sidecar is pending, that graph's create and its
|
||||
dependents are blocked with `cluster_recovery_pending`. Read-only commands
|
||||
(`status`, `plan`) warn about pending sidecars without acting on them.
|
||||
|
||||
**Re-creation is convergence.** If a graph root disappears out-of-band,
|
||||
`refresh` records the drift and the next `plan` proposes a create — and apply
|
||||
will execute it, producing an **empty** graph at the root. The data was
|
||||
already lost when the root vanished; the create is visible in the plan
|
||||
(disposition `applied`) before anything runs.
|
||||
|
||||
Schema changes to existing graphs are never executed by this stage. They are
|
||||
reported as `deferred` (warning `apply_unsupported_change`), and query/policy
|
||||
changes that depend on them are `blocked` (warning `apply_dependency_blocked`, status
|
||||
`blocked` in state). A partially-applicable plan still exits 0 with warnings;
|
||||
the JSON `converged` field is the automation signal for "state now matches the
|
||||
desired revision". The applied `config_digest` is only recorded when apply
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue