Implement cluster refresh and import

This commit is contained in:
aaltshuler 2026-06-08 23:18:44 +03:00
parent a7956ea5a9
commit cb1e7bb5ea
9 changed files with 1208 additions and 29 deletions

2
Cargo.lock generated
View file

@ -4569,6 +4569,7 @@ name = "omnigraph-cluster"
version = "0.6.1"
dependencies = [
"omnigraph-compiler",
"omnigraph-engine",
"serde",
"serde_json",
"serde_yaml",
@ -4576,6 +4577,7 @@ dependencies = [
"tempfile",
"thiserror",
"time",
"tokio",
"ulid",
]

View file

@ -11,8 +11,8 @@ use omnigraph::db::{Omnigraph, ReadTarget, SnapshotId};
use omnigraph::loader::LoadMode;
use omnigraph::storage::normalize_root_uri;
use omnigraph_cluster::{
DiagnosticSeverity, PlanOutput, StatusOutput, ValidateOutput, plan_config_dir,
status_config_dir, validate_config_dir,
DiagnosticSeverity, PlanOutput, StateSyncOutput, StatusOutput, ValidateOutput,
import_config_dir, plan_config_dir, refresh_config_dir, status_config_dir, validate_config_dir,
};
use omnigraph_compiler::query::parser::parse_query;
use omnigraph_compiler::schema::parser::parse_schema;
@ -350,6 +350,24 @@ enum ClusterCommand {
#[arg(long)]
json: bool,
},
/// Refresh existing local JSON state from declared graph observations.
Refresh {
/// Cluster config directory containing cluster.yaml.
#[arg(long, default_value = ".")]
config: PathBuf,
/// Emit JSON instead of human text.
#[arg(long)]
json: bool,
},
/// Import initial local JSON state from declared graph observations.
Import {
/// Cluster config directory containing cluster.yaml.
#[arg(long, default_value = ".")]
config: PathBuf,
/// Emit JSON instead of human text.
#[arg(long)]
json: bool,
},
}
/// Operations on the graph registry of a multi-graph server (MR-668).
@ -783,6 +801,34 @@ fn print_cluster_status_human(output: &StatusOutput) {
print_cluster_diagnostics(&output.diagnostics);
}
fn print_cluster_state_sync_human(output: &StateSyncOutput) {
let operation = match output.operation {
omnigraph_cluster::StateSyncOperation::Refresh => "refresh",
omnigraph_cluster::StateSyncOperation::Import => "import",
};
if output.ok {
let state = &output.state_observations;
println!(
"cluster {operation}: revision {}, {} resource(s)",
state.state_revision, state.resource_count
);
if let Some(cas) = state.state_cas.as_deref() {
println!(" state_cas: {cas}");
}
if state.locked {
match state.lock_id.as_deref() {
Some(lock_id) => println!(" lock: acquired ({lock_id})"),
None => println!(" lock: acquired"),
}
} else {
println!(" lock: not acquired");
}
} else {
println!("cluster {operation} failed");
}
print_cluster_diagnostics(&output.diagnostics);
}
fn print_cluster_diagnostics(diagnostics: &[omnigraph_cluster::Diagnostic]) {
for diagnostic in diagnostics {
let label = match diagnostic.severity {
@ -835,6 +881,19 @@ fn finish_cluster_status(output: &StatusOutput, json: bool) -> Result<()> {
Ok(())
}
fn finish_cluster_state_sync(output: &StateSyncOutput, json: bool) -> Result<()> {
if json {
print_json(output)?;
} else {
print_cluster_state_sync_human(output);
}
if !output.ok {
io::stdout().flush()?;
std::process::exit(1);
}
Ok(())
}
fn is_remote_uri(uri: &str) -> bool {
uri.starts_with("http://") || uri.starts_with("https://")
}
@ -3272,6 +3331,14 @@ async fn main() -> Result<()> {
let output = status_config_dir(config);
finish_cluster_status(&output, json)?;
}
ClusterCommand::Refresh { config, json } => {
let output = refresh_config_dir(config).await;
finish_cluster_state_sync(&output, json)?;
}
ClusterCommand::Import { config, json } => {
let output = import_config_dir(config).await;
finish_cluster_state_sync(&output, json)?;
}
},
Command::Graphs { command } => match command {
GraphsCommand::List {

View file

@ -124,6 +124,18 @@ policies:
.unwrap();
}
fn init_cluster_derived_graph(root: &std::path::Path) {
let graph_dir = root.join("graphs");
fs::create_dir_all(&graph_dir).unwrap();
output_success(
cli()
.arg("init")
.arg("--schema")
.arg(root.join("people.pg"))
.arg(graph_dir.join("knowledge.omni")),
);
}
#[test]
fn version_command_prints_current_cli_version() {
let output = output_success(cli().arg("version"));
@ -376,6 +388,196 @@ fn cluster_plan_locked_state_exits_nonzero() {
);
}
#[test]
fn cluster_import_json_bootstraps_missing_state() {
let temp = tempdir().unwrap();
write_cluster_config_fixture(temp.path());
init_cluster_derived_graph(temp.path());
let json = parse_stdout_json(&output_success(
cli()
.arg("cluster")
.arg("import")
.arg("--config")
.arg(temp.path())
.arg("--json"),
));
assert_eq!(json["ok"], true);
assert_eq!(json["operation"], "import");
assert_eq!(json["state_observations"]["state_revision"], 1);
assert!(
json["state_observations"]["state_cas"]
.as_str()
.unwrap()
.starts_with("sha256:")
);
assert!(json["observations"]["graph.knowledge"]["manifest_version"].is_number());
assert_eq!(
json["resource_statuses"]["graph.knowledge"]["status"],
"applied"
);
assert!(temp.path().join("__cluster/state.json").exists());
assert!(!temp.path().join("__cluster/lock.json").exists());
}
#[test]
fn cluster_refresh_json_updates_revision_cas_and_removes_lock() {
let temp = tempdir().unwrap();
write_cluster_config_fixture(temp.path());
init_cluster_derived_graph(temp.path());
let state_dir = temp.path().join("__cluster");
fs::create_dir_all(&state_dir).unwrap();
fs::write(
state_dir.join("state.json"),
r#"
{
"version": 1,
"state_revision": 2,
"applied_revision": { "resources": {} }
}
"#,
)
.unwrap();
let json = parse_stdout_json(&output_success(
cli()
.arg("cluster")
.arg("refresh")
.arg("--config")
.arg(temp.path())
.arg("--json"),
));
assert_eq!(json["ok"], true);
assert_eq!(json["operation"], "refresh");
assert_eq!(json["state_observations"]["state_revision"], 3);
assert!(
json["state_observations"]["state_cas"]
.as_str()
.unwrap()
.starts_with("sha256:")
);
assert!(!state_dir.join("lock.json").exists());
}
#[test]
fn cluster_refresh_missing_state_exits_nonzero() {
let temp = tempdir().unwrap();
write_cluster_config_fixture(temp.path());
let output = output_failure(
cli()
.arg("cluster")
.arg("refresh")
.arg("--config")
.arg(temp.path())
.arg("--json"),
);
let json = parse_stdout_json(&output);
assert_eq!(json["ok"], false);
assert!(
json["diagnostics"]
.as_array()
.unwrap()
.iter()
.any(|diagnostic| diagnostic["code"] == "state_missing"),
"missing state should produce a useful diagnostic: {json}"
);
}
#[test]
fn cluster_import_existing_state_exits_nonzero() {
let temp = tempdir().unwrap();
write_cluster_config_fixture(temp.path());
let state_dir = temp.path().join("__cluster");
fs::create_dir_all(&state_dir).unwrap();
fs::write(
state_dir.join("state.json"),
r#"{"version":1,"applied_revision":{"resources":{}}}"#,
)
.unwrap();
let output = output_failure(
cli()
.arg("cluster")
.arg("import")
.arg("--config")
.arg(temp.path())
.arg("--json"),
);
let json = parse_stdout_json(&output);
assert_eq!(json["ok"], false);
assert!(
json["diagnostics"]
.as_array()
.unwrap()
.iter()
.any(|diagnostic| diagnostic["code"] == "state_already_exists"),
"existing state should produce a useful diagnostic: {json}"
);
}
#[test]
fn cluster_refresh_and_import_locked_state_exit_nonzero() {
let temp = tempdir().unwrap();
write_cluster_config_fixture(temp.path());
let state_dir = temp.path().join("__cluster");
fs::create_dir_all(&state_dir).unwrap();
fs::write(
state_dir.join("state.json"),
r#"{"version":1,"applied_revision":{"resources":{}}}"#,
)
.unwrap();
fs::write(
state_dir.join("lock.json"),
r#"{"version":1,"lock_id":"held-lock","operation":"refresh","created_at":"2026-06-08T00:00:00Z","pid":123}"#,
)
.unwrap();
let refresh = parse_stdout_json(&output_failure(
cli()
.arg("cluster")
.arg("refresh")
.arg("--config")
.arg(temp.path())
.arg("--json"),
));
assert_eq!(refresh["state_observations"]["lock_id"], "held-lock");
assert!(
refresh["diagnostics"]
.as_array()
.unwrap()
.iter()
.any(|diagnostic| diagnostic["code"] == "state_lock_held")
);
let temp = tempdir().unwrap();
write_cluster_config_fixture(temp.path());
let state_dir = temp.path().join("__cluster");
fs::create_dir_all(&state_dir).unwrap();
fs::write(
state_dir.join("lock.json"),
r#"{"version":1,"lock_id":"held-lock","operation":"import","created_at":"2026-06-08T00:00:00Z","pid":123}"#,
)
.unwrap();
let imported = parse_stdout_json(&output_failure(
cli()
.arg("cluster")
.arg("import")
.arg("--config")
.arg(temp.path())
.arg("--json"),
));
assert_eq!(imported["state_observations"]["lock_id"], "held-lock");
assert!(
imported["diagnostics"]
.as_array()
.unwrap()
.iter()
.any(|diagnostic| diagnostic["code"] == "state_lock_held")
);
}
#[test]
fn cluster_validate_invalid_config_exits_nonzero() {
let temp = tempdir().unwrap();

View file

@ -9,6 +9,7 @@ homepage = "https://github.com/ModernRelay/omnigraph"
documentation = "https://docs.rs/omnigraph-cluster"
[dependencies]
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.1" }
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.1" }
serde = { workspace = true }
serde_json = { workspace = true }
@ -20,3 +21,4 @@ ulid = { workspace = true }
[dev-dependencies]
tempfile = { workspace = true }
tokio = { workspace = true }

File diff suppressed because it is too large Load diff

View file

@ -5,6 +5,13 @@
**Date:** 2026-06-07
**Relationship:** generalizes today's `omnigraph.yaml` graph/query/policy configuration surface ([CLI reference](../user/cli-reference.md), [server docs](../user/server.md)) into a future cluster control plane. The distilled rules are in [cluster-axioms.md](cluster-axioms.md); detailed downstream implementation spec and blast-radius assessment in [cluster-config-implementation-spec.md](cluster-config-implementation-spec.md). This is a proposed architecture, not an implemented RFC.
> **Implementation status.** The examples below describe the full target schema.
> Stage 2B only accepts the read-only subset documented in
> [cluster-config.md](../user/cluster-config.md). Future-phase fields such as
> `env_file`, `apply`, `providers`, `pipelines`, `embeddings`, `ui`, `aliases`,
> and `bindings` are intentionally rejected with typed diagnostics until their
> reconciler semantics are implemented.
> **Revision 2026-06-07 — full commitment to the Terraform paradigm.** Three changes from the earlier draft: (1) **state is an authoritative, locked ledger in a backend** (server-hosted *or* a separate cloud store), not "a mostly-rebuildable projection"; (2) `plan` is framed as the **CLI diff between local config and state**; (3) **ETL pipelines** (external data sources) are a first-class config asset — a second seam, alongside schema, where a definition triggers a data-plane effect. The full set of config assets (incl. **aliases**, **embeddings**) is enumerated below.
---

View file

@ -8,7 +8,7 @@ This file is the always-on map of the test surface. **Consult it before every ta
|---|---|---|
| `omnigraph` (engine) | `crates/omnigraph/tests/` | Integration tests (21 files), fixture-driven, share `tests/helpers/mod.rs` |
| `omnigraph-cli` | `crates/omnigraph-cli/tests/` | `cli.rs` (unit-ish), `system_local.rs`, `system_remote.rs`, share `tests/support/mod.rs` |
| `omnigraph-cluster` | mostly in-source `#[cfg(test)] mod tests` | Cluster config parser, local JSON state diff, state CAS/lock handling, read-only validate/plan/status |
| `omnigraph-cluster` | mostly in-source `#[cfg(test)] mod tests` | Cluster config parser, local JSON state diff, state CAS/lock handling, read-only validate/plan/status plus explicit refresh/import graph observations |
| `omnigraph-server` | `crates/omnigraph-server/tests/` | `server.rs` (HTTP-level), `openapi.rs` (OpenAPI drift / regeneration) |
| `omnigraph-compiler` | mostly in-source `#[cfg(test)] mod tests` | Parser, type-checker, IR lowering, lint |

View file

@ -21,7 +21,7 @@ A reference for the `omnigraph` binary's command surface and `omnigraph.yaml` sc
| `schema plan \| apply \| show (alias: get)` | migrations |
| `lint` (alias: `check`) | offline / graph-backed query validation. Replaces `query lint` / `query check`, which are kept as deprecated argv-level shims that print a one-line warning and rewrite to `omnigraph lint` |
| `queries validate \| list` | operate on the server-side stored-query registry (the `queries:` block). `validate` type-checks every stored query against the live schema offline (opens the selected graph; exits non-zero on any breakage), catching schema drift without restarting the server; `list` prints the selected registry's query names, MCP exposure, and typed params. For per-graph registries, pass `--target <graph>` or set `cli.graph`; with no graph selection, `list` shows only top-level `queries:`. Distinct from `lint`, which validates a single `.gq` file |
| `cluster validate \| plan \| status` | read-only cluster-control preview. `validate` checks a local `cluster.yaml` folder and referenced schema/query/policy files; `plan` diffs it against local JSON state at `__cluster/state.json` while briefly holding `__cluster/lock.json`; `status` reads the state ledger. No apply, graph open, live drift scan, server change, or `state.json` mutation occurs in Stage 2A |
| `cluster validate \| plan \| status \| refresh \| import` | cluster-control preview. `validate` checks a local `cluster.yaml` folder and referenced schema/query/policy files; `plan` diffs it against local JSON state at `__cluster/state.json`; `status` reads the state ledger; `refresh`/`import` explicitly update local JSON state from read-only graph observations. No apply, graph-resource mutation, server change, or `plan --refresh` occurs in Stage 2B |
| `optimize` | non-destructive Lance compaction (skips tables with `Blob` columns; `--json` reports a `skipped` field) |
| `cleanup --keep N --older-than 7d --confirm` | destructive version GC |
| `embed` | offline JSONL embedding pipeline |
@ -80,16 +80,21 @@ policy:
omnigraph cluster validate --config ./company-brain
omnigraph cluster plan --config ./company-brain --json
omnigraph cluster status --config ./company-brain --json
omnigraph cluster refresh --config ./company-brain --json
omnigraph cluster import --config ./company-brain --json
```
`--config` is a directory containing `cluster.yaml`; it defaults to `.`.
Stage 2A accepts graphs, schemas, stored queries, and policy bundle file
Stage 2B accepts graphs, schemas, stored queries, and policy bundle file
references. `cluster plan` reads local JSON state from
`<config-dir>/__cluster/state.json`; a missing file means empty state. Plan
acquires `__cluster/lock.json` by default and releases it before returning.
`cluster status` reads state only and reports any existing lock. External state
backends, apply, refresh/import, pipelines, UI specs, embeddings, aliases, and
bindings are reserved for later stages. See [cluster-config.md](cluster-config.md).
`<config-dir>/__cluster/state.json`; a missing file means empty state. Plan,
refresh, and import acquire `__cluster/lock.json` by default and release it
before returning. `cluster status` reads state only and reports any existing
lock. `refresh` requires an existing `state.json`; `import` creates one only
when it is missing. Both observe declared graphs read-only at
`<config-dir>/graphs/<graph-id>.omni`. External state backends, apply,
`plan --refresh`, pipelines, UI specs, embeddings, aliases, and bindings are
reserved for later stages. See [cluster-config.md](cluster-config.md).
## Output formats (`query` command, alias: `read`)

View file

@ -1,12 +1,13 @@
# Cluster Config
**Status:** Stage 2A read-only preview.
**Status:** Stage 2B state-observation preview.
Cluster config is the future control-plane configuration surface for a whole
OmniGraph deployment. In this stage, OmniGraph can validate a local
`cluster.yaml` folder, produce a deterministic read-only plan, and inspect the
local JSON state ledger. It does not apply changes, open graph roots, scan live
cluster state, start servers, or write graph resources.
`cluster.yaml` folder, produce a deterministic read-only plan, inspect the
local JSON state ledger, and explicitly refresh/import graph observations into
that ledger. It does not apply desired changes, start servers, or write graph
resources.
## Commands
@ -14,6 +15,8 @@ cluster state, start servers, or write graph resources.
omnigraph cluster validate --config ./company-brain
omnigraph cluster plan --config ./company-brain --json
omnigraph cluster status --config ./company-brain --json
omnigraph cluster refresh --config ./company-brain --json
omnigraph cluster import --config ./company-brain --json
```
`--config` points at a directory, not a file. The directory must contain
@ -21,7 +24,7 @@ omnigraph cluster status --config ./company-brain --json
## Supported `cluster.yaml`
Stage 2A accepts only the read-only resource subset:
Stage 2B accepts only the read-only resource subset:
```yaml
version: 1
@ -47,10 +50,10 @@ policies:
`metadata.name` is a display label. `state.backend` may be omitted or set to
`cluster`; external state backends are reserved for a later stage. `state.lock`
defaults to `true`. When enabled, `cluster plan` briefly acquires
`<config-dir>/__cluster/lock.json` while it reads state, then removes it before
returning. `cluster status` never acquires the lock; it only reports whether one
is present.
defaults to `true`. When enabled, `cluster plan`, `cluster refresh`, and
`cluster import` briefly acquire `<config-dir>/__cluster/lock.json`, then remove
it before returning. `cluster status` never acquires the lock; it only reports
whether one is present.
## Validation
@ -113,8 +116,10 @@ Missing `state_revision` is treated as `0`. Resource status values are
Plan output compares desired resource digests against state resource digests
and reports `create`, `update`, and `delete` changes. It also reports the state
CAS (`sha256:<digest>`), state revision, and lock id used for the read. The
command never writes `state.json`; apply, refresh, import, and live drift scans
are later-stage work.
command never writes `state.json` and does not scan live graphs. Use explicit
`cluster refresh` / `cluster import` when the state ledger should be updated
from live observations. Apply and live drift scans during plan are later-stage
work.
## Status
@ -122,3 +127,24 @@ are later-stage work.
ledger says is deployed. It does not validate referenced schema/query/policy
files and does not inspect live graphs. Missing `state.json` succeeds with a
warning; invalid state JSON or an unsupported state version fails.
## Refresh And Import
`cluster refresh` updates an existing `state.json` from actual observations.
`cluster import` creates the first `state.json` when the ledger is missing.
Both commands open declared graphs read-only at:
```text
<config-dir>/graphs/<graph-id>.omni
```
They observe only branch `main`, recording graph existence, manifest version,
live schema digest, desired schema digest, and schema-match status under
`observations["graph.<id>"]`. Missing graph roots are recorded as drift and
remove the graph/schema digests from state so a later `plan` proposes creates.
Invalid graph roots are recorded as errors; `refresh` persists the error
observation and exits non-zero, while `import` exits non-zero without creating
initial state.
Refresh/import do not observe query or policy resources yet. Existing query and
policy state digests are preserved on refresh and are not invented on import.