mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-27 02:39:38 +02:00
feat(cli): cluster-managed maintenance addressing + init signpost (RFC-010 Slice 3) (#221)
* feat(cluster): cluster_root_for_graph_uri detection helper (RFC-010 Slice 3) Public helper the CLI uses to refuse `init` into a cluster-managed location: given a graph storage URI of the cluster layout (`<root>/graphs/<id>.omni`), return the cluster root if `<root>` holds `__cluster/state.json`, else None. Cheap by construction — a URI that doesn't match the `<root>/graphs/<id>.omni` shape returns None with zero I/O, so ordinary `init` targets never probe storage. Works for file:// and s3:// via the storage adapter. Adds two ClusterStore accessors (`display_root`, `has_state`). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * feat(cli): cluster-managed maintenance addressing + init signpost (RFC-010 Slice 3) Two cluster-graph-aware CLI behaviors, sharing the cluster-resolution path. Maintenance addressing. `optimize`/`repair`/`cleanup` gain `--cluster <dir|s3://…> --cluster-graph <id>`, which resolves the graph's storage URI from the served cluster snapshot (the same truth a `--cluster` server boots from — `read_serving_snapshot*`) and opens it embedded. The operator no longer hand-types `<storage>/graphs/<id>.omni`. A distinct flag is required because the global `--graph` is `requires = server` and means a remote multi-graph id. clap enforces both-or-neither and exclusion with the positional URI / `--target`; an unserved graph errors loudly, pointing at `cluster apply`. init signpost. `init` refuses a cluster-managed positional path (the `<root>/graphs/<id>.omni` layout where `<root>` holds `__cluster/state.json`, detected by `cluster_root_for_graph_uri`) and points at `cluster apply` — graphs in an established cluster are created with ledger/recovery/approvals, not by hand. The check is gated on the path shape, so ordinary `init` does no extra I/O and existing pre-apply cluster-graph inits are unaffected. planes guard remediation now also mentions `--cluster … --cluster-graph …` (the two Slice-1 guard-string tests track it). Docs updated (cli-reference Command planes, maintenance.md, cluster.md §7); the stale "no S3-hosted cluster directories" limitation is dropped (RFC-006 landed it). Tests (cli_cluster.rs, reusing the apply-a-cluster fixture): resolve by id, unknown-id error, `--cluster` requires `--cluster-graph`, init refusal + signpost, and ordinary init still works. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * fix(cli): resolve cluster graphs from the state ledger, not the serving snapshot Addresses the Greptile review on #221. `read_serving_snapshot*` does all-or-nothing serving validation — recovery-sidecar checks plus a digest verify of every catalog payload (query .gq, policy blobs). Using it to resolve a maintenance target coupled `optimize`/`repair`/`cleanup` to the readiness of unrelated resources: a single corrupt policy blob, or a pending recovery sweep, would block the command before it could touch the graph — worst for `repair`, the tool you reach for *when the cluster is degraded*. Add `omnigraph_cluster::resolve_graph_storage_uri(cluster, graph_id)`: read the state ledger, confirm the graph is in the applied revision, return `graph_root(id)` — the URI is deterministically derivable, no catalog validation. The CLI's cluster resolver now calls it. Test: `optimize --cluster … --cluster-graph …` still resolves after the catalog payloads (`__cluster/resources/`) are removed — the ledger-only path is not blocked by degraded/unrelated catalog state. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
d6cf5b298c
commit
6144bb18d6
13 changed files with 401 additions and 14 deletions
|
|
@ -950,3 +950,138 @@ graphs:
|
|||
assert!(!leaked.contains("phantom") && !leaked.contains("9999"), "{leaked}");
|
||||
}
|
||||
|
||||
|
||||
// ── RFC-010 Slice 3: cluster-managed maintenance addressing + init signpost ──
|
||||
|
||||
/// Stand up an applied, served cluster with the `knowledge` graph and return
|
||||
/// its directory guard. Mirrors the e2e setup (fixture → init → import → apply).
|
||||
fn applied_knowledge_cluster() -> tempfile::TempDir {
|
||||
let temp = tempdir().unwrap();
|
||||
write_cluster_config_fixture(temp.path());
|
||||
init_cluster_derived_graph(temp.path());
|
||||
let import = cluster_json(temp.path(), "import");
|
||||
assert_eq!(import["ok"], true, "{import}");
|
||||
let apply = cluster_json(temp.path(), "apply");
|
||||
assert_eq!(apply["converged"], true, "{apply}");
|
||||
temp
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optimize_resolves_a_cluster_graph_by_id() {
|
||||
let temp = applied_knowledge_cluster();
|
||||
// No hand-typed storage path: address the graph by cluster dir + id.
|
||||
let out = output_success(
|
||||
cli()
|
||||
.arg("optimize")
|
||||
.arg("--cluster")
|
||||
.arg(temp.path())
|
||||
.arg("--cluster-graph")
|
||||
.arg("knowledge")
|
||||
.arg("--json"),
|
||||
);
|
||||
let payload = parse_stdout_json(&out);
|
||||
assert!(
|
||||
payload["tables"].as_array().is_some(),
|
||||
"optimize did not run against the resolved cluster graph: {payload}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optimize_unknown_cluster_graph_id_errors() {
|
||||
let temp = applied_knowledge_cluster();
|
||||
let out = output_failure(
|
||||
cli()
|
||||
.arg("optimize")
|
||||
.arg("--cluster")
|
||||
.arg(temp.path())
|
||||
.arg("--cluster-graph")
|
||||
.arg("does-not-exist")
|
||||
.arg("--json"),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||
assert!(
|
||||
stderr.contains("is not applied in cluster") && stderr.contains("cluster apply"),
|
||||
"expected an unapplied-graph error pointing at cluster apply; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cluster_flag_requires_cluster_graph() {
|
||||
// clap enforces both-or-neither.
|
||||
let out = output_failure(
|
||||
cli()
|
||||
.arg("optimize")
|
||||
.arg("--cluster")
|
||||
.arg(".")
|
||||
.arg("--json"),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||
assert!(
|
||||
stderr.contains("cluster-graph") || stderr.contains("required"),
|
||||
"expected --cluster to require --cluster-graph; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn init_refuses_a_cluster_managed_path_and_signposts_cluster_apply() {
|
||||
let temp = applied_knowledge_cluster();
|
||||
// Hand-init a NEW graph into the established cluster's storage layout.
|
||||
let out = output_failure(
|
||||
cli()
|
||||
.arg("init")
|
||||
.arg("--schema")
|
||||
.arg(temp.path().join("people.pg"))
|
||||
.arg(temp.path().join("graphs").join("sneaky.omni")),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||
assert!(
|
||||
stderr.contains("cluster apply"),
|
||||
"init into a cluster-managed path should signpost `cluster apply`; got: {stderr}"
|
||||
);
|
||||
// And it did not create the graph.
|
||||
assert!(!temp.path().join("graphs").join("sneaky.omni").exists());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn init_outside_a_cluster_still_works() {
|
||||
// Regression guard: ordinary init (no cluster layout) is unaffected.
|
||||
let temp = tempdir().unwrap();
|
||||
let schema = fixture("test.pg");
|
||||
let out = output_success(
|
||||
cli()
|
||||
.arg("init")
|
||||
.arg("--schema")
|
||||
.arg(&schema)
|
||||
.arg(temp.path().join("plain.omni")),
|
||||
);
|
||||
assert!(stdout_string(&out).contains("initialized"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optimize_by_cluster_works_when_catalog_payloads_are_degraded() {
|
||||
// Robustness (Greptile, #221): maintenance resolves the graph URI from the
|
||||
// state ledger alone, so an unrelated corrupt/missing catalog payload (or a
|
||||
// pending recovery sweep) does NOT block it — unlike the full serving-snapshot
|
||||
// read. This is what keeps `repair --cluster` usable on a degraded cluster.
|
||||
let temp = applied_knowledge_cluster();
|
||||
// Remove the verified catalog payloads (queries/policies) — a serving read
|
||||
// would refuse with a catalog-payload diagnostic; the ledger-only resolve
|
||||
// must not care.
|
||||
let resources = temp.path().join("__cluster").join("resources");
|
||||
if resources.exists() {
|
||||
fs::remove_dir_all(&resources).unwrap();
|
||||
}
|
||||
let out = output_success(
|
||||
cli()
|
||||
.arg("optimize")
|
||||
.arg("--cluster")
|
||||
.arg(temp.path())
|
||||
.arg("--cluster-graph")
|
||||
.arg("knowledge")
|
||||
.arg("--json"),
|
||||
);
|
||||
assert!(
|
||||
parse_stdout_json(&out)["tables"].as_array().is_some(),
|
||||
"optimize should resolve via the ledger despite degraded catalog payloads"
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -165,7 +165,7 @@ fn optimize_with_server_flag_errors_wrong_plane() {
|
|||
assert!(
|
||||
stderr.contains("`optimize` is a storage-plane command")
|
||||
&& stderr.contains("--server/--graph address the data plane and do not apply")
|
||||
&& stderr.contains("Use --target <name> or a storage URI."),
|
||||
&& stderr.contains("Use --target <name>, a storage URI, or --cluster <dir> --cluster-graph <id>."),
|
||||
"wrong-plane guard message not found; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ fn schema_plan_with_server_flag_errors_wrong_plane() {
|
|||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("`schema plan` is a storage-plane command")
|
||||
&& stderr.contains("Use --target <name> or a storage URI."),
|
||||
&& stderr.contains("Use --target <name>, a storage URI, or --cluster <dir> --cluster-graph <id>."),
|
||||
"schema plan wrong-plane message not found; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue