diff --git a/crates/omnigraph-cli/src/main.rs b/crates/omnigraph-cli/src/main.rs index da4f8e8..62fff60 100644 --- a/crates/omnigraph-cli/src/main.rs +++ b/crates/omnigraph-cli/src/main.rs @@ -6,7 +6,7 @@ use std::path::PathBuf; use std::sync::Arc; use clap::{Arg, ArgAction, Args, CommandFactory, FromArgMatches, Parser, Subcommand, ValueEnum}; -use color_eyre::eyre::{Result, bail}; +use color_eyre::eyre::{Result, WrapErr, bail}; use omnigraph::db::{Omnigraph, ReadTarget, SnapshotId}; use omnigraph::loader::LoadMode; use omnigraph::storage::normalize_root_uri; @@ -1251,6 +1251,25 @@ async fn open_local_db_with_policy(graph: &ResolvedCliGraph) -> Result) -> Result> { + if let Some(actor) = cli_as { + return Ok(Some(actor.to_string())); + } + let config = load_config(None).wrap_err( + "resolving the default actor from the per-operator omnigraph.yaml (pass --as to skip this lookup)", + )?; + Ok(config.cli.actor.clone()) +} + /// Resolve the CLI's effective actor identity for engine-layer policy /// (MR-722). Precedence: `--as ` (top-level flag) overrides /// `cli.actor` from `omnigraph.yaml`; both unset returns `None`. When @@ -3610,16 +3629,12 @@ async fn main() -> Result<()> { finish_cluster_plan(&output, json)?; } ClusterCommand::Apply { config, json } => { - // The global --as actor attributes graph-moving operations - // (sidecars, audit entries, engine schema-apply commits). - // Cluster config stays unlayered: no omnigraph.yaml fallback. - let output = apply_config_dir_with_options( - config, - ApplyOptions { - actor: cli.as_actor.clone(), - }, - ) - .await; + // The actor attributes graph-moving operations (sidecars, + // audit entries, engine schema-apply commits). Cluster FACTS + // stay unlayered; the operator's identity resolves --as flag + // first, then the per-operator omnigraph.yaml `cli.actor`. + let actor = resolve_cluster_actor(cli.as_actor.as_deref())?; + let output = apply_config_dir_with_options(config, ApplyOptions { actor }).await; finish_cluster_apply(&output, json)?; } ClusterCommand::Approve { @@ -3627,12 +3642,12 @@ async fn main() -> Result<()> { config, json, } => { - let Some(approver) = cli.as_actor.as_deref() else { + let Some(approver) = resolve_cluster_actor(cli.as_actor.as_deref())? else { bail!( - "`cluster approve` requires the global --as flag: an approval without an approver is meaningless" + "`cluster approve` requires an approver: pass the global --as flag or set `cli.actor` in your omnigraph.yaml — an approval without an approver is meaningless" ); }; - let output = approve_config_dir(config, &resource, approver).await; + let output = approve_config_dir(config, &resource, &approver).await; finish_cluster_approve(&output, json)?; } ClusterCommand::Status { config, json } => { diff --git a/crates/omnigraph-cli/tests/cli.rs b/crates/omnigraph-cli/tests/cli.rs index e4590f6..ab3c23b 100644 --- a/crates/omnigraph-cli/tests/cli.rs +++ b/crates/omnigraph-cli/tests/cli.rs @@ -4325,3 +4325,212 @@ fn queries_validate_positional_uri_ignores_default_graph() { "positional URI must validate the top-level registry, not the cli.graph default; stdout:\n{stdout}" ); } + +// ---- per-operator local config (omnigraph.yaml) vs the cluster surfaces ---- + +/// Cluster ops resolve operator identity per-operator: --as wins, and +/// without it the cwd omnigraph.yaml's `cli.actor` is the default. +#[test] +fn cluster_apply_uses_cli_actor_from_local_config() { + let temp = tempdir().unwrap(); + write_cluster_config_fixture(temp.path()); + fs::write( + temp.path().join("omnigraph.yaml"), + "cli:\n actor: act-local\n", + ) + .unwrap(); + // Phase 1: import once (setup, not under test). + let output = cli() + .current_dir(temp.path()) + .arg("cluster") + .arg("import") + .arg("--config") + .arg(temp.path()) + .output() + .unwrap(); + assert!(output.status.success(), "{output:?}"); + + // Phase 2: apply alone, capturing the echoed actor (idempotent re-runs). + let apply = |extra: &[&str]| { + let mut command = cli(); + command.current_dir(temp.path()); + for arg in extra { + command.arg(arg); + } + let output = command + .arg("cluster") + .arg("apply") + .arg("--config") + .arg(temp.path()) + .arg("--json") + .output() + .unwrap(); + let json: serde_json::Value = + serde_json::from_str(String::from_utf8_lossy(&output.stdout).trim()).unwrap(); + json["actor"].clone() + }; + assert_eq!(apply(&[]), "act-local", "cli.actor is the no-flag default"); + assert_eq!(apply(&["--as", "andrew"]), "andrew", "--as overrides cli.actor"); +} + +#[test] +fn cluster_approve_uses_cli_actor_fallback() { + let temp = tempdir().unwrap(); + write_cluster_config_fixture(temp.path()); + fs::write( + temp.path().join("omnigraph.yaml"), + "cli:\n actor: act-local\n", + ) + .unwrap(); + // Converge, then remove the graph so a gated delete is pending. + for command in ["import", "apply"] { + let output = cli() + .current_dir(temp.path()) + .arg("cluster") + .arg(command) + .arg("--config") + .arg(temp.path()) + .output() + .unwrap(); + assert!(output.status.success(), "cluster {command} failed"); + } + fs::write(temp.path().join("cluster.yaml"), "version: 1\ngraphs: {}\n").unwrap(); + + let output = cli() + .current_dir(temp.path()) + .arg("cluster") + .arg("approve") + .arg("graph.knowledge") + .arg("--config") + .arg(temp.path()) + .arg("--json") + .output() + .unwrap(); + assert!(output.status.success(), "{output:?}"); + let json: serde_json::Value = + serde_json::from_str(String::from_utf8_lossy(&output.stdout).trim()).unwrap(); + assert_eq!(json["approved_by"], "act-local"); + + // With neither flag nor config: refused with the actionable message. + let bare = tempdir().unwrap(); + write_cluster_config_fixture(bare.path()); + let output = output_failure( + cli() + .current_dir(bare.path()) + .arg("cluster") + .arg("approve") + .arg("graph.knowledge") + .arg("--config") + .arg(bare.path()), + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!(stderr.contains("--as"), "{stderr}"); + assert!(stderr.contains("cli.actor"), "{stderr}"); +} + +/// A malformed omnigraph.yaml in the cwd must never break cluster commands; +/// it is read for exactly one thing (the actor default when --as is absent), +/// and only that path fails loudly. +#[test] +fn cluster_commands_ignore_malformed_local_config() { + let temp = tempdir().unwrap(); + write_cluster_config_fixture(temp.path()); + fs::write(temp.path().join("omnigraph.yaml"), "{{{{ not yaml").unwrap(); + + for command in ["validate", "plan", "status"] { + let output = cli() + .current_dir(temp.path()) + .arg("cluster") + .arg(command) + .arg("--config") + .arg(temp.path()) + .arg("--json") + .output() + .unwrap(); + assert!( + output.status.success() || command == "plan", // plan warns state-missing pre-import; still must not config-error + "cluster {command} affected by malformed omnigraph.yaml: {output:?}" + ); + assert!( + !String::from_utf8_lossy(&output.stderr).contains("omnigraph.yaml"), + "cluster {command} touched omnigraph.yaml" + ); + } + // import + apply with an explicit --as: the config is never loaded. + for (command, args) in [("import", vec![]), ("apply", vec!["--as", "andrew"])] { + let mut invocation = cli(); + invocation.current_dir(temp.path()); + for arg in &args { + invocation.arg(arg); + } + let output = invocation + .arg("cluster") + .arg(command) + .arg("--config") + .arg(temp.path()) + .output() + .unwrap(); + assert!( + output.status.success(), + "cluster {command} affected by malformed omnigraph.yaml: {}", + String::from_utf8_lossy(&output.stderr) + ); + } + // Only the no-flag actor lookup is allowed to fail, and loudly. + let output = output_failure( + cli() + .current_dir(temp.path()) + .arg("cluster") + .arg("apply") + .arg("--config") + .arg(temp.path()), + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("omnigraph.yaml") && stderr.contains("--as"), + "the actor-default config read must fail loudly and actionably: {stderr}" + ); +} + +/// A well-formed omnigraph.yaml with a CONFLICTING world view (different +/// graphs, server bind) leaks nothing into cluster outputs. +#[test] +fn cluster_commands_ignore_conflicting_local_config() { + let baseline = tempdir().unwrap(); + write_cluster_config_fixture(baseline.path()); + let with_config = tempdir().unwrap(); + write_cluster_config_fixture(with_config.path()); + fs::write( + with_config.path().join("omnigraph.yaml"), + r#" +server: + bind: 0.0.0.0:9999 +graphs: + phantom: + uri: ./phantom.omni +"#, + ) + .unwrap(); + + let validate = |dir: &std::path::Path| { + let output = cli() + .current_dir(dir) + .arg("cluster") + .arg("validate") + .arg("--config") + .arg(dir) + .arg("--json") + .output() + .unwrap(); + assert!(output.status.success(), "{output:?}"); + serde_json::from_str::(String::from_utf8_lossy(&output.stdout).trim()) + .unwrap() + }; + let (a, b) = (validate(baseline.path()), validate(with_config.path())); + // Compare the path-free invariants (paths embed each tempdir). + for key in ["ok", "diagnostics", "resource_digests", "dependencies"] { + assert_eq!(a[key], b[key], "conflicting omnigraph.yaml leaked into cluster validate ({key})"); + } + let leaked = b.to_string(); + assert!(!leaked.contains("phantom") && !leaked.contains("9999"), "{leaked}"); +} diff --git a/crates/omnigraph-cli/tests/support/mod.rs b/crates/omnigraph-cli/tests/support/mod.rs index 855d8e0..c30ed28 100644 --- a/crates/omnigraph-cli/tests/support/mod.rs +++ b/crates/omnigraph-cli/tests/support/mod.rs @@ -218,6 +218,18 @@ pub fn spawn_server_with_cluster(cluster_dir: &Path) -> TestServer { spawn_server_process(command) } +/// Cluster boot with the server process's cwd set explicitly — used to prove +/// rule 0 never touches the cwd omnigraph.yaml search. +pub fn spawn_server_with_cluster_in(cluster_dir: &Path, cwd: &Path) -> TestServer { + let mut command = server_process(); + command + .arg("--cluster") + .arg(cluster_dir) + .arg("--unauthenticated") + .current_dir(cwd); + spawn_server_process(command) +} + pub fn spawn_server_with_cluster_env(cluster_dir: &Path, envs: &[(&str, &str)]) -> TestServer { let mut command = server_process(); command.arg("--cluster").arg(cluster_dir); diff --git a/crates/omnigraph-cli/tests/system_local.rs b/crates/omnigraph-cli/tests/system_local.rs index 4b5e4b6..adb5dc8 100644 --- a/crates/omnigraph-cli/tests/system_local.rs +++ b/crates/omnigraph-cli/tests/system_local.rs @@ -595,8 +595,12 @@ policy: {{}} ), ); + // current_dir matters: `init` scaffolds an omnigraph.yaml into its cwd, + // and without this it pollutes the crate dir, breaking unrelated tests + // (anything resolving a graph target from the cwd config). output_success( cli() + .current_dir(query_root) .arg("init") .arg("--schema") .arg(fixture("test.pg")) @@ -604,6 +608,7 @@ policy: {{}} ); output_success( cli() + .current_dir(query_root) .arg("load") .arg("--data") .arg(fixture("test.jsonl")) @@ -2152,3 +2157,37 @@ policies: // unknown query — the server's anti-probing contract. assert_eq!(invoke("admin-token").status().as_u16(), 404); } + +/// Rule 0 (axiom 15): a --cluster server never reads omnigraph.yaml — not +/// even the implicit cwd search. A MALFORMED config in the process cwd must +/// not affect boot or serving. +#[test] +fn cluster_server_boot_ignores_local_config_in_cwd() { + let cluster = tempfile::tempdir().unwrap(); + std::fs::write( + cluster.path().join("people.pg"), + "\nnode Person {\n name: String @key\n}\n", + ) + .unwrap(); + std::fs::write( + cluster.path().join("cluster.yaml"), + "version: 1\ngraphs:\n knowledge:\n schema: ./people.pg\n", + ) + .unwrap(); + for command in ["import", "apply"] { + let output = cli() + .arg("cluster") + .arg(command) + .arg("--config") + .arg(cluster.path()) + .output() + .unwrap(); + assert!(output.status.success(), "cluster {command} failed"); + } + let cwd = tempfile::tempdir().unwrap(); + std::fs::write(cwd.path().join("omnigraph.yaml"), "{{{{ not yaml").unwrap(); + + let server = spawn_server_with_cluster_in(cluster.path(), cwd.path()); + let response = reqwest::blocking::get(format!("{}/healthz", server.base_url)).unwrap(); + assert!(response.status().is_success()); +} diff --git a/docs/user/cli-reference.md b/docs/user/cli-reference.md index 6d864cc..ecb44b5 100644 --- a/docs/user/cli-reference.md +++ b/docs/user/cli-reference.md @@ -19,7 +19,7 @@ Top-level command families and subcommands. Graph-targeting commands accept eith | `commit list \| show` | inspect commit graph | | `schema plan \| apply \| show (alias: get)` | migrations | | `lint` (alias: `check`) | offline / graph-backed query validation. Replaces `query lint` / `query check`, which are kept as deprecated argv-level shims that print a one-line warning and rewrite to `omnigraph lint` | -| `cluster validate \| plan \| apply \| approve \| status \| refresh \| import \| force-unlock` | declarative cluster control plane. `validate` checks a local `cluster.yaml` folder and referenced schema/query/policy files; `plan` diffs it against local JSON state at `__cluster/state.json`, annotates dispositions, and embeds real schema-migration previews; `apply` converges the cluster — stored-query/policy catalog writes (content-addressed under `__cluster/resources/`), graph creates, schema updates (soft drops only; `--as` records the actor), and graph deletes behind a digest-bound approval from `cluster approve --as `; what apply converges is what an `omnigraph-server --cluster ` deployment serves on its next restart (omnigraph.yaml deployments are unaffected); `status` reads the state ledger; `refresh`/`import` explicitly update local JSON state from read-only graph observations; `force-unlock ` manually removes a held local state lock by exact id | +| `cluster validate \| plan \| apply \| approve \| status \| refresh \| import \| force-unlock` | declarative cluster control plane. `validate` checks a local `cluster.yaml` folder and referenced schema/query/policy files; `plan` diffs it against local JSON state at `__cluster/state.json`, annotates dispositions, and embeds real schema-migration previews; `apply` converges the cluster — stored-query/policy catalog writes (content-addressed under `__cluster/resources/`), graph creates, schema updates (soft drops only; `--as` records the actor), and graph deletes behind a digest-bound approval from `cluster approve --as ` (`apply`/`approve` default the actor from the per-operator `omnigraph.yaml`'s `cli.actor` when `--as` is omitted; nothing else in that file affects cluster commands); what apply converges is what an `omnigraph-server --cluster ` deployment serves on its next restart (omnigraph.yaml deployments are unaffected); `status` reads the state ledger; `refresh`/`import` explicitly update local JSON state from read-only graph observations; `force-unlock ` manually removes a held local state lock by exact id | | `optimize` | non-destructive Lance compaction (skips tables with `Blob` columns or uncovered drift; `--json` reports `skipped`) | | `repair [--confirm] [--force]` | preview or explicitly publish uncovered manifest/head drift. `--confirm` heals verified maintenance drift and exits non-zero if suspicious/unverifiable drift is refused; `--force --confirm` publishes suspicious/unverifiable drift after operator review | | `cleanup --keep N --older-than 7d --confirm` | destructive version GC | diff --git a/docs/user/cluster-config.md b/docs/user/cluster-config.md index 5847d8e..081bfa2 100644 --- a/docs/user/cluster-config.md +++ b/docs/user/cluster-config.md @@ -36,15 +36,26 @@ omnigraph cluster force-unlock --config ./company-brain --json ## Relationship to `omnigraph.yaml` `cluster.yaml` does not replace `omnigraph.yaml`, and the two never describe -the same fact. `omnigraph.yaml` remains how the CLI and server are configured -today (graph targets, server bind, CLI defaults, credential env references) and -is its long-term home for per-operator settings. `cluster.yaml` is the shared -desired state of a whole deployment, read only by the `cluster` commands via -`--config`. In the current stage, nothing recorded in the cluster state ledger -affects what a server serves or what other CLI commands target — the cluster -catalog is inspectable, not live. When server boot from cluster state ships in -a later stage, it will be an explicit per-deployment mode switch, not a merge -of the two files. +the same fact. `omnigraph.yaml` is the permanent **per-operator** layer (CLI +defaults, the operator's identity and credential references, graph targets +for data-plane commands); `cluster.yaml` is the shared desired state of a +whole deployment, read only by the `cluster` commands via `--config`. + +The exact contract: + +- **Cluster commands read `omnigraph.yaml` for exactly one thing**: the + `cli.actor` default used by `apply`/`approve` when `--as` is omitted — + operator identity is a per-operator fact. With `--as` present, no config + is read at all. Nothing else (its graph set, targets, bind, queries, + policies) ever influences a cluster command; a malformed `omnigraph.yaml` + breaks only the no-flag actor lookup, loudly. +- **A `--cluster` server reads `omnigraph.yaml` for nothing** — not even the + implicit current-directory search runs (mode-inference rule 0). Boot from + cluster state XOR `omnigraph.yaml`, never a merge. +- **The other direction is ergonomics, not coupling**: a per-operator + `omnigraph.yaml` may point `graphs..uri` at a cluster's derived root + (`./company-brain/graphs/knowledge.omni`) so data-plane commands can use + `--target ` — an ordinary local path, no special handling. ## Supported `cluster.yaml` diff --git a/docs/user/cluster.md b/docs/user/cluster.md index 6241378..dcc4b2b 100644 --- a/docs/user/cluster.md +++ b/docs/user/cluster.md @@ -109,8 +109,10 @@ omnigraph cluster apply --config ./company-brain --as andrew ``` `--as ` attributes the run: it is recorded in recovery sidecars and -audit entries and threaded into the engine's commit history. Make it a habit -on every apply (it is required for `approve`). +audit entries and threaded into the engine's commit history. Set +`cli: { actor: }` in your per-operator `omnigraph.yaml` to make it the +default when `--as` is omitted (the flag always wins; `approve` requires one +of the two). What each change kind does: @@ -234,9 +236,12 @@ with an in-flight apply. - **CI-driven convergence**: `validate` and `plan --json` are read-only and safe in pipelines; gate `apply --as ci` on plan review. Approvals are the human step by design — keep `cluster approve` out of automation. -- **`omnigraph.yaml` still has a job**: per-operator settings (CLI defaults, - credentials, active context). It just no longer describes the deployment — - a server boots from one source or the other, never a merge of both. +- **`omnigraph.yaml` still has a job**: per-operator settings — your + `cli.actor` default for `--as`, CLI defaults, credentials, and data-plane + ergonomics (point `graphs..uri` at a derived root like + `./company-brain/graphs/knowledge.omni` to use `--target ` for + loads). It just no longer describes the deployment — a server boots from + one source or the other, never a merge of both. ## What the control plane does not do (yet)