From d3ae31be088ae094058c03057018a8a78e858b9e Mon Sep 17 00:00:00 2001 From: aaltshuler Date: Wed, 10 Jun 2026 22:35:58 +0300 Subject: [PATCH 1/3] feat(docker): cluster-mode entrypoint and the CLI in the image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OMNIGRAPH_CLUSTER boots the container from a mounted cluster directory's applied revision — checked first and exclusive (exit 64 when combined with OMNIGRAPH_TARGET_URI/CONFIG/TARGET), the entrypoint-level mirror of the server's mode-inference rule 0. The omnigraph CLI joins the image so the day-2 loop (cluster apply/approve/status, data loads by explicit URI) runs in-container via docker/ECS exec or railway shell — no omnigraph.yaml required, which the cluster-local-config PR pins. entrypoint_test gains the cluster case plus all three exclusivity refusals. Co-Authored-By: Claude Fable 5 --- .dockerignore | 1 + Dockerfile | 6 +++++- docker/entrypoint.sh | 13 +++++++++++++ docker/entrypoint_test.sh | 20 ++++++++++++++++++++ 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/.dockerignore b/.dockerignore index ab6a1f8..05ec59a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -2,3 +2,4 @@ !Dockerfile !docker/entrypoint.sh !target/release/omnigraph-server +!target/release/omnigraph diff --git a/Dockerfile b/Dockerfile index e49a6c7..ca22a93 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,9 +11,13 @@ RUN groupadd --system omnigraph \ && useradd --system --gid omnigraph --create-home --home-dir /var/lib/omnigraph omnigraph COPY target/release/omnigraph-server /usr/local/bin/omnigraph-server +# The CLI ships in the image so the cluster day-2 loop (cluster +# apply/approve/status, data loads by explicit URI) runs in-container via +# `docker exec` / ECS exec / `railway shell` — no omnigraph.yaml required. +COPY target/release/omnigraph /usr/local/bin/omnigraph COPY docker/entrypoint.sh /usr/local/bin/omnigraph-entrypoint -RUN chmod 0755 /usr/local/bin/omnigraph-server /usr/local/bin/omnigraph-entrypoint +RUN chmod 0755 /usr/local/bin/omnigraph-server /usr/local/bin/omnigraph /usr/local/bin/omnigraph-entrypoint ENV OMNIGRAPH_BIND=0.0.0.0:8080 diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index a5fb275..98587aa 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -9,6 +9,17 @@ fi bind="${OMNIGRAPH_BIND:-0.0.0.0:8080}" +# Cluster mode first, and exclusive (the server's mode-inference rule 0): +# a deployment serves from cluster state XOR omnigraph.yaml, never a merge. +# Fail fast here with the same contract the server enforces. +if [ -n "${OMNIGRAPH_CLUSTER:-}" ]; then + if [ -n "${OMNIGRAPH_TARGET_URI:-}" ] || [ -n "${OMNIGRAPH_CONFIG:-}" ] || [ -n "${OMNIGRAPH_TARGET:-}" ]; then + echo "OMNIGRAPH_CLUSTER is an exclusive boot source; unset OMNIGRAPH_TARGET_URI/OMNIGRAPH_CONFIG/OMNIGRAPH_TARGET" >&2 + exit 64 + fi + exec "$SERVER_BIN" --cluster "${OMNIGRAPH_CLUSTER}" --bind "${bind}" +fi + # URI comes from the env var (the positional arg wins over any config # `graphs` block in resolve_target_uri). OMNIGRAPH_CONFIG, when also set, # is forwarded as --config purely to supply a policy file — the two @@ -28,6 +39,8 @@ fi cat >&2 <<'EOF' omnigraph-server container startup requires one of: + - OMNIGRAPH_CLUSTER (serve a cluster directory's applied revision; + exclusive — cannot combine with the others) - OMNIGRAPH_TARGET_URI - OMNIGRAPH_CONFIG diff --git a/docker/entrypoint_test.sh b/docker/entrypoint_test.sh index 01fbee2..3ee668f 100755 --- a/docker/entrypoint_test.sh +++ b/docker/entrypoint_test.sh @@ -58,6 +58,26 @@ got=$(sh "$ep" some-uri --bind 1.2.3.4:9 --extra) check "explicit args passthrough" \ "ARGS: some-uri --bind 1.2.3.4:9 --extra" "$got" +got=$(OMNIGRAPH_CLUSTER="/var/lib/omnigraph/company-brain" OMNIGRAPH_BIND="0.0.0.0:8080" sh "$ep") +check "CLUSTER only (Phase 5 mode switch)" \ + "ARGS: --cluster /var/lib/omnigraph/company-brain --bind 0.0.0.0:8080" "$got" + +# Exclusivity: OMNIGRAPH_CLUSTER refuses every combination, exit 64. +for combo in "OMNIGRAPH_TARGET_URI=s3://b/g" "OMNIGRAPH_CONFIG=/etc/o.yaml" "OMNIGRAPH_TARGET=active"; do + if out=$(env "$combo" OMNIGRAPH_CLUSTER="/data/cluster" sh "$ep" 2>&1); then + echo "FAIL: CLUSTER + ${combo%%=*} unexpectedly succeeded: $out" + fail=1 + else + status=$? + if [ "$status" -ne 64 ]; then + echo "FAIL: CLUSTER + ${combo%%=*} exited $status, want 64" + fail=1 + else + echo "ok: CLUSTER + ${combo%%=*} refused (64)" + fi + fi +done + if [ "$fail" -ne 0 ]; then echo "entrypoint_test: FAILED" exit 1 From 6b3ae7ac79723d097ee6a50b17235685103a6343 Mon Sep 17 00:00:00 2001 From: aaltshuler Date: Wed, 10 Jun 2026 22:45:30 +0300 Subject: [PATCH 2/3] docs(deploy): AWS and Railway cluster-mode recipes The container contract (OMNIGRAPH_CLUSTER + mounted volume + token env), ECS/Fargate+EFS and Railway-volume walkthroughs, the in-container day-2 loop, and the honest constraints list (volume mandatory, no hot reload, single-writer apply, shared-volume replicas unvalidated). Operator guide links the recipes. Co-Authored-By: Claude Fable 5 --- docs/user/cluster.md | 3 +- docs/user/deployment.md | 65 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/docs/user/cluster.md b/docs/user/cluster.md index 6241378..a4a4cae 100644 --- a/docs/user/cluster.md +++ b/docs/user/cluster.md @@ -227,7 +227,8 @@ with an in-flight apply. - **Replicas**: any number of `--cluster` servers can serve the same config directory; boot is read-only. Roll out a change by `apply` once, then restarting replicas (serving is static per process — there is no hot - reload yet). + reload yet). Container/cloud recipes (AWS ECS+EFS, Railway volumes): + [deployment.md](deployment.md#cluster-mode-in-containers-aws-railway). - **The directory is the deployable unit**: config, catalog, ledger, approvals, and graph data all live under it. Back it up as a whole; version the *config files* (not `__cluster/` or `graphs/`) in git. diff --git a/docs/user/deployment.md b/docs/user/deployment.md index 328784f..eb181e3 100644 --- a/docs/user/deployment.md +++ b/docs/user/deployment.md @@ -45,6 +45,71 @@ omnigraph-server s3://my-bucket/graphs/example/releases/2026-04-10-v0.1.0 \ --bind 0.0.0.0:8080 ``` +## Cluster Mode in Containers (AWS, Railway) + +A cluster-booted deployment serves a **cluster directory** (config + state +ledger + content-addressed catalog + graph data) from a mounted volume — the +one structural difference from the stateless S3 single-graph shape, which +needs no volume at all. The container contract: + +```bash +docker run -d \ + -v /srv/company-brain:/var/lib/omnigraph/cluster \ + -e OMNIGRAPH_CLUSTER=/var/lib/omnigraph/cluster \ + -e OMNIGRAPH_SERVER_BEARER_TOKEN=... \ + -p 8080:8080 +``` + +`OMNIGRAPH_CLUSTER` is exclusive: combining it with `OMNIGRAPH_TARGET_URI`, +`OMNIGRAPH_CONFIG`, or `OMNIGRAPH_TARGET` fails fast (exit 64), the same +rule the server itself enforces. The image also ships the `omnigraph` CLI, +so the day-2 loop runs in-container with no `omnigraph.yaml`: + +```bash +docker exec -it sh -c \ + 'omnigraph cluster apply --as andrew --config /var/lib/omnigraph/cluster' +# then restart the container to pick up the applied state +``` + +### AWS (ECS/Fargate + EFS) + +1. Push the image to ECR (the `package.yml` workflow builds it). +2. Create an EFS filesystem; mount it in the task definition at + `/var/lib/omnigraph/cluster`. +3. Task environment: `OMNIGRAPH_CLUSTER=/var/lib/omnigraph/cluster`, bearer + tokens via Secrets Manager/SSM into `OMNIGRAPH_SERVER_BEARER_TOKENS_JSON` + (or the `--features aws` build's native Secrets Manager source). +4. ALB in front for TLS; target the container's 8080 with `/healthz` checks. +5. Day-2: ECS exec into the task → edit/upload config on the volume → + `omnigraph cluster apply --as ` → force a new deployment (restart). + +For a deployment that doesn't need the cluster control plane, the classic +stateless shape — `OMNIGRAPH_TARGET_URI=s3://bucket/graph.omni`, no volume — +remains the simplest AWS architecture (see Binary/Container Deployment +above). + +### Railway + +1. Create a service from the image; attach a **volume** mounted at + `/var/lib/omnigraph/cluster`. +2. Variables: `OMNIGRAPH_CLUSTER=/var/lib/omnigraph/cluster`, + `OMNIGRAPH_SERVER_BEARER_TOKEN=`. Railway terminates TLS at its + edge and routes to the exposed 8080. +3. Day-2: `railway shell` (or `railway run`) → `omnigraph cluster apply + --as --config /var/lib/omnigraph/cluster` → redeploy/restart the + service. + +### Constraints (current honest list) + +- **Cluster directories are local-filesystem** — the volume is mandatory; + S3-hosted cluster dirs are not supported. +- **No hot reload** — applied changes serve on the next restart. +- **Single-writer apply** — run `cluster apply` from one place at a time + (the state lock enforces this; CI or one operator shell, not both). +- **Multi-replica serving off a shared volume (EFS) is documented but + unvalidated** — boot is lock-free read-only so it should compose, but it + is not yet exercised by tests. + ## One-Command Local RustFS Bootstrap The easiest local S3-backed deployment path is: From f165145b63f8380de38e30d0bb52453df2189c5b Mon Sep 17 00:00:00 2001 From: aaltshuler Date: Wed, 10 Jun 2026 22:54:26 +0300 Subject: [PATCH 3/3] =?UTF-8?q?docs(deploy):=20address=20review=20?= =?UTF-8?q?=E2=80=94=20consistent=20placeholders,=20complete=20ECS=20comma?= =?UTF-8?q?nd?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ECS day-2 apply gains its required --config flag (the image ships no omnigraph.yaml, so the CLI cannot locate the cluster dir without it), and the docker-exec example uses the placeholder convention instead of a real-looking actor name. Co-Authored-By: Claude Fable 5 --- docs/user/deployment.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/user/deployment.md b/docs/user/deployment.md index eb181e3..563a501 100644 --- a/docs/user/deployment.md +++ b/docs/user/deployment.md @@ -67,7 +67,7 @@ so the day-2 loop runs in-container with no `omnigraph.yaml`: ```bash docker exec -it sh -c \ - 'omnigraph cluster apply --as andrew --config /var/lib/omnigraph/cluster' + 'omnigraph cluster apply --as --config /var/lib/omnigraph/cluster' # then restart the container to pick up the applied state ``` @@ -81,7 +81,8 @@ docker exec -it sh -c \ (or the `--features aws` build's native Secrets Manager source). 4. ALB in front for TLS; target the container's 8080 with `/healthz` checks. 5. Day-2: ECS exec into the task → edit/upload config on the volume → - `omnigraph cluster apply --as ` → force a new deployment (restart). + `omnigraph cluster apply --as --config /var/lib/omnigraph/cluster` + → force a new deployment (restart). For a deployment that doesn't need the cluster control plane, the classic stateless shape — `OMNIGRAPH_TARGET_URI=s3://bucket/graph.omni`, no volume —