diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 34dd9c6..d4ecfa5 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -8,11 +8,11 @@ # CI fails if this file drifts from its source, and rejects PRs that # edit this file directly without also editing the yml. -* @aaltshuler +* @ragnorc -crates/** @aaltshuler -docs/** @aaltshuler @ragnorc -README.md @aaltshuler @ragnorc -AGENTS.md @aaltshuler @ragnorc -CLAUDE.md @aaltshuler @ragnorc -SECURITY.md @aaltshuler @ragnorc +crates/** @ragnorc +docs/** @ragnorc +README.md @ragnorc +AGENTS.md @ragnorc +CLAUDE.md @ragnorc +SECURITY.md @ragnorc diff --git a/.github/branch-protection.json b/.github/branch-protection.json index d472de8..61b7d33 100644 --- a/.github/branch-protection.json +++ b/.github/branch-protection.json @@ -11,7 +11,7 @@ "CODEOWNERS / noedit" ] }, - "enforce_admins": true, + "enforce_admins": false, "required_pull_request_reviews": { "dismissal_restrictions": {}, "dismiss_stale_reviews": true, diff --git a/.github/codeowners-roles.yml b/.github/codeowners-roles.yml index 9fdc8e5..c5e36a9 100644 --- a/.github/codeowners-roles.yml +++ b/.github/codeowners-roles.yml @@ -19,18 +19,15 @@ roles: engineering: description: > All production code under crates/**. Engine, CLI, server, - compiler. Single owner; review must come from this person. + compiler. members: - - aaltshuler + - ragnorc docs: description: > Documentation under docs/**, plus repo-level docs (README.md, - AGENTS.md, CLAUDE.md symlink, SECURITY.md). Either named member - can approve; both are listed so reviews can route to whoever is - available. + AGENTS.md, CLAUDE.md symlink, SECURITY.md). members: - - aaltshuler - ragnorc # Path → role mapping. GitHub CODEOWNERS uses "last match wins" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f36b31e..918a472 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -249,6 +249,63 @@ jobs: if: needs.classify_changes.outputs.run_full_ci == 'true' run: cargo test --locked -p omnigraph-server --features aws + test_windows_binaries: + name: Test Windows release binaries + needs: classify_changes + runs-on: windows-latest + timeout-minutes: 75 + permissions: + contents: read + env: + CARGO_TERM_COLOR: always + steps: + - name: Skip for text-only changes + if: needs.classify_changes.outputs.run_full_ci != 'true' + run: Write-Host "Text-only change detected; skipping Windows binary build." + + - name: Checkout source + if: needs.classify_changes.outputs.run_full_ci == 'true' + uses: actions/checkout@v5.0.1 + + - name: Install system dependencies + if: needs.classify_changes.outputs.run_full_ci == 'true' + run: choco install protoc -y + + - name: Install Rust stable + if: needs.classify_changes.outputs.run_full_ci == 'true' + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + + - name: Cache Rust build data + if: needs.classify_changes.outputs.run_full_ci == 'true' + uses: Swatinem/rust-cache@v2 + with: + workspaces: | + . -> target + key: windows-release-binaries + + - name: Build Windows binaries + if: needs.classify_changes.outputs.run_full_ci == 'true' + run: cargo build --release --locked -p omnigraph-cli -p omnigraph-server + + - name: Smoke test Windows binaries + if: needs.classify_changes.outputs.run_full_ci == 'true' + run: | + & ./target/release/omnigraph.exe version + & ./target/release/omnigraph-server.exe --help + + - name: Check PowerShell installer syntax + if: needs.classify_changes.outputs.run_full_ci == 'true' + run: | + $tokens = $null + $errors = $null + [System.Management.Automation.Language.Parser]::ParseFile("scripts/install.ps1", [ref]$tokens, [ref]$errors) | Out-Null + if ($errors.Count -gt 0) { + $errors | Format-List + exit 1 + } + rustfs_integration: name: RustFS S3 Integration needs: @@ -291,6 +348,14 @@ jobs: . -> target - name: Start RustFS + # Pinned to 1.0.0-beta.3 (2026-05-14) — the last known-good tag. + # `rustfs/rustfs:latest` (1.0.0-beta.4, 2026-05-21) added a + # credentials-policy check that refuses to start when + # AWS_ACCESS_KEY_ID/SECRET_ACCESS_KEY are values it considers + # "default" (rustfsadmin/rustfsadmin in our case). Bumping to + # beta.4+ requires either rotating those creds to less-default + # values or setting RUSTFS_ALLOW_INSECURE_DEFAULT_CREDENTIALS=true + # — deliberate work, not an emergency. Pin first; upgrade later. run: | docker rm -f rustfs >/dev/null 2>&1 || true docker run -d \ @@ -299,7 +364,7 @@ jobs: -p 9001:9001 \ -e RUSTFS_ACCESS_KEY="${AWS_ACCESS_KEY_ID}" \ -e RUSTFS_SECRET_KEY="${AWS_SECRET_ACCESS_KEY}" \ - rustfs/rustfs:latest \ + rustfs/rustfs:1.0.0-beta.3 \ /data - name: Install AWS CLI diff --git a/.github/workflows/publish-crates.yml b/.github/workflows/publish-crates.yml index d7f783f..9484b98 100644 --- a/.github/workflows/publish-crates.yml +++ b/.github/workflows/publish-crates.yml @@ -80,8 +80,15 @@ jobs: version=$(cargo metadata --format-version=1 --no-deps \ | jq -r --arg c "$crate" '.packages[] | select(.name==$c) | .version') + # crates.io API requires a User-Agent header — without it the + # API responds 403 and the skip check below would silently + # fall through to a real publish attempt that errors with + # "already exists on crates.io index" when re-running after a + # partial publish. Send a UA naming the workflow. local current - current=$(curl -fsSL "https://crates.io/api/v1/crates/${crate}" \ + current=$(curl -fsSL \ + -A 'ModernRelay-omnigraph-ci (https://github.com/ModernRelay/omnigraph)' \ + "https://crates.io/api/v1/crates/${crate}" \ | jq -r '.crate.max_version' || echo "") if [[ "$current" == "$version" ]]; then @@ -90,10 +97,28 @@ jobs: fi echo "==> publishing ${crate} ${version} (current crates.io: ${current:-none})" - cargo publish -p "$crate" --locked + # Defense in depth: if the skip check missed an existing + # version (e.g. crates.io API hiccup), cargo publish errors + # with "already exists on crates.io index". Treat that as + # success so the workflow can be re-run idempotently. + local output + if ! output=$(cargo publish -p "$crate" --locked 2>&1); then + echo "$output" + if echo "$output" | grep -q "already exists on crates.io"; then + echo "==> ${crate} ${version} was already published; treating as success" + return 0 + fi + return 1 + fi + echo "$output" } + # Order matters: each crate must precede anything that depends on it. + # omnigraph-compiler and omnigraph-policy have no internal deps; + # omnigraph-engine depends on both; server depends on engine + the + # two leaf crates; cli depends on everything. publish_if_new omnigraph-compiler + publish_if_new omnigraph-policy publish_if_new omnigraph-engine publish_if_new omnigraph-server publish_if_new omnigraph-cli diff --git a/.github/workflows/release-edge.yml b/.github/workflows/release-edge.yml index 6147646..3996e65 100644 --- a/.github/workflows/release-edge.yml +++ b/.github/workflows/release-edge.yml @@ -43,6 +43,8 @@ jobs: asset_name: omnigraph-linux-x86_64 - runner: macos-14 asset_name: omnigraph-macos-arm64 + - runner: windows-latest + asset_name: omnigraph-windows-x86_64 env: CARGO_TERM_COLOR: always steps: @@ -59,6 +61,10 @@ jobs: if: runner.os == 'macOS' run: brew install protobuf + - name: Install Windows dependencies + if: runner.os == 'Windows' + run: choco install protoc -y + - name: Install Rust stable uses: dtolnay/rust-toolchain@stable with: @@ -73,7 +79,8 @@ jobs: - name: Build release binaries run: cargo build --release --locked -p omnigraph-cli -p omnigraph-server - - name: Package release archive + - name: Package Unix release archive + if: runner.os != 'Windows' run: | mkdir -p release install -m 0755 target/release/omnigraph release/omnigraph @@ -81,6 +88,22 @@ jobs: tar -C release -czf "${{ matrix.asset_name }}.tar.gz" omnigraph omnigraph-server shasum -a 256 "${{ matrix.asset_name }}.tar.gz" > "${{ matrix.asset_name }}.sha256" + - name: Package Windows release archive + if: runner.os == 'Windows' + run: | + New-Item -ItemType Directory -Force -Path release | Out-Null + Copy-Item target/release/omnigraph.exe release/omnigraph.exe + Copy-Item target/release/omnigraph-server.exe release/omnigraph-server.exe + Compress-Archive -Path release/omnigraph.exe, release/omnigraph-server.exe -DestinationPath "${{ matrix.asset_name }}.zip" -Force + $hash = (Get-FileHash "${{ matrix.asset_name }}.zip" -Algorithm SHA256).Hash.ToLowerInvariant() + "$hash ${{ matrix.asset_name }}.zip" | Out-File -FilePath "${{ matrix.asset_name }}.sha256" -Encoding ascii + New-Item -ItemType Directory -Force -Path verify | Out-Null + Expand-Archive -Path "${{ matrix.asset_name }}.zip" -DestinationPath verify -Force + $items = Get-ChildItem -Path verify -File + if ($items.Count -ne 2 -or !(Test-Path verify/omnigraph.exe) -or !(Test-Path verify/omnigraph-server.exe)) { + throw "Windows release archive is missing expected binaries" + } + - name: Publish edge release assets uses: softprops/action-gh-release@v2.5.0 with: @@ -91,5 +114,22 @@ jobs: body: | Rolling prerelease from `${{ github.sha }}`. files: | - ${{ matrix.asset_name }}.tar.gz - ${{ matrix.asset_name }}.sha256 + ${{ matrix.asset_name }}.* + + smoke_windows_installer: + name: Smoke Windows installer + needs: build_release + runs-on: windows-latest + permissions: + contents: read + steps: + - name: Checkout source + uses: actions/checkout@v5.0.1 + + - name: Install from edge release + run: ./scripts/install.ps1 -ReleaseChannel edge -InstallDir "$env:RUNNER_TEMP/omnigraph-bin" + + - name: Smoke installed binaries + run: | + & "$env:RUNNER_TEMP/omnigraph-bin/omnigraph.exe" version + & "$env:RUNNER_TEMP/omnigraph-bin/omnigraph-server.exe" --help diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e7fc75f..48ab38c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,6 +20,8 @@ jobs: asset_name: omnigraph-linux-x86_64 - runner: macos-14 asset_name: omnigraph-macos-arm64 + - runner: windows-latest + asset_name: omnigraph-windows-x86_64 env: CARGO_TERM_COLOR: always steps: @@ -36,6 +38,10 @@ jobs: if: runner.os == 'macOS' run: brew install protobuf + - name: Install Windows dependencies + if: runner.os == 'Windows' + run: choco install protoc -y + - name: Install Rust stable uses: dtolnay/rust-toolchain@stable with: @@ -50,7 +56,8 @@ jobs: - name: Build release binaries run: cargo build --release --locked -p omnigraph-cli -p omnigraph-server - - name: Package release archive + - name: Package Unix release archive + if: runner.os != 'Windows' run: | mkdir -p release install -m 0755 target/release/omnigraph release/omnigraph @@ -58,12 +65,27 @@ jobs: tar -C release -czf "${{ matrix.asset_name }}.tar.gz" omnigraph omnigraph-server shasum -a 256 "${{ matrix.asset_name }}.tar.gz" > "${{ matrix.asset_name }}.sha256" + - name: Package Windows release archive + if: runner.os == 'Windows' + run: | + New-Item -ItemType Directory -Force -Path release | Out-Null + Copy-Item target/release/omnigraph.exe release/omnigraph.exe + Copy-Item target/release/omnigraph-server.exe release/omnigraph-server.exe + Compress-Archive -Path release/omnigraph.exe, release/omnigraph-server.exe -DestinationPath "${{ matrix.asset_name }}.zip" -Force + $hash = (Get-FileHash "${{ matrix.asset_name }}.zip" -Algorithm SHA256).Hash.ToLowerInvariant() + "$hash ${{ matrix.asset_name }}.zip" | Out-File -FilePath "${{ matrix.asset_name }}.sha256" -Encoding ascii + New-Item -ItemType Directory -Force -Path verify | Out-Null + Expand-Archive -Path "${{ matrix.asset_name }}.zip" -DestinationPath verify -Force + $items = Get-ChildItem -Path verify -File + if ($items.Count -ne 2 -or !(Test-Path verify/omnigraph.exe) -or !(Test-Path verify/omnigraph-server.exe)) { + throw "Windows release archive is missing expected binaries" + } + - name: Publish GitHub release assets uses: softprops/action-gh-release@v2.5.0 with: files: | - ${{ matrix.asset_name }}.tar.gz - ${{ matrix.asset_name }}.sha256 + ${{ matrix.asset_name }}.* update_homebrew_tap: name: Update Homebrew tap @@ -113,3 +135,22 @@ jobs: git add Formula/omnigraph.rb git commit -m "Update Omnigraph formula to ${GITHUB_REF_NAME}" git push origin HEAD:main + + smoke_windows_installer: + name: Smoke Windows installer + needs: build_release + if: startsWith(github.ref, 'refs/tags/v') + runs-on: windows-latest + permissions: + contents: read + steps: + - name: Checkout source + uses: actions/checkout@v5.0.1 + + - name: Install from tagged release + run: ./scripts/install.ps1 -Version "$env:GITHUB_REF_NAME" -InstallDir "$env:RUNNER_TEMP/omnigraph-bin" + + - name: Smoke installed binaries + run: | + & "$env:RUNNER_TEMP/omnigraph-bin/omnigraph.exe" version + & "$env:RUNNER_TEMP/omnigraph-bin/omnigraph-server.exe" --help diff --git a/.gitignore b/.gitignore index 919d9d8..2248d5a 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ __pycache__/ *.pyc demo/*.omni/ .omnigraph-rustfs-demo/ +/docs/internal # Local-only working files (not for the public repo) .claude/ diff --git a/AGENTS.md b/AGENTS.md index 10f2e41..3fc78f7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,24 +1,24 @@ # OmniGraph — Agent Guide -This file is the always-on map for AI coding agents (Claude Code, Codex, Cursor, Cline) working in this repo. It is loaded into context on every turn, so it stays as a **map plus the rules and invariants that need to be in scope at all times** — the encyclopedia content lives under [`docs/`](docs/). When you need depth, follow a pointer. +This file is the always-on map for AI coding agents (Claude Code, Codex, Cursor, Cline) working in this codebase. It is loaded into context on every turn, so it stays as a **map plus the rules and invariants that need to be in scope at all times** — the encyclopedia content lives under [`docs/`](docs/). When you need depth, follow a pointer. **Required reading every session, every change:** -1. **[docs/invariants.md](docs/invariants.md)** — the architectural invariants and §IX deny-list. Apply to every PR, not only architecture work. -2. **[docs/lance.md](docs/lance.md)** — the curated index of upstream Lance docs. **Consult it before every task** to identify which Lance pages are relevant. **Then fetch every page in the matching domain section, plus every page that is even slightly relevant** — not just the page whose title most obviously matches the task. Behavior is interlocked across pages (transactions reference index lifecycle; index lifecycle references compaction; compaction references row-id lineage), and skipping a "slightly relevant" page is how alignment misses happen. The index itself is not a substitute for reading the pages — never act on the index alone. **Always fetch the FULL page content, not summaries** — use `npx mdrip ` (or `npx mdrip --max-chars 200000 ` for very long pages). Tools that summarize pages (like Claude's `WebFetch`) drop load-bearing details — we have caught alignment misses (default flags, `pub(crate)` blockers, three-page sub-specs hidden behind navigation hubs) only after dumping the full markdown. If `npx mdrip` is unavailable, fall back to `curl | pandoc -f html -t markdown` or paste the rendered page text manually; never act on a summarized fetch alone. -3. **[docs/testing.md](docs/testing.md)** — the test-coverage map. **Always check what already covers your change before writing a new test.** Extending an existing test (an assertion, a fixture row, a parameterization) is preferred over a duplicated `init_and_load()` block. Walk the before-every-task checklist to identify existing coverage, run those tests as a clean baseline, and only add a new test fn or file when no existing one owns the area. +1. **[docs/dev/invariants.md](docs/dev/invariants.md)** — the architectural invariants and deny-list. Apply to every PR, not only architecture work. +2. **[docs/dev/lance.md](docs/dev/lance.md)** — the curated index of upstream Lance docs. **Consult it before every task** to identify which Lance pages are relevant. **Then fetch every page in the matching domain section, plus every page that is even slightly relevant** — not just the page whose title most obviously matches the task. Behavior is interlocked across pages (transactions reference index lifecycle; index lifecycle references compaction; compaction references row-id lineage), and skipping a "slightly relevant" page is how alignment misses happen. The index itself is not a substitute for reading the pages — never act on the index alone. **Always fetch the FULL page content, not summaries** — use `curl -sL | pandoc -f html -t markdown` or paste the rendered page text manually. Tools that summarize pages (like Claude's `WebFetch`) drop load-bearing details — we have caught alignment misses (default flags, `pub(crate)` blockers, three-page sub-specs hidden behind navigation hubs) only after dumping the full markdown. +3. **[docs/dev/testing.md](docs/dev/testing.md)** — the test-coverage map. **Always check what already covers your change before writing a new test.** Extending an existing test (an assertion, a fixture row, a parameterization) is preferred over a duplicated `init_and_load()` block. Walk the before-every-task checklist to identify existing coverage, run those tests as a clean baseline, and only add a new test fn or file when no existing one owns the area. Tools that support `@`-imports (Claude Code) auto-include all three files via the imports below — note these must sit at column 0 (not inside a blockquote) for the parser to recognize them. Other agents (Codex, Cursor, Cline, …) must open them explicitly at the start of each session. -@docs/invariants.md -@docs/lance.md -@docs/testing.md +@docs/dev/invariants.md +@docs/dev/lance.md +@docs/dev/testing.md `CLAUDE.md` is a symlink to this file — there is exactly one source of truth. Edit `AGENTS.md`. -**Version surveyed:** 0.4.2 -**Workspace crates:** `omnigraph-compiler`, `omnigraph` (engine), `omnigraph-cli`, `omnigraph-server` -**Storage substrate:** Lance 4.x (columnar, versioned, branchable) +**Version surveyed:** 0.6.0 +**Workspace crates:** `omnigraph-compiler`, `omnigraph` (engine), `omnigraph-policy`, `omnigraph-cli`, `omnigraph-server` +**Storage substrate:** Lance 6.x (columnar, versioned, branchable) **License:** MIT **Toolchain:** Rust stable, edition 2024 @@ -33,7 +33,7 @@ OmniGraph is a typed property-graph engine built as a coordination layer over ma - **Multi-modal querying**: vector ANN (`nearest`), full-text (`search`/`fuzzy`/`match_text`/`bm25`), Reciprocal Rank Fusion (`rrf`), and graph traversal (`Expand`, anti-join `not { … }`) in one runtime. - **Branches and commits across the whole graph**: Git-style — every successful publish appends to a commit DAG; merges are three-way at the row level. - **Atomic per-query writes**: `mutate_as` and `load` accumulate insert/update batches into an in-memory `MutationStaging.pending` per touched table; one `stage_*` + `commit_staged` per table runs at end-of-query, then `ManifestBatchPublisher::publish` commits the manifest atomically with per-table `expected_table_versions` CAS. A mid-query failure leaves Lance HEAD untouched on staged tables — no drift, no run state machine, no staging branches. Deletes still inline-commit; D₂ at parse time prevents inserts/updates and deletes from coexisting in one query. -- **HTTP server**: Axum + utoipa OpenAPI, bearer auth (SHA-256 hashed, optional AWS Secrets Manager), Cedar policy gating. +- **HTTP server**: Axum + utoipa OpenAPI, bearer auth (SHA-256 hashed, optional AWS Secrets Manager). Cedar policy enforcement is engine-wide — every `_as` writer calls `Omnigraph::enforce(action, scope, actor)`, so HTTP, CLI, and embedded SDK consumers all hit the same gate. **Two modes** (v0.6.0+): single-graph (legacy flat routes) and multi-graph (`/graphs/{graph_id}/...` cluster routes + read-only `GET /graphs` enumeration). Per-graph + server-level Cedar policies. Runtime add/remove (`POST /graphs`, `DELETE /graphs/{id}`) is not exposed — operators edit `omnigraph.yaml` and restart. - **CLI** driven by a single `omnigraph.yaml`; multi-format output (json/jsonl/csv/kv/table). Throughout the docs, capabilities are split into **L1 — Inherited from Lance** vs **L2 — Added by OmniGraph**. @@ -50,16 +50,16 @@ CLI (omnigraph) HTTP Server (omnigraph-server, Axum) omnigraph-compiler ── Pest grammars, catalog, IR, lowering, lint, migration plan │ ▼ - omnigraph (engine) ── ManifestRepo, CommitGraph, RunRegistry, GraphIndex (CSR/CSC), exec + omnigraph (engine) ── ManifestCoordinator, CommitGraph, RunRegistry, GraphIndex (CSR/CSC), exec │ ▼ - Lance 4.x ── columnar Arrow, fragments, per-dataset versions/branches, indexes + Lance 6.x ── columnar Arrow, fragments, per-dataset versions/branches, indexes │ ▼ Object store (file / s3 / RustFS / MinIO / S3-compat) ``` -Full diagram and concurrency model: [docs/architecture.md](docs/architecture.md). +Full diagram and concurrency model: [docs/dev/architecture.md](docs/dev/architecture.md). --- @@ -67,35 +67,37 @@ Full diagram and concurrency model: [docs/architecture.md](docs/architecture.md) | Area | Read | |---|---| -| **Architectural invariants & deny-list (read before any non-trivial proposal or review)** | **[docs/invariants.md](docs/invariants.md)** | -| **Lance docs index — fetch upstream Lance docs by problem domain** | **[docs/lance.md](docs/lance.md)** | -| **Test coverage map — what's covered, what helpers to reuse, before-every-task checklist** | **[docs/testing.md](docs/testing.md)** | -| Architecture, L1/L2 framing, concurrency model | [docs/architecture.md](docs/architecture.md) | -| Storage layout, `__manifest` schema, URI schemes, S3 env vars | [docs/storage.md](docs/storage.md) | -| `.pg` schema language, types, constraints, annotations, migration planning | [docs/schema-language.md](docs/schema-language.md) | -| Schema-lint codes (`OG-XXX-NNN`), families, severity, suppression | [docs/schema-lint.md](docs/schema-lint.md) | -| `.gq` query language, MATCH/RETURN/ORDER, search funcs, mutations, IR ops, lint codes | [docs/query-language.md](docs/query-language.md) | -| Indexes (BTREE / inverted / vector / graph topology) | [docs/indexes.md](docs/indexes.md) | -| Embeddings (compiler + engine clients, env vars, `@embed`) | [docs/embeddings.md](docs/embeddings.md) | -| Branches, commit graph, snapshots, system branches | [docs/branches-commits.md](docs/branches-commits.md) | -| Transactions and atomicity (per-query atomic; branches as multi-query transactions) | [docs/transactions.md](docs/transactions.md) | -| Direct-publish writes (the former Run state machine, now demoted to publisher CAS) | [docs/runs.md](docs/runs.md) | -| Three-way merge and conflict kinds | [docs/merge.md](docs/merge.md) | -| Diff / change feed (`diff_between`, `diff_commits`) | [docs/changes.md](docs/changes.md) | -| Query execution, mutation execution, bulk loader, `load` vs `ingest` | [docs/execution.md](docs/execution.md) | -| `optimize` (compaction) and `cleanup` (version GC) | [docs/maintenance.md](docs/maintenance.md) | -| Cedar policy actions, scopes, CLI | [docs/policy.md](docs/policy.md) | -| HTTP server endpoints, auth, error model, body limits | [docs/server.md](docs/server.md) | -| CLI quick-start | [docs/cli.md](docs/cli.md) | -| CLI command surface and `omnigraph.yaml` schema | [docs/cli-reference.md](docs/cli-reference.md) | -| Audit / actor tracking | [docs/audit.md](docs/audit.md) | -| Error taxonomy and result serialization | [docs/errors.md](docs/errors.md) | -| Install (binary / Homebrew / source / channels) | [docs/install.md](docs/install.md) | -| Deployment (binary / container / RustFS bootstrap / auth / build variants) | [docs/deployment.md](docs/deployment.md) | -| CI / release workflows | [docs/ci.md](docs/ci.md) | -| Code ownership (CODEOWNERS source of truth, roles, regeneration) | [docs/codeowners.md](docs/codeowners.md) | -| Branch protection policy (declarative, applied via `scripts/apply-branch-protection.sh`) | [docs/branch-protection.md](docs/branch-protection.md) | -| Constants & tunables cheat sheet | [docs/constants.md](docs/constants.md) | +| **User docs entry point (public CLI/API/operator docs)** | **[docs/user/index.md](docs/user/index.md)** | +| **Developer docs entry point (architecture, invariants, testing, internals)** | **[docs/dev/index.md](docs/dev/index.md)** | +| **Architectural invariants & deny-list (read before any non-trivial proposal or review)** | **[docs/dev/invariants.md](docs/dev/invariants.md)** | +| **Lance docs index — fetch upstream Lance docs by problem domain** | **[docs/dev/lance.md](docs/dev/lance.md)** | +| **Test coverage map — what's covered, what helpers to reuse, before-every-task checklist** | **[docs/dev/testing.md](docs/dev/testing.md)** | +| Architecture, L1/L2 framing, concurrency model | [docs/dev/architecture.md](docs/dev/architecture.md) | +| Storage layout, `__manifest` schema, URI schemes, S3 env vars | [docs/user/storage.md](docs/user/storage.md) | +| `.pg` schema language, types, constraints, annotations, migration planning | [docs/user/schema-language.md](docs/user/schema-language.md) | +| Schema-lint codes (`OG-XXX-NNN`), families, severity, suppression | [docs/user/schema-lint.md](docs/user/schema-lint.md) | +| `.gq` query language, MATCH/RETURN/ORDER, search funcs, mutations, IR ops, lint codes | [docs/user/query-language.md](docs/user/query-language.md) | +| Indexes (BTREE / inverted / vector / graph topology) | [docs/user/indexes.md](docs/user/indexes.md) | +| Embeddings (compiler + engine clients, env vars, `@embed`) | [docs/user/embeddings.md](docs/user/embeddings.md) | +| Branches, commit graph, snapshots, system branches | [docs/user/branches-commits.md](docs/user/branches-commits.md) | +| Transactions and atomicity (per-query atomic; branches as multi-query transactions) | [docs/user/transactions.md](docs/user/transactions.md) | +| Direct-publish writes (the former Run state machine, now demoted to publisher CAS) | [docs/dev/runs.md](docs/dev/runs.md) | +| Three-way merge and conflict kinds | [docs/dev/merge.md](docs/dev/merge.md) | +| Diff / change feed (`diff_between`, `diff_commits`) | [docs/user/changes.md](docs/user/changes.md) | +| Query execution, mutation execution, bulk loader, `load` vs `ingest` | [docs/dev/execution.md](docs/dev/execution.md) | +| `optimize` (compaction) and `cleanup` (version GC) | [docs/user/maintenance.md](docs/user/maintenance.md) | +| Cedar policy actions, scopes, CLI | [docs/user/policy.md](docs/user/policy.md) | +| HTTP server endpoints, auth, error model, body limits | [docs/user/server.md](docs/user/server.md) | +| CLI quick-start | [docs/user/cli.md](docs/user/cli.md) | +| CLI command surface and `omnigraph.yaml` schema | [docs/user/cli-reference.md](docs/user/cli-reference.md) | +| Audit / actor tracking | [docs/user/audit.md](docs/user/audit.md) | +| Error taxonomy and result serialization | [docs/user/errors.md](docs/user/errors.md) | +| Install (binary / Homebrew / source / channels) | [docs/user/install.md](docs/user/install.md) | +| Deployment (binary / container / RustFS bootstrap / auth / build variants) | [docs/user/deployment.md](docs/user/deployment.md) | +| CI / release workflows | [docs/dev/ci.md](docs/dev/ci.md) | +| Code ownership (CODEOWNERS source of truth, roles, regeneration) | [docs/dev/codeowners.md](docs/dev/codeowners.md) | +| Branch protection policy (declarative, applied via `scripts/apply-branch-protection.sh`) | [docs/dev/branch-protection.md](docs/dev/branch-protection.md) | +| Constants & tunables cheat sheet | [docs/user/constants.md](docs/user/constants.md) | | Per-version release notes | [docs/releases/](docs/releases/) | --- @@ -119,15 +121,15 @@ When evaluating a design, ask: *"what does this look like after 5 more changes l ### Tiebreakers when liability alone is silent - **Correctness > simplicity > performance.** Lexicographic — give up performance for simpler code; give up simplicity for correct code; never give up correctness. The deny-list ("no silent failures," "no acks before durable persistence," "no reads of partial commits") is this rule's hard floor. -- **Reversibility shapes evidence demand.** Reversible changes wait for evidence: prefer prod metrics over napkin math over RFCs. Irreversible changes (substrate choice, on-disk format, the §VI database guarantees) earn an RFC, because by the time prod tells you they were wrong, you've shipped years of dependent code. Reviewers should spot both failure modes — RFC-ing a one-line config, and measuring-your-way into a substrate decision. +- **Reversibility shapes evidence demand.** Reversible changes wait for evidence: prefer prod metrics over napkin math over RFCs. Irreversible changes (substrate choice, on-disk format, database guarantees) earn an RFC, because by the time prod tells you they were wrong, you've shipped years of dependent code. Reviewers should spot both failure modes — RFC-ing a one-line config, and measuring-your-way into a substrate decision. -The always-on rules below and the §IX deny-list in [docs/invariants.md](docs/invariants.md) are specific applications of this principle; when the rules are silent, fall back to it. +The always-on rules below and the deny-list in [docs/dev/invariants.md](docs/dev/invariants.md) are specific applications of this principle; when the rules are silent, fall back to it. --- ## Always-on rules (load these into your working memory) -These are architectural rules that need to be in scope on every change. They're framed at the level that survives renames and refactors — the deeper implementation specifics (function names, lock names, branch-prefix conventions, enforcement points) live in the per-area docs and may evolve. The full architectural invariants and deny-list are in [docs/invariants.md](docs/invariants.md); §IX (deny-list) is the fastest first-pass when reviewing any change. +These are architectural rules that need to be in scope on every change. They're framed at the level that survives renames and refactors — the deeper implementation specifics (function names, lock names, branch-prefix conventions, enforcement points) live in the per-area docs and may evolve. The full architectural invariants and deny-list are in [docs/dev/invariants.md](docs/dev/invariants.md); the deny-list is the fastest first-pass when reviewing any change. 1. **Multi-dataset publish is atomic across the whole graph.** A graph commit flips every relevant sub-table version visible together, in one manifest write. Don't introduce code paths that publish per sub-table outside the unified publish path — that loses cross-table snapshot isolation. 2. **Snapshot isolation per query.** A query holds one snapshot for its lifetime. Don't re-read the current head mid-query. @@ -136,7 +138,7 @@ These are architectural rules that need to be in scope on every change. They're 5. **Reads always see the current index state for the branch they're reading.** Indexes track the branch head, not historical snapshots. If you change index lifecycle, preserve this guarantee. 6. **Stable type IDs survive renames.** Schema migration relies on identity that's stable across rename — don't mint new IDs on rename. -### Deny-list (fast-pass review filter — full reasoning in [docs/invariants.md §IX](docs/invariants.md)) +### Deny-list (fast-pass review filter — full reasoning in [docs/dev/invariants.md](docs/dev/invariants.md)) If a proposal fits one of these, the burden is on the proposer to justify why this case is the exception: @@ -162,38 +164,64 @@ If a proposal fits one of these, the burden is on the proposer to justify why th --- +## Build, test, lint + +Rust stable workspace (edition 2024). `protoc` is a build dependency (`brew install protobuf` / `apt-get install protobuf-compiler libprotobuf-dev`). **Crate dir ≠ package name** for the engine: the directory is `crates/omnigraph` but its Cargo package is `omnigraph-engine` (use that in `-p`). The CLI binary built from `omnigraph-cli` is named `omnigraph`. + +```bash +cargo build --workspace --locked # build everything +cargo test --workspace --locked # the canonical CI gate (matches CI exactly) +cargo run -p omnigraph-cli -- # run the `omnigraph` CLI from source +cargo run -p omnigraph-server -- --bind 0.0.0.0:8080 # run the server from source + +# Run one crate / one test file / one test fn +cargo test -p omnigraph-engine --test traversal # one integration-test file (see docs/dev/testing.md) +cargo test -p omnigraph-engine --test runs concurrent # one test fn by name substring +cargo test -p omnigraph-engine some_inline_test -- --nocapture # show stdout + +# Feature-gated suites (each is its own job in CI, not part of the default run) +cargo test -p omnigraph-engine --features failpoints --test failpoints # fault injection +cargo build -p omnigraph-server --features aws # AWS Secrets Manager bearer-token source +``` + +S3-backed tests (`s3_storage`, and the S3 paths in server/CLI system tests) **skip** unless `OMNIGRAPH_S3_TEST_BUCKET` + `AWS_*` (incl. `AWS_ENDPOINT_URL_S3` for non-AWS) are set; CI runs them against containerized RustFS. `scripts/local-rustfs-bootstrap.sh` stands up a local S3 environment. + +CI does **not** run `clippy` or `rustfmt` as gates — but `cargo test --workspace --locked` is the exact gate, so run it before pushing. Two non-test CI checks: `scripts/check-agents-md.sh` (doc cross-link integrity — run it after moving/renaming docs) and OpenAPI drift (`crates/omnigraph-server/tests/openapi.rs` regenerates `openapi.json`; set `OMNIGRAPH_UPDATE_OPENAPI=1` to update the checked-in copy when a server/API change is intentional). + +--- + ## Quick-reference flows ```bash -# Initialize an S3-backed repo -omnigraph init --schema ./schema.pg s3://my-bucket/repo.omni +# Initialize an S3-backed graph +omnigraph init --schema ./schema.pg s3://my-bucket/graph.omni # Bulk load -omnigraph load --data ./seed.jsonl --mode overwrite s3://my-bucket/repo.omni +omnigraph load --data ./seed.jsonl --mode overwrite s3://my-bucket/graph.omni # Branch + ingest a review batch -omnigraph branch create --from main review/2026-04-25 s3://my-bucket/repo.omni -omnigraph ingest --branch review/2026-04-25 --data ./batch.jsonl s3://my-bucket/repo.omni +omnigraph branch create --from main review/2026-04-25 s3://my-bucket/graph.omni +omnigraph ingest --branch review/2026-04-25 --data ./batch.jsonl s3://my-bucket/graph.omni # Run a hybrid (vector + BM25) query omnigraph read --query ./queries.gq --name find_similar \ - --params '{"q":"trends in AI safety"}' --format table s3://my-bucket/repo.omni + --params '{"q":"trends in AI safety"}' --format table s3://my-bucket/graph.omni # Plan + apply schema migration -omnigraph schema plan --schema ./next.pg s3://my-bucket/repo.omni -omnigraph schema apply --schema ./next.pg s3://my-bucket/repo.omni --json +omnigraph schema plan --schema ./next.pg s3://my-bucket/graph.omni +omnigraph schema apply --schema ./next.pg s3://my-bucket/graph.omni --json # Merge review branch back -omnigraph branch merge review/2026-04-25 --into main s3://my-bucket/repo.omni +omnigraph branch merge review/2026-04-25 --into main s3://my-bucket/graph.omni # Compact + GC (preview, then confirm) -omnigraph optimize s3://my-bucket/repo.omni -omnigraph cleanup --keep 10 --older-than 7d s3://my-bucket/repo.omni -omnigraph cleanup --keep 10 --older-than 7d --confirm s3://my-bucket/repo.omni +omnigraph optimize s3://my-bucket/graph.omni +omnigraph cleanup --keep 10 --older-than 7d s3://my-bucket/graph.omni +omnigraph cleanup --keep 10 --older-than 7d --confirm s3://my-bucket/graph.omni # Stand up the HTTP server (token from env) OMNIGRAPH_SERVER_BEARER_TOKEN=xxxx \ - omnigraph-server s3://my-bucket/repo.omni --bind 0.0.0.0:8080 + omnigraph-server s3://my-bucket/graph.omni --bind 0.0.0.0:8080 # Cedar policy explain omnigraph policy explain --actor act-alice --action change --branch main @@ -220,12 +248,12 @@ omnigraph policy explain --actor act-alice --action change --branch main | Schema language | — | `.pg` + Pest grammar + catalog + interfaces + constraints + annotations | | Query language | — | `.gq` + Pest grammar + IR + lowering + linter | | Schema migration planning | — | `plan_schema_migration` + `apply_schema` step types + `__schema_apply_lock__` | -| Commit graph (DAG) across whole repo | — | `_graph_commits.lance` with linear + merge parents, ULID ids, actor map | +| Commit graph (DAG) across whole graph | — | `_graph_commits.lance` with linear + merge parents, ULID ids, actor map | | Per-query atomic writes | — | In-memory `MutationStaging.pending` accumulator + `stage_*` / `commit_staged` per touched table at end-of-query + publisher CAS via `commit_with_expected` (single manifest commit per `mutate_as` / `load`); D₂ parse-time rule keeps inserts/updates and deletes from mixing | | Three-way row-level merge | — | `OrderedTableCursor` + `StagedTableWriter`, structured `MergeConflictKind` | | Change feeds | — | `diff_between` / `diff_commits` with manifest fast path + ID streaming | -| Cedar policy | — | 8 actions, branch / target_branch / protected scopes, validate/test/explain CLI | -| HTTP server | — | Axum, OpenAPI via utoipa, bearer auth (SHA-256, AWS Secrets Manager option), policy gating, NDJSON streaming export | +| Cedar policy | — | Per-graph actions plus server-scoped actions (see [docs/user/policy.md](docs/user/policy.md) for the current list), branch / target_branch / protected scopes, validate/test/explain CLI. **Engine-wide enforcement** (MR-722): every `_as` writer (`apply_schema_as`, `mutate_as`, `load_as`, `ingest_as`, `branch_create_as` / `branch_create_from_as`, `branch_delete_as`, `branch_merge_as`) calls `Omnigraph::enforce(action, scope, actor)` — HTTP, CLI, embedded SDK all hit the same gate. | +| HTTP server | — | Axum, OpenAPI via utoipa, bearer auth (SHA-256, AWS Secrets Manager option), `authorize_request` at the HTTP boundary (resolves bearer→actor, applies admission control), NDJSON streaming export, **multi-graph mode (v0.6.0+) with cluster routes + read-only `GET /graphs` enumeration + per-graph + server-level Cedar policies. Add/remove graphs by editing `omnigraph.yaml` and restarting.** | | CLI with config | — | `omnigraph.yaml`, aliases, multi-format output (json/jsonl/csv/kv/table) | | Audit / actor tracking | — | `_as` write APIs + actor map in commit graph | | Local RustFS bootstrap | — | `scripts/local-rustfs-bootstrap.sh` one-shot S3-backed dev environment | @@ -234,14 +262,14 @@ omnigraph policy explain --actor act-alice --action change --branch main ## Maintenance contract for agents -When you change something user-visible, **update the relevant `docs/.md` in the same change**. Pointers from this file to that doc must keep working — CI enforces cross-link integrity via `scripts/check-agents-md.sh`. +When you change something user-visible, **update the relevant `docs/user/.md` in the same change**. Use [docs/user/index.md](docs/user/index.md) for public behavior and [docs/dev/index.md](docs/dev/index.md) for contributor/internal mechanics. Pointers from this file to those docs must keep working — CI enforces cross-link integrity via `scripts/check-agents-md.sh`. -When proposing or reviewing a non-trivial change, walk [docs/invariants.md](docs/invariants.md) — at minimum the §IX deny-list and §X review checklist. Add to the deny-list when a new anti-pattern surfaces; relaxing an invariant requires the same review process as code. +When proposing or reviewing a non-trivial change, walk [docs/dev/invariants.md](docs/dev/invariants.md) — at minimum the deny-list and review checklist. Add to the deny-list when a new anti-pattern surfaces; relaxing an invariant requires the same review process as code. Rules: 1. **Update in the same PR.** New endpoint, query function, CLI flag, env var, constant, schema construct, or invariant: update both the source code and the doc in the same change. Never split documentation drift into a follow-up. -2. **Bump version on release.** When a release boundary crosses (e.g. v0.3.1 → v0.3.2), update the version line at the top of this file and add a `docs/releases/.md` describing the user-visible delta. Update [docs/architecture.md](docs/architecture.md) only if the architecture itself changed. +2. **Bump version on release.** When a release boundary crosses (e.g. v0.3.1 → v0.3.2), update the version line at the top of this file and add a `docs/releases/.md` describing the user-visible delta. Update [docs/dev/architecture.md](docs/dev/architecture.md) only if the architecture itself changed. 3. **Write OSS-facing release notes.** Release docs are public project history. Describe capabilities, behavior changes, breaking changes, upgrade notes, and user impact; do not reference private ticket systems, internal codenames, or planning shorthand that an outside contributor cannot inspect. 4. **Keep versioning coherent.** A release bump must update every published crate manifest, local path dependency constraint, `Cargo.lock`, generated API metadata such as `openapi.json`, and this file's surveyed version. Do not leave mixed package versions unless the release plan explicitly calls for them. 5. **Keep docs audience-neutral.** Prefer stable public identifiers (versions, PR numbers, public issue links, crate names, endpoint names) over organization-specific labels. If internal context is useful for maintainers, translate it into a durable public rationale before committing it. @@ -249,9 +277,9 @@ Rules: 7. **Re-verify before recommending.** If you cite a flag, env var, endpoint, or constant to the user or in code, grep for it in source first. Memory and docs go stale; the code is authoritative. 8. **Keep AGENTS.md short.** This file is always loaded into agent context, so every added line has a recurring context-window cost. Prefer pointers and terse invariants here; put detail in `docs/`. 9. **Keep AGENTS.md a map, not an encyclopedia.** New deep content goes into `docs/`. Add an entry to "Where to find each topic" instead of pasting prose into this file. The "Always-on rules" section is the exception — it's for invariants that should always be in scope. -10. **Re-read on schema/query/IR changes.** Edits to `schema.pest`, `query.pest`, `ir/lower.rs`, `query/typecheck.rs`, or `query/lint.rs` should trigger a re-read of [docs/schema-language.md](docs/schema-language.md), [docs/query-language.md](docs/query-language.md), and [docs/execution.md](docs/execution.md) to confirm they still describe reality. +10. **Re-read on schema/query/IR changes.** Edits to `schema.pest`, `query.pest`, `ir/lower.rs`, `query/typecheck.rs`, or `query/lint.rs` should trigger a re-read of [docs/user/schema-language.md](docs/user/schema-language.md), [docs/user/query-language.md](docs/user/query-language.md), and [docs/dev/execution.md](docs/dev/execution.md) to confirm they still describe reality. 11. **Always make smaller commits.** Each commit does one thing, compiles, and passes tests; mechanical refactors land separately from the behavior changes they enable. 12. **Test-first for bug fixes.** When fixing an identified bug, write a regression test that reproduces the failure first. Confirm it fails against the current code with the predicted symptom (not an unrelated error). Then land the fix in a separate commit and confirm the test turns green. The test commit lands just before the fix commit so the red → green pair is visible in `git log` and a reviewer can check out the test commit alone and reproduce the failure. 13. **Correct by design over symptomatic patches.** When a bug surfaces, identify the root cause and make the fix correct by construction. Don't patch the symptom. If the design admits the bug class, the fix is to close the class, not to add a guard around the latest instance. A symptomatic patch is acceptable only as a stop-gap, with an explicit note in the commit message and a follow-up issue tracking the design fix. -CI check: `scripts/check-agents-md.sh` verifies that every `docs/*.md` link in this file resolves and that every doc in the canonical set is linked. Run it locally before opening a PR if you've moved or renamed docs. +CI check: `scripts/check-agents-md.sh` verifies that docs links in this file and the audience indexes resolve, and that every canonical doc is linked from either [docs/user/index.md](docs/user/index.md) or [docs/dev/index.md](docs/dev/index.md). Run it locally before opening a PR if you've moved or renamed docs. diff --git a/Cargo.lock b/Cargo.lock index bac2a34..a3d6d62 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -175,9 +175,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" +checksum = "378530e55cd479eda3c14eb345310799717e6f76d0c332041e8487022166b471" dependencies = [ "arrow-arith", "arrow-array", @@ -196,9 +196,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" +checksum = "a0ab212d2c1886e802f51c5212d78ebbcbb0bec980fff9dadc1eb8d45cd0b738" dependencies = [ "arrow-array", "arrow-buffer", @@ -210,9 +210,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" +checksum = "cfd33d3e92f207444098c75b42de99d329562be0cf686b307b097cc52b4e999e" dependencies = [ "ahash", "arrow-buffer", @@ -221,7 +221,7 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.16.1", + "hashbrown 0.17.1", "num-complex", "num-integer", "num-traits", @@ -229,9 +229,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" +checksum = "0c6cd424c2693bcdbc150d843dc9d4d137dd2de4782ce6df491ad11a3a0416c0" dependencies = [ "bytes", "half", @@ -241,9 +241,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" +checksum = "4c5aefb56a2c02e9e2b30746241058b85f8983f0fcff2ba0c6d09006e1cded7f" dependencies = [ "arrow-array", "arrow-buffer", @@ -263,9 +263,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" +checksum = "e94e8cf7e517657a52b91ea1263acf38c4ca62a84655d72458a3359b12ab97de" dependencies = [ "arrow-array", "arrow-cast", @@ -278,9 +278,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" +checksum = "3c88210023a2bfee1896af366309a3028fc3bcbd6515fa29a7990ee1baa08ee0" dependencies = [ "arrow-buffer", "arrow-schema", @@ -291,9 +291,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" +checksum = "238438f0834483703d88896db6fe5a7138b2230debc31b34c0336c2996e3c64f" dependencies = [ "arrow-array", "arrow-buffer", @@ -301,21 +301,22 @@ dependencies = [ "arrow-schema", "arrow-select", "flatbuffers", - "lz4_flex 0.12.1", + "lz4_flex", "zstd", ] [[package]] name = "arrow-json" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" +checksum = "205ca2119e6d679d5c133c6f30e68f027738d95ed948cf77677ea69c7800036b" dependencies = [ "arrow-array", "arrow-buffer", "arrow-cast", - "arrow-data", + "arrow-ord", "arrow-schema", + "arrow-select", "chrono", "half", "indexmap 2.13.0", @@ -331,9 +332,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" +checksum = "1bffd8fd2579286a5d63bac898159873e5094a79009940bcb42bbfce4f19f1d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -344,9 +345,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" +checksum = "bab5994731204603c73ba69267616c50f80780774c6bb0476f1f830625115e0c" dependencies = [ "arrow-array", "arrow-buffer", @@ -357,9 +358,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" +checksum = "f633dbfdf39c039ada1bf9e34c694816eb71fbb7dc78f613993b7245e078a1ed" dependencies = [ "bitflags", "serde_core", @@ -368,9 +369,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" +checksum = "8cd065c54172ac787cf3f2f8d4107e0d3fdc26edba76fdf4f4cc170258942222" dependencies = [ "ahash", "arrow-array", @@ -382,9 +383,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.3.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" +checksum = "29dd7cda3ab9692f43a2e4acc444d760cc17b12bb6d8232ddf64e9bab7c06b42" dependencies = [ "arrow-array", "arrow-buffer", @@ -464,7 +465,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -475,7 +476,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -958,7 +959,7 @@ checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -1105,31 +1106,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "bon" -version = "3.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" -dependencies = [ - "bon-macros", - "rustversion", -] - -[[package]] -name = "bon-macros" -version = "3.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" -dependencies = [ - "darling", - "ident_case", - "prettyplease", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.115", -] - [[package]] name = "borsh" version = "1.6.1" @@ -1290,12 +1266,6 @@ dependencies = [ "smol_str", ] -[[package]] -name = "census" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" - [[package]] name = "cfg-if" version = "1.0.4" @@ -1310,9 +1280,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.43" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ "iana-time-zone", "js-sys", @@ -1373,7 +1343,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -1645,7 +1615,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -1656,7 +1626,7 @@ checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ "darling_core", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -1675,9 +1645,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43c18ba387f9c05ac1f3be32a73f8f3cc6c1cfc43e5d4b7a8e5b0d3a5eb48dc7" +checksum = "93db0e623840612f7f2cd757f7e8a8922064192363732c88692e0870016e141b" dependencies = [ "arrow", "arrow-schema", @@ -1711,7 +1681,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "rand 0.9.2", "regex", @@ -1724,9 +1694,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c75a4ce672b27fb8423810efb92a3600027717a1664d06a2c307eeeabcec694" +checksum = "37cefde60b26a7f4ff61e9d2ff2833322f91df2b568d7238afe67bde5bdffb66" dependencies = [ "arrow", "async-trait", @@ -1742,16 +1712,16 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "tokio", ] [[package]] name = "datafusion-catalog-listing" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c8b9a3795ffb46bf4957a34c67d89a67558b311ae455c8d4295ff2115eeea50" +checksum = "17e112307715d6a7a331111a4c2330ff54bc237183511c319e3708a4cff431fb" dependencies = [ "arrow", "async-trait", @@ -1767,14 +1737,14 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", ] [[package]] name = "datafusion-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "205dc1e20441973f470e6b7ef87626a3b9187970e5106058fef1b713047f770c" +checksum = "d72a11ca44a95e1081870d3abb80c717496e8a7acb467a1d3e932bb636af5cc2" dependencies = [ "ahash", "arrow", @@ -1783,9 +1753,10 @@ dependencies = [ "half", "hashbrown 0.16.1", "indexmap 2.13.0", + "itertools 0.14.0", "libc", "log", - "object_store", + "object_store 0.13.2", "paste", "sqlparser", "tokio", @@ -1794,9 +1765,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cf5880c02ff6f5f11fb5bc19211789fb32fd3c53d79b7d6cb2b12e401312ba0" +checksum = "89f4afaed29670ec4fd6053643adc749fe3f4bc9d1ce1b8c5679b22c67d12def" dependencies = [ "futures", "log", @@ -1805,9 +1776,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc614d6e709450e29b7b032a42c1bdb705f166a6b2edef7bed7c7897eb905499" +checksum = "e9fb386e1691355355a96419978a0022b7947b44d4a24a6ea99f00b6b485cbb6" dependencies = [ "arrow", "async-trait", @@ -1826,7 +1797,7 @@ dependencies = [ "glob", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.2", "rand 0.9.2", "tokio", "url", @@ -1834,9 +1805,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e497d5fc48dac7ce86f6b4fb09a3a494385774af301ff20ec91aebfae9b05b4" +checksum = "ffa6c52cfed0734c5f93754d1c0175f558175248bf686c944fb05c373e5fc096" dependencies = [ "arrow", "arrow-ipc", @@ -1852,15 +1823,15 @@ dependencies = [ "datafusion-session", "futures", "itertools 0.14.0", - "object_store", + "object_store 0.13.2", "tokio", ] [[package]] name = "datafusion-datasource-csv" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dfc250cad940d0327ca2e9109dc98830892d17a3d6b2ca11d68570e872cf379" +checksum = "503f29e0582c1fc189578d665ff57d9300da1f80c282777d7eb67bb79fb8cdca" dependencies = [ "arrow", "async-trait", @@ -1874,16 +1845,16 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store", + "object_store 0.13.2", "regex", "tokio", ] [[package]] name = "datafusion-datasource-json" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91e9677ed62833b0e8129dec0d1a8f3c9bb7590bd6dd714a43e4c3b663e4aa0" +checksum = "e33804749abc8d0c8cb7473228483cb8070e524c6f6086ee1b85a64debe2b3d2" dependencies = [ "arrow", "async-trait", @@ -1897,31 +1868,35 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store", + "object_store 0.13.2", + "serde_json", "tokio", + "tokio-stream", ] [[package]] name = "datafusion-doc" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e13e5fe3447baa0584b61ee8644086e007e1ef6e58f4be48bc8a72417854729" +checksum = "8de6ac0df1662b9148ad3c987978b32cbec7c772f199b1d53520c8fa764a87ee" [[package]] name = "datafusion-execution" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48a6cc03e34899a54546b229235f7b192634c8e832f78a267f0989b18216c56d" +checksum = "c03c7fbdaefcca4ef6ffe425a5fc2325763bfb426599bb0bf4536466efabe709" dependencies = [ "arrow", + "arrow-buffer", "async-trait", "chrono", "dashmap", "datafusion-common", "datafusion-expr", + "datafusion-physical-expr-common", "futures", "log", - "object_store", + "object_store 0.13.2", "parking_lot", "rand 0.9.2", "tempfile", @@ -1930,9 +1905,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee3315d87eca7a7df58e52a1fb43b4c4171b545fd30ffc3102945c162a9f6ddb" +checksum = "574b9b6977fedbd2a611cbff12e5caf90f31640ad9dc5870f152836d94bad0dd" dependencies = [ "arrow", "async-trait", @@ -1952,9 +1927,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c6d83feae0753799f933a2c47dfd15980c6947960cb95ed60f5c1f885548b3" +checksum = "7d7c3adf3db8bf61e92eb90cb659c8e8b734593a8f7c8e12a843c7ddba24b87e" dependencies = [ "arrow", "datafusion-common", @@ -1965,9 +1940,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b82962015cc3db4d7662459c9f7fcda0591b5edacb8af1cf3bc3031f274800" +checksum = "f28aa4e10384e782774b10e72aca4d93ef7b31aa653095d9d4536b0a3dbc51b6" dependencies = [ "arrow", "arrow-buffer", @@ -1986,6 +1961,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "memchr", "num-traits", "rand 0.9.2", "regex", @@ -1996,9 +1972,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e42c227d9e55a6c8041785d4a8a117e4de531033d480aae10984247ac62e27e" +checksum = "00aa6217e56098ba84e0a338176fe52f0a84cca398021512c6c8c5eff806d0ad" dependencies = [ "ahash", "arrow", @@ -2012,14 +1988,15 @@ dependencies = [ "datafusion-physical-expr-common", "half", "log", + "num-traits", "paste", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cead3cfed825b0b688700f4338d281cd7857e4907775a5b9554c083edd5f3f95" +checksum = "b511250349407db7c43832ab2de63f5557b19a20dfd236b39ca2c04468b50d47" dependencies = [ "ahash", "arrow", @@ -2030,9 +2007,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62ea99612970aebab8cf864d02eb3d296bbab7f4881e1023d282b57fe431b201" +checksum = "ef13a858e20d50f0a9bb5e96e7ac82b4e7597f247515bccca4fdd2992df0212a" dependencies = [ "arrow", "arrow-ord", @@ -2046,16 +2023,18 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", + "hashbrown 0.16.1", "itertools 0.14.0", + "itoa", "log", "paste", ] [[package]] name = "datafusion-functions-table" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d83dbf3ab8b9af6f209b068825a7adbd3b88bf276f2a1ec14ba09567b97f5674" +checksum = "72b40d3f5bbb3905f9ccb1ce9485a9595c77b69758a7c24d3ba79e334ff51e7e" dependencies = [ "arrow", "async-trait", @@ -2069,9 +2048,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "732edabe07496e2fc5a1e57a284d7a36edcea445a2821119770a0dea624b472c" +checksum = "d4e88ec9d57c9b685d02f58bfee7be62d72610430ddcedb82a08e5d9925dbfb6" dependencies = [ "arrow", "datafusion-common", @@ -2087,9 +2066,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c6e30e09700799bd52adce8c377ab03dda96e73a623e4803a31ad94fe7ce14" +checksum = "8307bb93519b1a91913723a1130cfafeee3f72200d870d88e91a6fc5470ede5c" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2097,20 +2076,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402f2a8ed70fb99a18f71580a1fe338604222a3d32ddeac6e72c5b34feea2d4d" +checksum = "2e367e6a71051d0ebdd29b2f85d12059b38b1d1f172c6906e80016da662226bd" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] name = "datafusion-optimizer" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99f32edb8ba12f08138f86c09b80fae3d4a320551262fa06b91d8a8cb3065a5b" +checksum = "e929015451a67f77d9d8b727b2bf3a40c4445fdef6cdc53281d7d97c76888ace" dependencies = [ "arrow", "chrono", @@ -2127,9 +2106,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "987c5e29e96186589301b42e25aa7d11bbe319a73eb02ef8d755edc55b5b89fc" +checksum = "4b1e68aba7a4b350401cfdf25a3d6f989ad898a7410164afe9ca52080244cb59" dependencies = [ "ahash", "arrow", @@ -2150,9 +2129,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1de89d0afa08b6686697bd8a6bac4ba2cd44c7003356e1bce6114d5a93f94b5c" +checksum = "ea22315f33cf2e0adc104e8ec42e285f6ed93998d565c65e82fec6a9ee9f9db4" dependencies = [ "arrow", "datafusion-common", @@ -2165,9 +2144,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "602d1970c0fe87f1c3a36665d131fbfe1c4379d35f8fc5ec43a362229ad2954d" +checksum = "b04b45ea8ad3ac2d78f2ea2a76053e06591c9629c7a603eda16c10649ecf4362" dependencies = [ "ahash", "arrow", @@ -2182,9 +2161,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b24d704b6385ebe27c756a12e5ba15684576d3b47aeca79cc9fb09480236dc32" +checksum = "7cb13397809a425918f608dfe8653f332015a3e330004ab191b4404187238b95" dependencies = [ "arrow", "datafusion-common", @@ -2200,9 +2179,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c21d94141ea5043e98793f170798e9c1887095813b8291c5260599341e383a38" +checksum = "5edc023675791af9d5fb4cc4c24abf5f7bd3bd4dcf9e5bd90ea1eff6976dcc79" dependencies = [ "ahash", "arrow", @@ -2224,6 +2203,7 @@ dependencies = [ "indexmap 2.13.0", "itertools 0.14.0", "log", + "num-traits", "parking_lot", "pin-project-lite", "tokio", @@ -2231,9 +2211,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a68cce43d18c0dfac95cacd74e70565f7e2fb12b9ed41e2d312f0fa837626b1" +checksum = "ac8c76860e355616555081cab5968cec1af7a80701ff374510860bcd567e365a" dependencies = [ "arrow", "datafusion-common", @@ -2248,9 +2228,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4e1c40a0b1896aed4a4504145c2eb7fa9b9da13c2d04b40a4767a09f076199" +checksum = "5412111aa48e2424ba926112e192f7a6b7e4ccb450145d25ce5ede9f19dc491e" dependencies = [ "async-trait", "datafusion-common", @@ -2262,15 +2242,16 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.4.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f1891e5b106d1d73c7fe403bd8a265d19c3977edc17f60808daf26c2fe65ffb" +checksum = "fa0d133ddf8b9b3b872acac900157f783e7b879fe9a6bccf389abebbfac45ec1" dependencies = [ "arrow", "bigdecimal", "chrono", "datafusion-common", "datafusion-expr", + "datafusion-functions-nested", "indexmap 2.13.0", "log", "regex", @@ -2365,7 +2346,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -2377,12 +2358,6 @@ dependencies = [ "const-random", ] -[[package]] -name = "downcast-rs" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc" - [[package]] name = "dunce" version = "1.0.5" @@ -2404,7 +2379,7 @@ dependencies = [ "enum-ordinalize", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -2448,7 +2423,7 @@ checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -2521,12 +2496,6 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55" -[[package]] -name = "fastdivide" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" - [[package]] name = "fastrand" version = "2.3.0" @@ -2601,16 +2570,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fs4" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8" -dependencies = [ - "rustix 0.38.44", - "windows-sys 0.52.0", -] - [[package]] name = "fs_extra" version = "1.3.0" @@ -2619,9 +2578,9 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2195cc7f87e84bd695586137de99605e7e9579b26ec5e01b82960ddb4d0922f2" +checksum = "83cf860f6a6bf0a6a60fdfe5a36c75121fad5ea4332d1d12deee3e65b6047727" dependencies = [ "arrow-array", "rand 0.9.2", @@ -2698,7 +2657,7 @@ checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -2887,8 +2846,6 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "allocator-api2", - "equivalent", "foldhash 0.1.5", ] @@ -2903,6 +2860,12 @@ dependencies = [ "foldhash 0.2.0", ] +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + [[package]] name = "heck" version = "0.5.0" @@ -2939,12 +2902,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "htmlescape" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" - [[package]] name = "http" version = "0.2.12" @@ -3307,6 +3264,17 @@ dependencies = [ "generic-array", ] +[[package]] +name = "io-uring" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d09b98f7eace8982db770e4408e7470b028ce513ac28fecdc6bf4c30fe92b62" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -3376,7 +3344,7 @@ checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -3425,7 +3393,7 @@ dependencies = [ "fast-float2", "itoa", "jiff", - "nom 8.0.0", + "nom", "num-traits", "ordered-float", "rand 0.9.2", @@ -3492,14 +3460,15 @@ dependencies = [ [[package]] name = "lance" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efe6c3ddd79cdfd2b7e1c23cafae52806906bc40fbd97de9e8cf2f8c7a75fc04" +checksum = "d34e854994e84d043897f5ec9fb609221e9e69e3fd52996cd715d979fcd349f6" dependencies = [ "arrow", "arrow-arith", "arrow-array", "arrow-buffer", + "arrow-cast", "arrow-ipc", "arrow-ord", "arrow-row", @@ -3535,12 +3504,14 @@ dependencies = [ "lance-linalg", "lance-namespace", "lance-table", + "lance-tokenizer", "log", "moka", - "object_store", + "object_store 0.12.5", "permutation", "pin-project", "prost", + "prost-build", "prost-types", "rand 0.9.2", "roaring", @@ -3548,7 +3519,6 @@ dependencies = [ "serde", "serde_json", "snafu", - "tantivy", "tokio", "tokio-stream", "tokio-util", @@ -3559,14 +3529,15 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d9f5d95bdda2a2b790f1fb8028b5b6dcf661abeb3133a8bca0f3d24b054af87" +checksum = "7827fe404358c27d120ee8ea8ef7b9415c2911d54072bec83dd689d750ae65da" dependencies = [ "arrow-array", "arrow-buffer", "arrow-cast", "arrow-data", + "arrow-ipc", "arrow-ord", "arrow-schema", "arrow-select", @@ -3581,9 +3552,9 @@ dependencies = [ [[package]] name = "lance-bitpacking" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f827d6ab9f8f337a9509d5ad66a12f3314db8713868260521c344ef6135eb4e4" +checksum = "2cd0b31570d50fe13c7e4e36b03e1f1c99c3d8e5a34845b24b0665b51b40570d" dependencies = [ "arrayref", "paste", @@ -3592,9 +3563,9 @@ dependencies = [ [[package]] name = "lance-core" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f1e25df6a79bf72ee6bcde0851f19b1cd36c5848c1b7db83340882d3c9fdecb" +checksum = "b128c213c676cb8e03c62a68670642770825171e64097cc2da97cbb19fe35d29" dependencies = [ "arrow-array", "arrow-buffer", @@ -3614,7 +3585,7 @@ dependencies = [ "mock_instant", "moka", "num_cpus", - "object_store", + "object_store 0.12.5", "pin-project", "prost", "rand 0.9.2", @@ -3631,13 +3602,14 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93146de8ae720cb90edef81c2f2d0a1b065fc2f23ecff2419546f389b0fa70a4" +checksum = "e03b2de71cbcd09b10bf1a17c83cacbc0176ecd97203fb72b9e59d9b8f9a3743" dependencies = [ "arrow", "arrow-array", "arrow-buffer", + "arrow-cast", "arrow-ord", "arrow-schema", "arrow-select", @@ -3663,9 +3635,9 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccec8ce4d8e0a87a99c431dab2364398029f2ffb649c1a693c60c79e05ed30dd" +checksum = "2fe7c7ea7fd397e495a1646fec360e46ee0cbd75718f1c0e887aad657c5f2944" dependencies = [ "arrow", "arrow-array", @@ -3676,16 +3648,16 @@ dependencies = [ "half", "hex", "rand 0.9.2", - "rand_distr 0.5.1", + "rand_distr", "rand_xoshiro", "random_word", ] [[package]] name = "lance-encoding" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1aec0bbbac6bce829bc10f1ba066258126100596c375fb71908ecf11c2c2a5" +checksum = "fe3f8070835b407d8db9ea8728386bc3207ba23c66a9c22d344e231ef12b77ca" dependencies = [ "arrow-arith", "arrow-array", @@ -3722,9 +3694,9 @@ dependencies = [ [[package]] name = "lance-file" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14a8c548804f5b17486dc2d3282356ed1957095a852780283bc401fdd69e9075" +checksum = "a6dfcf654549330df3aef708cd7c12e170feecddd34d6c19dd005b4153213268" dependencies = [ "arrow-arith", "arrow-array", @@ -3745,7 +3717,7 @@ dependencies = [ "lance-io", "log", "num-traits", - "object_store", + "object_store 0.12.5", "prost", "prost-build", "prost-types", @@ -3756,9 +3728,9 @@ dependencies = [ [[package]] name = "lance-index" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2da212f0090ea59f79ac3686660f596520c167fe1cb5f408900cf71d215f0e03" +checksum = "4fb8ad0bd10efa2608634a2518b7dd501231e76c56a65fbd6519e23914cc425a" dependencies = [ "arrow", "arrow-arith", @@ -3795,16 +3767,17 @@ dependencies = [ "lance-io", "lance-linalg", "lance-table", + "lance-tokenizer", "libm", "log", "ndarray", "num-traits", - "object_store", + "object_store 0.12.5", "prost", "prost-build", "prost-types", "rand 0.9.2", - "rand_distr 0.5.1", + "rand_distr", "rangemap", "rayon", "roaring", @@ -3812,7 +3785,6 @@ dependencies = [ "serde_json", "smallvec", "snafu", - "tantivy", "tempfile", "tokio", "tracing", @@ -3822,9 +3794,9 @@ dependencies = [ [[package]] name = "lance-io" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d958eb4b56f03bbe0f5f85eb2b4e9657882812297b6f711f201ffc995f259f" +checksum = "ef5314703fa8c8baed04193cc669da80ab42521c6319d3cc921a4a997690dcc0" dependencies = [ "arrow", "arrow-arith", @@ -3844,11 +3816,14 @@ dependencies = [ "deepsize", "futures", "http 1.4.0", + "io-uring", "lance-arrow", "lance-core", "lance-namespace", + "libc", "log", - "object_store", + "moka", + "object_store 0.12.5", "object_store_opendal", "opendal", "path_abs", @@ -3865,9 +3840,9 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0285b70da35def7ed95e150fae1d5308089554e1290470403ed3c50cb235bc5e" +checksum = "51aa9b73279f505b2bec0f194c7a2390ca74ad3260131e631a7bef8d97d54b2e" dependencies = [ "arrow-array", "arrow-buffer", @@ -3883,9 +3858,9 @@ dependencies = [ [[package]] name = "lance-namespace" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f78e2a828b654e062a495462c6e3eb4fcf0e7e907d761b8f217fc09ccd3ceac" +checksum = "39cd01581f55ce45c49cbe494ee86c7ba7ca4ca3654690fd820941cd9105a46e" dependencies = [ "arrow", "async-trait", @@ -3898,9 +3873,9 @@ dependencies = [ [[package]] name = "lance-namespace-impls" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2392314f3da38f00d166295e44244208a65ccfc256e274fa8631849fc3f4d94" +checksum = "c2cb89f3933060f01350ad05a5a3fbda952e8ba638799bf8ac4cd2368416ee46" dependencies = [ "arrow", "arrow-ipc", @@ -3913,10 +3888,11 @@ dependencies = [ "lance-core", "lance-index", "lance-io", + "lance-linalg", "lance-namespace", "lance-table", "log", - "object_store", + "object_store 0.12.5", "rand 0.9.2", "serde_json", "snafu", @@ -3926,22 +3902,23 @@ dependencies = [ [[package]] name = "lance-namespace-reqwest-client" -version = "0.6.1" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee2e48de899e2931afb67fcddd0a08e439bf5d8b6ea2a2ed9cb8f4df669bd5cc" +checksum = "6369eee4682fb11edf538388b43c61ce288b8302fe89bb40944d7daa7faaae99" dependencies = [ "reqwest", "serde", "serde_json", "serde_repr", + "serde_with", "url", ] [[package]] name = "lance-table" -version = "4.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3df9c4adca3eb2074b3850432a9fb34248a3d90c3d6427d158b13ff9355664ee" +checksum = "5db70650465a1af174b7dfe6948ec91a3d466ada12e11274eb66e51132173aa0" dependencies = [ "arrow", "arrow-array", @@ -3959,7 +3936,7 @@ dependencies = [ "lance-file", "lance-io", "log", - "object_store", + "object_store 0.12.5", "prost", "prost-build", "prost-types", @@ -3976,6 +3953,17 @@ dependencies = [ "uuid", ] +[[package]] +name = "lance-tokenizer" +version = "6.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb08ef9382c9d58036c323db2c19cc097e02d1d0d87714fc7176b5d3b36a31aa" +dependencies = [ + "rust-stemmers", + "serde", + "unicode-normalization", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -3991,12 +3979,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" -[[package]] -name = "levenshtein_automata" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" - [[package]] name = "lexical-core" version = "1.0.6" @@ -4093,12 +4075,6 @@ dependencies = [ "linked-hash-map", ] -[[package]] -name = "linux-raw-sys" -version = "0.4.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" - [[package]] name = "linux-raw-sys" version = "0.12.1" @@ -4146,7 +4122,7 @@ dependencies = [ "quote", "regex-automata", "regex-syntax", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -4171,15 +4147,6 @@ dependencies = [ "tracing-subscriber", ] -[[package]] -name = "lru" -version = "0.12.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" -dependencies = [ - "hashbrown 0.15.5", -] - [[package]] name = "lru-slab" version = "0.1.2" @@ -4207,15 +4174,9 @@ dependencies = [ [[package]] name = "lz4_flex" -version = "0.11.6" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "373f5eceeeab7925e0c1098212f2fbc4d416adec9d35051a6ab251e824c1854a" - -[[package]] -name = "lz4_flex" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" +checksum = "7ef0d4ed8669f8f8826eb00dc878084aa8f253506c4fd5e8f58f5bce72ddb97e" dependencies = [ "twox-hash", ] @@ -4258,30 +4219,12 @@ dependencies = [ "digest", ] -[[package]] -name = "measure_time" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e" -dependencies = [ - "log", -] - [[package]] name = "memchr" version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" -[[package]] -name = "memmap2" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" -dependencies = [ - "libc", -] - [[package]] name = "miette" version = "7.6.0" @@ -4302,7 +4245,7 @@ checksum = "db5b29714e950dbb20d5e6f74f9dcec4edbcc1067bb7f8ed198c097b8c1a818b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -4321,12 +4264,6 @@ dependencies = [ "unicase", ] -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "miniz_oxide" version = "0.8.9" @@ -4380,12 +4317,6 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" -[[package]] -name = "murmurhash32" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" - [[package]] name = "ndarray" version = "0.16.1" @@ -4407,16 +4338,6 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "nom" version = "8.0.0" @@ -4578,6 +4499,32 @@ dependencies = [ "web-time", ] +[[package]] +name = "object_store" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622acbc9100d3c10e2ee15804b0caa40e55c933d5aa53814cd520805b7958a49" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures-channel", + "futures-core", + "futures-util", + "http 1.4.0", + "humantime", + "itertools 0.14.0", + "parking_lot", + "percent-encoding", + "thiserror", + "tokio", + "tracing", + "url", + "walkdir", + "wasm-bindgen-futures", + "web-time", +] + [[package]] name = "object_store_opendal" version = "0.55.0" @@ -4588,7 +4535,7 @@ dependencies = [ "bytes", "chrono", "futures", - "object_store", + "object_store 0.12.5", "opendal", "pin-project", "tokio", @@ -4596,14 +4543,16 @@ dependencies = [ [[package]] name = "omnigraph-cli" -version = "0.4.2" +version = "0.6.0" dependencies = [ "assert_cmd", "clap", "color-eyre", + "lance", "lance-index", "omnigraph-compiler", "omnigraph-engine", + "omnigraph-policy", "omnigraph-server", "predicates", "reqwest", @@ -4616,7 +4565,7 @@ dependencies = [ [[package]] name = "omnigraph-compiler" -version = "0.4.2" +version = "0.6.0" dependencies = [ "ahash", "arrow-array", @@ -4637,7 +4586,7 @@ dependencies = [ [[package]] name = "omnigraph-engine" -version = "0.4.2" +version = "0.6.0" dependencies = [ "arc-swap", "arrow-array", @@ -4659,8 +4608,9 @@ dependencies = [ "lance-namespace", "lance-namespace-impls", "lance-table", - "object_store", + "object_store 0.12.5", "omnigraph-compiler", + "omnigraph-policy", "regex", "reqwest", "serde", @@ -4676,21 +4626,37 @@ dependencies = [ ] [[package]] -name = "omnigraph-server" -version = "0.4.2" +name = "omnigraph-policy" +version = "0.6.0" dependencies = [ + "cedar-policy", + "clap", + "color-eyre", + "serde", + "serde_json", + "serde_yaml", + "tempfile", +] + +[[package]] +name = "omnigraph-server" +version = "0.6.0" +dependencies = [ + "arc-swap", "async-trait", "aws-config", "aws-sdk-secretsmanager", "axum", - "cedar-policy", "clap", "color-eyre", "dashmap", "futures", + "lance", "lance-index", "omnigraph-compiler", "omnigraph-engine", + "omnigraph-policy", + "regex", "serde", "serde_json", "serde_yaml", @@ -4698,6 +4664,7 @@ dependencies = [ "sha2", "subtle", "tempfile", + "thiserror", "tokio", "tower", "tower-http", @@ -4718,12 +4685,6 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" -[[package]] -name = "oneshot" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "269bca4c2591a28585d6bf10d9ed0332b7d76900a1b02bec41bdc3a2cdcda107" - [[package]] name = "opendal" version = "0.55.0" @@ -4791,15 +4752,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" -[[package]] -name = "ownedbytes" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fbd56f7631767e61784dc43f8580f403f4475bd4aaa4da003e6295e1bab4a7e" -dependencies = [ - "stable_deref_trait", -] - [[package]] name = "owo-colors" version = "4.2.3" @@ -4924,7 +4876,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -5009,7 +4961,7 @@ checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -5161,7 +5113,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -5198,7 +5150,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.115", + "syn 2.0.117", "tempfile", ] @@ -5212,7 +5164,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -5395,16 +5347,6 @@ dependencies = [ "getrandom 0.3.4", ] -[[package]] -name = "rand_distr" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" -dependencies = [ - "num-traits", - "rand 0.8.5", -] - [[package]] name = "rand_distr" version = "0.5.1" @@ -5506,7 +5448,7 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -5715,19 +5657,6 @@ dependencies = [ "semver", ] -[[package]] -name = "rustix" -version = "0.38.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" -dependencies = [ - "bitflags", - "errno", - "libc", - "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", -] - [[package]] name = "rustix" version = "1.1.4" @@ -5737,7 +5666,7 @@ dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys 0.12.1", + "linux-raw-sys", "windows-sys 0.61.2", ] @@ -5994,7 +5923,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6030,7 +5959,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6073,7 +6002,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6112,7 +6041,7 @@ checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6212,15 +6141,6 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" -[[package]] -name = "sketches-ddsketch" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6f73aeb92d671e0cc4dca167e59b2deb6387c375391bc99ee743f326994a2b" -dependencies = [ - "serde", -] - [[package]] name = "slab" version = "0.4.12" @@ -6261,7 +6181,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6302,9 +6222,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.59.0" +version = "0.61.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" dependencies = [ "log", "sqlparser_derive", @@ -6312,13 +6232,13 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6389,7 +6309,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6411,9 +6331,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.115" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e614ed320ac28113fa64972c4262d5dbc89deacdfd00c34a3e4cea073243c12" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -6437,7 +6357,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6446,152 +6366,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" -[[package]] -name = "tantivy" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64a966cb0e76e311f09cf18507c9af192f15d34886ee43d7ba7c7e3803660c43" -dependencies = [ - "aho-corasick", - "arc-swap", - "base64", - "bitpacking", - "bon", - "byteorder", - "census", - "crc32fast", - "crossbeam-channel", - "downcast-rs", - "fastdivide", - "fnv", - "fs4", - "htmlescape", - "hyperloglogplus", - "itertools 0.14.0", - "levenshtein_automata", - "log", - "lru", - "lz4_flex 0.11.6", - "measure_time", - "memmap2", - "once_cell", - "oneshot", - "rayon", - "regex", - "rust-stemmers", - "rustc-hash", - "serde", - "serde_json", - "sketches-ddsketch", - "smallvec", - "tantivy-bitpacker", - "tantivy-columnar", - "tantivy-common", - "tantivy-fst", - "tantivy-query-grammar", - "tantivy-stacker", - "tantivy-tokenizer-api", - "tempfile", - "thiserror", - "time", - "uuid", - "winapi", -] - -[[package]] -name = "tantivy-bitpacker" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1adc286a39e089ae9938935cd488d7d34f14502544a36607effd2239ff0e2494" -dependencies = [ - "bitpacking", -] - -[[package]] -name = "tantivy-columnar" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6300428e0c104c4f7db6f95b466a6f5c1b9aece094ec57cdd365337908dc7344" -dependencies = [ - "downcast-rs", - "fastdivide", - "itertools 0.14.0", - "serde", - "tantivy-bitpacker", - "tantivy-common", - "tantivy-sstable", - "tantivy-stacker", -] - -[[package]] -name = "tantivy-common" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b6ea6090ce03dc72c27d0619e77185d26cc3b20775966c346c6d4f7e99d7f" -dependencies = [ - "async-trait", - "byteorder", - "ownedbytes", - "serde", - "time", -] - -[[package]] -name = "tantivy-fst" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18" -dependencies = [ - "byteorder", - "regex-syntax", - "utf8-ranges", -] - -[[package]] -name = "tantivy-query-grammar" -version = "0.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e810cdeeebca57fc3f7bfec5f85fdbea9031b2ac9b990eb5ff49b371d52bbe6a" -dependencies = [ - "nom 7.1.3", - "serde", - "serde_json", -] - -[[package]] -name = "tantivy-sstable" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "709f22c08a4c90e1b36711c1c6cad5ae21b20b093e535b69b18783dd2cb99416" -dependencies = [ - "futures-util", - "itertools 0.14.0", - "tantivy-bitpacker", - "tantivy-common", - "tantivy-fst", - "zstd", -] - -[[package]] -name = "tantivy-stacker" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bcdebb267671311d1e8891fd9d1301803fdb8ad21ba22e0a30d0cab49ba59c1" -dependencies = [ - "murmurhash32", - "rand_distr 0.4.3", - "tantivy-common", -] - -[[package]] -name = "tantivy-tokenizer-api" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfa942fcee81e213e09715bbce8734ae2180070b97b33839a795ba1de201547d" -dependencies = [ - "serde", -] - [[package]] name = "tap" version = "1.0.1" @@ -6607,7 +6381,7 @@ dependencies = [ "fastrand", "getrandom 0.4.2", "once_cell", - "rustix 1.1.4", + "rustix", "windows-sys 0.61.2", ] @@ -6643,7 +6417,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6754,7 +6528,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -6786,6 +6560,7 @@ dependencies = [ "futures-core", "pin-project-lite", "tokio", + "tokio-util", ] [[package]] @@ -6873,7 +6648,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -7098,7 +6873,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -7229,7 +7004,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", "wasm-bindgen-shared", ] @@ -7318,22 +7093,6 @@ dependencies = [ "rustls-pki-types", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - [[package]] name = "winapi-util" version = "0.1.11" @@ -7343,12 +7102,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-core" version = "0.62.2" @@ -7370,7 +7123,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -7381,7 +7134,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -7603,7 +7356,7 @@ dependencies = [ "heck", "indexmap 2.13.0", "prettyplease", - "syn 2.0.115", + "syn 2.0.117", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -7619,7 +7372,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -7707,7 +7460,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", "synstructure", ] @@ -7728,7 +7481,7 @@ checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] @@ -7748,7 +7501,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", "synstructure", ] @@ -7788,7 +7541,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.115", + "syn 2.0.117", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 761f29b..66bfc01 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ members = [ "crates/omnigraph-compiler", "crates/omnigraph", "crates/omnigraph-cli", + "crates/omnigraph-policy", "crates/omnigraph-server", ] default-members = [ @@ -13,29 +14,29 @@ default-members = [ ] [workspace.dependencies] -arrow-array = "57" -arrow-ipc = "57" -arrow-schema = "57" -arrow-select = "57" -arrow-cast = { version = "57", features = ["prettyprint"] } -arrow-ord = "57" +arrow-array = "58" +arrow-ipc = "58" +arrow-schema = "58" +arrow-select = "58" +arrow-cast = { version = "58", features = ["prettyprint"] } +arrow-ord = "58" -datafusion = { version = "52", default-features = false } -datafusion-physical-plan = "52" -datafusion-physical-expr = "52" -datafusion-execution = "52" -datafusion-common = "52" -datafusion-expr = "52" -datafusion-functions-aggregate = "52" +datafusion = { version = "53", default-features = false, features = ["nested_expressions"] } +datafusion-physical-plan = "53" +datafusion-physical-expr = "53" +datafusion-execution = "53" +datafusion-common = "53" +datafusion-expr = "53" +datafusion-functions-aggregate = "53" -lance = { version = "4.0.0", default-features = false, features = ["aws"] } -lance-datafusion = "4.0.0" -lance-file = "4.0.0" -lance-index = "4.0.0" -lance-linalg = "4.0.0" -lance-namespace = "4.0.0" -lance-namespace-impls = "4.0.0" -lance-table = "4.0.0" +lance = { version = "6.0.1", default-features = false, features = ["aws"] } +lance-datafusion = "6.0.1" +lance-file = "6.0.1" +lance-index = "6.0.1" +lance-linalg = "6.0.1" +lance-namespace = "6.0.1" +lance-namespace-impls = "6.0.1" +lance-table = "6.0.1" ulid = "1" futures = "0.3" diff --git a/README.md b/README.md index 7490cc1..0f6ebea 100644 --- a/README.md +++ b/README.md @@ -5,32 +5,35 @@ [![Crates.io](https://img.shields.io/crates/v/omnigraph-cli.svg)](https://crates.io/crates/omnigraph-cli) [![CI](https://github.com/ModernRelay/omnigraph/actions/workflows/ci.yml/badge.svg)](https://github.com/ModernRelay/omnigraph/actions/workflows/ci.yml) -**Lakehouse-native graph engine with git-style workflows.** +**Lakehouse native graph engine built for context assembly** -Branch, commit, and merge typed graph data like source code. Multi-modal, self-hosted, open source. +Omnigraph acts as operational state & coordination layer for agents -Built on Rust, Arrow, DataFusion and Lance. +- Git-style versioning & branching +- Multimodal retrieval (graph+vector/fts+filters) optimized for context assembly +- Object storage native (S3, RustFS) +- Native blob-as-data support (docs, images, videos, etc) +- VPC, On-prem, hybrid deployment +- [`Lance`](https://github.com/lance-format/lance) format as open storage layer -Join the [Omnigraph Slack community](https://join.slack.com/t/omnigraphworkspace/shared_invite/zt-3wfpglyxj-lHvJGhuySPfqLtN35uJZNw) +| AS CODE | What it means | +|---|---| +| **Schema AS CODE** | Typed `.pg` schemas, planned, applied, enforced | +| **Context AS CODE** | Linted queries & agentic nudges, versioned and reusable | +| **Security AS CODE** | Cedar policies enforced server-side on every mutation | +| **Dashboards AS CODE** | Declarative views & controls over the graph *(coming)* | -## Use Cases +## Core Use Cases -- Company brains -- Context graphs -- Backbone for multi-agent research -- Incident response graphs -- Compliance & audit graphs -- Enterprise knowledge systems - -## Capabilities - -- Typed schema, typed queries, and typed mutations -- Schema-as-code, query validation and linting -- Git-style graph workflows: branches, commits, merges, and transactional runs -- Local, on-prem & cloud S3-native storage with snapshot-pinned reads -- Graph traversal + text, fuzzy, BM25, vector, and RRF search in one runtime -- Policy-as-code for server-side access control -- Single CLI for multiple deployments +| Use case | What it's for +|---|---| +| **Company brain** | Org knowledge unified into one queryable graph | +| **Context graph** | Decision traces and codified tribal knowledge | +| **Agentic memory** | Durable, versioned memory for long-running agents | +| **Dev graph** | Issues & dependency model for coding agents | +| **R&D data layer** | Experiments & trials data written into branches | +| **ML workflows** | Versioned, branchable graphs for training & eval | +| **Karpathy's LLM wiki** | A living, agent-updatable knowledge base | ## Quick Install @@ -59,7 +62,7 @@ curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/ That bootstrap: - starts RustFS on `127.0.0.1:9000` -- creates a bucket and S3-backed repo +- creates a bucket and S3-backed graph - loads the checked-in context fixture - launches `omnigraph-server` on `127.0.0.1:8080` @@ -68,8 +71,8 @@ Docker must be installed and running first. The RustFS bootstrap prefers the rolling `edge` binaries and only falls back to source builds when release assets are unavailable. -If a previous run left objects under the same repo prefix but did not finish -initializing the repo, rerun with `RESET_REPO=1` or set `PREFIX` to a new +If a previous run left objects under the same graph prefix but did not finish +initializing the graph, rerun with `RESET_REPO=1` or set `PREFIX` to a new value. ## Common Commands @@ -77,15 +80,15 @@ value. The same URI works for local paths, `s3://…`, or `http://host:port`. ```bash -omnigraph init --schema ./schema.pg ./repo.omni -omnigraph load --data ./data.jsonl ./repo.omni -omnigraph read --query ./queries.gq --name get_person --params '{"name":"Alice"}' ./repo.omni -omnigraph change --query ./queries.gq --name insert_person --params '{"name":"Mina"}' ./repo.omni -omnigraph branch create --from main feature-x ./repo.omni -omnigraph branch merge feature-x --into main ./repo.omni +omnigraph init --schema ./schema.pg ./graph.omni +omnigraph load --data ./data.jsonl ./graph.omni +omnigraph read --query ./queries.gq --name get_person --params '{"name":"Alice"}' ./graph.omni +omnigraph change --query ./queries.gq --name insert_person --params '{"name":"Mina"}' ./graph.omni +omnigraph branch create --from main feature-x ./graph.omni +omnigraph branch merge feature-x --into main ./graph.omni ``` -See [docs/cli.md](docs/cli.md) for schema apply, snapshots, ingest, runs, and policy commands. +See [docs/user/cli.md](docs/user/cli.md) for schema apply, snapshots, ingest, commits, and policy commands. ## Clients @@ -107,9 +110,8 @@ Both packages are versioned in lockstep with `omnigraph-server` on major.minor: ## Docs -- [Install guide](docs/install.md) -- [CLI guide](docs/cli.md) -- [Deployment guide](docs/deployment.md) +- [Install guide](docs/user/install.md) +- [Deployment guide](docs/user/deployment.md) ## Build And Test @@ -130,8 +132,8 @@ Notes: - `crates/omnigraph-compiler`: shared schema/query parser, typechecker, catalog, and IR lowering - `crates/omnigraph`: storage/runtime, branching, merge, change detection, and query execution -- `crates/omnigraph-cli`: CLI for init/load/ingest/read/change/branch/snapshot/export/policy operations -- `crates/omnigraph-server`: Axum HTTP server for remote reads, changes, ingest, export, branches, commits, and runs +- `crates/omnigraph-cli`: CLI for graph lifecycle (init/load/ingest), query/mutate, branch/commit/merge, schema/lint, snapshot/export, policy, and maintenance (optimize/cleanup) +- `crates/omnigraph-server`: Axum HTTP server for remote reads, changes, ingest, export, branches, and commits ## Contributing diff --git a/crates/omnigraph-cli/Cargo.toml b/crates/omnigraph-cli/Cargo.toml index 2da4384..0d35ed8 100644 --- a/crates/omnigraph-cli/Cargo.toml +++ b/crates/omnigraph-cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "omnigraph-cli" -version = "0.4.2" +version = "0.6.0" edition = "2024" description = "CLI for the Omnigraph graph database." license = "MIT" @@ -13,9 +13,10 @@ name = "omnigraph" path = "src/main.rs" [dependencies] -omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.4.2" } -omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" } -omnigraph-server = { path = "../omnigraph-server", version = "0.4.2" } +omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.0" } +omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" } +omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" } +omnigraph-server = { path = "../omnigraph-server", version = "0.6.0" } clap = { workspace = true } color-eyre = { workspace = true } serde = { workspace = true } @@ -29,4 +30,5 @@ assert_cmd = "2" predicates = "3" serde_json = { workspace = true } tempfile = { workspace = true } +lance = { workspace = true } lance-index = { workspace = true } diff --git a/crates/omnigraph-cli/src/main.rs b/crates/omnigraph-cli/src/main.rs index 35740cc..b7e3041 100644 --- a/crates/omnigraph-cli/src/main.rs +++ b/crates/omnigraph-cli/src/main.rs @@ -1,7 +1,9 @@ +use std::ffi::OsString; use std::fs; use std::io::{self, Write}; use std::path::Path; use std::path::PathBuf; +use std::sync::Arc; use clap::{Arg, ArgAction, Args, CommandFactory, FromArgMatches, Parser, Subcommand, ValueEnum}; use color_eyre::eyre::{Result, bail}; @@ -16,10 +18,11 @@ use omnigraph_compiler::{ }; use omnigraph_server::api::{ BranchCreateOutput, BranchCreateRequest, BranchDeleteOutput, BranchListOutput, - BranchMergeOutput, BranchMergeRequest, ChangeOutput, ChangeRequest, CommitListOutput, - CommitOutput, ErrorOutput, ExportRequest, IngestOutput, IngestRequest, ReadOutput, ReadRequest, - SchemaApplyOutput, SchemaApplyRequest, SchemaOutput, SnapshotOutput, SnapshotTableOutput, - commit_output, ingest_output, read_output, schema_apply_output, snapshot_payload, + BranchMergeOutput, BranchMergeRequest, ChangeOutput, CommitListOutput, CommitOutput, + ErrorOutput, ExportRequest, GraphListResponse, IngestOutput, IngestRequest, ReadOutput, + ReadRequest, SchemaApplyOutput, SchemaApplyRequest, SchemaOutput, SnapshotOutput, + SnapshotTableOutput, commit_output, ingest_output, read_output, schema_apply_output, + snapshot_payload, }; use omnigraph_server::{ AliasCommand, OmnigraphConfig, PolicyAction, PolicyDecision, PolicyEngine, PolicyRequest, @@ -44,6 +47,17 @@ const DEFAULT_BEARER_TOKEN_ENV: &str = "OMNIGRAPH_BEARER_TOKEN"; #[command(about = "Omnigraph graph database CLI")] #[command(version = env!("CARGO_PKG_VERSION"), disable_version_flag = true)] struct Cli { + /// Actor identity for direct-engine writes (MR-722). Overrides + /// `cli.actor` from `omnigraph.yaml`. When the configured policy + /// is in effect, Cedar evaluates this actor against the requested + /// action and scope; with policy configured but neither this flag + /// nor `cli.actor` set, the engine-layer footgun guard fires and + /// the write is denied (no silent bypass). Has no effect on remote + /// HTTP writes — those resolve their actor server-side from the + /// bearer token. + #[arg(long = "as", global = true, value_name = "ACTOR")] + as_actor: Option, + #[command(subcommand)] command: Command, } @@ -54,16 +68,23 @@ enum Command { Version, /// Generate, clean, or refresh explicit seed embeddings Embed(EmbedArgs), - /// Initialize a new repo from a schema + /// Initialize a new graph from a schema Init { #[arg(long)] schema: PathBuf, - /// Repo URI (local path or s3://) + /// Graph URI (local path or s3://) uri: String, + /// Overwrite existing schema artifacts at the URI. Without + /// this flag, init refuses to touch a URI that already holds + /// `_schema.pg`, `_schema.ir.json`, or `__schema_state.json` + /// — closes the re-init footgun (MR-668 follow-up). With the + /// flag, the operator opts in to destructive semantics. + #[arg(long)] + force: bool, }, - /// Load data into a repo + /// Load data into a graph Load { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -80,7 +101,7 @@ enum Command { }, /// Ingest data into a reviewable named branch Ingest { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -107,14 +128,34 @@ enum Command { #[command(subcommand)] command: SchemaCommand, }, - /// Query validation and linting - Query { - #[command(subcommand)] - command: QueryCommand, + /// Validate queries against a schema (offline) or repo (repo-backed). + /// + /// Canonical name is `lint` (matches the `omnigraph_compiler::lint` + /// module and the `OG-XXX-NNN` lint-code vocabulary). Replaces the + /// deprecated `omnigraph query lint` / `omnigraph query check` / + /// `omnigraph check` invocations — each is kept as an argv-level + /// shim that prints a one-line stderr warning and rewrites to + /// `omnigraph lint`. Aliases are deliberately *not* exposed via + /// clap's `visible_alias` because that would advertise two + /// equivalent canonical names, which agents emit interchangeably + /// (see MR-981). + Lint { + /// Graph URI + uri: Option, + #[arg(long)] + target: Option, + #[arg(long)] + config: Option, + #[arg(long)] + query: PathBuf, + #[arg(long)] + schema: Option, + #[arg(long)] + json: bool, }, - /// Show repo snapshot + /// Show graph snapshot Snapshot { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -127,7 +168,7 @@ enum Command { }, /// Export a full graph snapshot as JSONL Export { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -147,9 +188,14 @@ enum Command { #[command(subcommand)] command: CommitCommand, }, - /// Execute a read query against a branch or snapshot - Read { - /// Repo URI + /// Execute a read query against a branch or snapshot. + /// + /// Canonical read endpoint. The previous name `omnigraph read` is + /// kept as a visible alias and prints a one-line deprecation warning + /// when used. Pairs with `omnigraph mutate` on the write side. + #[command(visible_alias = "read")] + Query { + /// Graph URI #[arg(long)] uri: Option, #[arg(hide = true)] @@ -158,10 +204,13 @@ enum Command { target: Option, #[arg(long)] config: Option, - #[arg(long)] + #[arg(long, conflicts_with_all = ["query", "query_string"])] alias: Option, - #[arg(long)] + #[arg(long, conflicts_with_all = ["alias", "query_string"])] query: Option, + /// Inline GQ source — alternative to `--query ` and `--alias `. + #[arg(short = 'e', long = "query-string", value_name = "GQ", conflicts_with_all = ["query", "alias"])] + query_string: Option, #[arg(long)] name: Option, #[command(flatten)] @@ -177,9 +226,14 @@ enum Command { #[arg()] alias_args: Vec, }, - /// Execute a graph change query against a branch - Change { - /// Repo URI + /// Execute a graph mutation query against a branch. + /// + /// Canonical mutation endpoint. The previous name `omnigraph change` + /// is kept as a visible alias and prints a one-line deprecation + /// warning when used. Pairs with `omnigraph query` on the read side. + #[command(visible_alias = "change")] + Mutate { + /// Graph URI #[arg(long)] uri: Option, #[arg(hide = true)] @@ -188,10 +242,13 @@ enum Command { target: Option, #[arg(long)] config: Option, - #[arg(long)] + #[arg(long, conflicts_with_all = ["query", "query_string"])] alias: Option, - #[arg(long)] + #[arg(long, conflicts_with_all = ["alias", "query_string"])] query: Option, + /// Inline GQ source — alternative to `--query ` and `--alias `. + #[arg(short = 'e', long = "query-string", value_name = "GQ", conflicts_with_all = ["query", "alias"])] + query_string: Option, #[arg(long)] name: Option, #[command(flatten)] @@ -208,9 +265,9 @@ enum Command { #[command(subcommand)] command: PolicyCommand, }, - /// Compact small Lance fragments in every table of the repo + /// Compact small Lance fragments in every table of the graph Optimize { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -219,9 +276,9 @@ enum Command { #[arg(long)] json: bool, }, - /// Remove old Lance versions from every table of the repo (destructive) + /// Remove old Lance versions from every table of the graph (destructive) Cleanup { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -241,13 +298,40 @@ enum Command { #[arg(long)] json: bool, }, + /// Manage graphs on a multi-graph server (MR-668) + Graphs { + #[command(subcommand)] + command: GraphsCommand, + }, +} + +/// Operations on the graph registry of a multi-graph server (MR-668). +/// +/// All operations target a remote multi-graph server URL (http:// or +/// https://). Local-URI invocations return a clear error. To add or +/// remove graphs, operators edit `omnigraph.yaml` directly and restart +/// the server — runtime mutation is not exposed in v0.6.0. +#[derive(Debug, Subcommand)] +enum GraphsCommand { + /// List every graph registered with the multi-graph server. + List { + /// Remote server URL (e.g. `https://server.example.com`). + #[arg(long)] + uri: Option, + #[arg(long)] + target: Option, + #[arg(long)] + config: Option, + #[arg(long)] + json: bool, + }, } #[derive(Debug, Subcommand)] enum BranchCommand { /// Create a new branch Create { - /// Repo URI + /// Graph URI #[arg(long)] uri: Option, #[arg(long)] @@ -262,7 +346,7 @@ enum BranchCommand { }, /// List branches List { - /// Repo URI + /// Graph URI #[arg(long)] uri: Option, #[arg(long)] @@ -274,7 +358,7 @@ enum BranchCommand { }, /// Delete a branch Delete { - /// Repo URI + /// Graph URI #[arg(long)] uri: Option, #[arg(long)] @@ -287,7 +371,7 @@ enum BranchCommand { }, /// Merge a source branch into a target branch Merge { - /// Repo URI + /// Graph URI #[arg(long)] uri: Option, #[arg(long)] @@ -306,7 +390,7 @@ enum BranchCommand { enum SchemaCommand { /// Plan a schema migration against the accepted persisted schema Plan { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -316,10 +400,15 @@ enum SchemaCommand { schema: PathBuf, #[arg(long)] json: bool, + /// Show the plan as it would execute with `--allow-data-loss`. + /// Promotes every `DropMode::Soft` step to `DropMode::Hard` + /// so the plan output reflects the destructive intent. + #[arg(long, default_value_t = false)] + allow_data_loss: bool, }, /// Apply a supported schema migration Apply { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -329,11 +418,22 @@ enum SchemaCommand { schema: PathBuf, #[arg(long)] json: bool, + /// Allow destructive (data-loss) schema changes. + /// + /// Without this flag, drops are "soft": the column or table + /// is removed from the current manifest version but prior + /// versions are retained, so `snapshot_at_version(pre_drop)` + /// can still read the dropped data until `omnigraph cleanup` + /// runs. With this flag, drops are "hard": `cleanup_old_versions` + /// runs on the affected datasets immediately after the apply, + /// making the prior data unreachable. + #[arg(long, default_value_t = false)] + allow_data_loss: bool, }, /// Show the current accepted schema source #[command(alias = "get")] Show { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -345,30 +445,11 @@ enum SchemaCommand { } #[derive(Debug, Subcommand)] -enum QueryCommand { - /// Validate queries and report higher-level drift warnings - #[command(visible_alias = "check")] - Lint { - /// Repo URI - uri: Option, - #[arg(long)] - target: Option, - #[arg(long)] - config: Option, - #[arg(long)] - query: PathBuf, - #[arg(long)] - schema: Option, - #[arg(long)] - json: bool, - }, -} -#[derive(Debug, Subcommand)] enum CommitCommand { /// List graph commits List { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -381,7 +462,7 @@ enum CommitCommand { }, /// Show a graph commit Show { - /// Repo URI + /// Graph URI #[arg(long)] uri: Option, #[arg(long)] @@ -554,7 +635,7 @@ fn finish_query_lint(output: &QueryLintOutput, json: bool) -> Result<()> { Ok(()) } -fn ensure_local_repo_parent(uri: &str) -> Result<()> { +fn ensure_local_graph_parent(uri: &str) -> Result<()> { if !uri.contains("://") { fs::create_dir_all(uri)?; } @@ -666,7 +747,35 @@ fn resolve_policy_engine(config: &OmnigraphConfig) -> Result { let policy_file = config .resolve_policy_file() .ok_or_else(|| color_eyre::eyre::eyre!("policy.file must be set in omnigraph.yaml"))?; - PolicyEngine::load(&policy_file, &policy_repo_id(config)) + PolicyEngine::load_graph(&policy_file, &policy_graph_id(config)) +} + +/// Open a local-URI graph and, when `policy.file` is configured in +/// `omnigraph.yaml`, install the resolved `PolicyEngine` on the engine +/// handle so every direct-engine write goes through +/// `Omnigraph::enforce(...)` (MR-722). Without a configured policy this +/// is identical to a bare `Omnigraph::open`. +/// +/// Returns owned `Omnigraph`; chained on top of `Omnigraph::open(...)`'s +/// existing future to keep call sites narrow. +async fn open_local_db_with_policy(uri: &str, config: &OmnigraphConfig) -> Result { + let db = Omnigraph::open(uri).await?; + if config.resolve_policy_file().is_some() { + let engine = Arc::new(resolve_policy_engine(config)?); + Ok(db.with_policy(engine as Arc)) + } else { + Ok(db) + } +} + +/// Resolve the CLI's effective actor identity for engine-layer policy +/// (MR-722). Precedence: `--as ` (top-level flag) overrides +/// `cli.actor` from `omnigraph.yaml`; both unset returns `None`. When +/// policy is configured and this returns `None`, the engine-layer +/// footgun guard intentionally denies — silent bypass via "I forgot the +/// actor" is what the guard prevents. +fn resolve_cli_actor<'a>(cli_as: Option<&'a str>, config: &'a OmnigraphConfig) -> Option<&'a str> { + cli_as.or(config.cli.actor.as_deref()) } fn resolve_policy_tests_path(config: &OmnigraphConfig) -> Result { @@ -677,7 +786,7 @@ fn resolve_policy_tests_path(config: &OmnigraphConfig) -> Result { }) } -fn policy_repo_id(config: &OmnigraphConfig) -> String { +fn policy_graph_id(config: &OmnigraphConfig) -> String { if let Some(name) = &config.project.name { return name.clone(); } @@ -775,8 +884,15 @@ fn parse_duration_arg(s: &str) -> Result { if s.is_empty() { bail!("duration is empty"); } - let (num_part, unit) = match s.char_indices().rev().find(|(_, c)| c.is_ascii_alphabetic()) { - Some((i, _)) => (&s[..i + 1 - s[i..].chars().next().unwrap().len_utf8()], &s[i..]), + let (num_part, unit) = match s + .char_indices() + .rev() + .find(|(_, c)| c.is_ascii_alphabetic()) + { + Some((i, _)) => ( + &s[..i + 1 - s[i..].chars().next().unwrap().len_utf8()], + &s[i..], + ), None => (s, ""), }; let n: u64 = num_part @@ -802,7 +918,7 @@ fn resolve_local_uri( let uri = resolve_uri(config, cli_uri, cli_target)?; if is_remote_uri(&uri) { bail!( - "{} is only supported against local repo URIs in this milestone", + "{} is only supported against local graph URIs in this milestone", operation ); } @@ -847,7 +963,9 @@ fn resolve_query_path( .map(PathBuf::from) .or_else(|| alias_query.map(PathBuf::from)) .ok_or_else(|| { - color_eyre::eyre::eyre!("exactly one of --query or --alias must be provided") + color_eyre::eyre::eyre!( + "exactly one of --query, --query-string, or --alias must be provided" + ) }) .and_then(|query_path| config.resolve_query_path(&query_path)) } @@ -855,8 +973,15 @@ fn resolve_query_path( fn resolve_query_source( config: &OmnigraphConfig, explicit_query: Option<&PathBuf>, + inline_query: Option<&str>, alias_query: Option<&str>, ) -> Result { + if let Some(inline) = inline_query { + if inline.trim().is_empty() { + bail!("--query-string must not be empty"); + } + return Ok(inline.to_string()); + } Ok(fs::read_to_string(resolve_query_path( config, explicit_query, @@ -1035,14 +1160,46 @@ fn render_schema_plan_step(step: &SchemaMigrationStep) -> String { type_name, render_annotations(annotations) ), - SchemaMigrationStep::UnsupportedChange { - entity, - reason, - code, - } => match code { - Some(c) => format!("unsupported change on {} [{}]: {}", entity, c, reason), - None => format!("unsupported change on {}: {}", entity, reason), - }, + SchemaMigrationStep::DropType { + type_kind, + name, + mode, + } => format!( + "drop {} type '{}' ({} mode)", + schema_type_kind_label(*type_kind), + name, + drop_mode_label(*mode), + ), + SchemaMigrationStep::DropProperty { + type_kind, + type_name, + property_name, + mode, + } => format!( + "drop property '{}.{}' of {} '{}' ({} mode)", + type_name, + property_name, + schema_type_kind_label(*type_kind), + type_name, + drop_mode_label(*mode), + ), + SchemaMigrationStep::UnsupportedChange { entity, reason, .. } => { + // When a schema-lint code is attached, render code + tier + // so operators see at-a-glance the kind of risk (destructive + // / validated / safe) — not just the rule identifier. + // Reach the diagnostic via the `diagnostic()` helper so the + // CLI doesn't need to know how the lookup works. + match step.diagnostic() { + Some(diag) => format!( + "unsupported change on {} [{}, {}]: {}", + entity, + diag.code, + schema_lint_tier_label(diag.tier), + reason, + ), + None => format!("unsupported change on {}: {}", entity, reason), + } + } } } @@ -1054,6 +1211,21 @@ fn schema_type_kind_label(kind: omnigraph_compiler::SchemaTypeKind) -> &'static } } +fn schema_lint_tier_label(tier: omnigraph_compiler::SafetyTier) -> &'static str { + match tier { + omnigraph_compiler::SafetyTier::Safe => "safe", + omnigraph_compiler::SafetyTier::Validated => "validated", + omnigraph_compiler::SafetyTier::Destructive => "destructive", + } +} + +fn drop_mode_label(mode: omnigraph_compiler::DropMode) -> &'static str { + match mode { + omnigraph_compiler::DropMode::Soft => "soft", + omnigraph_compiler::DropMode::Hard => "hard", + } +} + fn render_prop_type(prop_type: &omnigraph_compiler::PropType) -> String { let base = if let Some(values) = &prop_type.enum_values { format!("Enum({})", values.join("|")) @@ -1195,12 +1367,12 @@ fn print_commit_human(commit: &CommitOutput) { println!("created_at: {}", commit.created_at); } -fn print_policy_explain(decision: &PolicyDecision, request: &PolicyRequest) { +fn print_policy_explain(decision: &PolicyDecision, actor_id: &str, request: &PolicyRequest) { println!( "decision: {}", if decision.allowed { "allow" } else { "deny" } ); - println!("actor: {}", request.actor_id); + println!("actor: {}", actor_id); println!("action: {}", request.action); if let Some(branch) = &request.branch { println!("branch: {}", branch); @@ -1421,10 +1593,10 @@ async fn execute_query_lint( )); } - let has_repo_target = + let has_graph_target = cli_uri.is_some() || cli_target.is_some() || config.cli_graph_name().is_some(); - if !has_repo_target { - bail!("query lint requires --schema or a resolvable repo target"); + if !has_graph_target { + bail!("query lint requires --schema or a resolvable graph target"); } let uri = resolve_local_uri(config, cli_uri, cli_target, "query lint")?; @@ -1433,7 +1605,7 @@ async fn execute_query_lint( &db.catalog(), &query_source, query_path, - QueryLintSchemaSource::repo(uri), + QueryLintSchemaSource::graph(uri), )) } @@ -1488,22 +1660,52 @@ async fn execute_change( query_name: Option<&str>, branch: &str, params_json: Option<&Value>, + config: &OmnigraphConfig, + cli_as_actor: Option<&str>, ) -> Result { let (selected_name, query_params) = select_named_query(query_source, query_name)?; let params = query_params_from_json(&query_params, params_json)?; - let mut db = Omnigraph::open(uri).await?; + let db = open_local_db_with_policy(uri, config).await?; + let actor = resolve_cli_actor(cli_as_actor, config); let result = db - .mutate(branch, query_source, &selected_name, ¶ms) + .mutate_as(branch, query_source, &selected_name, ¶ms, actor) .await?; Ok(ChangeOutput { branch: branch.to_string(), query_name: selected_name, affected_nodes: result.affected_nodes, affected_edges: result.affected_edges, - actor_id: None, + actor_id: actor.map(String::from), }) } +/// Build the JSON body for `POST /change` using the legacy wire shape. +/// +/// `ChangeRequest`'s Rust field names are now `query` / `name` (the canonical +/// wire shape going forward), but old `omnigraph-server` builds still require +/// the legacy `query_source` / `query_name` keys on `/change`. Hand-rolling +/// the JSON with the legacy names keeps a newer CLI talking to an older +/// server intact -- the same byte-stability contract we apply to +/// `execute_read_remote` against `/read`. +fn legacy_change_request_body( + query_source: &str, + query_name: Option<&str>, + branch: &str, + params_json: Option<&Value>, +) -> Value { + let mut body = serde_json::json!({ + "query_source": query_source, + "branch": branch, + }); + if let Some(name) = query_name { + body["query_name"] = Value::String(name.to_string()); + } + if let Some(params) = params_json { + body["params"] = params.clone(); + } + body +} + async fn execute_change_remote( client: &reqwest::Client, uri: &str, @@ -1517,12 +1719,12 @@ async fn execute_change_remote( client, Method::POST, remote_url(uri, "/change"), - Some(serde_json::to_value(ChangeRequest { - query_source: query_source.to_string(), - query_name: query_name.map(ToOwned::to_owned), - params: params_json.cloned(), - branch: Some(branch.to_string()), - })?), + Some(legacy_change_request_body( + query_source, + query_name, + branch, + params_json, + )), bearer_token, ) .await @@ -1577,10 +1779,74 @@ async fn execute_export_remote_to_writer( Ok(()) } +/// Rewrite deprecated CLI invocations into their canonical form. +/// +/// The current rename pass moves four subcommands: +/// - `omnigraph read` -> `omnigraph query` (clap `visible_alias` handles parsing; we warn) +/// - `omnigraph change` -> `omnigraph mutate` (clap `visible_alias` handles parsing; we warn) +/// - `omnigraph check` -> `omnigraph lint` (rewrite required; no visible_alias by design) +/// - `omnigraph query lint` -> `omnigraph lint` (rewrite required; `query` is now the read-runner) +/// - `omnigraph query check` -> `omnigraph lint` (rewrite required) +/// +/// `check` is *not* a clap visible_alias on `lint` even though they're +/// semantically equivalent. Visible aliases create two canonical names +/// that agents emit interchangeably depending on training-data drift +/// (see MR-981 §6 for the policy). The argv-shim + stderr warning +/// pattern preserves back-compat for human users while pointing every +/// caller at the single canonical name in `--help`. +/// +/// Returns the (possibly rewritten) argv that clap should parse. +fn rewrite_deprecated_argv(args: Vec) -> Vec { + if args.len() >= 3 { + let sub = args[1].to_str(); + let sub2 = args[2].to_str(); + if sub == Some("query") && matches!(sub2, Some("lint") | Some("check")) { + let suffix = sub2.unwrap(); + eprintln!( + "warning: `omnigraph query {suffix}` is deprecated; use `omnigraph lint` instead" + ); + // Drop the leading `query` token AND normalize `check` -> `lint`. + // `check` is no longer a clap visible_alias (MR-981 §6), so the + // rewritten argv must reach the canonical `lint` subcommand + // directly. Result for `omnigraph query check --query foo.gq`: + // `omnigraph lint --query foo.gq`. + let mut out = Vec::with_capacity(args.len() - 1); + out.push(args[0].clone()); + out.push(OsString::from("lint")); + out.extend(args[3..].iter().cloned()); + return out; + } + } + if let Some(sub) = args.get(1).and_then(|s| s.to_str()) { + match sub { + "read" => eprintln!( + "warning: `omnigraph read` is deprecated; use `omnigraph query` instead" + ), + "change" => eprintln!( + "warning: `omnigraph change` is deprecated; use `omnigraph mutate` instead" + ), + "check" => { + eprintln!( + "warning: `omnigraph check` is deprecated; use `omnigraph lint` instead" + ); + // Rewrite the top-level subcommand to `lint`; pass through the rest. + let mut out = Vec::with_capacity(args.len()); + out.push(args[0].clone()); + out.push(OsString::from("lint")); + out.extend(args[2..].iter().cloned()); + return out; + } + _ => {} + } + } + args +} + #[tokio::main] async fn main() -> Result<()> { color_eyre::install()?; let cli = { + let raw_args = rewrite_deprecated_argv(std::env::args_os().collect()); let matches = Cli::command() .arg( Arg::new("version") @@ -1589,7 +1855,7 @@ async fn main() -> Result<()> { .action(ArgAction::Version) .help("Print version"), ) - .get_matches(); + .get_matches_from(raw_args); Cli::from_arg_matches(&matches)? }; let http_client = build_http_client()?; @@ -1605,10 +1871,15 @@ async fn main() -> Result<()> { print_embed_human(&output); } } - Command::Init { schema, uri } => { + Command::Init { schema, uri, force } => { let schema_source = fs::read_to_string(&schema)?; - ensure_local_repo_parent(&uri)?; - Omnigraph::init(&uri, &schema_source).await?; + ensure_local_graph_parent(&uri)?; + Omnigraph::init_with_options( + &uri, + &schema_source, + omnigraph::db::InitOptions { force }, + ) + .await?; scaffold_config_if_missing(&uri)?; println!("initialized {}", uri); } @@ -1624,9 +1895,10 @@ async fn main() -> Result<()> { let config = load_cli_config(config.as_ref())?; let uri = resolve_local_uri(&config, uri, target.as_deref(), "load")?; let branch = resolve_branch(&config, branch, None, "main"); - let mut db = Omnigraph::open(&uri).await?; + let db = open_local_db_with_policy(&uri, &config).await?; + let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config); let result = db - .load_file(&branch, &data.to_string_lossy(), mode.into()) + .load_file_as(&branch, &data.to_string_lossy(), mode.into(), actor) .await?; let payload = LoadOutput { uri: &uri, @@ -1683,9 +1955,16 @@ async fn main() -> Result<()> { ) .await? } else { - let mut db = Omnigraph::open(&uri).await?; + let db = open_local_db_with_policy(&uri, &config).await?; + let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config); let result = db - .ingest_file(&branch, Some(&from), &data.to_string_lossy(), mode.into()) + .ingest_file_as( + &branch, + Some(&from), + &data.to_string_lossy(), + mode.into(), + actor, + ) .await?; ingest_output(&uri, &result, None) }; @@ -1722,14 +2001,15 @@ async fn main() -> Result<()> { ) .await? } else { - let mut db = Omnigraph::open(&uri).await?; - db.branch_create_from(ReadTarget::branch(&from), &name) + let db = open_local_db_with_policy(&uri, &config).await?; + let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config); + db.branch_create_from_as(ReadTarget::branch(&from), &name, actor) .await?; BranchCreateOutput { uri: uri.clone(), from: from.clone(), name: name.clone(), - actor_id: None, + actor_id: actor.map(String::from), } }; if json { @@ -1792,12 +2072,13 @@ async fn main() -> Result<()> { ) .await? } else { - let mut db = Omnigraph::open(&uri).await?; - db.branch_delete(&name).await?; + let db = open_local_db_with_policy(&uri, &config).await?; + let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config); + db.branch_delete_as(&name, actor).await?; BranchDeleteOutput { uri: uri.clone(), name: name.clone(), - actor_id: None, + actor_id: actor.map(String::from), } }; if json { @@ -1832,13 +2113,14 @@ async fn main() -> Result<()> { ) .await? } else { - let mut db = Omnigraph::open(&uri).await?; - let outcome = db.branch_merge(&source, &into).await?; + let db = open_local_db_with_policy(&uri, &config).await?; + let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config); + let outcome = db.branch_merge_as(&source, &into, actor).await?; BranchMergeOutput { source: source.clone(), target: into.clone(), outcome: outcome.into(), - actor_id: None, + actor_id: actor.map(String::from), } }; if json { @@ -1931,12 +2213,18 @@ async fn main() -> Result<()> { config, schema, json, + allow_data_loss, } => { let config = load_cli_config(config.as_ref())?; let uri = resolve_local_uri(&config, uri, target.as_deref(), "schema plan")?; let schema_source = fs::read_to_string(&schema)?; let db = Omnigraph::open(&uri).await?; - let plan = db.plan_schema(&schema_source).await?; + let plan = db + .plan_schema_with_options( + &schema_source, + omnigraph::db::SchemaApplyOptions { allow_data_loss }, + ) + .await?; let output = SchemaPlanOutput { uri: &uri, supported: plan.supported, @@ -1955,6 +2243,7 @@ async fn main() -> Result<()> { config, schema, json, + allow_data_loss, } => { let config = load_cli_config(config.as_ref())?; let bearer_token = @@ -1962,19 +2251,33 @@ async fn main() -> Result<()> { let uri = resolve_uri(&config, uri, target.as_deref())?; let schema_source = fs::read_to_string(&schema)?; let output = if is_remote_uri(&uri) { + // MR-694 PR B: SchemaApplyRequest gained an + // allow_data_loss field so Hard-mode drops are no + // longer CLI-only. The previous bail is gone; the + // field is forwarded into the JSON payload, and + // the server's `server_schema_apply` honors it. remote_json::( &http_client, Method::POST, remote_url(&uri, "/schema/apply"), Some(serde_json::to_value(SchemaApplyRequest { schema_source: schema_source.clone(), + allow_data_loss, })?), bearer_token.as_deref(), ) .await? } else { - let mut db = Omnigraph::open(&uri).await?; - schema_apply_output(&uri, db.apply_schema(&schema_source).await?) + let db = open_local_db_with_policy(&uri, &config).await?; + let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config); + let result = db + .apply_schema_as( + &schema_source, + omnigraph::db::SchemaApplyOptions { allow_data_loss }, + actor, + ) + .await?; + schema_apply_output(&uri, result) }; if json { print_json(&output)?; @@ -2014,22 +2317,20 @@ async fn main() -> Result<()> { } } }, - Command::Query { command } => match command { - QueryCommand::Lint { - uri, - target, - config, - query, - schema, - json, - } => { - let config = load_cli_config(config.as_ref())?; - let output = - execute_query_lint(&config, uri, target.as_deref(), schema.as_ref(), &query) - .await?; - finish_query_lint(&output, json)?; - } - }, + Command::Lint { + uri, + target, + config, + query, + schema, + json, + } => { + let config = load_cli_config(config.as_ref())?; + let output = + execute_query_lint(&config, uri, target.as_deref(), schema.as_ref(), &query) + .await?; + finish_query_lint(&output, json)?; + } Command::Snapshot { uri, target, @@ -2099,13 +2400,14 @@ async fn main() -> Result<()> { .await?; } } - Command::Read { + Command::Query { uri, legacy_uri, target, config, alias, query, + query_string, name, params, branch, @@ -2114,8 +2416,8 @@ async fn main() -> Result<()> { json, alias_args, } => { - if alias.is_some() == query.is_some() { - bail!("exactly one of --alias or --query must be provided"); + if alias.is_none() && query.is_none() && query_string.is_none() { + bail!("exactly one of --query, --query-string, or --alias must be provided"); } let config = load_cli_config(config.as_ref())?; @@ -2138,6 +2440,7 @@ async fn main() -> Result<()> { let query_source = resolve_query_source( &config, query.as_ref(), + query_string.as_deref(), alias_config.map(|a| a.query.as_str()), )?; let params_json = merged_params_json( @@ -2184,21 +2487,22 @@ async fn main() -> Result<()> { ); print_read_output(&output, format, &config)?; } - Command::Change { + Command::Mutate { uri, legacy_uri, target, config, alias, query, + query_string, name, params, branch, json, alias_args, } => { - if alias.is_some() == query.is_some() { - bail!("exactly one of --alias or --query must be provided"); + if alias.is_none() && query.is_none() && query_string.is_none() { + bail!("exactly one of --query, --query-string, or --alias must be provided"); } let config = load_cli_config(config.as_ref())?; @@ -2221,6 +2525,7 @@ async fn main() -> Result<()> { let query_source = resolve_query_source( &config, query.as_ref(), + query_string.as_deref(), alias_config.map(|a| a.query.as_str()), )?; let params_json = merged_params_json( @@ -2256,6 +2561,8 @@ async fn main() -> Result<()> { query_name.as_deref(), &branch, params_json.as_ref(), + &config, + cli.as_actor.as_deref(), ) .await? }; @@ -2296,13 +2603,12 @@ async fn main() -> Result<()> { let config = load_cli_config(config.as_ref())?; let engine = resolve_policy_engine(&config)?; let request = PolicyRequest { - actor_id: actor, action, branch, target_branch, }; - let decision = engine.authorize(&request)?; - print_policy_explain(&decision, &request); + let decision = engine.authorize(&actor, &request)?; + print_policy_explain(&decision, &actor, &request); } }, Command::Optimize { @@ -2313,7 +2619,7 @@ async fn main() -> Result<()> { } => { let config = load_cli_config(config.as_ref())?; let uri = resolve_uri(&config, uri, target.as_deref())?; - let mut db = Omnigraph::open(&uri).await?; + let db = Omnigraph::open(&uri).await?; let stats = db.optimize().await?; if json { let value = serde_json::json!({ @@ -2354,17 +2660,16 @@ async fn main() -> Result<()> { let config = load_cli_config(config.as_ref())?; let uri = resolve_uri(&config, uri, target.as_deref())?; - let older_than_dur = older_than - .as_deref() - .map(parse_duration_arg) - .transpose()?; + let older_than_dur = older_than.as_deref().map(parse_duration_arg).transpose()?; if keep.is_none() && older_than_dur.is_none() { bail!("cleanup requires at least one of --keep or --older-than"); } let policy_desc = match (keep, older_than_dur) { - (Some(k), Some(d)) => format!("keep {} versions, remove anything older than {:?}", k, d), + (Some(k), Some(d)) => { + format!("keep {} versions, remove anything older than {:?}", k, d) + } (Some(k), None) => format!("keep {} versions", k), (None, Some(d)) => format!("remove anything older than {:?}", d), _ => unreachable!(), @@ -2410,6 +2715,41 @@ async fn main() -> Result<()> { ); } } + Command::Graphs { command } => match command { + GraphsCommand::List { + uri, + target, + config, + json, + } => { + let config = load_cli_config(config.as_ref())?; + let bearer_token = + resolve_remote_bearer_token(&config, uri.as_deref(), target.as_deref())?; + let uri = resolve_uri(&config, uri, target.as_deref())?; + if !is_remote_uri(&uri) { + bail!( + "`omnigraph graphs list` requires a remote multi-graph server URL \ + (http:// or https://). To enumerate local graphs, read `omnigraph.yaml` \ + directly." + ); + } + let payload = remote_json::( + &http_client, + Method::GET, + remote_url(&uri, "/graphs"), + None, + bearer_token.as_deref(), + ) + .await?; + if json { + print_json(&payload)?; + } else { + for entry in payload.graphs { + println!("{}\t{}", entry.graph_id, entry.uri); + } + } + } + }, } Ok(()) } @@ -2419,14 +2759,62 @@ mod tests { use std::fs; use super::{ - DEFAULT_BEARER_TOKEN_ENV, apply_bearer_token, bearer_token_from_env_file, load_cli_config, - load_env_file_into_process, normalize_bearer_token, parse_env_assignment, - resolve_remote_bearer_token, + DEFAULT_BEARER_TOKEN_ENV, apply_bearer_token, bearer_token_from_env_file, + legacy_change_request_body, load_cli_config, load_env_file_into_process, + normalize_bearer_token, parse_env_assignment, resolve_remote_bearer_token, }; use omnigraph_server::load_config; use reqwest::header::AUTHORIZATION; + use serde_json::json; use tempfile::tempdir; + #[test] + fn legacy_change_request_body_uses_legacy_field_names() { + // `execute_change_remote` hits `POST /change`, which old + // `omnigraph-server` builds deserialize as `ChangeRequest` with + // **required** `query_source` and optional `query_name` keys. + // Newer servers accept both spellings via serde alias, but a + // newer CLI must still emit the legacy keys on the wire so it + // can talk to an old server during a rolling upgrade. + let body = legacy_change_request_body( + "query insert_person($n: String) { insert Person { name: $n } }", + Some("insert_person"), + "main", + Some(&json!({ "n": "Alice" })), + ); + assert_eq!( + body["query_source"].as_str(), + Some("query insert_person($n: String) { insert Person { name: $n } }"), + ); + assert_eq!(body["query_name"].as_str(), Some("insert_person")); + assert_eq!(body["branch"].as_str(), Some("main")); + assert_eq!(body["params"]["n"].as_str(), Some("Alice")); + // Crucially, the **new** field names must NOT appear -- old + // servers would silently treat them as unknown fields and then + // fail on missing required `query_source`. + assert!( + body.get("query").is_none(), + "legacy /change body must not carry the renamed `query` key; got {body}" + ); + assert!( + body.get("name").is_none(), + "legacy /change body must not carry the renamed `name` key; got {body}" + ); + } + + #[test] + fn legacy_change_request_body_omits_optional_fields_when_unset() { + let body = legacy_change_request_body( + "query find() { match { $p: Person } return { $p.name } }", + None, + "main", + None, + ); + assert_eq!(body["branch"].as_str(), Some("main")); + assert!(body.get("query_name").is_none()); + assert!(body.get("params").is_none()); + } + #[test] fn apply_bearer_token_adds_header_when_configured() { let client = reqwest::Client::new(); diff --git a/crates/omnigraph-cli/tests/cli.rs b/crates/omnigraph-cli/tests/cli.rs index 9dc7338..6e5de37 100644 --- a/crates/omnigraph-cli/tests/cli.rs +++ b/crates/omnigraph-cli/tests/cli.rs @@ -1,6 +1,6 @@ use std::fs; -use lance_index::traits::DatasetIndexExt; +use lance::index::DatasetIndexExt; use omnigraph::db::{Omnigraph, ReadTarget}; use serde_json::Value; use tempfile::tempdir; @@ -48,9 +48,9 @@ cases: expect: deny "#; -fn manifest_dataset_version(repo: &std::path::Path) -> u64 { +fn manifest_dataset_version(graph: &std::path::Path) -> u64 { tokio::runtime::Runtime::new().unwrap().block_on(async { - Omnigraph::open(repo.to_string_lossy().as_ref()) + Omnigraph::open(graph.to_string_lossy().as_ref()) .await .unwrap() .snapshot_of(ReadTarget::branch("main")) @@ -67,7 +67,7 @@ fn write_policy_config_fixture(root: &std::path::Path) -> (std::path::PathBuf, s &config, r#" project: - name: policy-test-repo + name: policy-test-graph policy: file: ./policy.yaml "#, @@ -221,26 +221,26 @@ fn embed_seed_preserves_non_entity_rows() { } #[test] -fn init_creates_repo_successfully_on_missing_local_directory() { +fn init_creates_graph_successfully_on_missing_local_directory() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema = fixture("test.pg"); - let output = output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&repo)); + let output = output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&graph)); let stdout = stdout_string(&output); assert!(stdout.contains("initialized")); - assert!(repo.join("_schema.pg").exists()); - assert!(repo.join("__manifest").exists()); + assert!(graph.join("_schema.pg").exists()); + assert!(graph.join("__manifest").exists()); assert!(temp.path().join("omnigraph.yaml").exists()); } #[test] fn schema_plan_json_reports_supported_additive_change() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("next.pg"); - init_repo(&repo); + init_graph(&graph); let next_schema = fs::read_to_string(fixture("test.pg")).unwrap().replace( " age: I32?\n}", @@ -255,7 +255,7 @@ fn schema_plan_json_reports_supported_additive_change() { .arg("--schema") .arg(&schema_path) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); @@ -270,9 +270,9 @@ fn schema_plan_json_reports_supported_additive_change() { #[test] fn schema_plan_json_reports_unsupported_type_change() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("breaking.pg"); - init_repo(&repo); + init_graph(&graph); let breaking_schema = fs::read_to_string(fixture("test.pg")) .unwrap() @@ -286,7 +286,7 @@ fn schema_plan_json_reports_unsupported_type_change() { .arg("--schema") .arg(&schema_path) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); @@ -303,9 +303,9 @@ fn schema_plan_json_reports_unsupported_type_change() { #[test] fn schema_apply_json_applies_supported_migration() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("next.pg"); - init_repo(&repo); + init_graph(&graph); let next_schema = fs::read_to_string(fixture("test.pg")).unwrap().replace( " age: I32?\n}", @@ -320,7 +320,7 @@ fn schema_apply_json_applies_supported_migration() { .arg("--schema") .arg(&schema_path) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); @@ -330,7 +330,7 @@ fn schema_apply_json_applies_supported_migration() { let db = tokio::runtime::Runtime::new() .unwrap() - .block_on(Omnigraph::open(repo.to_string_lossy().as_ref())) + .block_on(Omnigraph::open(graph.to_string_lossy().as_ref())) .unwrap(); assert!( db.catalog().node_types["Person"] @@ -342,9 +342,9 @@ fn schema_apply_json_applies_supported_migration() { #[test] fn schema_apply_human_reports_noop() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = fixture("test.pg"); - init_repo(&repo); + init_graph(&graph); let output = output_success( cli() @@ -352,7 +352,7 @@ fn schema_apply_human_reports_noop() { .arg("apply") .arg("--schema") .arg(&schema_path) - .arg(&repo), + .arg(&graph), ); let stdout = stdout_string(&output); @@ -363,9 +363,9 @@ fn schema_apply_human_reports_noop() { #[test] fn schema_apply_json_renames_type_and_updates_snapshot() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("rename.pg"); - init_repo(&repo); + init_graph(&graph); let renamed_schema = fs::read_to_string(fixture("test.pg")) .unwrap() @@ -384,14 +384,14 @@ fn schema_apply_json_renames_type_and_updates_snapshot() { .arg("--schema") .arg(&schema_path) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); assert_eq!(payload["applied"], true); let db = tokio::runtime::Runtime::new() .unwrap() - .block_on(Omnigraph::open(repo.to_string_lossy().as_ref())) + .block_on(Omnigraph::open(graph.to_string_lossy().as_ref())) .unwrap(); let snapshot = tokio::runtime::Runtime::new() .unwrap() @@ -404,9 +404,9 @@ fn schema_apply_json_renames_type_and_updates_snapshot() { #[test] fn schema_apply_json_renames_property_and_updates_catalog() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("rename-property.pg"); - init_repo(&repo); + init_graph(&graph); let renamed_schema = fs::read_to_string(fixture("test.pg")) .unwrap() @@ -420,14 +420,14 @@ fn schema_apply_json_renames_property_and_updates_catalog() { .arg("--schema") .arg(&schema_path) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); assert_eq!(payload["applied"], true); let db = tokio::runtime::Runtime::new() .unwrap() - .block_on(Omnigraph::open(repo.to_string_lossy().as_ref())) + .block_on(Omnigraph::open(graph.to_string_lossy().as_ref())) .unwrap(); let person = &db.catalog().node_types["Person"]; assert!(person.properties.contains_key("years")); @@ -437,12 +437,12 @@ fn schema_apply_json_renames_property_and_updates_catalog() { #[test] fn schema_apply_json_adds_index_for_existing_property() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("index.pg"); - init_repo(&repo); + init_graph(&graph); let before_index_count = tokio::runtime::Runtime::new().unwrap().block_on(async { - let db = Omnigraph::open(repo.to_string_lossy().as_ref()) + let db = Omnigraph::open(graph.to_string_lossy().as_ref()) .await .unwrap(); let snapshot = db.snapshot_of(ReadTarget::branch("main")).await.unwrap(); @@ -462,13 +462,13 @@ fn schema_apply_json_adds_index_for_existing_property() { .arg("--schema") .arg(&schema_path) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); assert_eq!(payload["applied"], true); let after_index_count = tokio::runtime::Runtime::new().unwrap().block_on(async { - let db = Omnigraph::open(repo.to_string_lossy().as_ref()) + let db = Omnigraph::open(graph.to_string_lossy().as_ref()) .await .unwrap(); let snapshot = db.snapshot_of(ReadTarget::branch("main")).await.unwrap(); @@ -481,9 +481,9 @@ fn schema_apply_json_adds_index_for_existing_property() { #[test] fn schema_apply_rejects_unsupported_plan() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("breaking.pg"); - init_repo(&repo); + init_graph(&graph); let breaking_schema = fs::read_to_string(fixture("test.pg")) .unwrap() @@ -496,7 +496,7 @@ fn schema_apply_rejects_unsupported_plan() { .arg("apply") .arg("--schema") .arg(&schema_path) - .arg(&repo), + .arg(&graph), ); let stderr = String::from_utf8_lossy(&output.stderr); assert!(stderr.contains("changing property type")); @@ -505,9 +505,9 @@ fn schema_apply_rejects_unsupported_plan() { #[test] fn schema_apply_rejects_when_non_main_branch_exists() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema_path = temp.path().join("next.pg"); - init_repo(&repo); + init_graph(&graph); output_success( cli() .arg("branch") @@ -515,7 +515,7 @@ fn schema_apply_rejects_when_non_main_branch_exists() { .arg("--from") .arg("main") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("feature"), ); @@ -531,10 +531,10 @@ fn schema_apply_rejects_when_non_main_branch_exists() { .arg("apply") .arg("--schema") .arg(&schema_path) - .arg(&repo), + .arg(&graph), ); let stderr = String::from_utf8_lossy(&output.stderr); - assert!(stderr.contains("schema apply requires a repo with only main")); + assert!(stderr.contains("schema apply requires a graph with only main")); } #[test] @@ -631,12 +631,208 @@ query list_people() { assert_eq!(stdout_string(&lint_output), stdout_string(&check_output)); } +/// `omnigraph lint` is the canonical top-level lint command after the +/// query/mutate rename. `omnigraph query lint` and `omnigraph query check` +/// are kept as deprecated argv shims (warning + rewrite). All three must +/// produce identical stdout output. #[test] -fn query_lint_can_use_local_repo_via_positional_uri() { +fn lint_top_level_matches_deprecated_query_lint_output() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let schema_path = temp.path().join("schema.pg"); let query_path = temp.path().join("queries.gq"); - init_repo(&repo); + write_file( + &schema_path, + r#" +node Person { + name: String +} +"#, + ); + write_query_file( + &query_path, + r#" +query list_people() { + match { $p: Person } + return { $p.name } +} +"#, + ); + + let canonical = output_success( + cli() + .arg("lint") + .arg("--query") + .arg(&query_path) + .arg("--schema") + .arg(&schema_path) + .arg("--json"), + ); + let deprecated_lint = output_success( + cli() + .arg("query") + .arg("lint") + .arg("--query") + .arg(&query_path) + .arg("--schema") + .arg(&schema_path) + .arg("--json"), + ); + let deprecated_check = output_success( + cli() + .arg("query") + .arg("check") + .arg("--query") + .arg(&query_path) + .arg("--schema") + .arg(&schema_path) + .arg("--json"), + ); + + assert_eq!(stdout_string(&canonical), stdout_string(&deprecated_lint)); + assert_eq!(stdout_string(&canonical), stdout_string(&deprecated_check)); + + // Canonical form must NOT emit the deprecation warning. + let canonical_stderr = String::from_utf8(canonical.stderr).unwrap(); + assert!( + !canonical_stderr.contains("deprecated"), + "`omnigraph lint` is canonical and must not warn; got stderr: {canonical_stderr}" + ); + + // Deprecated forms MUST emit the one-line warning, pointing at the + // new top-level `omnigraph lint`. + let lint_stderr = String::from_utf8(deprecated_lint.stderr).unwrap(); + assert!( + lint_stderr.contains("`omnigraph query lint` is deprecated") + && lint_stderr.contains("`omnigraph lint`"), + "expected deprecation warning pointing at `omnigraph lint`; got: {lint_stderr}" + ); + let check_stderr = String::from_utf8(deprecated_check.stderr).unwrap(); + assert!( + check_stderr.contains("`omnigraph query check` is deprecated") + && check_stderr.contains("`omnigraph lint`"), + "expected deprecation warning pointing at `omnigraph lint`; got: {check_stderr}" + ); +} + +/// Bare `omnigraph check` is NOT a clap `visible_alias` on `lint` (MR-981 §6: +/// visible aliases give agents two canonical names to emit interchangeably). +/// It's an argv-level shim: rewrites to `omnigraph lint`, prints a one-line +/// stderr deprecation warning, and produces identical stdout to the canonical +/// invocation. Cargo/Go users typing `check` keep working; help text shows +/// only `lint`. +#[test] +fn deprecated_check_top_level_rewrites_to_lint() { + let temp = tempdir().unwrap(); + let schema_path = temp.path().join("schema.pg"); + let query_path = temp.path().join("queries.gq"); + write_file( + &schema_path, + r#" +node Person { + name: String +} +"#, + ); + write_query_file( + &query_path, + r#" +query list_people() { + match { $p: Person } + return { $p.name } +} +"#, + ); + + let canonical = output_success( + cli() + .arg("lint") + .arg("--query") + .arg(&query_path) + .arg("--schema") + .arg(&schema_path) + .arg("--json"), + ); + let deprecated_check = output_success( + cli() + .arg("check") + .arg("--query") + .arg(&query_path) + .arg("--schema") + .arg(&schema_path) + .arg("--json"), + ); + + assert_eq!(stdout_string(&canonical), stdout_string(&deprecated_check)); + + let check_stderr = String::from_utf8(deprecated_check.stderr).unwrap(); + assert!( + check_stderr.contains("`omnigraph check` is deprecated") + && check_stderr.contains("`omnigraph lint`"), + "expected `omnigraph check` deprecation warning pointing at `omnigraph lint`; got: {check_stderr}" + ); + + // `check` must NOT appear in the canonical `omnigraph --help` output — + // agents copy the surface from help text and would otherwise emit both + // names interchangeably. + let help = cli().arg("--help").output().unwrap(); + let stdout = String::from_utf8(help.stdout).unwrap(); + let check_aliased = stdout + .lines() + .any(|line| line.trim_start().starts_with("lint") && line.contains("check")); + assert!( + !check_aliased, + "`check` must not be advertised as a visible alias of `lint`; help output: {stdout}" + ); +} + +/// `omnigraph read` and `omnigraph change` are kept as visible clap +/// aliases for the new canonical `query` / `mutate` subcommands, plus an +/// argv-level deprecation warning. The warning is emitted to stderr; the +/// command otherwise behaves identically to the canonical form. +#[test] +fn deprecated_read_and_change_subcommands_emit_warnings() { + // Both subcommands require `--query`/`--query-string`/`--alias`, so + // invoking them with no args will exit non-zero. That's fine -- + // we only care that the deprecation warning is printed before the + // argument-required error. + let output = cli().arg("read").output().unwrap(); + let stderr = String::from_utf8(output.stderr).unwrap(); + assert!( + stderr.contains("`omnigraph read` is deprecated") + && stderr.contains("`omnigraph query`"), + "expected `omnigraph read` deprecation warning; got: {stderr}" + ); + + let output = cli().arg("change").output().unwrap(); + let stderr = String::from_utf8(output.stderr).unwrap(); + assert!( + stderr.contains("`omnigraph change` is deprecated") + && stderr.contains("`omnigraph mutate`"), + "expected `omnigraph change` deprecation warning; got: {stderr}" + ); + + // Sanity check the inverse: the canonical names must NOT print the + // deprecation banner. + let output = cli().arg("query").arg("--help").output().unwrap(); + let stderr = String::from_utf8(output.stderr).unwrap(); + assert!( + !stderr.contains("deprecated"), + "`omnigraph query` is canonical and must not warn; got: {stderr}" + ); + let output = cli().arg("mutate").arg("--help").output().unwrap(); + let stderr = String::from_utf8(output.stderr).unwrap(); + assert!( + !stderr.contains("deprecated"), + "`omnigraph mutate` is canonical and must not warn; got: {stderr}" + ); +} + +#[test] +fn query_lint_can_use_local_graph_via_positional_uri() { + let temp = tempdir().unwrap(); + let graph = graph_path(temp.path()); + let query_path = temp.path().join("queries.gq"); + init_graph(&graph); write_query_file( &query_path, r#" @@ -654,24 +850,24 @@ query list_people() { .arg("--query") .arg(&query_path) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); assert_eq!(payload["status"], "ok"); - assert_eq!(payload["schema_source"]["kind"], "repo"); + assert_eq!(payload["schema_source"]["kind"], "graph"); assert_eq!( payload["schema_source"]["uri"].as_str(), - Some(repo.to_string_lossy().as_ref()) + Some(graph.to_string_lossy().as_ref()) ); } #[test] -fn query_lint_can_resolve_repo_and_query_from_config() { +fn query_lint_can_resolve_graph_and_query_from_config() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let config_path = temp.path().join("omnigraph.yaml"); - init_repo(&repo); + init_graph(&graph); write_query_file( &temp.path().join("queries.gq"), r#" @@ -681,7 +877,7 @@ query list_people() { } "#, ); - write_config(&config_path, &local_yaml_config(&repo)); + write_config(&config_path, &local_yaml_config(&graph)); let output = output_success( cli() @@ -696,10 +892,10 @@ query list_people() { let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); assert_eq!(payload["status"], "ok"); - assert_eq!(payload["schema_source"]["kind"], "repo"); + assert_eq!(payload["schema_source"]["kind"], "graph"); assert_eq!( payload["schema_source"]["uri"].as_str(), - Some(repo.to_string_lossy().as_ref()) + Some(graph.to_string_lossy().as_ref()) ); } @@ -727,12 +923,12 @@ query list_people() { ); let stderr = String::from_utf8_lossy(&output.stderr); assert!( - stderr.contains("query lint is only supported against local repo URIs in this milestone") + stderr.contains("query lint is only supported against local graph URIs in this milestone") ); } #[test] -fn query_lint_requires_schema_or_resolvable_repo_target() { +fn query_lint_requires_schema_or_resolvable_graph_target() { let temp = tempdir().unwrap(); let query_path = temp.path().join("queries.gq"); write_query_file( @@ -754,7 +950,7 @@ query list_people() { ); let stderr = String::from_utf8_lossy(&output.stderr); assert!( - stderr.contains("query lint requires --schema or a resolvable repo target") + stderr.contains("query lint requires --schema or a resolvable graph target") ); } @@ -846,8 +1042,8 @@ query bad_update($slug: String) { #[test] fn load_json_outputs_summary_for_main_branch() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); let data = fixture("test.jsonl"); let output = output_success( @@ -856,7 +1052,7 @@ fn load_json_outputs_summary_for_main_branch() { .arg("--data") .arg(&data) .arg("--json") - .arg(&repo), + .arg(&graph), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); @@ -871,16 +1067,16 @@ fn load_json_outputs_summary_for_main_branch() { #[test] fn load_into_feature_branch_with_merge_mode_succeeds() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("feature"), @@ -901,7 +1097,7 @@ fn load_into_feature_branch_with_merge_mode_succeeds() { .arg("feature") .arg("--mode") .arg("merge") - .arg(&repo), + .arg(&graph), ); let stdout = stdout_string(&output); @@ -913,15 +1109,15 @@ fn load_into_feature_branch_with_merge_mode_succeeds() { #[test] fn read_json_outputs_rows_for_named_query() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); let queries = fixture("test.gq"); let output = output_success( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(&queries) .arg("--name") @@ -941,16 +1137,16 @@ fn read_json_outputs_rows_for_named_query() { #[test] fn export_jsonl_outputs_source_rows_for_selected_branch_and_type() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("feature"), @@ -970,13 +1166,13 @@ fn export_jsonl_outputs_source_rows_for_selected_branch_and_type() { .arg("feature") .arg("--mode") .arg("append") - .arg(&repo), + .arg(&graph), ); let output = output_success( cli() .arg("export") - .arg(&repo) + .arg(&graph) .arg("--branch") .arg("feature") .arg("--type") @@ -1025,7 +1221,7 @@ fn policy_validate_fails_for_invalid_policy_file() { &config, r#" project: - name: policy-test-repo + name: policy-test-graph policy: file: ./policy.yaml "#, @@ -1117,11 +1313,11 @@ fn policy_explain_reports_decision_and_matched_rule() { #[test] fn read_can_resolve_uri_from_config() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let config = temp.path().join("omnigraph.yaml"); - init_repo(&repo); - load_fixture(&repo); - write_config(&config, &local_yaml_config(&repo)); + init_graph(&graph); + load_fixture(&graph); + write_config(&config, &local_yaml_config(&graph)); let output = output_success( cli() @@ -1143,11 +1339,11 @@ fn read_can_resolve_uri_from_config() { #[test] fn read_alias_from_yaml_config_runs_with_kv_output() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let config = temp.path().join("omnigraph.yaml"); let query = temp.path().join("aliases.gq"); - init_repo(&repo); - load_fixture(&repo); + init_graph(&graph); + load_fixture(&graph); write_query_file( &query, &std::fs::read_to_string(fixture("test.gq")).unwrap(), @@ -1156,7 +1352,7 @@ fn read_alias_from_yaml_config_runs_with_kv_output() { &config, &format!( "{}aliases:\n owner:\n command: read\n query: aliases.gq\n name: get_person\n args: [name]\n format: kv\n", - local_yaml_config(&repo) + local_yaml_config(&graph) ), ); @@ -1178,16 +1374,16 @@ fn read_alias_from_yaml_config_runs_with_kv_output() { #[test] fn read_alias_uses_alias_target_without_cli_default_and_accepts_url_like_arg() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let config = temp.path().join("omnigraph.yaml"); let query = temp.path().join("aliases.gq"); let data = temp.path().join("url-like.jsonl"); - init_repo(&repo); + init_graph(&graph); write_jsonl( &data, r#"{"type":"Person","data":{"name":"https://example.com","age":30}}"#, ); - output_success(cli().arg("load").arg("--data").arg(&data).arg(&repo)); + output_success(cli().arg("load").arg("--data").arg(&data).arg(&graph)); write_query_file( &query, &std::fs::read_to_string(fixture("test.gq")).unwrap(), @@ -1196,7 +1392,7 @@ fn read_alias_uses_alias_target_without_cli_default_and_accepts_url_like_arg() { &config, &format!( "graphs:\n local:\n uri: '{}'\nquery:\n roots:\n - .\npolicy: {{}}\naliases:\n owner:\n command: read\n query: aliases.gq\n name: get_person\n args: [name]\n graph: local\n format: kv\n", - repo.to_string_lossy() + graph.to_string_lossy() ), ); @@ -1218,11 +1414,11 @@ fn read_alias_uses_alias_target_without_cli_default_and_accepts_url_like_arg() { #[test] fn change_alias_from_yaml_config_persists_changes() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let config = temp.path().join("omnigraph.yaml"); let query = temp.path().join("mutations.gq"); - init_repo(&repo); - load_fixture(&repo); + init_graph(&graph); + load_fixture(&graph); write_query_file( &query, r#" @@ -1235,7 +1431,7 @@ query insert_person($name: String, $age: I32) { &config, &format!( "{}aliases:\n add_person:\n command: change\n query: mutations.gq\n name: insert_person\n args: [name, age]\n", - local_yaml_config(&repo) + local_yaml_config(&graph) ), ); @@ -1256,7 +1452,7 @@ query insert_person($name: String, $age: I32) { let verify = output_success( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -1272,14 +1468,14 @@ query insert_person($name: String, $age: I32) { #[test] fn read_csv_format_outputs_header_and_row_values() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); let output = output_success( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -1298,14 +1494,14 @@ fn read_csv_format_outputs_header_and_row_values() { #[test] fn read_jsonl_format_outputs_metadata_header_first() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); let output = output_success( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -1324,9 +1520,9 @@ fn read_jsonl_format_outputs_metadata_header_first() { #[test] fn change_json_outputs_affected_counts_and_persists() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); let mutation_file = temp.path().join("mutations.gq"); write_query_file( &mutation_file, @@ -1340,7 +1536,7 @@ query insert_person($name: String, $age: I32) { let output = output_success( cli() .arg("change") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(&mutation_file) .arg("--params") @@ -1356,7 +1552,7 @@ query insert_person($name: String, $age: I32) { let verify = output_success( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -1373,11 +1569,11 @@ query insert_person($name: String, $age: I32) { #[test] fn change_can_resolve_uri_and_branch_from_config() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let config = temp.path().join("omnigraph.yaml"); - init_repo(&repo); - load_fixture(&repo); - write_config(&config, &local_yaml_config(&repo)); + init_graph(&graph); + load_fixture(&graph); + write_config(&config, &local_yaml_config(&graph)); let mutation_file = temp.path().join("config-mutations.gq"); write_query_file( &mutation_file, @@ -1407,14 +1603,14 @@ query insert_person($name: String, $age: I32) { #[test] fn read_requires_name_for_multi_query_files() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); let output = output_failure( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(fixture("test.gq")), ); @@ -1422,18 +1618,114 @@ fn read_requires_name_for_multi_query_files() { assert!(stderr.contains("multiple queries")); } +#[test] +fn read_supports_inline_query_string() { + let temp = tempdir().unwrap(); + let repo = graph_path(temp.path()); + init_graph(&repo); + load_fixture(&repo); + + let output = output_success( + cli() + .arg("read") + .arg(&repo) + .arg("-e") + .arg("query find($name: String) { match { $p: Person { name: $name } } return { $p.name, $p.age } }") + .arg("--params") + .arg(r#"{"name":"Alice"}"#) + .arg("--json"), + ); + let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); + assert_eq!(payload["query_name"], "find"); + assert_eq!(payload["row_count"], 1); + assert_eq!(payload["rows"][0]["p.name"], "Alice"); +} + +#[test] +fn change_supports_inline_query_string() { + let temp = tempdir().unwrap(); + let repo = graph_path(temp.path()); + init_graph(&repo); + load_fixture(&repo); + + let output = output_success( + cli() + .arg("change") + .arg(&repo) + .arg("--query-string") + .arg("query add($name: String, $age: I32) { insert Person { name: $name, age: $age } }") + .arg("--params") + .arg(r#"{"name":"Inline","age":42}"#) + .arg("--json"), + ); + let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); + assert_eq!(payload["query_name"], "add"); + assert_eq!(payload["affected_nodes"], 1); + + let verify = output_success( + cli() + .arg("read") + .arg(&repo) + .arg("-e") + .arg("query find($name: String) { match { $p: Person { name: $name } } return { $p.name } }") + .arg("--params") + .arg(r#"{"name":"Inline"}"#) + .arg("--json"), + ); + let verify_payload: Value = serde_json::from_slice(&verify.stdout).unwrap(); + assert_eq!(verify_payload["row_count"], 1); +} + +#[test] +fn read_rejects_query_string_combined_with_query() { + let temp = tempdir().unwrap(); + let repo = graph_path(temp.path()); + init_graph(&repo); + load_fixture(&repo); + + let output = output_failure( + cli() + .arg("read") + .arg(&repo) + .arg("--query") + .arg(fixture("test.gq")) + .arg("-e") + .arg("query whatever() { match { $p: Person } return { $p.name } }"), + ); + let stderr = String::from_utf8(output.stderr).unwrap(); + assert!( + stderr.contains("cannot be used") || stderr.contains("conflict"), + "expected clap conflict error, got: {stderr}" + ); +} + +#[test] +fn read_rejects_empty_query_string() { + let temp = tempdir().unwrap(); + let repo = graph_path(temp.path()); + init_graph(&repo); + load_fixture(&repo); + + let output = output_failure(cli().arg("read").arg(&repo).arg("-e").arg("")); + let stderr = String::from_utf8(output.stderr).unwrap(); + assert!( + stderr.contains("must not be empty"), + "expected empty-string rejection, got: {stderr}" + ); +} + #[test] fn branch_create_json_outputs_source_and_name() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); let output = output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("feature") @@ -1443,21 +1735,21 @@ fn branch_create_json_outputs_source_and_name() { assert_eq!(payload["from"], "main"); assert_eq!(payload["name"], "feature"); - assert_eq!(payload["uri"], repo.to_string_lossy().as_ref()); + assert_eq!(payload["uri"], graph.to_string_lossy().as_ref()); } #[test] fn branch_list_outputs_sorted_branches() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("zeta"), @@ -1467,13 +1759,13 @@ fn branch_list_outputs_sorted_branches() { .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("alpha"), ); - let output = output_success(cli().arg("branch").arg("list").arg("--uri").arg(&repo)); + let output = output_success(cli().arg("branch").arg("list").arg("--uri").arg(&graph)); let stdout = stdout_string(&output); let lines = stdout .lines() @@ -1487,15 +1779,15 @@ fn branch_list_outputs_sorted_branches() { #[test] fn branch_delete_json_outputs_name_and_removes_branch() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("feature"), @@ -1506,15 +1798,15 @@ fn branch_delete_json_outputs_name_and_removes_branch() { .arg("branch") .arg("delete") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("feature") .arg("--json"), ); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); assert_eq!(payload["name"], "feature"); - assert_eq!(payload["uri"], repo.to_string_lossy().as_ref()); + assert_eq!(payload["uri"], graph.to_string_lossy().as_ref()); - let listed = output_success(cli().arg("branch").arg("list").arg("--uri").arg(&repo)); + let listed = output_success(cli().arg("branch").arg("list").arg("--uri").arg(&graph)); let stdout = stdout_string(&listed); let lines = stdout .lines() @@ -1527,15 +1819,15 @@ fn branch_delete_json_outputs_name_and_removes_branch() { #[test] fn branch_delete_rejects_main() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); let output = output_failure( cli() .arg("branch") .arg("delete") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("main"), ); let stderr = String::from_utf8(output.stderr).unwrap(); @@ -1545,16 +1837,16 @@ fn branch_delete_rejects_main() { #[test] fn branch_merge_defaults_target_to_main() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("feature"), @@ -1574,7 +1866,7 @@ fn branch_merge_defaults_target_to_main() { .arg("feature") .arg("--mode") .arg("append") - .arg(&repo), + .arg(&graph), ); let merge_output = output_success( @@ -1582,7 +1874,7 @@ fn branch_merge_defaults_target_to_main() { .arg("branch") .arg("merge") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("feature") .arg("--json"), ); @@ -1594,7 +1886,7 @@ fn branch_merge_defaults_target_to_main() { let snapshot_output = output_success( cli() .arg("snapshot") - .arg(&repo) + .arg(&graph) .arg("--branch") .arg("main") .arg("--json"), @@ -1614,16 +1906,16 @@ fn branch_merge_defaults_target_to_main() { #[test] fn branch_merge_supports_explicit_target() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("feature"), @@ -1633,7 +1925,7 @@ fn branch_merge_supports_explicit_target() { .arg("branch") .arg("create") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("--from") .arg("main") .arg("experiment"), @@ -1653,7 +1945,7 @@ fn branch_merge_supports_explicit_target() { .arg("feature") .arg("--mode") .arg("append") - .arg(&repo), + .arg(&graph), ); let merge_output = output_success( @@ -1661,7 +1953,7 @@ fn branch_merge_supports_explicit_target() { .arg("branch") .arg("merge") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("feature") .arg("--into") .arg("experiment") @@ -1675,17 +1967,17 @@ fn branch_merge_supports_explicit_target() { #[test] fn snapshot_json_returns_manifest_version_and_tables() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); - let output = output_success(cli().arg("snapshot").arg(&repo).arg("--json")); + let output = output_success(cli().arg("snapshot").arg(&graph).arg("--json")); let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); assert_eq!(payload["branch"], "main"); assert_eq!( payload["manifest_version"].as_u64().unwrap(), - manifest_dataset_version(&repo) + manifest_dataset_version(&graph) ); assert!(payload["tables"].as_array().unwrap().len() >= 4); } @@ -1755,11 +2047,11 @@ fn read_embedded_rows(path: std::path::PathBuf) -> Vec { #[test] fn snapshot_can_resolve_uri_from_config() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let config = temp.path().join("omnigraph.yaml"); - init_repo(&repo); - load_fixture(&repo); - write_config(&config, &local_yaml_config(&repo)); + init_graph(&graph); + load_fixture(&graph); + write_config(&config, &local_yaml_config(&graph)); let output = output_success( cli() @@ -1775,11 +2067,11 @@ fn snapshot_can_resolve_uri_from_config() { #[test] fn snapshot_human_output_includes_branch_and_table_summaries() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); - let output = output_success(cli().arg("snapshot").arg(&repo)); + let output = output_success(cli().arg("snapshot").arg(&graph)); let stdout = stdout_string(&output); assert!(stdout.contains("branch: main")); @@ -1791,11 +2083,11 @@ fn snapshot_human_output_includes_branch_and_table_summaries() { #[test] fn commit_show_accepts_long_uri_flag() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); - let list = output_success(cli().arg("commit").arg("list").arg(&repo).arg("--json")); + let list = output_success(cli().arg("commit").arg("list").arg(&graph).arg("--json")); let list_payload: Value = serde_json::from_slice(&list.stdout).unwrap(); let commit_id = list_payload["commits"][0]["graph_commit_id"] .as_str() @@ -1807,7 +2099,7 @@ fn commit_show_accepts_long_uri_flag() { .arg("commit") .arg("show") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg(&commit_id) .arg("--json"), ); @@ -1818,11 +2110,11 @@ fn commit_show_accepts_long_uri_flag() { } #[test] -fn cli_fails_for_missing_repo() { +fn cli_fails_for_missing_graph() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); - let output = output_failure(cli().arg("snapshot").arg(&repo)); + let output = output_failure(cli().arg("snapshot").arg(&graph)); let stderr = String::from_utf8(output.stderr).unwrap(); assert!( stderr.contains("_schema.pg") @@ -1834,7 +2126,7 @@ fn cli_fails_for_missing_repo() { #[test] fn cli_fails_for_missing_schema_or_data_file() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let missing_schema = temp.path().join("missing.pg"); let missing_data = temp.path().join("missing.jsonl"); @@ -1843,7 +2135,7 @@ fn cli_fails_for_missing_schema_or_data_file() { .arg("init") .arg("--schema") .arg(&missing_schema) - .arg(&repo), + .arg(&graph), ); assert!( String::from_utf8(init_output.stderr) @@ -1851,13 +2143,13 @@ fn cli_fails_for_missing_schema_or_data_file() { .contains("No such file") ); - init_repo(&repo); + init_graph(&graph); let load_output = output_failure( cli() .arg("load") .arg("--data") .arg(&missing_data) - .arg(&repo), + .arg(&graph), ); assert!( String::from_utf8(load_output.stderr) @@ -1869,16 +2161,16 @@ fn cli_fails_for_missing_schema_or_data_file() { #[test] fn cli_fails_for_invalid_merge_requests() { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); let missing_branch = output_failure( cli() .arg("branch") .arg("merge") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("missing"), ); let missing_branch_stderr = String::from_utf8(missing_branch.stderr).unwrap(); @@ -1893,7 +2185,7 @@ fn cli_fails_for_invalid_merge_requests() { .arg("branch") .arg("merge") .arg("--uri") - .arg(&repo) + .arg(&graph) .arg("main") .arg("--into") .arg("main"), @@ -1909,3 +2201,178 @@ fn cli_fails_for_invalid_merge_requests() { // alongside the run state machine. Direct-to-target writes leave nothing // for these CLIs to manage. Audit history is now visible via // `omnigraph commit list` reading the commit graph. + +// ─── MR-694 PR B: --allow-data-loss flag end-to-end ────────────────────── +// +// The schema-lint chassis v1.2 (PR #100) shipped the `--allow-data-loss` +// flag at the CLI layer; the SDK suite verifies promotion to Hard mode +// via `apply_schema_with_options(.., SchemaApplyOptions { allow_data_loss })`. +// These CLI tests close the integration gap so a future change that +// drops the flag wiring in `main.rs` turns red. + +#[test] +fn schema_apply_allow_data_loss_flag_promotes_drops_to_hard() { + let temp = tempdir().unwrap(); + let graph = graph_path(temp.path()); + let schema_path = temp.path().join("drop-age.pg"); + init_graph(&graph); + + // Drop the nullable `age` column. + let next_schema = fs::read_to_string(fixture("test.pg")) + .unwrap() + .replace(" age: I32?\n", ""); + fs::write(&schema_path, next_schema).unwrap(); + + let output = output_success( + cli() + .arg("schema") + .arg("apply") + .arg("--schema") + .arg(&schema_path) + .arg("--allow-data-loss") + .arg("--json") + .arg(&graph), + ); + let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); + assert_eq!(payload["applied"], true); + + let drop_step = payload["steps"] + .as_array() + .unwrap() + .iter() + .find(|s| s["kind"] == "drop_property") + .expect("plan should include a drop_property step"); + assert_eq!( + drop_step["mode"], "hard", + "--allow-data-loss should promote Soft → Hard; full step: {drop_step}", + ); +} + +#[test] +fn schema_apply_without_allow_data_loss_keeps_soft_drops() { + // Symmetric to the above: same schema change without the flag → + // drops stay Soft. Pins default semantics against accidental Hard + // promotion if a future refactor changes the option threading. + let temp = tempdir().unwrap(); + let graph = graph_path(temp.path()); + let schema_path = temp.path().join("drop-age-soft.pg"); + init_graph(&graph); + + let next_schema = fs::read_to_string(fixture("test.pg")) + .unwrap() + .replace(" age: I32?\n", ""); + fs::write(&schema_path, next_schema).unwrap(); + + let output = output_success( + cli() + .arg("schema") + .arg("apply") + .arg("--schema") + .arg(&schema_path) + .arg("--json") + .arg(&graph), + ); + let payload: Value = serde_json::from_slice(&output.stdout).unwrap(); + assert_eq!(payload["applied"], true); + + let drop_step = payload["steps"] + .as_array() + .unwrap() + .iter() + .find(|s| s["kind"] == "drop_property") + .expect("plan should include a drop_property step"); + assert_eq!( + drop_step["mode"], "soft", + "no flag should leave drops Soft; full step: {drop_step}", + ); +} + +#[test] +fn schema_plan_parity_cli_and_sdk() { + // Same .pg through `Omnigraph::plan_schema_with_options` (SDK) and + // `omnigraph schema plan --json` (CLI). Asserts the steps array is + // byte-identical after JSON round-trip. HTTP doesn't expose a + // separate /schema/plan route — that side of parity is covered by + // the HTTP soft/hard drop tests, which exercise apply with + // identical fixtures. + let temp = tempdir().unwrap(); + let graph = graph_path(temp.path()); + init_graph(&graph); + let schema_path = temp.path().join("plan-parity.pg"); + let next_schema = fs::read_to_string(fixture("test.pg")).unwrap().replace( + " age: I32?\n}", + " age: I32?\n nickname: String?\n}", + ); + fs::write(&schema_path, &next_schema).unwrap(); + + // CLI side. + let cli_output = output_success( + cli() + .arg("schema") + .arg("plan") + .arg("--schema") + .arg(&schema_path) + .arg("--json") + .arg(&graph), + ); + let cli_payload: Value = serde_json::from_slice(&cli_output.stdout).unwrap(); + + // SDK side: open graph, call plan_schema. + let plan = tokio::runtime::Runtime::new().unwrap().block_on(async { + let db = Omnigraph::open(graph.to_string_lossy().as_ref()) + .await + .unwrap(); + db.plan_schema(&next_schema).await.unwrap() + }); + let sdk_steps = serde_json::to_value(&plan.steps).unwrap(); + + assert_eq!( + cli_payload["steps"], sdk_steps, + "CLI plan steps must match SDK plan steps for identical input", + ); + assert_eq!(cli_payload["supported"], plan.supported); +} + +// ─── MR-668 PR 8 — omnigraph graphs subcommand ───────────────────────────── + +/// `omnigraph graphs --help` lists only the read-only `list` +/// subcommand. Runtime add (`create`) and remove (`delete`) are +/// deferred — operators add/remove graphs by editing `omnigraph.yaml` +/// and restarting. This test pins the deferral against accidental +/// re-introduction. +#[test] +fn graphs_subcommand_help_lists_list_only() { + let output = output_success(cli().arg("graphs").arg("--help")); + let stdout = stdout_string(&output); + assert!( + stdout.contains("list"), + "expected `list` subcommand in help output:\n{stdout}" + ); + let lowered = stdout.to_lowercase(); + assert!( + !lowered.contains("create a new graph"), + "graph create should not be in v0.6.0 help; got:\n{stdout}" + ); + assert!( + !lowered.contains("delete a graph"), + "graph delete should not be in v0.6.0 help; got:\n{stdout}" + ); +} + +/// `omnigraph graphs list` against a local URI errors with a clear +/// message — the CLI only operates against remote multi-graph servers. +#[test] +fn graphs_list_against_local_uri_errors_with_remote_only_message() { + let output = output_failure( + cli() + .arg("graphs") + .arg("list") + .arg("--uri") + .arg("/tmp/local"), + ); + let stderr = String::from_utf8_lossy(&output.stderr).into_owned(); + assert!( + stderr.contains("remote multi-graph server URL"), + "expected 'remote multi-graph server URL' rejection in stderr; got:\n{stderr}" + ); +} diff --git a/crates/omnigraph-cli/tests/support/mod.rs b/crates/omnigraph-cli/tests/support/mod.rs index 31092ea..b62d861 100644 --- a/crates/omnigraph-cli/tests/support/mod.rs +++ b/crates/omnigraph-cli/tests/support/mod.rs @@ -52,7 +52,7 @@ pub fn fixture(name: &str) -> PathBuf { .join(name) } -pub fn repo_path(root: &Path) -> PathBuf { +pub fn graph_path(root: &Path) -> PathBuf { root.join("demo.omni") } @@ -86,14 +86,14 @@ pub fn parse_stdout_json(output: &Output) -> Value { serde_json::from_slice(&output.stdout).unwrap() } -pub fn init_repo(repo: &Path) { +pub fn init_graph(graph: &Path) { let schema = fixture("test.pg"); - output_success(cli().arg("init").arg("--schema").arg(&schema).arg(repo)); + output_success(cli().arg("init").arg("--schema").arg(&schema).arg(graph)); } -pub fn load_fixture(repo: &Path) { +pub fn load_fixture(graph: &Path) { let data = fixture("test.jsonl"); - output_success(cli().arg("load").arg("--data").arg(&data).arg(repo)); + output_success(cli().arg("load").arg("--data").arg(&data).arg(graph)); } pub fn write_jsonl(path: &Path, rows: &str) { @@ -116,7 +116,7 @@ fn yaml_string(value: &str) -> String { format!("'{}'", value.replace('\'', "''")) } -pub fn local_yaml_config(repo: &Path) -> String { +pub fn local_yaml_config(graph: &Path) -> String { format!( "\ graphs: @@ -130,7 +130,7 @@ query: - . policy: {{}} ", - yaml_string(&repo.to_string_lossy()) + yaml_string(&graph.to_string_lossy()) ) } @@ -200,9 +200,9 @@ fn spawn_server_process(mut command: StdCommand) -> TestServer { panic!("server did not become healthy"); } -pub fn spawn_server(repo: &Path) -> TestServer { +pub fn spawn_server(graph: &Path) -> TestServer { let mut command = server_process(); - command.arg(repo); + command.arg(graph); spawn_server_process(command) } @@ -221,58 +221,57 @@ pub fn spawn_server_with_config_env(config: &Path, envs: &[(&str, &str)]) -> Tes spawn_server_process(command) } - -pub struct SystemRepo { +pub struct SystemGraph { _temp: TempDir, - repo: PathBuf, + graph: PathBuf, } -impl SystemRepo { +impl SystemGraph { pub fn initialized() -> Self { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - Self { _temp: temp, repo } + let graph = graph_path(temp.path()); + init_graph(&graph); + Self { _temp: temp, graph } } pub fn loaded() -> Self { let temp = tempdir().unwrap(); - let repo = repo_path(temp.path()); - init_repo(&repo); - load_fixture(&repo); - Self { _temp: temp, repo } + let graph = graph_path(temp.path()); + init_graph(&graph); + load_fixture(&graph); + Self { _temp: temp, graph } } pub fn path(&self) -> &Path { - &self.repo + &self.graph } pub fn write_query(&self, name: &str, source: &str) -> PathBuf { - let path = self.repo.parent().unwrap().join(name); + let path = self.graph.parent().unwrap().join(name); write_query_file(&path, source); path } pub fn write_jsonl(&self, name: &str, rows: &str) -> PathBuf { - let path = self.repo.parent().unwrap().join(name); + let path = self.graph.parent().unwrap().join(name); write_jsonl(&path, rows); path } pub fn write_config(&self, name: &str, source: &str) -> PathBuf { - let path = self.repo.parent().unwrap().join(name); + let path = self.graph.parent().unwrap().join(name); write_config(&path, source); path } pub fn write_file(&self, name: &str, source: &str) -> PathBuf { - let path = self.repo.parent().unwrap().join(name); + let path = self.graph.parent().unwrap().join(name); write_file(&path, source); path } pub fn spawn_server(&self) -> TestServer { - spawn_server(&self.repo) + spawn_server(&self.graph) } pub fn spawn_server_with_config(&self, config: &Path) -> TestServer { diff --git a/crates/omnigraph-cli/tests/system_local.rs b/crates/omnigraph-cli/tests/system_local.rs index d890603..074b203 100644 --- a/crates/omnigraph-cli/tests/system_local.rs +++ b/crates/omnigraph-cli/tests/system_local.rs @@ -30,6 +30,21 @@ rules: actors: { group: admins } actions: [branch_merge] target_branch_scope: protected + - id: admins-write + allow: + actors: { group: admins } + actions: [change] + branch_scope: any + - id: admins-branch-ops + allow: + actors: { group: admins } + actions: [branch_create, branch_delete] + target_branch_scope: any + - id: admins-schema-apply + allow: + actors: { group: admins } + actions: [schema_apply] + target_branch_scope: any "#; const POLICY_E2E_TESTS_YAML: &str = r#" @@ -51,7 +66,7 @@ fn yaml_string(value: &str) -> String { format!("'{}'", value.replace('\'', "''")) } -fn local_policy_config(repo: &SystemRepo) -> String { +fn local_policy_config(graph: &SystemGraph) -> String { format!( "\ project: @@ -68,12 +83,12 @@ query: policy: file: ./policy.yaml ", - yaml_string(&repo.path().to_string_lossy()) + yaml_string(&graph.path().to_string_lossy()) ) } -fn insert_person_query(repo: &SystemRepo, name: &str) -> std::path::PathBuf { - repo.write_query( +fn insert_person_query(graph: &SystemGraph, name: &str) -> std::path::PathBuf { + graph.write_query( name, r#" query insert_person($name: String, $age: I32) { @@ -83,8 +98,8 @@ query insert_person($name: String, $age: I32) { ) } -fn add_friend_query(repo: &SystemRepo, name: &str) -> std::path::PathBuf { - repo.write_query( +fn add_friend_query(graph: &SystemGraph, name: &str) -> std::path::PathBuf { + graph.write_query( name, r#" query add_friend($from: String, $to: String) { @@ -94,13 +109,13 @@ query add_friend($from: String, $to: String) { ) } -fn snapshot_table_row_count(repo: &SystemRepo, table_key: &str) -> u64 { - snapshot_table_row_count_at(repo.path(), table_key) +fn snapshot_table_row_count(graph: &SystemGraph, table_key: &str) -> u64 { + snapshot_table_row_count_at(graph.path(), table_key) } -fn snapshot_table_row_count_at(repo: &std::path::Path, table_key: &str) -> u64 { +fn snapshot_table_row_count_at(graph: &std::path::Path, table_key: &str) -> u64 { let payload = parse_stdout_json(&output_success( - cli().arg("snapshot").arg(repo).arg("--json"), + cli().arg("snapshot").arg(graph).arg("--json"), )); payload["tables"] .as_array() @@ -163,7 +178,7 @@ fn format_vector(values: &[f32]) -> String { .join(", ") } -fn s3_test_repo_uri(suite: &str) -> Option { +fn s3_test_graph_uri(suite: &str) -> Option { let bucket = env::var("OMNIGRAPH_S3_TEST_BUCKET").ok()?; let prefix = env::var("OMNIGRAPH_S3_TEST_PREFIX") .ok() @@ -178,21 +193,21 @@ fn s3_test_repo_uri(suite: &str) -> Option { #[test] fn local_cli_end_to_end_init_load_read_change_read_flow() { - let repo = SystemRepo::initialized(); - let mutation_file = insert_person_query(&repo, "system-local-init-change.gq"); + let graph = SystemGraph::initialized(); + let mutation_file = insert_person_query(&graph, "system-local-init-change.gq"); output_success( cli() .arg("load") .arg("--data") .arg(fixture("test.jsonl")) - .arg(repo.path()), + .arg(graph.path()), ); let read_before = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -207,7 +222,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() { let change_payload = parse_stdout_json(&output_success( cli() .arg("change") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(&mutation_file) .arg("--params") @@ -220,7 +235,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() { let read_after = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -231,19 +246,50 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() { )); assert_eq!(read_after["row_count"], 1); assert_eq!(read_after["rows"][0]["p.name"], "Eve"); + + // Inline-source variants of the same read/change flow (CLI `-e` / + // `--query-string`). Confirms that file-less invocations reach the + // engine identically, including param binding and `branch=main` defaults. + let inline_change = parse_stdout_json(&output_success( + cli() + .arg("change") + .arg(graph.path()) + .arg("-e") + .arg("query add($name: String, $age: I32) { insert Person { name: $name, age: $age } }") + .arg("--params") + .arg(r#"{"name":"Inline","age":42}"#) + .arg("--json"), + )); + assert_eq!(inline_change["branch"], "main"); + assert_eq!(inline_change["query_name"], "add"); + assert_eq!(inline_change["affected_nodes"], 1); + + let inline_read = parse_stdout_json(&output_success( + cli() + .arg("read") + .arg(graph.path()) + .arg("--query-string") + .arg("query find($name: String) { match { $p: Person { name: $name } } return { $p.name, $p.age } }") + .arg("--params") + .arg(r#"{"name":"Inline"}"#) + .arg("--json"), + )); + assert_eq!(inline_read["row_count"], 1); + assert_eq!(inline_read["rows"][0]["p.name"], "Inline"); + assert_eq!(inline_read["rows"][0]["p.age"], 42); } #[test] fn local_cli_end_to_end_branch_change_merge_flow() { - let repo = SystemRepo::loaded(); - let mutation_file = insert_person_query(&repo, "system-local-change.gq"); + let graph = SystemGraph::loaded(); + let mutation_file = insert_person_query(&graph, "system-local-change.gq"); output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(repo.path()) + .arg(graph.path()) .arg("--from") .arg("main") .arg("feature"), @@ -252,7 +298,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() { let change_payload = parse_stdout_json(&output_success( cli() .arg("change") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(&mutation_file) .arg("--branch") @@ -267,7 +313,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() { let feature_read = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -286,7 +332,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() { .arg("branch") .arg("merge") .arg("--uri") - .arg(repo.path()) + .arg(graph.path()) .arg("feature") .arg("--json"), )); @@ -295,7 +341,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() { let main_read = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -312,7 +358,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() { cli() .arg("commit") .arg("list") - .arg(repo.path()) + .arg(graph.path()) .arg("--branch") .arg("main") .arg("--json"), @@ -322,8 +368,8 @@ fn local_cli_end_to_end_branch_change_merge_flow() { #[test] fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() { - let repo = SystemRepo::loaded(); - let ingest_data = repo.write_jsonl( + let graph = SystemGraph::loaded(); + let ingest_data = graph.write_jsonl( "system-local-ingest.jsonl", r#"{"type":"Person","data":{"name":"Zoe","age":33}} {"type":"Person","data":{"name":"Bob","age":26}}"#, @@ -336,7 +382,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() { .arg(&ingest_data) .arg("--branch") .arg("feature-ingest") - .arg(repo.path()) + .arg(graph.path()) .arg("--json"), )); assert_eq!(ingest_payload["branch"], "feature-ingest"); @@ -349,7 +395,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() { let feature_snapshot = parse_stdout_json(&output_success( cli() .arg("snapshot") - .arg(repo.path()) + .arg(graph.path()) .arg("--branch") .arg("feature-ingest") .arg("--json"), @@ -359,7 +405,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() { let zoe = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -376,7 +422,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() { let bob = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -393,20 +439,20 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() { #[test] fn local_cli_export_round_trips_full_branch_graph() { - let repo = SystemRepo::loaded(); + let graph = SystemGraph::loaded(); output_success( cli() .arg("branch") .arg("create") .arg("--uri") - .arg(repo.path()) + .arg(graph.path()) .arg("--from") .arg("main") .arg("feature"), ); - let feature_data = repo.write_jsonl( + let feature_data = graph.write_jsonl( "system-local-export-feature.jsonl", r#"{"type":"Person","data":{"name":"Eve","age":29}} {"edge":"Knows","from":"Alice","to":"Eve"}"#, @@ -420,53 +466,56 @@ fn local_cli_export_round_trips_full_branch_graph() { .arg("feature") .arg("--mode") .arg("append") - .arg(repo.path()), + .arg(graph.path()), ); let exported = stdout_string(&output_success( cli() .arg("export") - .arg(repo.path()) + .arg(graph.path()) .arg("--branch") .arg("feature") .arg("--jsonl"), )); - let export_path = repo.write_jsonl("system-local-exported.jsonl", &exported); - let imported_repo = repo.path().parent().unwrap().join("imported-export.omni"); + let export_path = graph.write_jsonl("system-local-exported.jsonl", &exported); + let imported_graph = graph.path().parent().unwrap().join("imported-export.omni"); output_success( cli() .arg("init") .arg("--schema") .arg(fixture("test.pg")) - .arg(&imported_repo), + .arg(&imported_graph), ); output_success( cli() .arg("load") .arg("--data") .arg(&export_path) - .arg(&imported_repo), + .arg(&imported_graph), ); assert_eq!( - snapshot_table_row_count_at(&imported_repo, "node:Person"), + snapshot_table_row_count_at(&imported_graph, "node:Person"), 5 ); assert_eq!( - snapshot_table_row_count_at(&imported_repo, "node:Company"), + snapshot_table_row_count_at(&imported_graph, "node:Company"), 2 ); - assert_eq!(snapshot_table_row_count_at(&imported_repo, "edge:Knows"), 4); assert_eq!( - snapshot_table_row_count_at(&imported_repo, "edge:WorksAt"), + snapshot_table_row_count_at(&imported_graph, "edge:Knows"), + 4 + ); + assert_eq!( + snapshot_table_row_count_at(&imported_graph, "edge:WorksAt"), 2 ); let eve = parse_stdout_json(&output_success( cli() .arg("read") - .arg(&imported_repo) + .arg(&imported_graph) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -481,7 +530,7 @@ fn local_cli_export_round_trips_full_branch_graph() { let friends = parse_stdout_json(&output_success( cli() .arg("read") - .arg(&imported_repo) + .arg(&imported_graph) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -495,7 +544,7 @@ fn local_cli_export_round_trips_full_branch_graph() { #[test] fn local_cli_s3_end_to_end_init_load_read_flow() { - let Some(repo_uri) = s3_test_repo_uri("cli-local") else { + let Some(graph_uri) = s3_test_graph_uri("cli-local") else { eprintln!("skipping s3 cli test: OMNIGRAPH_S3_TEST_BUCKET is not set"); return; }; @@ -520,7 +569,7 @@ query: - . policy: {{}} ", - repo_uri + graph_uri ), ); @@ -529,14 +578,14 @@ policy: {{}} .arg("init") .arg("--schema") .arg(fixture("test.pg")) - .arg(&repo_uri), + .arg(&graph_uri), ); output_success( cli() .arg("load") .arg("--data") .arg(fixture("test.jsonl")) - .arg(&repo_uri), + .arg(&graph_uri), ); let read = parse_stdout_json(&output_success( @@ -569,13 +618,13 @@ policy: {{}} #[test] fn local_cli_failed_load_keeps_target_state_unchanged() { - let repo = SystemRepo::loaded(); - let bad_data = repo.write_jsonl( + let graph = SystemGraph::loaded(); + let bad_data = graph.write_jsonl( "system-bad-load.jsonl", r#"{"edge":"Knows","from":"Alice","to":"Missing"}"#, ); - let person_rows_before = snapshot_table_row_count(&repo, "node:Person"); - let knows_rows_before = snapshot_table_row_count(&repo, "edge:Knows"); + let person_rows_before = snapshot_table_row_count(&graph, "node:Person"); + let knows_rows_before = snapshot_table_row_count(&graph, "edge:Knows"); let output = output_failure( cli() @@ -584,17 +633,17 @@ fn local_cli_failed_load_keeps_target_state_unchanged() { .arg(&bad_data) .arg("--mode") .arg("append") - .arg(repo.path()), + .arg(graph.path()), ); let stderr = String::from_utf8(output.stderr).unwrap(); assert!(stderr.contains("not found") || stderr.contains("Missing")); assert_eq!( - snapshot_table_row_count(&repo, "node:Person"), + snapshot_table_row_count(&graph, "node:Person"), person_rows_before ); assert_eq!( - snapshot_table_row_count(&repo, "edge:Knows"), + snapshot_table_row_count(&graph, "edge:Knows"), knows_rows_before ); // Failed loads leave no run record (the run lifecycle has been @@ -603,13 +652,13 @@ fn local_cli_failed_load_keeps_target_state_unchanged() { #[test] fn local_cli_failed_change_keeps_target_state_unchanged() { - let repo = SystemRepo::loaded(); - let mutation_file = add_friend_query(&repo, "system-invalid-change.gq"); + let graph = SystemGraph::loaded(); + let mutation_file = add_friend_query(&graph, "system-invalid-change.gq"); let output = output_failure( cli() .arg("change") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(&mutation_file) .arg("--params") @@ -621,7 +670,7 @@ fn local_cli_failed_change_keeps_target_state_unchanged() { let friends_payload = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -637,8 +686,8 @@ fn local_cli_failed_change_keeps_target_state_unchanged() { #[test] fn local_cli_resolves_relative_query_against_config_base_dir() { - let repo = SystemRepo::loaded(); - let root = repo.path().parent().unwrap(); + let graph = SystemGraph::loaded(); + let root = graph.path().parent().unwrap(); let config_dir = root.join("config"); let query_dir = config_dir.join("queries"); let ambient_dir = root.join("ambient"); @@ -661,7 +710,7 @@ query: - queries policy: {{}} ", - repo.path().display() + graph.path().display() ), ); write_query_file( @@ -715,7 +764,7 @@ query get_person($name: String) { #[test] fn local_cli_datetime_and_list_types_round_trip_through_load_read_and_change() { let temp = tempfile::tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema = temp.path().join("datatypes.pg"); let data = temp.path().join("datatypes.jsonl"); let queries = temp.path().join("datatypes.gq"); @@ -790,13 +839,13 @@ query get_task($slug: String) { "#, ); - output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&repo)); - output_success(cli().arg("load").arg("--data").arg(&data).arg(&repo)); + output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&graph)); + output_success(cli().arg("load").arg("--data").arg(&data).arg(&graph)); let filtered = parse_stdout_json(&output_success( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(&queries) .arg("--name") @@ -821,7 +870,7 @@ query get_task($slug: String) { let insert_payload = parse_stdout_json(&output_success( cli() .arg("change") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(&queries) .arg("--name") @@ -837,7 +886,7 @@ query get_task($slug: String) { let update_payload = parse_stdout_json(&output_success( cli() .arg("change") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(&queries) .arg("--name") @@ -851,7 +900,7 @@ query get_task($slug: String) { let gamma = parse_stdout_json(&output_success( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(&queries) .arg("--name") @@ -878,7 +927,7 @@ query get_task($slug: String) { #[ignore = "requires GEMINI_API_KEY and network access"] fn local_cli_real_gemini_string_nearest_query_returns_expected_match() { let temp = tempfile::tempdir().unwrap(); - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let schema = temp.path().join("gemini.pg"); let data = temp.path().join("gemini.jsonl"); let queries = temp.path().join("gemini.gq"); @@ -920,13 +969,13 @@ query vector_search($q: String) { "#, ); - output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&repo)); - output_success(cli().arg("load").arg("--data").arg(&data).arg(&repo)); + output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&graph)); + output_success(cli().arg("load").arg("--data").arg(&data).arg(&graph)); let result = parse_stdout_json(&output_success( cli() .arg("read") - .arg(&repo) + .arg(&graph) .arg("--query") .arg(&queries) .arg("--name") @@ -949,12 +998,14 @@ query vector_search($q: String) { // surface is the same engine path the unit test already covers. #[test] -fn local_cli_policy_tooling_is_end_to_end_while_local_writes_stay_unenforced() { - let repo = SystemRepo::loaded(); - let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo)); - repo.write_config("policy.yaml", POLICY_E2E_YAML); - repo.write_config("policy.tests.yaml", POLICY_E2E_TESTS_YAML); - let mutation_file = insert_person_query(&repo, "system-local-policy-change.gq"); +fn local_cli_policy_tooling_is_end_to_end() { + // Sanity check for the read-only policy CLI surfaces. These don't + // mutate the graph — they just parse and evaluate the policy file — + // so they don't depend on PR #4's engine-side enforcement. + let graph = SystemGraph::loaded(); + let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph)); + graph.write_config("policy.yaml", POLICY_E2E_YAML); + graph.write_config("policy.tests.yaml", POLICY_E2E_TESTS_YAML); let validate = output_success( cli() @@ -984,8 +1035,34 @@ fn local_cli_policy_tooling_is_end_to_end_while_local_writes_stay_unenforced() { let explain_stdout = stdout_string(&explain); assert!(explain_stdout.contains("decision: deny")); assert!(explain_stdout.contains("branch: main")); +} - let local_change = parse_stdout_json(&output_success( +#[test] +fn local_cli_change_enforces_engine_layer_policy() { + // Asserts MR-722 PR #4: when `policy.file` is configured in + // `omnigraph.yaml`, the CLI loads PolicyEngine into Omnigraph and + // every direct-engine write hits `enforce(action, scope, actor)` — + // identical to what the HTTP server gets, regardless of transport. + // + // Three cases, each discriminating: + // + // 1. Policy installed, no actor source (no `cli.actor` in config, + // no `--as` flag) → engine-layer footgun guard fires; CLI exits + // non-zero with a "no actor" message. Silent bypass is the bug + // PR #4 prevents. + // 2. Policy installed, `--as act-bruno`, change on main → Cedar + // denies (bruno can change unprotected branches; main is + // protected). CLI exits non-zero with a "denied" message. + // 3. Policy installed, `--as act-ragnor`, change on main → + // Cedar permits (admins-write rule). Write succeeds and the + // inserted row is readable. + let graph = SystemGraph::loaded(); + let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph)); + graph.write_config("policy.yaml", POLICY_E2E_YAML); + let mutation_file = insert_person_query(&graph, "system-local-policy-change.gq"); + + // Case 1: policy configured, no actor threaded → footgun guard. + let no_actor = output_failure( cli() .arg("change") .arg("--config") @@ -993,24 +1070,456 @@ fn local_cli_policy_tooling_is_end_to_end_while_local_writes_stay_unenforced() { .arg("--query") .arg(&mutation_file) .arg("--params") - .arg(r#"{"name":"PolicyLocal","age":44}"#) + .arg(r#"{"name":"NoActorPerson","age":1}"#) + .arg("--json"), + ); + let no_actor_stderr = String::from_utf8_lossy(&no_actor.stderr); + assert!( + no_actor_stderr.contains("no actor"), + "expected 'no actor' footgun message, got stderr: {no_actor_stderr}" + ); + + // Case 2: `--as act-bruno` against protected main → denied. + let denied = output_failure( + cli() + .arg("--as") + .arg("act-bruno") + .arg("change") + .arg("--config") + .arg(&config) + .arg("--query") + .arg(&mutation_file) + .arg("--params") + .arg(r#"{"name":"BrunoOnMain","age":2}"#) + .arg("--json"), + ); + let denied_stderr = String::from_utf8_lossy(&denied.stderr); + assert!( + denied_stderr.contains("denied"), + "expected 'denied' message for bruno/main, got stderr: {denied_stderr}" + ); + + // Case 3: `--as act-ragnor` against main → permitted by admins-write. + let allowed = parse_stdout_json(&output_success( + cli() + .arg("--as") + .arg("act-ragnor") + .arg("change") + .arg("--config") + .arg(&config) + .arg("--query") + .arg(&mutation_file) + .arg("--params") + .arg(r#"{"name":"RagnorOnMain","age":3}"#) .arg("--json"), )); - assert_eq!(local_change["branch"], "main"); - assert_eq!(local_change["affected_nodes"], 1); + assert_eq!(allowed["branch"], "main"); + assert_eq!(allowed["affected_nodes"], 1); + assert_eq!(allowed["actor_id"], "act-ragnor"); + // Verify the row landed — proves the write actually committed, not + // just that enforce returned Ok and silently dropped the work. let verify = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") .arg("get_person") .arg("--params") - .arg(r#"{"name":"PolicyLocal"}"#) + .arg(r#"{"name":"RagnorOnMain"}"#) .arg("--json"), )); assert_eq!(verify["row_count"], 1); - assert_eq!(verify["rows"][0]["p.name"], "PolicyLocal"); + assert_eq!(verify["rows"][0]["p.name"], "RagnorOnMain"); +} + +// ─── MR-722 PR A: CLI×writer matrix ─────────────────────────────────────── +// +// The change writer is covered above by `local_cli_change_enforces_engine_layer_policy`. +// These tests extend the engine-layer-policy assertion to the other 6 +// writers, asserting each `omnigraph --as ` invocation +// reaches the corresponding `_as` method and Cedar evaluates correctly. +// One denied case (`--as act-bruno`) + one allowed case (`--as act-ragnor` +// via the `admins-*` rules) per writer; the no-actor footgun is already +// proved by the change-writer test and applies identically to every +// other `_as` variant. + +#[test] +fn local_cli_load_enforces_engine_layer_policy() { + let graph = SystemGraph::loaded(); + let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph)); + graph.write_config("policy.yaml", POLICY_E2E_YAML); + let data = graph.write_jsonl( + "system-local-policy-load.jsonl", + r#"{"type":"Person","data":{"name":"LoadPolicy","age":11}}"#, + ); + + // act-bruno: change-on-protected is denied (team-write-unprotected only). + let denied = output_failure( + cli() + .arg("--as") + .arg("act-bruno") + .arg("load") + .arg("--config") + .arg(&config) + .arg("--data") + .arg(&data) + .arg("--json"), + ); + let stderr = String::from_utf8_lossy(&denied.stderr); + assert!( + stderr.contains("denied"), + "expected 'denied' for bruno/main load, got: {stderr}" + ); + + // act-ragnor: admins-write rule permits change anywhere. + let allowed = parse_stdout_json(&output_success( + cli() + .arg("--as") + .arg("act-ragnor") + .arg("load") + .arg("--config") + .arg(&config) + .arg("--data") + .arg(&data) + .arg("--json"), + )); + assert_eq!(allowed["branch"], "main"); + assert!(allowed["nodes_loaded"].as_u64().unwrap() >= 1); +} + +#[test] +fn local_cli_ingest_enforces_engine_layer_policy() { + let graph = SystemGraph::loaded(); + let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph)); + graph.write_config("policy.yaml", POLICY_E2E_YAML); + let data = graph.write_jsonl( + "system-local-policy-ingest.jsonl", + r#"{"type":"Person","data":{"name":"IngestPolicy","age":12}}"#, + ); + + // act-bruno: ingest into a new branch requires both BranchCreate and + // Change. Bruno has change-unprotected only, and the implicit + // branch_create fires first when the target branch doesn't exist. + // Either gate is enough to deny — assert denial without pinning + // which one fires first. + let denied = output_failure( + cli() + .arg("--as") + .arg("act-bruno") + .arg("ingest") + .arg("--config") + .arg(&config) + .arg("--data") + .arg(&data) + .arg("--branch") + .arg("policy-ingest-feature") + .arg("--json"), + ); + let stderr = String::from_utf8_lossy(&denied.stderr); + assert!( + stderr.contains("denied"), + "expected 'denied' for bruno ingest, got: {stderr}" + ); + + // act-ragnor: admins-write covers Change, admins-branch-ops covers + // BranchCreate. Both fire as ingest creates the branch + loads. + let allowed = parse_stdout_json(&output_success( + cli() + .arg("--as") + .arg("act-ragnor") + .arg("ingest") + .arg("--config") + .arg(&config) + .arg("--data") + .arg(&data) + .arg("--branch") + .arg("policy-ingest-feature") + .arg("--json"), + )); + assert_eq!(allowed["branch"], "policy-ingest-feature"); + assert_eq!(allowed["branch_created"], true); +} + +#[test] +fn local_cli_schema_apply_enforces_engine_layer_policy() { + let graph = SystemGraph::loaded(); + let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph)); + graph.write_config("policy.yaml", POLICY_E2E_YAML); + + // Additive: add a nullable property; SDK-compatible with the fixture + // schema. Uses the schema-apply scope (TargetBranch("main")). + let new_schema = std::fs::read_to_string(fixture("test.pg")) + .unwrap() + .replace( + " age: I32?\n}", + " age: I32?\n nickname: String?\n}", + ); + let schema_path = graph.path().join("policy-additive.pg"); + std::fs::write(&schema_path, &new_schema).unwrap(); + + let denied = output_failure( + cli() + .arg("--as") + .arg("act-bruno") + .arg("schema") + .arg("apply") + .arg("--config") + .arg(&config) + .arg("--schema") + .arg(&schema_path) + .arg("--json"), + ); + let stderr = String::from_utf8_lossy(&denied.stderr); + assert!( + stderr.contains("denied"), + "expected 'denied' for bruno schema apply, got: {stderr}" + ); + + let allowed = parse_stdout_json(&output_success( + cli() + .arg("--as") + .arg("act-ragnor") + .arg("schema") + .arg("apply") + .arg("--config") + .arg(&config) + .arg("--schema") + .arg(&schema_path) + .arg("--json"), + )); + assert_eq!(allowed["applied"], true); +} + +#[test] +fn local_cli_branch_create_enforces_engine_layer_policy() { + let graph = SystemGraph::loaded(); + let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph)); + graph.write_config("policy.yaml", POLICY_E2E_YAML); + + let denied = output_failure( + cli() + .arg("--as") + .arg("act-bruno") + .arg("branch") + .arg("create") + .arg("--config") + .arg(&config) + .arg("--from") + .arg("main") + .arg("bruno-feature"), + ); + let stderr = String::from_utf8_lossy(&denied.stderr); + assert!( + stderr.contains("denied"), + "expected 'denied' for bruno branch create, got: {stderr}" + ); + + output_success( + cli() + .arg("--as") + .arg("act-ragnor") + .arg("branch") + .arg("create") + .arg("--config") + .arg(&config) + .arg("--from") + .arg("main") + .arg("ragnor-feature"), + ); +} + +#[test] +fn local_cli_branch_delete_enforces_engine_layer_policy() { + let graph = SystemGraph::loaded(); + let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph)); + graph.write_config("policy.yaml", POLICY_E2E_YAML); + + // Pre-create the branch as ragnor so there's something to delete. + output_success( + cli() + .arg("--as") + .arg("act-ragnor") + .arg("branch") + .arg("create") + .arg("--config") + .arg(&config) + .arg("--from") + .arg("main") + .arg("doomed"), + ); + + let denied = output_failure( + cli() + .arg("--as") + .arg("act-bruno") + .arg("branch") + .arg("delete") + .arg("--config") + .arg(&config) + .arg("doomed"), + ); + let stderr = String::from_utf8_lossy(&denied.stderr); + assert!( + stderr.contains("denied"), + "expected 'denied' for bruno branch delete, got: {stderr}" + ); + + output_success( + cli() + .arg("--as") + .arg("act-ragnor") + .arg("branch") + .arg("delete") + .arg("--config") + .arg(&config) + .arg("doomed"), + ); +} + +#[test] +fn local_cli_branch_merge_enforces_engine_layer_policy() { + let graph = SystemGraph::loaded(); + let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph)); + graph.write_config("policy.yaml", POLICY_E2E_YAML); + + // Pre-create a feature branch as ragnor (admins-branch-ops covers it). + output_success( + cli() + .arg("--as") + .arg("act-ragnor") + .arg("branch") + .arg("create") + .arg("--config") + .arg(&config) + .arg("--from") + .arg("main") + .arg("merge-feature"), + ); + + let denied = output_failure( + cli() + .arg("--as") + .arg("act-bruno") + .arg("branch") + .arg("merge") + .arg("--config") + .arg(&config) + .arg("merge-feature") + .arg("--into") + .arg("main"), + ); + let stderr = String::from_utf8_lossy(&denied.stderr); + assert!( + stderr.contains("denied"), + "expected 'denied' for bruno branch merge, got: {stderr}" + ); + + output_success( + cli() + .arg("--as") + .arg("act-ragnor") + .arg("branch") + .arg("merge") + .arg("--config") + .arg(&config) + .arg("merge-feature") + .arg("--into") + .arg("main"), + ); +} + +// ─── MR-722 PR A: cli.actor config-only precedence ──────────────────────── +// +// The change-writer test above uses `--as` directly. These two tests +// pin the precedence rule that `main.rs::resolve_cli_actor` implements: +// `--as` flag > `cli.actor` from `omnigraph.yaml` > None. + +fn local_policy_config_with_actor(graph: &SystemGraph, actor: &str) -> String { + // Mirrors `local_policy_config` but adds `cli.actor` so the + // config-only precedence path is exercised. The `cli:` block + // already has `graph` and `branch`; appending `actor` here. + format!( + "\ +project: + name: policy-e2e-local +graphs: + local: + uri: {} +cli: + graph: local + branch: main + actor: {} +query: + roots: + - . +policy: + file: ./policy.yaml +", + yaml_string(&graph.path().to_string_lossy()), + actor, + ) +} + +#[test] +fn local_cli_actor_from_config_used_when_no_flag() { + // cli.actor: act-ragnor in omnigraph.yaml, no --as flag → change + // permitted via admins-write rule. Proves the config-only path + // works; previously the only proof was structural. + let graph = SystemGraph::loaded(); + let config = graph.write_config( + "omnigraph-policy.yaml", + &local_policy_config_with_actor(&graph, "act-ragnor"), + ); + graph.write_config("policy.yaml", POLICY_E2E_YAML); + let mutation_file = insert_person_query(&graph, "system-local-cli-actor.gq"); + + let allowed = parse_stdout_json(&output_success( + cli() + .arg("change") + .arg("--config") + .arg(&config) + .arg("--query") + .arg(&mutation_file) + .arg("--params") + .arg(r#"{"name":"ConfigActorEve","age":18}"#) + .arg("--json"), + )); + assert_eq!(allowed["affected_nodes"], 1); + assert_eq!(allowed["actor_id"], "act-ragnor"); +} + +#[test] +fn local_cli_actor_flag_overrides_config_actor() { + // cli.actor: act-ragnor in config + --as act-bruno on CLI → change + // denied. Flag wins per the precedence rule. Without this test, a + // future change that reverses precedence would ride through silently. + let graph = SystemGraph::loaded(); + let config = graph.write_config( + "omnigraph-policy.yaml", + &local_policy_config_with_actor(&graph, "act-ragnor"), + ); + graph.write_config("policy.yaml", POLICY_E2E_YAML); + let mutation_file = insert_person_query(&graph, "system-local-cli-actor-override.gq"); + + let denied = output_failure( + cli() + .arg("--as") + .arg("act-bruno") + .arg("change") + .arg("--config") + .arg(&config) + .arg("--query") + .arg(&mutation_file) + .arg("--params") + .arg(r#"{"name":"OverrideEve","age":19}"#) + .arg("--json"), + ); + let stderr = String::from_utf8_lossy(&denied.stderr); + assert!( + stderr.contains("denied"), + "expected 'denied' when --as overrides config to bruno, got: {stderr}" + ); } diff --git a/crates/omnigraph-cli/tests/system_remote.rs b/crates/omnigraph-cli/tests/system_remote.rs index 15f3a6f..c86e32e 100644 --- a/crates/omnigraph-cli/tests/system_remote.rs +++ b/crates/omnigraph-cli/tests/system_remote.rs @@ -37,11 +37,22 @@ rules: target_branch_scope: protected "#; +const GRAPH_LIST_SERVER_POLICY_YAML: &str = r#" +version: 1 +groups: + admins: [act-admin] +rules: + - id: admins-can-list-graphs + allow: + actors: { group: admins } + actions: [graph_list] +"#; + fn yaml_string(value: &str) -> String { format!("'{}'", value.replace('\'', "''")) } -fn remote_policy_server_config(repo: &SystemRepo) -> String { +fn remote_policy_server_config(graph: &SystemGraph) -> String { format!( "\ project: @@ -54,7 +65,7 @@ server: policy: file: ./policy.yaml ", - yaml_string(&repo.path().to_string_lossy()) + yaml_string(&graph.path().to_string_lossy()) ) } @@ -81,10 +92,10 @@ auth: #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_server_and_cli_end_to_end_flow() { - let repo = SystemRepo::loaded(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); - let mutation_file = repo.write_query( + let graph = SystemGraph::loaded(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let mutation_file = graph.write_query( "system-remote-change.gq", r#" query insert_person($name: String, $age: I32) { @@ -105,7 +116,7 @@ query insert_person($name: String, $age: I32) { assert_eq!(health["status"], "ok"); let local_snapshot = parse_stdout_json(&output_success( - cli().arg("snapshot").arg(repo.path()).arg("--json"), + cli().arg("snapshot").arg(graph.path()).arg("--json"), )); let snapshot = parse_stdout_json(&output_success( cli() @@ -120,7 +131,7 @@ query insert_person($name: String, $age: I32) { let local_read = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -180,7 +191,7 @@ query insert_person($name: String, $age: I32) { let local_verify = parse_stdout_json(&output_success( cli() .arg("read") - .arg(repo.path()) + .arg(graph.path()) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -192,6 +203,67 @@ query insert_person($name: String, $age: I32) { assert_eq!(local_verify["row_count"], 1); assert_eq!(local_verify["rows"][0]["p.name"], "Mina"); + // CLI `-e` over the HTTP transport (--config points at remote server). + // Confirms inline source survives the remote-execution path identically + // to file-based queries, and exercises `POST /query` end-to-end via the + // change-then-read round trip we just established. + let inline_remote_read = parse_stdout_json(&output_success( + cli() + .arg("read") + .arg("--config") + .arg(&config) + .arg("-e") + .arg("query find($name: String) { match { $p: Person { name: $name } } return { $p.name, $p.age } }") + .arg("--params") + .arg(r#"{"name":"Mina"}"#) + .arg("--json"), + )); + assert_eq!(inline_remote_read["row_count"], 1); + assert_eq!(inline_remote_read["rows"][0]["p.name"], "Mina"); + + let inline_remote_change = parse_stdout_json(&output_success( + cli() + .arg("change") + .arg("--config") + .arg(&config) + .arg("--query-string") + .arg("query add($name: String, $age: I32) { insert Person { name: $name, age: $age } }") + .arg("--params") + .arg(r#"{"name":"Inline","age":42}"#) + .arg("--json"), + )); + assert_eq!(inline_remote_change["affected_nodes"], 1); + + // `POST /query` happy path directly: a hand-rolled HTTP body using the + // new clean field names. + let http_query = client + .post(format!("{}/query", server.base_url)) + .json(&json!({ + "branch": "main", + "query": "query find($name: String) { match { $p: Person { name: $name } } return { $p.name } }", + "params": { "name": "Inline" } + })) + .send() + .unwrap() + .error_for_status() + .unwrap() + .json::() + .unwrap(); + assert_eq!(http_query["row_count"], 1); + assert_eq!(http_query["rows"][0]["p.name"], "Inline"); + + // `POST /query` rejects mutations with 400. + let http_query_mutation = client + .post(format!("{}/query", server.base_url)) + .json(&json!({ + "branch": "main", + "query": "query bad($name: String, $age: I32) { insert Person { name: $name, age: $age } }", + "params": { "name": "Nope", "age": 1 } + })) + .send() + .unwrap(); + assert_eq!(http_query_mutation.status(), reqwest::StatusCode::BAD_REQUEST); + // `run publish` / `run list` removed. Direct-to-target writes // already landed via the change call above; the commit graph is now // the audit surface (verified separately by `commit list`). @@ -199,11 +271,11 @@ query insert_person($name: String, $age: I32) { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] -fn remote_schema_apply_via_cli_updates_repo() { - let repo = SystemRepo::initialized(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); - let next_schema = repo.write_file( +fn remote_schema_apply_via_cli_updates_graph() { + let graph = SystemGraph::initialized(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let next_schema = graph.write_file( "next.pg", &fs::read_to_string(fixture("test.pg")).unwrap().replace( " age: I32?\n}", @@ -225,7 +297,7 @@ fn remote_schema_apply_via_cli_updates_repo() { let db = tokio::runtime::Runtime::new() .unwrap() - .block_on(Omnigraph::open(repo.path().to_string_lossy().as_ref())) + .block_on(Omnigraph::open(graph.path().to_string_lossy().as_ref())) .unwrap(); assert!( db.catalog().node_types["Person"] @@ -237,10 +309,10 @@ fn remote_schema_apply_via_cli_updates_repo() { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_schema_apply_rejects_unsupported_plan() { - let repo = SystemRepo::initialized(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); - let breaking_schema = repo.write_file( + let graph = SystemGraph::initialized(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let breaking_schema = graph.write_file( "breaking.pg", &fs::read_to_string(fixture("test.pg")) .unwrap() @@ -263,7 +335,7 @@ fn remote_schema_apply_rejects_unsupported_plan() { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_schema_apply_rejects_when_non_main_branch_exists() { - let repo = SystemRepo::initialized(); + let graph = SystemGraph::initialized(); output_success( cli() .arg("branch") @@ -271,12 +343,12 @@ fn remote_schema_apply_rejects_when_non_main_branch_exists() { .arg("--from") .arg("main") .arg("--uri") - .arg(repo.path()) + .arg(graph.path()) .arg("feature"), ); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); - let next_schema = repo.write_file( + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let next_schema = graph.write_file( "next.pg", &fs::read_to_string(fixture("test.pg")).unwrap().replace( " age: I32?\n}", @@ -294,16 +366,16 @@ fn remote_schema_apply_rejects_when_non_main_branch_exists() { .arg(&next_schema), ); let stderr = String::from_utf8_lossy(&output.stderr); - assert!(stderr.contains("schema apply requires a repo with only main")); + assert!(stderr.contains("schema apply requires a graph with only main")); } #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_read_preserves_projection_order_in_json_and_csv() { - let repo = SystemRepo::loaded(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); - let ordered_query = repo.write_query( + let graph = SystemGraph::loaded(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let ordered_query = graph.write_query( "ordered-remote.gq", r#" query ordered_person($name: String) { @@ -358,10 +430,10 @@ query ordered_person($name: String) { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_branch_create_list_merge_flow() { - let repo = SystemRepo::loaded(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); - let mutation_file = repo.write_query( + let graph = SystemGraph::loaded(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let mutation_file = graph.write_query( "system-remote-branch-change.gq", r#" query insert_person($name: String, $age: I32) { @@ -455,9 +527,9 @@ query insert_person($name: String, $age: I32) { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_branch_delete_removes_branch() { - let repo = SystemRepo::loaded(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let graph = SystemGraph::loaded(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); parse_stdout_json(&output_success( cli() @@ -496,10 +568,10 @@ fn remote_branch_delete_removes_branch() { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_export_round_trips_full_branch_graph() { - let repo = SystemRepo::loaded(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); - let mutation_file = repo.write_query( + let graph = SystemGraph::loaded(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let mutation_file = graph.write_query( "system-remote-export-change.gq", r#" query insert_person($name: String, $age: I32) { @@ -563,8 +635,8 @@ query add_friend($from: String, $to: String) { .arg("feature") .arg("--jsonl"), )); - let export_path = repo.write_jsonl("system-remote-exported.jsonl", &exported); - let imported_repo = repo + let export_path = graph.write_jsonl("system-remote-exported.jsonl", &exported); + let imported_graph = graph .path() .parent() .unwrap() @@ -575,18 +647,18 @@ query add_friend($from: String, $to: String) { .arg("init") .arg("--schema") .arg(fixture("test.pg")) - .arg(&imported_repo), + .arg(&imported_graph), ); output_success( cli() .arg("load") .arg("--data") .arg(&export_path) - .arg(&imported_repo), + .arg(&imported_graph), ); let snapshot = parse_stdout_json(&output_success( - cli().arg("snapshot").arg(&imported_repo).arg("--json"), + cli().arg("snapshot").arg(&imported_graph).arg("--json"), )); assert_eq!( snapshot["tables"] @@ -610,7 +682,7 @@ query add_friend($from: String, $to: String) { let eve = parse_stdout_json(&output_success( cli() .arg("read") - .arg(&imported_repo) + .arg(&imported_graph) .arg("--query") .arg(fixture("test.gq")) .arg("--name") @@ -626,10 +698,10 @@ query add_friend($from: String, $to: String) { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_ingest_creates_review_branch_and_keeps_it_readable() { - let repo = SystemRepo::loaded(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); - let ingest_data = repo.write_jsonl( + let graph = SystemGraph::loaded(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let ingest_data = graph.write_jsonl( "system-remote-ingest.jsonl", r#"{"type":"Person","data":{"name":"Zoe","age":33}} {"type":"Person","data":{"name":"Bob","age":26}}"#, @@ -686,9 +758,9 @@ fn remote_ingest_creates_review_branch_and_keeps_it_readable() { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_ingest_reuses_existing_branch_and_merges_updates() { - let repo = SystemRepo::loaded(); - let server = repo.spawn_server(); - let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); + let graph = SystemGraph::loaded(); + let server = graph.spawn_server(); + let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url)); output_success( cli() @@ -701,7 +773,7 @@ fn remote_ingest_reuses_existing_branch_and_merges_updates() { .arg("feature-ingest"), ); - let ingest_data = repo.write_jsonl( + let ingest_data = graph.write_jsonl( "system-remote-ingest-merge.jsonl", r#"{"type":"Person","data":{"name":"Bob","age":26}} {"type":"Person","data":{"name":"Zoe","age":33}}"#, @@ -767,23 +839,23 @@ fn remote_ingest_reuses_existing_branch_and_merges_updates() { #[test] #[ignore = "requires loopback socket permissions in sandboxed runners"] fn remote_policy_enforces_branch_first_cli_workflow() { - let repo = SystemRepo::loaded(); + let graph = SystemGraph::loaded(); let server_config = - repo.write_config("server-policy.yaml", &remote_policy_server_config(&repo)); - repo.write_config("policy.yaml", REMOTE_POLICY_E2E_YAML); - let server = repo.spawn_server_with_config_env( + graph.write_config("server-policy.yaml", &remote_policy_server_config(&graph)); + graph.write_config("policy.yaml", REMOTE_POLICY_E2E_YAML); + let server = graph.spawn_server_with_config_env( &server_config, &[( "OMNIGRAPH_SERVER_BEARER_TOKENS_JSON", r#"{"act-bruno":"team-token","act-ragnor":"admin-token"}"#, )], ); - let client_config = repo.write_config( + let client_config = graph.write_config( "omnigraph-policy.yaml", &remote_policy_client_config(&server.base_url), ); - repo.write_config(".env.omni", "POLICY_TEST_TOKEN=team-token\n"); - let mutation_file = repo.write_query( + graph.write_config(".env.omni", "POLICY_TEST_TOKEN=team-token\n"); + let mutation_file = graph.write_query( "system-remote-policy-change.gq", r#" query insert_person($name: String, $age: I32) { @@ -888,3 +960,112 @@ query insert_person($name: String, $age: I32) { assert_eq!(verify["row_count"], 1); assert_eq!(verify["rows"][0]["p.name"], "PolicyRemote"); } + +// ─── MR-668 PR 8 — omnigraph graphs list end-to-end ──────────────────────── + +/// Multi-graph server + CLI `omnigraph graphs list` end-to-end. +/// +/// Steps: +/// 1. Init a graph `alpha` on disk and write an `omnigraph.yaml` +/// whose `graphs:` map references it. +/// 2. Spawn the server with `--config `. +/// 3. `omnigraph graphs list` — expect to see `alpha`. +/// +/// Ignored by default — spawning servers needs loopback socket +/// permissions some sandboxes lack. +#[test] +#[ignore = "requires loopback socket permissions in sandboxed runners"] +fn graphs_list_against_multi_graph_server() { + let cfg_dir = tempfile::tempdir().unwrap(); + let schema_path = fixture("test.pg"); + + // Init `alpha` on disk. + let alpha_uri = cfg_dir.path().join("alpha.omni"); + tokio::runtime::Runtime::new().unwrap().block_on(async { + Omnigraph::init( + alpha_uri.to_str().unwrap(), + &fs::read_to_string(&schema_path).unwrap(), + ) + .await + .unwrap(); + }); + + fs::write( + cfg_dir.path().join("server-policy.yaml"), + GRAPH_LIST_SERVER_POLICY_YAML, + ) + .unwrap(); + + // Server config with `graphs:` map and no `server.graph` selector + // — multi mode (rule 4 of the inference matrix). `GET /graphs` is a + // server-scoped action, so the success path needs an explicit server + // policy and bearer token. + let server_config_path = cfg_dir.path().join("omnigraph.yaml"); + fs::write( + &server_config_path, + format!( + "\ +server: + policy: + file: ./server-policy.yaml +graphs: + alpha: + uri: {} +", + yaml_string(&alpha_uri.to_string_lossy()) + ), + ) + .unwrap(); + + let server = spawn_server_with_config_env( + &server_config_path, + &[( + "OMNIGRAPH_SERVER_BEARER_TOKENS_JSON", + r#"{"act-admin":"admin-token"}"#, + )], + ); + + // Client config — the CLI's `--target dev` resolves to `server.base_url`. + let client_config_path = cfg_dir.path().join("client.yaml"); + fs::write( + &client_config_path, + format!( + "\ +graphs: + dev: + uri: {} + bearer_token_env: GRAPH_LIST_TOKEN +cli: + graph: dev +auth: + env_file: ./.env.omni +", + yaml_string(&server.base_url) + ), + ) + .unwrap(); + fs::write( + cfg_dir.path().join(".env.omni"), + "GRAPH_LIST_TOKEN=admin-token\n", + ) + .unwrap(); + + // `graphs list` lists `alpha`. + let payload = parse_stdout_json(&output_success( + cli() + .arg("graphs") + .arg("list") + .arg("--config") + .arg(&client_config_path) + .arg("--json"), + )); + let ids: Vec<&str> = payload["graphs"] + .as_array() + .unwrap() + .iter() + .map(|g| g["graph_id"].as_str().unwrap()) + .collect(); + assert_eq!(ids, vec!["alpha"]); + + drop(server); +} diff --git a/crates/omnigraph-compiler/Cargo.toml b/crates/omnigraph-compiler/Cargo.toml index 7bb8df0..229b862 100644 --- a/crates/omnigraph-compiler/Cargo.toml +++ b/crates/omnigraph-compiler/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "omnigraph-compiler" -version = "0.4.2" +version = "0.6.0" edition = "2024" description = "Schema/query compiler for Omnigraph. Zero Lance dependency." license = "MIT" diff --git a/crates/omnigraph-compiler/src/catalog/schema_plan.rs b/crates/omnigraph-compiler/src/catalog/schema_plan.rs index 835a1be..a9e26b2 100644 --- a/crates/omnigraph-compiler/src/catalog/schema_plan.rs +++ b/crates/omnigraph-compiler/src/catalog/schema_plan.rs @@ -16,6 +16,29 @@ pub enum SchemaTypeKind { Edge, } +/// How a drop step interacts with data. +/// +/// - **`Soft`** — catalog tombstone only. The type / property is hidden +/// from queries but the underlying Lance column / dataset is retained +/// on disk. Reversible via `omnigraph schema unhide` (forthcoming). +/// Tier: `safe`. +/// - **`Hard`** — actual data removal. The Lance column is rewritten +/// without the property, or the Lance dataset is dropped. Irreversible +/// short of branch / snapshot restore. Tier: `destructive`; requires +/// `--allow-data-loss` to apply. +/// +/// The planner emits `Soft` by default; `--allow-data-loss` on the apply +/// CLI promotes drops to `Hard`. This is the dimension orthogonal to +/// `SafetyTier` from the schema-lint chassis (`crate::lint`): tier +/// describes the rule's class; mode describes the operator's intent for +/// data treatment. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum DropMode { + Soft, + Hard, +} + #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct SchemaMigrationPlan { pub supported: bool, @@ -62,6 +85,28 @@ pub enum SchemaMigrationStep { property_name: String, annotations: Vec, }, + /// Remove a node or edge type. Soft mode tombstones in the catalog + /// and retains data on disk; Hard mode drops the Lance dataset and + /// requires `--allow-data-loss`. + /// + /// Dormant in this commit — emitted by the planner in a later + /// commit (see `docs/schema-lint-v1-plan.md`). + DropType { + type_kind: SchemaTypeKind, + name: String, + mode: DropMode, + }, + /// Remove a property from an existing type. Soft mode tombstones + /// the property in the catalog and retains the Lance column; Hard + /// mode rewrites the column out and requires `--allow-data-loss`. + /// + /// Dormant in this commit. + DropProperty { + type_kind: SchemaTypeKind, + type_name: String, + property_name: String, + mode: DropMode, + }, UnsupportedChange { entity: String, reason: String, @@ -93,6 +138,22 @@ impl SchemaMigrationStep { _ => None, } } + + /// If this step carries a schema-lint code, return the full + /// catalog entry — including family, safety tier, and default + /// severity. Used by renderers that want to display richer + /// context than just the code string (e.g. `omnigraph schema + /// plan` annotating each line with its tier). + /// + /// Returns `None` for steps that carry no code (the 12 of 17 + /// `UnsupportedChange` paths still untagged in v0, plus every + /// non-`UnsupportedChange` variant). + pub fn diagnostic(&self) -> Option<&'static crate::lint::DiagnosticCode> { + match self { + Self::UnsupportedChange { code: Some(c), .. } => crate::lint::lookup(c), + _ => None, + } + } } pub fn plan_schema_migration( @@ -261,13 +322,18 @@ fn plan_nodes( .iter() .filter(|node| !consumed.contains(&node.name)) { - steps.push(SchemaMigrationStep::UnsupportedChange { - entity: format!("node:{}", leftover.name), - reason: format!( - "removing node type '{}' is not supported in schema migration v1", - leftover.name - ), - code: Some(crate::lint::codes::OG_DS_102.code.to_string()), + // Node type removed from the desired schema: emit + // DropType { Node, Soft } per docs/dev/schema-lint-v1-plan.md + // commit #4. Soft = remove the table's entry from the current + // __manifest version; data files retained; previous manifest + // versions still reference the table, so Lance time travel + // restores it until cleanup_old_versions ages out the older + // __manifest entries. Hard mode (immediate dataset deletion) + // lands in commit #5 gated by --allow-data-loss. + steps.push(SchemaMigrationStep::DropType { + type_kind: SchemaTypeKind::Node, + name: leftover.name.clone(), + mode: DropMode::Soft, }); } @@ -379,13 +445,15 @@ fn plan_edges( .iter() .filter(|edge| !consumed.contains(&edge.name)) { - steps.push(SchemaMigrationStep::UnsupportedChange { - entity: format!("edge:{}", leftover.name), - reason: format!( - "removing edge type '{}' is not supported in schema migration v1", - leftover.name - ), - code: Some(crate::lint::codes::OG_DS_103.code.to_string()), + // Edge type removed from the desired schema: emit + // DropType { Edge, Soft } per docs/dev/schema-lint-v1-plan.md + // commit #4. Same Soft mechanics as node-type drops — manifest + // entry tombstoned, data files retained, reversible via Lance + // time travel until cleanup. + steps.push(SchemaMigrationStep::DropType { + type_kind: SchemaTypeKind::Edge, + name: leftover.name.clone(), + mode: DropMode::Soft, }); } } @@ -499,18 +567,22 @@ fn plan_properties( .iter() .filter(|property| !consumed.contains(&property.name)) { - steps.push(SchemaMigrationStep::UnsupportedChange { - entity: format!( - "{}:{}.{}", - schema_type_kind_key(type_kind), - type_name, - leftover.name - ), - reason: format!( - "removing property '{}.{}' is not supported in schema migration v1", - type_name, leftover.name - ), - code: Some(crate::lint::codes::OG_DS_104.code.to_string()), + // Property removed from the desired schema: emit + // DropProperty { Soft } per docs/schema-lint-v1-plan.md + // commit #3. The Soft mode reuses the existing + // stage_overwrite rewrite path — batch_for_schema_apply_rewrite + // iterates target_schema.fields(), so the dropped column is + // naturally projected away. The prior Lance version retains + // the column until cleanup_old_versions runs, matching the + // OG-DS-104 destructive-tier expectation that data remains + // recoverable via time travel until cleanup. Hard mode (with + // immediate compact_files + cleanup_old_versions) lands in + // commit #5, gated by --allow-data-loss. + steps.push(SchemaMigrationStep::DropProperty { + type_kind, + type_name: type_name.to_string(), + property_name: leftover.name.clone(), + mode: DropMode::Soft, }); } @@ -863,6 +935,139 @@ node Account @rename_from("User") { })); } + #[test] + fn plan_emits_soft_drop_for_removed_nullable_property() { + // Removing a property from the desired schema emits + // DropProperty { Soft } (schema-lint v1 chassis commit #3, + // MR-694). The plan is `supported = true` — the apply path + // handles soft drop via the existing stage_overwrite rewrite + // projection. Verified at the integration level by + // `apply_schema_drops_a_nullable_property_softly_preserves_prior_version` + // in `crates/omnigraph/tests/schema_apply.rs`. + let accepted = build_schema_ir( + &parse_schema( + r#" +node Person { + name: String @key + age: I32? +} +"#, + ) + .unwrap(), + ) + .unwrap(); + let desired = build_schema_ir( + &parse_schema( + r#" +node Person { + name: String @key +} +"#, + ) + .unwrap(), + ) + .unwrap(); + + let plan = plan_schema_migration(&accepted, &desired).unwrap(); + assert!( + plan.supported, + "drop-property plan must be supported: {plan:?}" + ); + assert!( + plan.steps.iter().any(|step| matches!( + step, + SchemaMigrationStep::DropProperty { + type_kind: SchemaTypeKind::Node, + type_name, + property_name, + mode: DropMode::Soft, + .. + } if type_name == "Person" && property_name == "age" + )), + "expected DropProperty {{ Soft }} step in plan: {plan:?}", + ); + // Negative: no UnsupportedChange anywhere in the plan. + assert!( + !plan + .steps + .iter() + .any(|step| matches!(step, UnsupportedChange { .. })), + "soft drop must not emit UnsupportedChange: {plan:?}", + ); + } + + #[test] + fn plan_emits_soft_drop_for_removed_node_and_edge_types() { + // Removing a node type + the edge type that references it + // emits two DropType { Soft } steps (chassis v1 commit #4, + // MR-694). The plan is `supported = true` — apply tombstones + // both manifest entries. Time-travel reversibility is verified + // at the integration level by + // `apply_schema_drops_node_and_referencing_edge_softly` + // in `crates/omnigraph/tests/schema_apply.rs`. + let accepted = build_schema_ir( + &parse_schema( + r#" +node Person { + name: String @key +} + +node Company { + name: String @key +} + +edge WorksAt: Person -> Company +"#, + ) + .unwrap(), + ) + .unwrap(); + let desired = build_schema_ir( + &parse_schema( + r#" +node Person { + name: String @key +} +"#, + ) + .unwrap(), + ) + .unwrap(); + + let plan = plan_schema_migration(&accepted, &desired).unwrap(); + assert!(plan.supported, "drop-type plan must be supported: {plan:?}"); + assert!( + plan.steps.iter().any(|step| matches!( + step, + SchemaMigrationStep::DropType { + type_kind: SchemaTypeKind::Node, + name, + mode: DropMode::Soft, + } if name == "Company" + )), + "expected DropType {{ Node, Company, Soft }} in plan: {plan:?}", + ); + assert!( + plan.steps.iter().any(|step| matches!( + step, + SchemaMigrationStep::DropType { + type_kind: SchemaTypeKind::Edge, + name, + mode: DropMode::Soft, + } if name == "WorksAt" + )), + "expected DropType {{ Edge, WorksAt, Soft }} in plan: {plan:?}", + ); + // Negative: no UnsupportedChange anywhere in the plan. + assert!( + !plan + .steps + .iter() + .any(|step| matches!(step, UnsupportedChange { .. })), + "soft type drop must not emit UnsupportedChange: {plan:?}", + ); + } + #[test] fn plan_rejects_required_property_addition() { let accepted = build_schema_ir( @@ -935,4 +1140,55 @@ node Person @description("new") { }], })); } + + #[test] + fn drop_steps_round_trip_through_serde() { + // The DropType / DropProperty variants are dormant in this + // commit — the planner doesn't emit them yet — but their + // serde shape needs to be stable from day one. A future + // SchemaIR JSON containing one of these must deserialize + // back to the same value. This test pins the wire format + // so a v0 schema-ir consumer never sees a surprise variant + // shape after v1 ships. + let steps = vec![ + SchemaMigrationStep::DropType { + type_kind: SchemaTypeKind::Node, + name: "Person".to_string(), + mode: DropMode::Soft, + }, + SchemaMigrationStep::DropType { + type_kind: SchemaTypeKind::Edge, + name: "Knows".to_string(), + mode: DropMode::Hard, + }, + SchemaMigrationStep::DropProperty { + type_kind: SchemaTypeKind::Node, + type_name: "Person".to_string(), + property_name: "age".to_string(), + mode: DropMode::Soft, + }, + SchemaMigrationStep::DropProperty { + type_kind: SchemaTypeKind::Interface, + type_name: "Named".to_string(), + property_name: "alias".to_string(), + mode: DropMode::Hard, + }, + ]; + + for step in steps { + let json = serde_json::to_string(&step).expect("serialize"); + let round_trip: SchemaMigrationStep = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(step, round_trip, "round-trip mismatch on {json}"); + } + } + + #[test] + fn drop_mode_serde_uses_snake_case() { + // External tools may write SchemaIR JSON by hand. Pin the + // wire form so we don't silently break them later. + assert_eq!(serde_json::to_string(&DropMode::Soft).unwrap(), "\"soft\""); + assert_eq!(serde_json::to_string(&DropMode::Hard).unwrap(), "\"hard\""); + let soft: DropMode = serde_json::from_str("\"soft\"").unwrap(); + assert_eq!(soft, DropMode::Soft); + } } diff --git a/crates/omnigraph-compiler/src/ir/lower.rs b/crates/omnigraph-compiler/src/ir/lower.rs index c130d18..6999d69 100644 --- a/crates/omnigraph-compiler/src/ir/lower.rs +++ b/crates/omnigraph-compiler/src/ir/lower.rs @@ -271,9 +271,7 @@ fn lower_clauses( .traversals .iter() .find(|rt| { - rt.src == traversal.src - && rt.dst == traversal.dst - && rt.edge_type == edge.name + rt.src == traversal.src && rt.dst == traversal.dst && rt.edge_type == edge.name }) .map(|rt| rt.direction) .unwrap_or(Direction::Out); diff --git a/crates/omnigraph-compiler/src/ir/lower_tests.rs b/crates/omnigraph-compiler/src/ir/lower_tests.rs index 50ce93a..7aa140e 100644 --- a/crates/omnigraph-compiler/src/ir/lower_tests.rs +++ b/crates/omnigraph-compiler/src/ir/lower_tests.rs @@ -205,12 +205,8 @@ insert Knows { from: $name, to: $friend } let ir = lower_mutation_query(&qf.queries[0]).unwrap(); assert_eq!(ir.ops.len(), 2); - assert!( - matches!(&ir.ops[0], MutationOpIR::Insert { type_name, .. } if type_name == "Person") - ); - assert!( - matches!(&ir.ops[1], MutationOpIR::Insert { type_name, .. } if type_name == "Knows") - ); + assert!(matches!(&ir.ops[0], MutationOpIR::Insert { type_name, .. } if type_name == "Person")); + assert!(matches!(&ir.ops[1], MutationOpIR::Insert { type_name, .. } if type_name == "Knows")); } /// Destination binding is deferred: NodeScan + Expand + Filter (no cross-join). diff --git a/crates/omnigraph-compiler/src/lib.rs b/crates/omnigraph-compiler/src/lib.rs index 102b479..ba1aba2 100644 --- a/crates/omnigraph-compiler/src/lib.rs +++ b/crates/omnigraph-compiler/src/lib.rs @@ -16,11 +16,11 @@ pub use catalog::schema_ir::{ schema_ir_pretty_json, }; pub use catalog::schema_plan::{ - SchemaMigrationPlan, SchemaMigrationStep, SchemaTypeKind, plan_schema_migration, + DropMode, SchemaMigrationPlan, SchemaMigrationStep, SchemaTypeKind, plan_schema_migration, }; -pub use lint::{DiagnosticCode, Family, SafetyTier, Severity}; pub use ir::ParamMap; pub use ir::lower::{lower_mutation_query, lower_query}; +pub use lint::{DiagnosticCode, Family, SafetyTier, Severity}; pub use query::ast::Literal; pub use query::lint::{ QueryLintFinding, QueryLintOutput, QueryLintQueryKind, QueryLintQueryResult, diff --git a/crates/omnigraph-compiler/src/lint/codes.rs b/crates/omnigraph-compiler/src/lint/codes.rs index e53bf31..ba870cf 100644 --- a/crates/omnigraph-compiler/src/lint/codes.rs +++ b/crates/omnigraph-compiler/src/lint/codes.rs @@ -116,7 +116,13 @@ pub const ALL_CODES: &[DiagnosticCode] = &[ ]; /// Codes actually emitted by the planner in v0 (i.e. not reserved). -pub const EMITTED_IN_V0: &[&str] = &["OG-DS-102", "OG-DS-103", "OG-DS-104", "OG-MF-103", "OG-MF-106"]; +pub const EMITTED_IN_V0: &[&str] = &[ + "OG-DS-102", + "OG-DS-103", + "OG-DS-104", + "OG-MF-103", + "OG-MF-106", +]; /// Look up a code by its string identifier. pub fn lookup(code: &str) -> Option<&'static DiagnosticCode> { diff --git a/crates/omnigraph-compiler/src/lint/mod.rs b/crates/omnigraph-compiler/src/lint/mod.rs index 79e9986..5c6c47d 100644 --- a/crates/omnigraph-compiler/src/lint/mod.rs +++ b/crates/omnigraph-compiler/src/lint/mod.rs @@ -24,5 +24,5 @@ pub mod codes; pub mod diagnostic; -pub use codes::{lookup, DiagnosticCode, ALL_CODES}; +pub use codes::{ALL_CODES, DiagnosticCode, lookup}; pub use diagnostic::{Family, SafetyTier, Severity}; diff --git a/crates/omnigraph-compiler/src/query/lint.rs b/crates/omnigraph-compiler/src/query/lint.rs index 38ae6ee..5f56774 100644 --- a/crates/omnigraph-compiler/src/query/lint.rs +++ b/crates/omnigraph-compiler/src/query/lint.rs @@ -38,7 +38,7 @@ pub enum QueryLintQueryKind { #[serde(rename_all = "lowercase")] pub enum QueryLintSchemaSourceKind { File, - Repo, + Graph, } #[derive(Debug, Clone, PartialEq, Eq, Serialize)] @@ -59,9 +59,9 @@ impl QueryLintSchemaSource { } } - pub fn repo(uri: impl Into) -> Self { + pub fn graph(uri: impl Into) -> Self { Self { - kind: QueryLintSchemaSourceKind::Repo, + kind: QueryLintSchemaSourceKind::Graph, path: None, uri: Some(uri.into()), } diff --git a/crates/omnigraph-compiler/src/query/parser.rs b/crates/omnigraph-compiler/src/query/parser.rs index 20fedb8..4ba8476 100644 --- a/crates/omnigraph-compiler/src/query/parser.rs +++ b/crates/omnigraph-compiler/src/query/parser.rs @@ -137,12 +137,11 @@ fn parse_query_decl(pair: pest::iterators::Pair) -> Result { Rule::mutation_body => { for mutation_pair in body.into_inner() { if let Rule::mutation_stmt = mutation_pair.as_rule() { - let stmt = - mutation_pair.into_inner().next().ok_or_else(|| { - NanoError::Parse( - "mutation statement cannot be empty".to_string(), - ) - })?; + let stmt = mutation_pair.into_inner().next().ok_or_else(|| { + NanoError::Parse( + "mutation statement cannot be empty".to_string(), + ) + })?; mutations.push(parse_mutation_stmt(stmt)?); } } diff --git a/crates/omnigraph-compiler/src/schema/parser_tests.rs b/crates/omnigraph-compiler/src/schema/parser_tests.rs index 9b96a4e..2302cfb 100644 --- a/crates/omnigraph-compiler/src/schema/parser_tests.rs +++ b/crates/omnigraph-compiler/src/schema/parser_tests.rs @@ -271,9 +271,9 @@ age: I32? match &schema.declarations[0] { SchemaDecl::Node(n) => { assert!( - n.constraints.iter().any( - |c| matches!(c, Constraint::Range { property, .. } if property == "age") - ) + n.constraints + .iter() + .any(|c| matches!(c, Constraint::Range { property, .. } if property == "age")) ); } _ => panic!("expected Node"), diff --git a/crates/omnigraph-policy/Cargo.toml b/crates/omnigraph-policy/Cargo.toml new file mode 100644 index 0000000..dacda35 --- /dev/null +++ b/crates/omnigraph-policy/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "omnigraph-policy" +version = "0.6.0" +edition = "2024" +description = "Policy / authorization layer for Omnigraph — Cedar-backed PolicyEngine, PolicyChecker trait, ResourceScope enum." +license = "MIT" +repository = "https://github.com/ModernRelay/omnigraph" +homepage = "https://github.com/ModernRelay/omnigraph" +documentation = "https://docs.rs/omnigraph-policy" + +[dependencies] +cedar-policy = { workspace = true } +clap = { workspace = true } +color-eyre = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +serde_yaml = { workspace = true } + +[dev-dependencies] +tempfile = { workspace = true } diff --git a/crates/omnigraph-policy/src/lib.rs b/crates/omnigraph-policy/src/lib.rs new file mode 100644 index 0000000..6459fcd --- /dev/null +++ b/crates/omnigraph-policy/src/lib.rs @@ -0,0 +1,1492 @@ +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::fmt; +use std::fs; +use std::path::Path; +use std::str::FromStr; + +use cedar_policy::{ + Authorizer, Context, Decision, Entities, Entity, EntityId, EntityTypeName, EntityUid, Policy, + PolicyId, PolicySet, Request, Schema, ValidationMode, Validator, +}; +use clap::ValueEnum; +use color_eyre::eyre::{Result, bail, eyre}; +use serde::{Deserialize, Serialize}; +use serde_json::json; + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, ValueEnum)] +#[serde(rename_all = "snake_case")] +pub enum PolicyAction { + Read, + Export, + Change, + SchemaApply, + BranchCreate, + BranchDelete, + BranchMerge, + /// Reserved for **policy-management** surfaces. Per MR-724 Option A, + /// this gates operator actions like hot-reloading policy / tokens + /// (MR-726), querying the audit log (MR-732), and listing / + /// approving pending two-person-rule requests (MR-734). None of + /// those endpoints exist yet, so today no engine or HTTP code + /// calls `enforce(Admin, ...)`. The variant is kept in the enum so + /// the action vocabulary is complete from chassis day one — when + /// the first consumer surface ships, it can just call + /// `enforce(Admin, ResourceScope::Graph, actor)` without needing + /// to add the enum variant + update policy.yaml schemas + redeploy. + /// + /// Operators can write Cedar rules referencing `admin` today; they + /// won't fire (no call site) but they're load-bearing for the + /// future shape. Avoid writing such rules until the first consumer + /// endpoint ships to prevent confusion. + Admin, + /// MR-668: management action that operates on the server's graph + /// registry, not on a single graph's contents. The Cedar `appliesTo` + /// declaration binds it to `resource: Server` instead of the + /// per-graph `resource: Graph`. Operators authorize a group with: + /// ```yaml + /// rules: + /// - id: admins-can-list-graphs + /// allow: + /// actors: { group: admins } + /// actions: [graph_list] + /// ``` + /// `branch_scope` and `target_branch_scope` are NOT supported for + /// this action — there's no branch context at the server level. + /// Runtime `graph_create` / `graph_delete` are intentionally omitted + /// from v0.6.0; operators add and remove graphs by editing + /// `omnigraph.yaml` and restarting. + GraphList, +} + +impl PolicyAction { + pub fn as_str(self) -> &'static str { + match self { + Self::Read => "read", + Self::Export => "export", + Self::Change => "change", + Self::SchemaApply => "schema_apply", + Self::BranchCreate => "branch_create", + Self::BranchDelete => "branch_delete", + Self::BranchMerge => "branch_merge", + Self::Admin => "admin", + Self::GraphList => "graph_list", + } + } + + fn uses_branch_scope(self) -> bool { + matches!(self, Self::Read | Self::Export | Self::Change) + } + + fn uses_target_branch_scope(self) -> bool { + matches!( + self, + Self::BranchCreate | Self::SchemaApply | Self::BranchDelete | Self::BranchMerge + ) + } + + /// Which Cedar resource entity governs this action. + /// Per-graph actions (Read, Change, …) apply to `Omnigraph::Graph::""`. + /// Server-scoped management actions (GraphList) apply to + /// `Omnigraph::Server::"root"`. `Admin` is reserved without a current + /// call site; classified as per-graph until MR-724 picks a shape. + pub fn resource_kind(self) -> PolicyResourceKind { + match self { + Self::GraphList => PolicyResourceKind::Server, + Self::Read + | Self::Export + | Self::Change + | Self::SchemaApply + | Self::BranchCreate + | Self::BranchDelete + | Self::BranchMerge + | Self::Admin => PolicyResourceKind::Graph, + } + } +} + +/// Which Cedar entity an action's policies apply to. Internal to +/// `omnigraph-policy` — drives the `compile_policy_source` template +/// and the request-time resource UID construction. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum PolicyResourceKind { + /// `Omnigraph::Graph::""` — per-graph actions. + Graph, + /// `Omnigraph::Server::"root"` — management actions. + Server, +} + +/// Which kind of policy file the caller is loading. Drives the +/// load-time validation that catches a "wrong action in wrong file" +/// mistake — a graph policy with `graph_list` rules, or a server +/// policy with `read` rules, both compile silently as Cedar but +/// never match any actual request. Typing the loader makes the +/// mistake a load-time error. +/// +/// Pairs with [`PolicyAction::resource_kind`]: every action's resource +/// kind must match the engine kind it's loaded under. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum PolicyEngineKind { + /// Engine is loaded for a single graph; only actions whose + /// `resource_kind()` is `PolicyResourceKind::Graph` are allowed. + Graph, + /// Engine is loaded for server-level management endpoints; only + /// actions whose `resource_kind()` is `PolicyResourceKind::Server` + /// are allowed. + Server, +} + +impl fmt::Display for PolicyAction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.as_str()) + } +} + +impl FromStr for PolicyAction { + type Err = color_eyre::eyre::Error; + + fn from_str(value: &str) -> Result { + match value.trim() { + "read" => Ok(Self::Read), + "export" => Ok(Self::Export), + "change" => Ok(Self::Change), + "schema_apply" => Ok(Self::SchemaApply), + "branch_create" => Ok(Self::BranchCreate), + "branch_delete" => Ok(Self::BranchDelete), + "branch_merge" => Ok(Self::BranchMerge), + "admin" => Ok(Self::Admin), + "graph_list" => Ok(Self::GraphList), + other => bail!("unknown policy action '{other}'"), + } + } +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum PolicyBranchScope { + Any, + Protected, + Unprotected, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PolicyActorSelector { + pub group: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PolicyAllowRule { + pub actors: PolicyActorSelector, + pub actions: Vec, + pub branch_scope: Option, + pub target_branch_scope: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PolicyRule { + pub id: String, + pub allow: PolicyAllowRule, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PolicyConfig { + pub version: u32, + #[serde(default)] + pub groups: BTreeMap>, + #[serde(default)] + pub protected_branches: Vec, + #[serde(default)] + pub rules: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PolicyTestConfig { + pub version: u32, + #[serde(default)] + pub cases: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PolicyTestCase { + pub id: String, + pub actor: String, + pub action: PolicyAction, + pub branch: Option, + pub target_branch: Option, + pub expect: PolicyExpectation, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum PolicyExpectation { + Allow, + Deny, +} + +/// What a caller wants to do, sans identity. Actor identity flows +/// through a separate `actor_id: &str` parameter on +/// [`PolicyEngine::authorize`] / [`PolicyChecker::check`] — encoding +/// the architectural invariant that actor identity is server-authoritative +/// and must not be supplied by the same code path that supplies the +/// requested action. In the HTTP layer, the bearer-token middleware +/// resolves the actor and passes it independently; clients cannot +/// smuggle identity inside this struct. +#[derive(Debug, Clone)] +pub struct PolicyRequest { + pub action: PolicyAction, + pub branch: Option, + pub target_branch: Option, +} + +#[derive(Debug, Clone)] +pub struct PolicyDecision { + pub allowed: bool, + pub matched_rule_id: Option, + pub message: String, +} + +pub struct PolicyCompiler; + +#[derive(Clone)] +pub struct PolicyEngine { + graph_id: String, + protected_branches: BTreeSet, + known_actors: BTreeSet, + schema: Schema, + entities: Entities, + policies: PolicySet, + policy_to_rule: HashMap, +} + +impl PolicyConfig { + pub fn load(path: &Path) -> Result { + let config: Self = serde_yaml::from_str(&fs::read_to_string(path)?)?; + config.validate()?; + Ok(config) + } + + pub fn validate(&self) -> Result<()> { + if self.version != 1 { + bail!("policy version must be 1"); + } + + for (group, members) in &self.groups { + if group.trim().is_empty() { + bail!("policy group names must not be blank"); + } + if members.is_empty() { + bail!("policy group '{group}' must not be empty"); + } + for actor in members { + if actor.trim().is_empty() { + bail!("policy group '{group}' contains a blank actor id"); + } + } + } + + for branch in &self.protected_branches { + if branch.trim().is_empty() { + bail!("protected branch names must not be blank"); + } + } + + let mut seen_rule_ids = HashSet::new(); + for rule in &self.rules { + if rule.id.trim().is_empty() { + bail!("policy rule ids must not be blank"); + } + if !seen_rule_ids.insert(rule.id.clone()) { + bail!("duplicate policy rule id '{}'", rule.id); + } + if rule.allow.actors.group.trim().is_empty() { + bail!("policy rule '{}' must reference a non-blank group", rule.id); + } + if !self.groups.contains_key(rule.allow.actors.group.as_str()) { + bail!( + "policy rule '{}' references unknown group '{}'", + rule.id, + rule.allow.actors.group + ); + } + if rule.allow.actions.is_empty() { + bail!("policy rule '{}' must include at least one action", rule.id); + } + if rule.allow.branch_scope.is_some() && rule.allow.target_branch_scope.is_some() { + bail!( + "policy rule '{}' may specify branch_scope or target_branch_scope, not both", + rule.id + ); + } + if let Some(_) = rule.allow.branch_scope { + for action in &rule.allow.actions { + if !action.uses_branch_scope() { + bail!( + "policy rule '{}' uses branch_scope with unsupported action '{}'", + rule.id, + action + ); + } + } + } + if let Some(_) = rule.allow.target_branch_scope { + for action in &rule.allow.actions { + if !action.uses_target_branch_scope() { + bail!( + "policy rule '{}' uses target_branch_scope with unsupported action '{}'", + rule.id, + action + ); + } + } + } + // MR-668: server-scoped actions have no branch context and + // must not be mixed with per-graph actions in the same + // rule (each rule generates one Cedar `permit` referencing + // a specific resource kind). + let mut server_scoped = false; + let mut graph_scoped = false; + for action in &rule.allow.actions { + match action.resource_kind() { + PolicyResourceKind::Server => server_scoped = true, + PolicyResourceKind::Graph => graph_scoped = true, + } + } + if server_scoped && graph_scoped { + bail!( + "policy rule '{}' mixes the server-scoped action `graph_list` \ + with per-graph actions; split into separate rules", + rule.id + ); + } + if server_scoped + && (rule.allow.branch_scope.is_some() || rule.allow.target_branch_scope.is_some()) + { + bail!( + "policy rule '{}' uses branch_scope/target_branch_scope with a \ + server-scoped action; server-scoped actions have no branch context", + rule.id + ); + } + } + + Ok(()) + } +} + +impl PolicyTestConfig { + pub fn load(path: &Path) -> Result { + let config: Self = serde_yaml::from_str(&fs::read_to_string(path)?)?; + if config.version != 1 { + bail!("policy test version must be 1"); + } + let mut seen = HashSet::new(); + for case in &config.cases { + if case.id.trim().is_empty() { + bail!("policy test case ids must not be blank"); + } + if !seen.insert(case.id.clone()) { + bail!("duplicate policy test case id '{}'", case.id); + } + if case.actor.trim().is_empty() { + bail!("policy test case '{}' must not use a blank actor", case.id); + } + } + Ok(config) + } +} + +impl PolicyCompiler { + pub fn compile(config: &PolicyConfig, graph_id: &str) -> Result { + config.validate()?; + let (schema, schema_warnings) = Schema::from_cedarschema_str(policy_schema_source())?; + let schema_warnings = schema_warnings + .map(|warning| warning.to_string()) + .collect::>(); + if !schema_warnings.is_empty() { + bail!("policy schema warnings:\n{}", schema_warnings.join("\n")); + } + let entities = compile_entities(config, graph_id, &schema)?; + let (policies, policy_to_rule) = compile_policies(config, graph_id)?; + let validator = Validator::new(schema.clone()); + let validation = validator.validate(&policies, ValidationMode::Strict); + let errors = validation + .validation_errors() + .map(|err| err.to_string()) + .collect::>(); + if !errors.is_empty() { + bail!("policy validation failed:\n{}", errors.join("\n")); + } + + let known_actors = config + .groups + .values() + .flat_map(|members| members.iter().cloned()) + .collect(); + Ok(PolicyEngine { + graph_id: graph_id.to_string(), + protected_branches: config.protected_branches.iter().cloned().collect(), + known_actors, + schema, + entities, + policies, + policy_to_rule, + }) + } +} + +impl PolicyEngine { + /// Load a per-graph policy file. Rejects rules whose actions are + /// server-scoped (e.g. `graph_list`) — those belong in a server + /// policy file, not a per-graph one. + /// + /// `graph_id` is the label of the graph this engine governs; + /// becomes the Cedar `Omnigraph::Graph::""` resource + /// for every per-graph action evaluated against this engine. + pub fn load_graph(path: &Path, graph_id: &str) -> Result { + let config = PolicyConfig::load(path)?; + validate_kind_alignment(&config, PolicyEngineKind::Graph)?; + PolicyCompiler::compile(&config, graph_id) + } + + /// Load a server-level policy file. Rejects rules whose actions + /// are per-graph (e.g. `read`, `change`) — those belong in a + /// per-graph policy file, not the server one. Takes no `graph_id`: + /// server-scoped actions resolve against the singleton + /// `Omnigraph::Server::"root"` entity, never a Graph. + pub fn load_server(path: &Path) -> Result { + let config = PolicyConfig::load(path)?; + validate_kind_alignment(&config, PolicyEngineKind::Server)?; + // The Graph entity created by the compiler is never referenced + // by a server-scoped rule, so the label below is purely a + // placeholder. Use the canonical SERVER_RESOURCE_ID so any + // future inspection of an unreachable Graph entity at least + // points at the right concept. + PolicyCompiler::compile(&config, SERVER_RESOURCE_ID) + } + + /// Evaluate a request. `actor_id` is supplied as a separate + /// argument (not inside `PolicyRequest`) so the type system enforces + /// the "server-authoritative actor identity" invariant — clients + /// supplying a `PolicyRequest` cannot smuggle identity through the + /// same struct that carries the requested action. + pub fn authorize(&self, actor_id: &str, request: &PolicyRequest) -> Result { + if !self.known_actors.contains(actor_id) { + return Ok(self.deny( + None, + format!( + "policy denied action '{}' for unknown actor '{}'", + request.action, actor_id + ), + )); + } + + let principal = entity_uid("Actor", actor_id)?; + let action = entity_uid("Action", request.action.as_str())?; + // Pick the resource entity based on the action's `resource_kind`. + // Server-scoped actions (`graph_list`) bind to + // `Omnigraph::Server::"root"`; per-graph actions bind to + // `Omnigraph::Graph::""`. + let resource = match request.action.resource_kind() { + PolicyResourceKind::Server => entity_uid("Server", SERVER_RESOURCE_ID)?, + PolicyResourceKind::Graph => entity_uid("Graph", &self.graph_id)?, + }; + let context_value = json!({ + "has_branch": request.branch.is_some(), + "branch": request.branch.clone().unwrap_or_default(), + "has_target_branch": request.target_branch.is_some(), + "target_branch": request.target_branch.clone().unwrap_or_default(), + "branch_is_protected": request.branch.as_ref().is_some_and(|branch| self.protected_branches.contains(branch)), + "target_branch_is_protected": request.target_branch.as_ref().is_some_and(|branch| self.protected_branches.contains(branch)), + }); + let context = Context::from_json_value(context_value, Some((&self.schema, &action)))?; + let cedar_request = Request::new(principal, action, resource, context, Some(&self.schema))?; + let response = + Authorizer::new().is_authorized(&cedar_request, &self.policies, &self.entities); + let errors = response + .diagnostics() + .errors() + .map(|err| err.to_string()) + .collect::>(); + if !errors.is_empty() { + bail!("policy evaluation failed:\n{}", errors.join("\n")); + } + + let matched_rule_id = response + .diagnostics() + .reason() + .filter_map(|policy_id| { + let key: &str = policy_id.as_ref(); + self.policy_to_rule.get(key).cloned() + }) + .min(); + + Ok(match response.decision() { + Decision::Allow => PolicyDecision { + allowed: true, + matched_rule_id: matched_rule_id.clone(), + message: format!( + "policy allowed action '{}' for actor '{}'", + request.action, actor_id + ), + }, + Decision::Deny => { + let message = format!( + "policy denied action '{}'{}{} for actor '{}'", + request.action, + request + .branch + .as_deref() + .map(|branch| format!(" on branch '{}'", branch)) + .unwrap_or_default(), + request + .target_branch + .as_deref() + .map(|branch| format!(" targeting branch '{}'", branch)) + .unwrap_or_default(), + actor_id + ); + self.deny(matched_rule_id, message) + } + }) + } + + pub fn run_tests(&self, tests: &PolicyTestConfig) -> Result<()> { + if tests.version != 1 { + bail!("policy test version must be 1"); + } + let mut failures = Vec::new(); + for case in &tests.cases { + let decision = self.authorize( + &case.actor, + &PolicyRequest { + action: case.action, + branch: case.branch.clone(), + target_branch: case.target_branch.clone(), + }, + )?; + let expected_allowed = matches!(case.expect, PolicyExpectation::Allow); + if decision.allowed != expected_allowed { + failures.push(format!( + "{}: expected {:?} but got {}", + case.id, + case.expect, + if decision.allowed { "allow" } else { "deny" } + )); + } + } + if failures.is_empty() { + Ok(()) + } else { + bail!("policy tests failed:\n{}", failures.join("\n")) + } + } + + pub fn known_actor_count(&self) -> usize { + self.known_actors.len() + } + + fn deny(&self, matched_rule_id: Option, message: String) -> PolicyDecision { + PolicyDecision { + allowed: false, + matched_rule_id, + message, + } + } +} + +/// Reject any rule whose actions don't match the engine kind +/// being loaded. Closes the "wrong action in wrong file silently +/// no-ops" class — `graph_list` in a per-graph file or `read` in +/// a server file fails at load time instead of compiling cleanly +/// and never matching a request. +fn validate_kind_alignment(config: &PolicyConfig, kind: PolicyEngineKind) -> Result<()> { + let required = match kind { + PolicyEngineKind::Graph => PolicyResourceKind::Graph, + PolicyEngineKind::Server => PolicyResourceKind::Server, + }; + for rule in &config.rules { + for action in &rule.allow.actions { + if action.resource_kind() != required { + let (got, expected_file) = match action.resource_kind() { + PolicyResourceKind::Server => ("server-scoped", "server policy file"), + PolicyResourceKind::Graph => ("per-graph", "per-graph policy file"), + }; + bail!( + "policy rule '{}' uses {} action '{}' in a {:?} policy file; \ + move it to a {}", + rule.id, + got, + action, + kind, + expected_file + ); + } + } + } + Ok(()) +} + +fn compile_entities(config: &PolicyConfig, graph_id: &str, schema: &Schema) -> Result { + let mut group_entities = Vec::new(); + for group in config.groups.keys() { + group_entities.push(Entity::new( + entity_uid("Group", group)?, + HashMap::new(), + HashSet::::new(), + )?); + } + + let mut actor_groups: BTreeMap> = BTreeMap::new(); + for (group, members) in &config.groups { + for actor in members { + actor_groups + .entry(actor.clone()) + .or_default() + .insert(group.clone()); + } + } + + let mut actor_entities = Vec::new(); + for (actor, groups) in actor_groups { + let parents = groups + .iter() + .map(|group| entity_uid("Group", group)) + .collect::>>()?; + actor_entities.push(Entity::new( + entity_uid("Actor", &actor)?, + HashMap::new(), + parents, + )?); + } + + let graph_entity = Entity::new( + entity_uid("Graph", graph_id)?, + HashMap::new(), + HashSet::::new(), + )?; + + let mut entities = Vec::new(); + entities.extend(group_entities); + entities.extend(actor_entities); + entities.push(graph_entity); + + // MR-668: include the `Omnigraph::Server::"root"` entity + // whenever any rule references a server-scoped action. Cedar's + // schema validator will otherwise reject the policy. Keeping this + // conditional (rather than always-on) avoids polluting test + // assertions for graph-only policies. + let any_server_scoped = config.rules.iter().any(|rule| { + rule.allow + .actions + .iter() + .any(|action| action.resource_kind() == PolicyResourceKind::Server) + }); + if any_server_scoped { + entities.push(Entity::new( + entity_uid("Server", SERVER_RESOURCE_ID)?, + HashMap::new(), + HashSet::::new(), + )?); + } + + Ok(Entities::from_entities(entities, Some(schema))?) +} + +fn compile_policies( + config: &PolicyConfig, + graph_id: &str, +) -> Result<(PolicySet, HashMap)> { + let mut policies = Vec::new(); + let mut policy_to_rule = HashMap::new(); + + for rule in &config.rules { + for action in &rule.allow.actions { + let policy_id = PolicyId::new(format!("{}:{}", rule.id, action.as_str())); + let source = compile_policy_source(rule, action, graph_id); + let policy = Policy::parse(Some(policy_id.clone()), source.as_str())?; + policy_to_rule.insert(policy_id.to_string(), rule.id.clone()); + policies.push(policy); + } + } + + Ok((PolicySet::from_policies(policies)?, policy_to_rule)) +} + +fn compile_policy_source(rule: &PolicyRule, action: &PolicyAction, graph_id: &str) -> String { + let mut conditions = Vec::new(); + if let Some(scope) = rule.allow.branch_scope { + conditions.push(branch_scope_condition(scope)); + } + if let Some(scope) = rule.allow.target_branch_scope { + conditions.push(target_branch_scope_condition(scope)); + } + + let when = if conditions.is_empty() { + String::new() + } else { + format!("\nwhen {{ {} }}", conditions.join(" && ")) + }; + + // MR-668: emit the resource literal that matches the action's + // `resource_kind`. Per-graph actions reference the engine's + // `Omnigraph::Graph::""` instance; server-scoped + // actions reference the singleton `Omnigraph::Server::"root"`. + let resource_literal = match action.resource_kind() { + PolicyResourceKind::Graph => { + format!("Omnigraph::Graph::{}", cedar_literal(graph_id)) + } + PolicyResourceKind::Server => { + format!("Omnigraph::Server::{}", cedar_literal(SERVER_RESOURCE_ID)) + } + }; + + format!( + r#"permit ( + principal in Omnigraph::Group::{group}, + action == Omnigraph::Action::{action}, + resource == {resource_literal} +){when};"#, + group = cedar_literal(&rule.allow.actors.group), + action = cedar_literal(action.as_str()), + when = when, + resource_literal = resource_literal, + ) +} + +fn branch_scope_condition(scope: PolicyBranchScope) -> String { + match scope { + PolicyBranchScope::Any => "true".to_string(), + PolicyBranchScope::Protected => { + "context.has_branch && context.branch_is_protected".to_string() + } + PolicyBranchScope::Unprotected => { + "context.has_branch && context.branch_is_protected == false".to_string() + } + } +} + +fn target_branch_scope_condition(scope: PolicyBranchScope) -> String { + match scope { + PolicyBranchScope::Any => "true".to_string(), + PolicyBranchScope::Protected => { + "context.has_target_branch && context.target_branch_is_protected".to_string() + } + PolicyBranchScope::Unprotected => { + "context.has_target_branch && context.target_branch_is_protected == false".to_string() + } + } +} + +fn policy_schema_source() -> &'static str { + // MR-668: `entity Server;` plus the `graph_list` action that + // binds to it. Per-graph actions stay bound to `Graph`. + // The Cedar schema string lives here (not on a fixture file) so any + // omnigraph-policy build picks up the new vocabulary in lock-step + // with the Rust code. + r#" +namespace Omnigraph { + type RequestContext = { + has_branch: Bool, + branch: String, + has_target_branch: Bool, + target_branch: String, + branch_is_protected: Bool, + target_branch_is_protected: Bool, + }; + + entity Actor in [Group]; + entity Group; + entity Graph; + entity Server; + + action "read" appliesTo { principal: Actor, resource: Graph, context: RequestContext }; + action "export" appliesTo { principal: Actor, resource: Graph, context: RequestContext }; + action "change" appliesTo { principal: Actor, resource: Graph, context: RequestContext }; + action "schema_apply" appliesTo { principal: Actor, resource: Graph, context: RequestContext }; + action "branch_create" appliesTo { principal: Actor, resource: Graph, context: RequestContext }; + action "branch_delete" appliesTo { principal: Actor, resource: Graph, context: RequestContext }; + action "branch_merge" appliesTo { principal: Actor, resource: Graph, context: RequestContext }; + action "admin" appliesTo { principal: Actor, resource: Graph, context: RequestContext }; + + action "graph_list" appliesTo { principal: Actor, resource: Server, context: RequestContext }; +} +"# +} + +/// Canonical id of the `Omnigraph::Server` Cedar entity. There's only one +/// (the running server); the id is fixed at `"root"` so Cedar rules can +/// reference it unambiguously: `resource == Omnigraph::Server::"root"`. +const SERVER_RESOURCE_ID: &str = "root"; + +fn entity_uid(entity_type: &str, id: &str) -> Result { + let typename = EntityTypeName::from_str(&format!("Omnigraph::{entity_type}"))?; + let entity_id = EntityId::from_str(id).map_err(|err| eyre!(err.to_string()))?; + Ok(EntityUid::from_type_name_and_id(typename, entity_id)) +} + +fn cedar_literal(value: &str) -> String { + serde_json::to_string(value).expect("string literal should serialize") +} + +impl PolicyRequest { + pub fn action(&self) -> PolicyAction { + self.action + } + + pub fn branch(&self) -> Option<&str> { + self.branch.as_deref() + } + + pub fn target_branch(&self) -> Option<&str> { + self.target_branch.as_deref() + } +} + +// ─── PolicyChecker trait + ResourceScope (MR-722 chassis core) ─────────────── +// +// The trait below is the engine-layer integration point for policy +// enforcement. `Omnigraph::enforce()` calls `check()` at the head of +// every mutating method; consumers in the engine crate hold an +// `Arc` and don't reach into Cedar internals. +// +// Two enforcement layers compose via this trait — different methods, +// same Cedar policies: +// +// * **Engine-layer (this trait — `check`)** — coarse gate at operation +// entry. Answers "can this actor invoke this action on this scope at all?" +// * **Query-layer (MR-725 — will add `predicate_for`)** — fine gate +// inside the query planner. Answers "for the rows/types touched, which +// can the actor see/modify?" Cedar predicates compile to DataFusion +// `Expr` and push into the scan. +// +// The two layers have non-overlapping responsibilities and must not +// drift. `ResourceScope` deliberately stays at branch granularity; +// per-type and per-row scope live in MR-725 via the (future) +// `predicate_for` method. Do not add `Type(TypeRef)` or `Row(predicate)` +// variants to `ResourceScope` — that's the boundary the chassis design +// pins (see MR-722 design refinements comment, 2026-05-17). + +/// Resource scope for a policy decision. Branch-grained on purpose — +/// per-type / per-row granularity is owned by the query-layer (MR-725). +/// +/// The variants map to today's `(branch, target_branch)` pair convention +/// in [`PolicyRequest`]. Each writer in the engine picks the variant +/// that matches how the existing HTTP-layer Cedar policies were +/// written, so the engine-layer enforce() call and the HTTP-layer +/// authorize_request() call evaluate the same decision. +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum ResourceScope { + /// Action applies to the graph as a whole (no branch context). + /// Used by graph-level ops if any ever go through enforcement. + /// Maps to `(branch: None, target_branch: None)`. + Graph, + /// Action operates on a single branch — reading from it, writing + /// to it, mutating it. Maps to `(branch: Some(X), target_branch: None)`. + /// Used by Read, Export, Change. + Branch(String), + /// Action targets a branch as its destination/effect. The action + /// modifies this branch (SchemaApply applies the new schema to it) + /// or removes it (BranchDelete). Maps to + /// `(branch: None, target_branch: Some(X))`. + /// Used by SchemaApply, BranchDelete. + TargetBranch(String), + /// Action transitions between two branches. `source` is the + /// branch being read-from / merged-from / forked-from; `target` + /// is the destination. Maps to + /// `(branch: Some(source), target_branch: Some(target))`. + /// Used by BranchCreate (from→new), BranchMerge (source→target). + BranchTransition { source: String, target: String }, +} + +impl ResourceScope { + /// Lower the scope into the (branch, target_branch) pair carried + /// by today's [`PolicyRequest`]. The mapping preserves the + /// HTTP-layer's existing scope conventions so Cedar policies don't + /// have to be rewritten when engine-layer enforcement is enabled. + pub fn to_branch_pair(&self) -> (Option<&str>, Option<&str>) { + match self { + ResourceScope::Graph => (None, None), + ResourceScope::Branch(branch) => (Some(branch.as_str()), None), + ResourceScope::TargetBranch(target) => (None, Some(target.as_str())), + ResourceScope::BranchTransition { source, target } => { + (Some(source.as_str()), Some(target.as_str())) + } + } + } +} + +/// Engine-layer policy enforcement error. `Denied` is the normal "policy +/// said no" path; `Internal` covers evaluation failures (malformed rule, +/// Cedar internal error, etc.). +#[derive(Debug, Clone)] +pub enum PolicyError { + /// Policy evaluated successfully and denied the action. + Denied(String), + /// Policy evaluation itself failed (not a denial — a bug or + /// configuration error). + Internal(String), +} + +impl fmt::Display for PolicyError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PolicyError::Denied(msg) => write!(f, "policy denied: {msg}"), + PolicyError::Internal(msg) => write!(f, "policy evaluation failed: {msg}"), + } + } +} + +impl std::error::Error for PolicyError {} + +/// Engine-layer policy enforcement trait. Implemented by `PolicyEngine` +/// (Cedar-backed) and any mock checker used in tests. +/// +/// MR-725 will extend this trait with a query-layer pushdown method — +/// roughly `fn predicate_for(&self, type_ref: &TypeRef, actor: &str) -> +/// Option`. Engine and query-layer enforcement back to +/// the same Cedar policies but consume different methods. Don't conflate +/// them by overloading `check`. +pub trait PolicyChecker: Send + Sync { + /// Engine-layer gate. Called at the head of every mutating engine + /// method. `Ok(())` allows the action; `Err(PolicyError::Denied)` + /// denies; `Err(PolicyError::Internal)` reports an evaluation bug. + fn check( + &self, + action: PolicyAction, + scope: &ResourceScope, + actor: &str, + ) -> Result<(), PolicyError>; +} + +impl PolicyChecker for PolicyEngine { + fn check( + &self, + action: PolicyAction, + scope: &ResourceScope, + actor: &str, + ) -> Result<(), PolicyError> { + let (branch, target_branch) = scope.to_branch_pair(); + let request = PolicyRequest { + action, + branch: branch.map(|s| s.to_string()), + target_branch: target_branch.map(|s| s.to_string()), + }; + let decision = self + .authorize(actor, &request) + .map_err(|e| PolicyError::Internal(e.to_string()))?; + if decision.allowed { + Ok(()) + } else { + Err(PolicyError::Denied(decision.message)) + } + } +} + +#[cfg(test)] +mod tests { + use super::{ + PolicyAction, PolicyCompiler, PolicyConfig, PolicyEngine, PolicyExpectation, PolicyRequest, + PolicyTestCase, PolicyTestConfig, + }; + + #[test] + fn rejects_duplicate_rule_ids() { + let policy: PolicyConfig = serde_yaml::from_str( + r#" +version: 1 +groups: + team: [act-andrew] +rules: + - id: same + allow: + actors: { group: team } + actions: [read] + branch_scope: any + - id: same + allow: + actors: { group: team } + actions: [export] + branch_scope: any +"#, + ) + .unwrap(); + + let err = policy.validate().unwrap_err(); + assert!(err.to_string().contains("duplicate policy rule id")); + } + + #[test] + fn rejects_unknown_group_references() { + let policy: PolicyConfig = serde_yaml::from_str( + r#" +version: 1 +groups: + team: [act-andrew] +rules: + - id: bad + allow: + actors: { group: admins } + actions: [read] + branch_scope: any +"#, + ) + .unwrap(); + + let err = policy.validate().unwrap_err(); + assert!(err.to_string().contains("references unknown group")); + } + + #[test] + fn rejects_invalid_scope_action_combinations() { + let policy: PolicyConfig = serde_yaml::from_str( + r#" +version: 1 +groups: + team: [act-andrew] +rules: + - id: bad + allow: + actors: { group: team } + actions: [branch_merge] + branch_scope: protected +"#, + ) + .unwrap(); + + let err = policy.validate().unwrap_err(); + assert!(err.to_string().contains("unsupported action")); + } + + #[test] + fn compiles_and_authorizes_branch_and_target_rules() { + let policy: PolicyConfig = serde_yaml::from_str( + r#" +version: 1 +groups: + team: [act-andrew, act-bruno] + admins: [act-andrew] +protected_branches: [main] +rules: + - id: team-read + allow: + actors: { group: team } + actions: [read, export] + branch_scope: any + - id: team-write + allow: + actors: { group: team } + actions: [change] + branch_scope: unprotected + - id: admins-promote + allow: + actors: { group: admins } + actions: [branch_delete, branch_merge] + target_branch_scope: protected +"#, + ) + .unwrap(); + + let engine = PolicyCompiler::compile(&policy, "graph").unwrap(); + let allow = engine + .authorize( + "act-bruno", + &PolicyRequest { + action: PolicyAction::Change, + branch: Some("feature".to_string()), + target_branch: None, + }, + ) + .unwrap(); + assert!(allow.allowed); + assert_eq!(allow.matched_rule_id.as_deref(), Some("team-write")); + + let deny = engine + .authorize( + "act-bruno", + &PolicyRequest { + action: PolicyAction::BranchDelete, + branch: None, + target_branch: Some("main".to_string()), + }, + ) + .unwrap(); + assert!(!deny.allowed); + + let admin = engine + .authorize( + "act-andrew", + &PolicyRequest { + action: PolicyAction::BranchDelete, + branch: None, + target_branch: Some("main".to_string()), + }, + ) + .unwrap(); + assert!(admin.allowed); + assert_eq!(admin.matched_rule_id.as_deref(), Some("admins-promote")); + } + + #[test] + fn policy_tests_enforce_expected_outcomes() { + let policy: PolicyConfig = serde_yaml::from_str( + r#" +version: 1 +groups: + team: [act-andrew] +protected_branches: [main] +rules: + - id: team-read + allow: + actors: { group: team } + actions: [read] + branch_scope: any +"#, + ) + .unwrap(); + let engine = PolicyCompiler::compile(&policy, "graph").unwrap(); + let tests = PolicyTestConfig { + version: 1, + cases: vec![ + PolicyTestCase { + id: "allow-read".to_string(), + actor: "act-andrew".to_string(), + action: PolicyAction::Read, + branch: Some("main".to_string()), + target_branch: None, + expect: PolicyExpectation::Allow, + }, + PolicyTestCase { + id: "deny-change".to_string(), + actor: "act-andrew".to_string(), + action: PolicyAction::Change, + branch: Some("main".to_string()), + target_branch: None, + expect: PolicyExpectation::Deny, + }, + ], + }; + + engine.run_tests(&tests).unwrap(); + } + + #[test] + fn schema_apply_uses_target_branch_scope() { + let policy: PolicyConfig = serde_yaml::from_str( + r#" +version: 1 +groups: + admins: [act-ragnor] +protected_branches: [main] +rules: + - id: admins-schema-apply + allow: + actors: { group: admins } + actions: [schema_apply] + target_branch_scope: protected +"#, + ) + .unwrap(); + + let engine = PolicyCompiler::compile(&policy, "graph").unwrap(); + let allow = engine + .authorize( + "act-ragnor", + &PolicyRequest { + action: PolicyAction::SchemaApply, + branch: None, + target_branch: Some("main".to_string()), + }, + ) + .unwrap(); + assert!(allow.allowed); + + let deny = engine + .authorize( + "act-ragnor", + &PolicyRequest { + action: PolicyAction::SchemaApply, + branch: None, + target_branch: Some("feature".to_string()), + }, + ) + .unwrap(); + assert!(!deny.allowed); + } + + // ─── MR-668 — server-scoped action (graph_list) ─ + + #[test] + fn graph_list_action_authorizes_against_server_resource() { + let policy: PolicyConfig = serde_yaml::from_str( + r#" +version: 1 +groups: + admins: [act-andrew] + viewers: [act-bruno] +rules: + - id: admins-list-graphs + allow: + actors: { group: admins } + actions: [graph_list] +"#, + ) + .unwrap(); + + // The graph_label passed at compile time is irrelevant for + // server-scoped actions — they resolve against + // `Omnigraph::Server::"root"` regardless. We pass a sentinel + // so it's obvious the value isn't used. + let engine = PolicyCompiler::compile(&policy, "ignored").unwrap(); + + let allow = engine + .authorize( + "act-andrew", + &PolicyRequest { + action: PolicyAction::GraphList, + branch: None, + target_branch: None, + }, + ) + .unwrap(); + assert!(allow.allowed); + assert_eq!(allow.matched_rule_id.as_deref(), Some("admins-list-graphs")); + + // Different actor, same policy → deny. + let deny = engine + .authorize( + "act-bruno", + &PolicyRequest { + action: PolicyAction::GraphList, + branch: None, + target_branch: None, + }, + ) + .unwrap(); + assert!(!deny.allowed); + } + + #[test] + fn server_scoped_rule_cannot_use_branch_scope() { + let policy: PolicyConfig = serde_yaml::from_str( + r#" +version: 1 +groups: + admins: [act-andrew] +rules: + - id: bad-branch-scope-on-graph-list + allow: + actors: { group: admins } + actions: [graph_list] + branch_scope: any +"#, + ) + .unwrap(); + let err = policy.validate().unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("branch_scope") || msg.contains("server-scoped"), + "expected branch_scope rejection for server-scoped action; got: {msg}" + ); + } + + #[test] + fn rule_mixing_server_and_per_graph_actions_is_rejected() { + // A single rule must reference exactly one resource kind. + // `graph_list` (Server) + `read` (Graph) in one allow block + // is invalid — operators must split the rule. + let policy: PolicyConfig = serde_yaml::from_str( + r#" +version: 1 +groups: + admins: [act-andrew] +rules: + - id: mixed-resource-kinds + allow: + actors: { group: admins } + actions: [graph_list, read] +"#, + ) + .unwrap(); + let err = policy.validate().unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("server-scoped") || msg.contains("split into separate rules"), + "expected mix-resource-kinds rejection; got: {msg}" + ); + } + + #[test] + fn per_graph_rules_continue_to_work_alongside_server_rules() { + // Decision 6 contract: existing operator policies (which only + // reference per-graph actions) keep compiling and authorizing + // as before, even when the compiled-in schema now declares + // `Server` + `graph_*` actions. This pins the "Cedar refactor + // is operator-invisible" promise. + let policy: PolicyConfig = serde_yaml::from_str( + r#" +version: 1 +groups: + team: [act-andrew] +protected_branches: [main] +rules: + - id: team-read + allow: + actors: { group: team } + actions: [read, export] + branch_scope: any +"#, + ) + .unwrap(); + let engine = PolicyCompiler::compile(&policy, "graph").unwrap(); + let allow = engine + .authorize( + "act-andrew", + &PolicyRequest { + action: PolicyAction::Read, + branch: Some("main".to_string()), + target_branch: None, + }, + ) + .unwrap(); + assert!(allow.allowed); + assert_eq!(allow.matched_rule_id.as_deref(), Some("team-read")); + } + + // ─── MR-668 follow-up — load_graph / load_server kind alignment ─ + + /// A per-graph policy file containing a `graph_list` rule fails + /// at load time. Pre-fix, the file compiled cleanly and the rule + /// silently never matched (per-graph engine never gets a + /// `graph_list` check). Closes the "wrong action, wrong file, + /// silent no-op" class. + #[test] + fn load_graph_rejects_server_scoped_action() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("bad-graph-policy.yaml"); + std::fs::write( + &path, + r#" +version: 1 +groups: + admins: [act-andrew] +rules: + - id: misplaced-graph-list + allow: + actors: { group: admins } + actions: [graph_list] +"#, + ) + .unwrap(); + let err = match PolicyEngine::load_graph(&path, "g1") { + Ok(_) => panic!("expected server-scoped action in per-graph file to be rejected"), + Err(e) => e, + }; + let msg = err.to_string(); + assert!( + msg.contains("server-scoped") && msg.contains("graph_list"), + "expected server-scoped-in-graph-file rejection, got: {msg}" + ); + } + + /// A server policy file containing a `read` rule fails at load + /// time. Pre-fix, the file compiled cleanly and the rule silently + /// never matched (server engine never gets a `read` check). + #[test] + fn load_server_rejects_per_graph_action() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("bad-server-policy.yaml"); + std::fs::write( + &path, + r#" +version: 1 +groups: + team: [act-andrew] +rules: + - id: misplaced-read + allow: + actors: { group: team } + actions: [read] + branch_scope: any +"#, + ) + .unwrap(); + let err = match PolicyEngine::load_server(&path) { + Ok(_) => panic!("expected per-graph action in server file to be rejected"), + Err(e) => e, + }; + let msg = err.to_string(); + assert!( + msg.contains("per-graph") && msg.contains("read"), + "expected per-graph-in-server-file rejection, got: {msg}" + ); + } + + /// Positive case: a properly-shaped per-graph policy loads via + /// `load_graph` and authorizes as expected. Verifies the + /// kind-alignment check is permissive when the file is correct. + #[test] + fn load_graph_accepts_per_graph_only_policy() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("ok-graph-policy.yaml"); + std::fs::write( + &path, + r#" +version: 1 +groups: + team: [act-andrew] +rules: + - id: team-read + allow: + actors: { group: team } + actions: [read] + branch_scope: any +"#, + ) + .unwrap(); + let engine = PolicyEngine::load_graph(&path, "g1").unwrap(); + let decision = engine + .authorize( + "act-andrew", + &PolicyRequest { + action: PolicyAction::Read, + branch: Some("main".to_string()), + target_branch: None, + }, + ) + .unwrap(); + assert!(decision.allowed); + } + + /// Positive case: a properly-shaped server policy loads via + /// `load_server` and authorizes the `graph_list` action. + #[test] + fn load_server_accepts_server_only_policy() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("ok-server-policy.yaml"); + std::fs::write( + &path, + r#" +version: 1 +groups: + admins: [act-andrew] +rules: + - id: admins-list-graphs + allow: + actors: { group: admins } + actions: [graph_list] +"#, + ) + .unwrap(); + let engine = PolicyEngine::load_server(&path).unwrap(); + let decision = engine + .authorize( + "act-andrew", + &PolicyRequest { + action: PolicyAction::GraphList, + branch: None, + target_branch: None, + }, + ) + .unwrap(); + assert!(decision.allowed); + } +} diff --git a/crates/omnigraph-server/Cargo.toml b/crates/omnigraph-server/Cargo.toml index 9070c97..e9a0e46 100644 --- a/crates/omnigraph-server/Cargo.toml +++ b/crates/omnigraph-server/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "omnigraph-server" -version = "0.4.2" +version = "0.6.0" edition = "2024" description = "HTTP server for the Omnigraph graph database." license = "MIT" @@ -19,8 +19,9 @@ default = [] aws = ["dep:aws-config", "dep:aws-sdk-secretsmanager"] [dependencies] -omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.4.2" } -omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" } +omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.0" } +omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" } +omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" } axum = { workspace = true } clap = { workspace = true } color-eyre = { workspace = true } @@ -32,12 +33,14 @@ tracing = { workspace = true } tracing-subscriber = { workspace = true } tower-http = { workspace = true } utoipa = { workspace = true } -cedar-policy = { workspace = true } futures = { workspace = true } sha2 = { workspace = true } subtle = { workspace = true } async-trait = { workspace = true } +arc-swap = { workspace = true } dashmap = "6" +regex = { workspace = true } +thiserror = { workspace = true } aws-config = { version = "1", optional = true, default-features = false, features = ["rustls", "rt-tokio", "credentials-process", "sso"] } aws-sdk-secretsmanager = { version = "1", optional = true, default-features = false, features = ["rustls", "rt-tokio"] } @@ -45,4 +48,5 @@ aws-sdk-secretsmanager = { version = "1", optional = true, default-features = fa tempfile = { workspace = true } tower = { workspace = true } serial_test = "3" +lance = { workspace = true } lance-index = { workspace = true } diff --git a/crates/omnigraph-server/examples/bench_actor_isolation.rs b/crates/omnigraph-server/examples/bench_actor_isolation.rs index 1eca032..5a708e0 100644 --- a/crates/omnigraph-server/examples/bench_actor_isolation.rs +++ b/crates/omnigraph-server/examples/bench_actor_isolation.rs @@ -199,8 +199,8 @@ async fn drive_light_actor( let mut other = 0usize; for op_idx in 0..ops { let request_body = ChangeRequest { - query_source: "query insert_person($name: String, $age: I32) {\n insert Person { name: $name, age: $age }\n}".to_string(), - query_name: Some("insert_person".to_string()), + query: "query insert_person($name: String, $age: I32) {\n insert Person { name: $name, age: $age }\n}".to_string(), + name: Some("insert_person".to_string()), params: Some(serde_json::json!({ "name": format!("light-{actor_idx}-{op_idx}"), "age": op_idx as i32, @@ -259,10 +259,10 @@ async fn main() { } let temp = tempfile::tempdir().expect("tempdir"); - let repo = temp.path().join("bench.omni"); - Omnigraph::init(repo.to_str().unwrap(), SCHEMA) + let graph = temp.path().join("bench.omni"); + Omnigraph::init(graph.to_str().unwrap(), SCHEMA) .await - .expect("init repo"); + .expect("init graph"); // Build bearer tokens: one for the heavy actor + one per light actor. let mut tokens: Vec<(String, String)> = @@ -270,21 +270,17 @@ async fn main() { for i in 0..args.light_actors { tokens.push((format!("act-light-{i}"), format!("light-token-{i}"))); } - let db = Omnigraph::open(repo.to_str().unwrap()) + let db = Omnigraph::open(graph.to_str().unwrap()) .await - .expect("open repo"); + .expect("open graph"); // Construct a custom WorkloadController with the requested caps and // pass it through `AppState::new_with_workload`. Avoids the // `unsafe { std::env::set_var(...) }` antipattern that violates // `setenv`'s thread-safety precondition once the multi-thread tokio // runtime is up. let workload = WorkloadController::new(args.inflight_cap, args.byte_cap); - let state = AppState::new_with_workload( - repo.to_string_lossy().to_string(), - db, - tokens, - workload, - ); + let state = + AppState::new_with_workload(graph.to_string_lossy().to_string(), db, tokens, workload); let app = build_app(state); eprintln!( diff --git a/crates/omnigraph-server/examples/bench_concurrent_http.rs b/crates/omnigraph-server/examples/bench_concurrent_http.rs index 11505e7..6a8411a 100644 --- a/crates/omnigraph-server/examples/bench_concurrent_http.rs +++ b/crates/omnigraph-server/examples/bench_concurrent_http.rs @@ -121,8 +121,8 @@ async fn drive_actor( for op_idx in 0..ops { let table_idx = pick_table(actor_idx, op_idx, mode, num_tables); let request_body = ChangeRequest { - query_source: build_query_source(table_idx), - query_name: Some("insert_item".to_string()), + query: build_query_source(table_idx), + name: Some("insert_item".to_string()), params: Some(serde_json::json!({ "name": format!("a{actor_idx}_o{op_idx}"), "value": op_idx as i32, @@ -152,7 +152,9 @@ async fn drive_actor( errors += 1; // Drain body for logging on the first few failures. if errors <= 3 { - let body = to_bytes(response.into_body(), 64 * 1024).await.unwrap_or_default(); + let body = to_bytes(response.into_body(), 64 * 1024) + .await + .unwrap_or_default(); eprintln!( "actor {actor_idx} op {op_idx} status {status} body {}", String::from_utf8_lossy(&body) @@ -173,13 +175,13 @@ async fn main() { } let temp = tempfile::tempdir().expect("tempdir"); - let repo = temp.path().join("bench.omni"); + let graph = temp.path().join("bench.omni"); let schema = build_schema(args.tables); - Omnigraph::init(repo.to_str().unwrap(), &schema) + Omnigraph::init(graph.to_str().unwrap(), &schema) .await - .expect("init repo"); + .expect("init graph"); - let state = AppState::open(repo.to_string_lossy().to_string()) + let state = AppState::open(graph.to_string_lossy().to_string()) .await .expect("open AppState"); let app = build_app(state); diff --git a/crates/omnigraph-server/src/api.rs b/crates/omnigraph-server/src/api.rs index 89534f5..2c818ae 100644 --- a/crates/omnigraph-server/src/api.rs +++ b/crates/omnigraph-server/src/api.rs @@ -235,7 +235,9 @@ pub struct CommitListOutput { pub struct ReadRequest { /// GQ query source. May declare one or more named queries; pick one with /// `query_name` if there is more than one. - #[schema(example = "query get_person($name: String) {\n match {\n $p: Person { name: $name }\n }\n return { $p.name, $p.age }\n}")] + #[schema( + example = "query get_person($name: String) {\n match {\n $p: Person { name: $name }\n }\n return { $p.name, $p.age }\n}" + )] pub query_source: String, /// Name of the query to run when `query_source` declares multiple. Optional /// when only one query is declared. @@ -248,26 +250,70 @@ pub struct ReadRequest { pub snapshot: Option, } +/// Inline read-query request for `POST /query`. +/// +/// Friendlier-named alternative to [`ReadRequest`] for ad-hoc reads and +/// AI-agent integration. Mutations are rejected with 400 — use `POST +/// /mutate` (or its deprecated alias `POST /change`) for write queries. +/// Field names are deliberately short (`query`, `name`) to match the GQ +/// keyword and the CLI `-e` flag. #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] -pub struct ChangeRequest { - /// GQ mutation source containing `insert`, `update`, or `delete` statements. - /// May declare multiple named mutations; pick one with `query_name`. - #[schema(example = "query insert_person($name: String, $age: I32) {\n insert Person { name: $name, age: $age }\n}")] - pub query_source: String, - /// Name of the mutation to run when `query_source` declares multiple. - pub query_name: Option, - /// JSON object whose keys match the mutation's declared parameters. +pub struct QueryRequest { + /// GQ read-query source. May declare one or more named queries; pick one + /// with `name` when more than one is declared. Mutations + /// (`insert`/`update`/`delete`) get 400 — use `POST /mutate` (or its + /// deprecated alias `POST /change`) instead. + #[schema(example = "query get_person($name: String) {\n match {\n $p: Person { name: $name }\n }\n return { $p.name, $p.age }\n}")] + pub query: String, + /// Name of the query to run when `query` declares multiple. Optional when + /// only one query is declared. + pub name: Option, + /// JSON object whose keys match the query's declared parameters. pub params: Option, - /// Target branch. Defaults to `main`. + /// Branch to read from. Mutually exclusive with `snapshot`. Defaults to `main`. pub branch: Option, + /// Snapshot id to read from. Mutually exclusive with `branch`. + pub snapshot: Option, } #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct ChangeRequest { + /// GQ mutation source containing `insert`, `update`, or `delete` statements. + /// May declare multiple named mutations; pick one with `name`. + /// + /// Accepts the legacy field name `query_source` as a deserialization alias. + #[schema( + example = "query insert_person($name: String, $age: I32) {\n insert Person { name: $name, age: $age }\n}" + )] + #[serde(alias = "query_source")] + pub query: String, + /// Name of the mutation to run when `query` declares multiple. + /// + /// Accepts the legacy field name `query_name` as a deserialization alias. + #[serde(default, alias = "query_name")] + pub name: Option, + /// JSON object whose keys match the mutation's declared parameters. + #[serde(default)] + pub params: Option, + /// Target branch. Defaults to `main`. + #[serde(default)] + pub branch: Option, +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)] pub struct SchemaApplyRequest { /// Project schema in `.pg` source form. The diff against the current /// schema produces the migration steps that will be applied. - #[schema(example = "node Person {\n name: String @key\n age: I32?\n}\n\nedge Knows: Person -> Person")] + #[schema( + example = "node Person {\n name: String @key\n age: I32?\n}\n\nedge Knows: Person -> Person" + )] pub schema_source: String, + /// When true, promote every `DropMode::Soft` step in the plan to + /// `DropMode::Hard`, making the prior column data unreachable + /// after the apply. Matches the CLI's `--allow-data-loss` flag. + /// Defaults to `false` (drops remain reversible via time travel). + #[serde(default)] + pub allow_data_loss: bool, } #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] @@ -297,7 +343,9 @@ pub struct IngestRequest { pub mode: Option, /// NDJSON payload: one record per line, each shaped /// `{"type": "", "data": {...}}`. - #[schema(example = "{\"type\": \"Person\", \"data\": {\"name\": \"Alice\", \"age\": 30}}\n{\"type\": \"Person\", \"data\": {\"name\": \"Bob\", \"age\": 25}}")] + #[schema( + example = "{\"type\": \"Person\", \"data\": {\"name\": \"Alice\", \"age\": 30}}\n{\"type\": \"Person\", \"data\": {\"name\": \"Bob\", \"age\": 25}}" + )] pub data: String, } @@ -338,6 +386,11 @@ pub enum ErrorCode { Forbidden, BadRequest, NotFound, + /// 405 Method Not Allowed — the route exists but the active server + /// mode doesn't serve this method (e.g. `GET /graphs` in single-graph + /// mode). Distinct from 404 so clients can tell "wrong context" from + /// "no such resource." + MethodNotAllowed, Conflict, /// 429 Too Many Requests — per-actor admission cap exceeded. /// Clients should respect the `Retry-After` header. @@ -461,3 +514,23 @@ pub fn read_target_output(target: &ReadTarget) -> ReadTargetOutput { }, } } + +// ─── MR-668 — management endpoint shapes ────────────────────────────────── + +/// One entry in the response from `GET /graphs`. Cluster operators +/// consume this list to discover which graphs the server is currently +/// serving. The shape is intentionally minimal — `graph_id` and `uri` +/// are the only fields a routing client needs. +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct GraphInfo { + pub graph_id: String, + pub uri: String, +} + +/// Response from `GET /graphs`. Lists every graph registered with the +/// server in alphabetical order by `graph_id` (sorted server-side so +/// clients get deterministic output across requests). +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct GraphListResponse { + pub graphs: Vec, +} diff --git a/crates/omnigraph-server/src/auth.rs b/crates/omnigraph-server/src/auth.rs index 80b6ed5..4f05228 100644 --- a/crates/omnigraph-server/src/auth.rs +++ b/crates/omnigraph-server/src/auth.rs @@ -119,7 +119,10 @@ pub(crate) fn parse_json_secret_payload(payload: &str) -> Result) -> Result { - let config = - aws_config::load_defaults(aws_config::BehaviorVersion::latest()).await; + let config = aws_config::load_defaults(aws_config::BehaviorVersion::latest()).await; let client = aws_sdk_secretsmanager::Client::new(&config); Ok(Self { client, @@ -200,8 +202,8 @@ pub use aws::SecretsManagerTokenSource; #[cfg(test)] mod tests { use super::*; - use std::env; use serial_test::serial; + use std::env; fn clear_env() { unsafe { @@ -232,7 +234,10 @@ mod tests { unsafe { env::remove_var("OMNIGRAPH_SERVER_BEARER_TOKEN"); } - assert_eq!(tokens, vec![("default".to_string(), "some-token".to_string())]); + assert_eq!( + tokens, + vec![("default".to_string(), "some-token".to_string())] + ); } #[tokio::test] diff --git a/crates/omnigraph-server/src/config.rs b/crates/omnigraph-server/src/config.rs index 035ad74..87737d0 100644 --- a/crates/omnigraph-server/src/config.rs +++ b/crates/omnigraph-server/src/config.rs @@ -6,6 +6,7 @@ use std::path::{Path, PathBuf}; use clap::ValueEnum; use color_eyre::eyre::{Result, bail}; use serde::{Deserialize, Serialize}; + pub const DEFAULT_CONFIG_FILE: &str = "omnigraph.yaml"; #[derive(Debug, Clone, Default, Serialize, Deserialize)] @@ -17,6 +18,12 @@ pub struct ProjectConfig { pub struct TargetConfig { pub uri: String, pub bearer_token_env: Option, + /// Per-graph Cedar policy file (MR-668). In single-graph mode this + /// field is unused — the top-level `policy.file` applies. In + /// multi-graph mode, each `graphs..policy.file` governs that + /// graph's HTTP-layer Cedar enforcement. + #[serde(default)] + pub policy: PolicySettings, } #[derive(Debug, Clone, Copy, Default, Eq, PartialEq, Serialize, Deserialize, ValueEnum)] @@ -46,6 +53,12 @@ pub struct CliDefaults { pub output_format: Option, pub table_max_column_width: Option, pub table_cell_layout: Option, + /// Default actor identity for CLI direct-engine writes (MR-722). + /// Used when `policy.file` is configured and the operator hasn't + /// passed `--as ` on the command line. With policy configured + /// and neither this nor `--as` set, the engine-layer footgun guard + /// fires (no silent bypass). + pub actor: Option, } #[derive(Debug, Clone, Default, Serialize, Deserialize)] @@ -53,6 +66,12 @@ pub struct ServerDefaults { #[serde(rename = "graph")] pub graph: Option, pub bind: Option, + /// Server-level Cedar policy (MR-668). Governs management endpoints + /// — currently `GET /graphs`; future runtime add/remove endpoints + /// will plug in here too. In single-graph mode this is unused — the + /// top-level `policy.file` covers the single graph. + #[serde(default)] + pub policy: PolicySettings, } #[derive(Debug, Clone, Default, Serialize, Deserialize)] @@ -74,7 +93,16 @@ pub struct PolicySettings { #[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum AliasCommand { + /// Read alias (canonical: `query`). The legacy spelling `read` is + /// kept as the variant name for back-compat with serialized configs + /// and external SDK callers; `query` is accepted on the wire via the + /// serde alias. + #[serde(alias = "query")] Read, + /// Mutation alias (canonical: `mutate`). The legacy spelling `change` + /// is kept as the variant name for back-compat; `mutate` is accepted + /// on the wire via the serde alias. + #[serde(alias = "mutate")] Change, } @@ -191,23 +219,46 @@ impl OmnigraphConfig { } pub fn resolve_auth_env_file(&self) -> Option { - let path = self.auth.env_file.as_deref()?; - let path = Path::new(path); - Some(if path.is_absolute() { - path.to_path_buf() - } else { - self.base_dir.join(path) - }) + self.auth + .env_file + .as_deref() + .map(|path| self.resolve_config_path(path)) } pub fn resolve_policy_file(&self) -> Option { - let path = self.policy.file.as_deref()?; - let path = Path::new(path); - Some(if path.is_absolute() { - path.to_path_buf() - } else { - self.base_dir.join(path) - }) + self.policy + .file + .as_deref() + .map(|path| self.resolve_config_path(path)) + } + + /// Resolve the per-graph policy file path for the named target, + /// relative to the config file's `base_dir`. Returns `None` if the + /// target is unknown or no per-graph `policy.file` is set. + pub fn resolve_target_policy_file(&self, target_name: &str) -> Option { + let target = self.graphs.get(target_name)?; + target + .policy + .file + .as_deref() + .map(|path| self.resolve_config_path(path)) + } + + /// Resolve the server-level policy file path (used by management + /// endpoints). Returns `None` if `server.policy.file` is not set. + pub fn resolve_server_policy_file(&self) -> Option { + self.server + .policy + .file + .as_deref() + .map(|path| self.resolve_config_path(path)) + } + + /// Resolve a raw config-supplied URI (which may be relative) to its + /// absolute form. URIs containing `://` are passed through as-is; + /// relative paths are joined with the config file's `base_dir`. + pub fn resolve_uri_value(&self, value: &str) -> String { + self.resolve_config_uri(value) } pub fn resolve_policy_tests_file(&self) -> Option { @@ -276,6 +327,15 @@ impl OmnigraphConfig { self.base_dir.join(path).to_string_lossy().to_string() } } + + fn resolve_config_path(&self, value: &str) -> PathBuf { + let path = Path::new(value); + if path.is_absolute() { + path.to_path_buf() + } else { + self.base_dir.join(path) + } + } } pub fn default_config_path() -> PathBuf { diff --git a/crates/omnigraph-server/src/graph_id.rs b/crates/omnigraph-server/src/graph_id.rs new file mode 100644 index 0000000..ffccd2a --- /dev/null +++ b/crates/omnigraph-server/src/graph_id.rs @@ -0,0 +1,254 @@ +//! `GraphId` — registry-level identity for a graph in multi-graph mode (MR-668). +//! +//! Validation lives in `GraphId::try_from(String)`; nothing else can construct a +//! `GraphId`. The newtype prevents `graph_id` strings from escaping the storage +//! root via path traversal or colliding with engine-reserved filenames. +//! +//! Regex: `^[a-zA-Z0-9-]{1,64}$` +//! +//! The engine reserves every filename starting with `_` at the graph root +//! (`_schema.pg`, `_schema.ir.json`, `__schema_state.json`, `__manifest/`, +//! `__recovery/`, etc.). Disallowing leading underscores at the regex level +//! means a `graph_id` can never collide with engine-managed files. Path +//! traversal (`..`, `/`) is unrepresentable. +//! +//! `policies` is additionally reserved as a future-proofing measure for a +//! potential `/graphs/policies/...` cluster route. + +use std::fmt; +use std::sync::OnceLock; + +use color_eyre::eyre::{Result, bail}; +use regex::Regex; +use serde::{Deserialize, Serialize}; + +/// Maximum length of a `GraphId` value. +pub const GRAPH_ID_MAX_LEN: usize = 64; + +/// Validated registry-level identity for a graph. +/// +/// Constructed only via `GraphId::try_from(String)` or +/// `GraphId::try_from(&str)`. The inner `String` is private to enforce the +/// validation contract. +#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize)] +#[serde(transparent)] +pub struct GraphId(String); + +impl GraphId { + /// View the validated identifier as `&str`. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for GraphId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + +impl AsRef for GraphId { + fn as_ref(&self) -> &str { + &self.0 + } +} + +impl TryFrom for GraphId { + type Error = color_eyre::eyre::Error; + + fn try_from(value: String) -> Result { + validate(value.as_str())?; + Ok(Self(value)) + } +} + +impl TryFrom<&str> for GraphId { + type Error = color_eyre::eyre::Error; + + fn try_from(value: &str) -> Result { + validate(value)?; + Ok(Self(value.to_string())) + } +} + +// Custom Deserialize that re-runs validation. Otherwise a serde-derived impl +// would accept any String, defeating the newtype's guarantee. +impl<'de> Deserialize<'de> for GraphId { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + Self::try_from(s).map_err(serde::de::Error::custom) + } +} + +fn validate(value: &str) -> Result<()> { + if value.is_empty() { + bail!("graph_id must not be empty"); + } + if value.len() > GRAPH_ID_MAX_LEN { + bail!( + "graph_id '{}' is {} chars; max {}", + value, + value.len(), + GRAPH_ID_MAX_LEN + ); + } + if !regex().is_match(value) { + bail!( + "graph_id '{}' must match ^[a-zA-Z0-9-]{{1,64}}$ — \ + no underscores (engine reserves them), no path separators, no unicode", + value + ); + } + if is_reserved(value) { + bail!( + "graph_id '{}' is reserved (would collide with engine-managed names or \ + future cluster routes)", + value + ); + } + Ok(()) +} + +fn regex() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| Regex::new(r"^[a-zA-Z0-9-]{1,64}$").expect("regex literal")) +} + +/// Reserved `graph_id` values that the regex alone wouldn't catch. +/// The leading-underscore rule already excludes every engine-managed +/// filename pattern (`_schema.pg`, `__manifest`, etc.); the regex +/// `^[a-zA-Z0-9-]{1,64}$` (see `regex()`) additionally rejects every +/// dot-containing name structurally — `openapi.json` and friends +/// never reach this check. +/// +/// This list only needs to cover route-prefix collisions and +/// top-level endpoint names whose spellings DO satisfy the regex +/// (no dots, no underscores). +fn is_reserved(value: &str) -> bool { + matches!(value, "policies" | "healthz" | "openapi" | "graphs") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn accepts_simple_alphanumeric_ids() { + for ok in ["alpha", "beta", "tenant-001", "A", "g", "X-9-z"] { + GraphId::try_from(ok).unwrap_or_else(|_| panic!("expected accept: {ok}")); + } + } + + #[test] + fn accepts_64_char_max() { + let max = "a".repeat(64); + GraphId::try_from(max.as_str()).unwrap(); + } + + #[test] + fn rejects_empty() { + assert!(GraphId::try_from("").is_err()); + } + + #[test] + fn rejects_over_64_chars() { + let too_long = "a".repeat(65); + assert!(GraphId::try_from(too_long.as_str()).is_err()); + } + + #[test] + fn rejects_leading_underscore() { + // Engine reserves every `_*` filename at the graph root. + assert!(GraphId::try_from("_internal").is_err()); + assert!(GraphId::try_from("__manifest").is_err()); + } + + #[test] + fn rejects_underscores_anywhere() { + // The regex doesn't allow `_` at all — keeps the disallow-leading-`_` + // rule cheap to enforce. If the rule changes later, we'd need to + // distinguish "starts with `_`" from "contains `_`". + assert!(GraphId::try_from("tenant_alpha").is_err()); + } + + #[test] + fn rejects_path_separators() { + for bad in ["alpha/beta", "../etc", "..", "alpha\\beta"] { + assert!(GraphId::try_from(bad).is_err(), "expected reject: {bad}"); + } + } + + #[test] + fn rejects_unicode() { + assert!(GraphId::try_from("αlpha").is_err()); + assert!(GraphId::try_from("graph-✨").is_err()); + } + + #[test] + fn rejects_whitespace() { + assert!(GraphId::try_from(" alpha").is_err()); + assert!(GraphId::try_from("alpha ").is_err()); + assert!(GraphId::try_from("alpha beta").is_err()); + assert!(GraphId::try_from("\talpha").is_err()); + } + + #[test] + fn rejects_dots() { + // Reserves the "extension"-shaped ids that look like filenames. + assert!(GraphId::try_from(".").is_err()); + assert!(GraphId::try_from("alpha.beta").is_err()); + assert!(GraphId::try_from("alpha.").is_err()); + } + + #[test] + fn rejects_reserved_route_names() { + // Names that satisfy the regex but are still reserved because + // they'd collide with top-level route prefixes / endpoint names. + // Dot-containing names (e.g. `openapi.json`) are rejected by the + // regex, not this list — `rejects_dots` above covers them. + for bad in ["policies", "healthz", "openapi", "graphs"] { + assert!( + GraphId::try_from(bad).is_err(), + "expected reject (reserved): {bad}" + ); + } + } + + #[test] + fn display_returns_inner_string() { + let id = GraphId::try_from("alpha").unwrap(); + assert_eq!(format!("{id}"), "alpha"); + assert_eq!(id.as_str(), "alpha"); + } + + #[test] + fn serialize_round_trips_via_json() { + let id = GraphId::try_from("tenant-007").unwrap(); + let json = serde_json::to_string(&id).unwrap(); + assert_eq!(json, "\"tenant-007\""); + let back: GraphId = serde_json::from_str(&json).unwrap(); + assert_eq!(back, id); + } + + #[test] + fn deserialize_runs_validation() { + // Hostile payload must not produce a GraphId. + let bad = serde_json::from_str::("\"_evil\""); + assert!(bad.is_err()); + let bad = serde_json::from_str::("\"../../etc\""); + assert!(bad.is_err()); + } + + #[test] + fn hash_equality_works_for_use_as_map_key() { + use std::collections::HashMap; + let a = GraphId::try_from("alpha").unwrap(); + let b = GraphId::try_from("alpha").unwrap(); + let mut m = HashMap::new(); + m.insert(a, 1u32); + assert_eq!(m.get(&b), Some(&1)); + } +} diff --git a/crates/omnigraph-server/src/identity.rs b/crates/omnigraph-server/src/identity.rs new file mode 100644 index 0000000..250640d --- /dev/null +++ b/crates/omnigraph-server/src/identity.rs @@ -0,0 +1,308 @@ +//! Identity types for the multi-graph server (MR-668) + forward-compatible +//! shapes for Cloud mode (RFC 0003) and OAuth provider (RFC 0004). +//! +//! Per decision 13 in the implementation plan: ship the type shapes that +//! Cloud mode will consume, without committing to any trait shape +//! (`TokenVerifier` stays draft in RFC 0001). Every Cluster-mode call site +//! constructs these types with their Cluster-mode-specific values: +//! +//! - `tenant_id: None` (Cloud will set `Some(...)` from the OAuth `org_id` claim) +//! - `scopes: vec![Scope::Full]` (Cloud will populate from the OAuth `scope` claim) +//! - `source: AuthSource::Static` (Cloud / OIDC will set `AuthSource::Oidc`) +//! +//! The enums use `#[non_exhaustive]` so RFC 0001 step 1 / RFC 0004 can +//! add variants without breaking exhaustive matches in callers. + +use std::fmt; +use std::sync::Arc; +use std::sync::OnceLock; + +use color_eyre::eyre::{Result, bail}; +use regex::Regex; +use serde::{Deserialize, Serialize}; + +use crate::graph_id::GraphId; + +/// Maximum length of a `TenantId` value. +pub const TENANT_ID_MAX_LEN: usize = 64; + +/// Cloud-mode tenant identifier. Validated with the same regex as +/// `GraphId` so the two interchange syntactically. +/// +/// `None` in Cluster mode; Cloud mode (RFC 0003) sets `Some(...)` from +/// the OAuth `org_id` claim. Constructed only via `try_from` so callers +/// cannot bypass validation. +#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize)] +#[serde(transparent)] +pub struct TenantId(String); + +impl TenantId { + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for TenantId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + +impl AsRef for TenantId { + fn as_ref(&self) -> &str { + &self.0 + } +} + +impl TryFrom for TenantId { + type Error = color_eyre::eyre::Error; + + fn try_from(value: String) -> Result { + validate_tenant_id(value.as_str())?; + Ok(Self(value)) + } +} + +impl TryFrom<&str> for TenantId { + type Error = color_eyre::eyre::Error; + + fn try_from(value: &str) -> Result { + validate_tenant_id(value)?; + Ok(Self(value.to_string())) + } +} + +impl<'de> Deserialize<'de> for TenantId { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + Self::try_from(s).map_err(serde::de::Error::custom) + } +} + +fn validate_tenant_id(value: &str) -> Result<()> { + if value.is_empty() { + bail!("tenant_id must not be empty"); + } + if value.len() > TENANT_ID_MAX_LEN { + bail!( + "tenant_id '{}' is {} chars; max {}", + value, + value.len(), + TENANT_ID_MAX_LEN + ); + } + if !tenant_id_regex().is_match(value) { + bail!("tenant_id '{}' must match ^[a-zA-Z0-9-]{{1,64}}$", value); + } + Ok(()) +} + +fn tenant_id_regex() -> &'static Regex { + static RE: OnceLock = OnceLock::new(); + RE.get_or_init(|| Regex::new(r"^[a-zA-Z0-9-]{1,64}$").expect("regex literal")) +} + +/// Registry HashMap key. Cluster mode populates `tenant_id: None`; +/// Cloud mode (RFC 0003) populates `tenant_id: Some(...)`. +/// +/// The `Option` field is the **single forward-compatibility seam** +/// between Cluster and Cloud modes. Every handler reaches the engine via +/// `state.registry.get(&key)` — the key shape stays stable, so handlers +/// don't get re-touched when Cloud mode lands. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct GraphKey { + pub tenant_id: Option, + pub graph_id: GraphId, +} + +impl GraphKey { + /// Cluster-mode constructor (`tenant_id: None`). + pub fn cluster(graph_id: GraphId) -> Self { + Self { + tenant_id: None, + graph_id, + } + } + + /// Cloud-mode constructor — reserved for RFC 0003; included here so + /// the seam is visible even though no Cluster-mode code path calls it. + pub fn cloud(tenant_id: TenantId, graph_id: GraphId) -> Self { + Self { + tenant_id: Some(tenant_id), + graph_id, + } + } +} + +impl fmt::Display for GraphKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.tenant_id { + Some(t) => write!(f, "{}/{}", t, self.graph_id), + None => write!(f, "{}", self.graph_id), + } + } +} + +/// Authorization scope. Cluster mode: every authenticated actor gets +/// `Scope::Full`. Cloud mode (RFC 0004) adds OAuth-style scopes via the +/// dashboard-configured `graph:read`, `graph:write`, `graph:admin`, +/// `graph:*` set; those become additional variants here. +/// +/// `#[non_exhaustive]` so RFC 0004 can extend without breaking matches. +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] +#[non_exhaustive] +pub enum Scope { + /// Full access. The Cluster-mode default — every authenticated actor + /// has unrestricted access subject to Cedar policy. + Full, +} + +/// How the actor was authenticated. Cluster mode: every actor authenticates +/// via the existing SHA-256 hash compare against a static token set, so +/// `AuthSource::Static`. RFC 0001 step 1 adds `AuthSource::Oidc` when the +/// `OidcJwtVerifier` ships. +/// +/// `#[non_exhaustive]` so RFC 0001 can extend without breaking matches. +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] +#[non_exhaustive] +pub enum AuthSource { + /// Authenticated via the static bearer-token hash table. + Static, +} + +/// Server-resolved actor identity. Replaces the previous +/// `AuthenticatedActor(Arc)` from `lib.rs`. +/// +/// The fields are populated by `authenticate_bearer_token` after a successful +/// constant-time hash match. **Clients cannot set any of these fields directly** +/// — this is the MR-731 invariant. See `authorize_request` in `lib.rs` for the +/// chokepoint that overwrites any client-supplied actor identity. +/// +/// Cluster mode constructs this with `tenant_id: None`, `scopes: vec![Scope::Full]`, +/// `source: AuthSource::Static` via the convenience constructor below. +#[derive(Debug, Clone)] +pub struct ResolvedActor { + pub actor_id: Arc, + pub tenant_id: Option, + pub scopes: Vec, + pub source: AuthSource, +} + +impl ResolvedActor { + /// Cluster-mode constructor — Static auth, no tenant, Full scope. + /// Used by `authenticate_bearer_token` after a successful hash match. + pub fn cluster_static(actor_id: Arc) -> Self { + Self { + actor_id, + tenant_id: None, + scopes: vec![Scope::Full], + source: AuthSource::Static, + } + } + + /// View the actor identifier as `&str`. Stable across the Cluster/Cloud + /// boundary — Cedar always sees this value as the principal. + pub fn actor_id_str(&self) -> &str { + &self.actor_id + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn tenant_id_accepts_simple_values() { + for ok in ["alpha", "tenant-001", "X", "01HZWA0KT0H0V0V0V0V0V0V0V0"] { + TenantId::try_from(ok).unwrap_or_else(|_| panic!("expected accept: {ok}")); + } + } + + #[test] + fn tenant_id_rejects_empty_and_over_max() { + assert!(TenantId::try_from("").is_err()); + let too_long = "a".repeat(65); + assert!(TenantId::try_from(too_long.as_str()).is_err()); + } + + #[test] + fn tenant_id_rejects_path_traversal() { + assert!(TenantId::try_from("../etc").is_err()); + assert!(TenantId::try_from("alpha/beta").is_err()); + } + + #[test] + fn tenant_id_deserialize_runs_validation() { + let bad: Result = serde_json::from_str("\"../evil\""); + assert!(bad.is_err()); + } + + #[test] + fn graph_key_cluster_constructor_sets_no_tenant() { + let id = GraphId::try_from("alpha").unwrap(); + let key = GraphKey::cluster(id.clone()); + assert!(key.tenant_id.is_none()); + assert_eq!(key.graph_id, id); + } + + #[test] + fn graph_key_cloud_constructor_sets_tenant() { + let tenant = TenantId::try_from("acme").unwrap(); + let id = GraphId::try_from("alpha").unwrap(); + let key = GraphKey::cloud(tenant.clone(), id.clone()); + assert_eq!(key.tenant_id.as_ref(), Some(&tenant)); + assert_eq!(key.graph_id, id); + } + + #[test] + fn graph_key_displays_with_or_without_tenant() { + let id = GraphId::try_from("alpha").unwrap(); + let cluster_key = GraphKey::cluster(id.clone()); + assert_eq!(format!("{cluster_key}"), "alpha"); + + let tenant = TenantId::try_from("acme").unwrap(); + let cloud_key = GraphKey::cloud(tenant, id); + assert_eq!(format!("{cloud_key}"), "acme/alpha"); + } + + #[test] + fn graph_key_is_hashable_for_map_use() { + use std::collections::HashMap; + let a = GraphKey::cluster(GraphId::try_from("alpha").unwrap()); + let b = GraphKey::cluster(GraphId::try_from("alpha").unwrap()); + let mut m: HashMap = HashMap::new(); + m.insert(a, 1); + assert_eq!(m.get(&b), Some(&1)); + } + + #[test] + fn graph_key_distinguishes_tenants() { + let id = GraphId::try_from("alpha").unwrap(); + let t1 = TenantId::try_from("acme").unwrap(); + let t2 = TenantId::try_from("globex").unwrap(); + let k1 = GraphKey::cloud(t1, id.clone()); + let k2 = GraphKey::cloud(t2, id); + assert_ne!(k1, k2); + } + + #[test] + fn resolved_actor_cluster_defaults() { + let actor = ResolvedActor::cluster_static(Arc::::from("act-alice")); + assert_eq!(actor.actor_id_str(), "act-alice"); + assert!(actor.tenant_id.is_none()); + assert_eq!(actor.scopes, vec![Scope::Full]); + assert_eq!(actor.source, AuthSource::Static); + } + + #[test] + fn scope_and_auth_source_are_non_exhaustive() { + // Regression: keep the `#[non_exhaustive]` annotation. If someone + // removes it, this test still passes (matches are still legal); it's + // the cross-crate compile that catches it. Document the contract here. + let _scope = Scope::Full; + let _src = AuthSource::Static; + } +} diff --git a/crates/omnigraph-server/src/lib.rs b/crates/omnigraph-server/src/lib.rs index 5b63eb0..ad41f9d 100644 --- a/crates/omnigraph-server/src/lib.rs +++ b/crates/omnigraph-server/src/lib.rs @@ -1,9 +1,16 @@ pub mod api; pub mod auth; pub mod config; +pub mod graph_id; +pub mod identity; pub mod policy; +pub mod registry; pub mod workload; +pub use graph_id::GraphId; +pub use identity::{AuthSource, GraphKey, ResolvedActor, Scope, TenantId}; +pub use registry::{GraphHandle, GraphRegistry, InsertError, RegistryLookup, RegistrySnapshot}; + use std::collections::{HashMap, HashSet}; use std::fs; use std::io; @@ -14,15 +21,17 @@ use std::sync::Arc; use api::{ BranchCreateOutput, BranchCreateRequest, BranchDeleteOutput, BranchListOutput, BranchMergeOutput, BranchMergeRequest, ChangeOutput, ChangeRequest, CommitListOutput, - CommitListQuery, ErrorCode, ErrorOutput, ExportRequest, HealthOutput, IngestOutput, - IngestRequest, ReadOutput, ReadRequest, SchemaApplyOutput, SchemaApplyRequest, SchemaOutput, - SnapshotQuery, ingest_output, schema_apply_output, snapshot_payload, + CommitListQuery, ErrorCode, ErrorOutput, ExportRequest, GraphInfo, GraphListResponse, + HealthOutput, IngestOutput, IngestRequest, QueryRequest, ReadOutput, ReadRequest, + SchemaApplyOutput, SchemaApplyRequest, SchemaOutput, SnapshotQuery, ingest_output, + schema_apply_output, snapshot_payload, }; +pub use auth::{AWS_SECRET_ENV, EnvOrFileTokenSource, TokenSource, resolve_token_source}; use axum::body::{Body, Bytes}; use axum::extract::DefaultBodyLimit; -use axum::extract::{Extension, Path, Query, Request, State}; +use axum::extract::{Extension, OriginalUri, Path, Query, Request, State}; use axum::http::StatusCode; -use axum::http::header::{AUTHORIZATION, CONTENT_TYPE}; +use axum::http::header::{AUTHORIZATION, CONTENT_TYPE, HeaderName, HeaderValue}; use axum::middleware::{self, Next}; use axum::response::{IntoResponse, Response}; use axum::routing::{delete, get, post}; @@ -36,23 +45,26 @@ pub use config::{ use futures::stream; use omnigraph::db::{Omnigraph, ReadTarget}; use omnigraph::error::{ManifestConflictDetails, ManifestErrorKind, OmniError}; +use omnigraph::storage::normalize_root_uri; use omnigraph_compiler::json_params_to_param_map; use omnigraph_compiler::query::parser::parse_query; use omnigraph_compiler::{JsonParamMode, ParamMap}; -pub use auth::{AWS_SECRET_ENV, EnvOrFileTokenSource, TokenSource, resolve_token_source}; pub use policy::{ PolicyAction, PolicyCompiler, PolicyConfig, PolicyDecision, PolicyEngine, PolicyExpectation, - PolicyRequest, PolicyTestConfig, + PolicyRequest, PolicyResourceKind, PolicyTestConfig, }; +use serde::Deserialize; use serde_json::Value; use sha2::{Digest, Sha256}; use subtle::ConstantTimeEq; use tokio::net::TcpListener; use tokio::sync::mpsc; use tower_http::trace::TraceLayer; -use tracing::{error, info}; +use tracing::{error, info, warn}; use tracing_subscriber::EnvFilter; use utoipa::OpenApi; +use utoipa::openapi::path::{Parameter, ParameterIn}; +use utoipa::openapi::schema::{Object, Type}; use utoipa::openapi::security::{Http, HttpAuthScheme, SecurityScheme}; type BearerTokenHash = [u8; 32]; @@ -72,10 +84,15 @@ fn hash_bearer_token(token: &str) -> BearerTokenHash { ), paths( server_health, + server_graphs_list, server_snapshot, - server_read, + // deprecated; the #[deprecated] attribute on the handler + // surfaces as `deprecated: true` on the OpenAPI operation. + #[allow(deprecated)] server_read, + server_query, server_export, - server_change, + #[allow(deprecated)] server_change, + server_mutate, server_schema_apply, server_schema_get, server_ingest, @@ -111,29 +128,119 @@ const SERVER_SOURCE_VERSION: Option<&str> = option_env!("OMNIGRAPH_SOURCE_VERSIO #[derive(Debug, Clone)] pub struct ServerConfig { - pub uri: String, + /// Server topology + the graphs to open at startup. Single-mode + /// invocations (`omnigraph-server ` or `--target `) + /// produce `ServerConfigMode::Single`; multi-mode invocations + /// (`--config omnigraph.yaml` with a non-empty `graphs:` map and + /// no single-mode selector) produce `ServerConfigMode::Multi`. + pub mode: ServerConfigMode, pub bind: String, + /// Operator opt-in for fully-unauthenticated dev mode (MR-723). + /// When neither bearer tokens nor a policy file are configured, + /// `serve()` refuses to start unless this is true (set via + /// `--unauthenticated` or `OMNIGRAPH_UNAUTHENTICATED=1`). The + /// motivation is that "no tokens + no policy" looks like protection + /// (no Cedar errors at boot) but is actually fully open — operators + /// who set up auth and forgot the policy file would otherwise ship + /// the illusion of protection. + pub allow_unauthenticated: bool, +} + +/// What `load_server_settings` produces after applying the four-rule +/// mode inference matrix (MR-668 decision 2). +#[derive(Debug, Clone)] +pub enum ServerConfigMode { + /// Legacy invocation — one graph at the given URI. Either: + /// * `omnigraph-server ` (CLI positional), or + /// * `omnigraph-server --target --config omnigraph.yaml`, or + /// * `omnigraph-server --config omnigraph.yaml` with `server.graph` + /// set to a named target. + Single { + uri: String, + /// Top-level `policy.file` (single-graph Cedar policy). + policy_file: Option, + }, + /// Multi-graph invocation — `--config omnigraph.yaml` with a + /// non-empty `graphs:` map and no single-mode selector. + Multi { + /// Per-graph startup configs, sorted by graph id (BTreeMap + /// iteration order). The parallel-open loop iterates this. + graphs: Vec, + /// Path to the config file the server was started from. Kept on + /// the mode so future runtime mutation (deferred — see release + /// notes) can locate the source of truth without re-parsing CLI + /// args. + config_path: PathBuf, + /// `server.policy.file` (server-level Cedar policy for the + /// management endpoints). Wired into `GET /graphs` authorization. + server_policy_file: Option, + }, +} + +/// One graph's startup-time configuration: id, opened URI, optional +/// per-graph policy file path. Constructed by `load_server_settings` +/// in multi mode; consumed by `serve`'s parallel open loop. +#[derive(Debug, Clone)] +pub struct GraphStartupConfig { + pub graph_id: String, + pub uri: String, pub policy_file: Option, } +/// Runtime routing for the server. Single mode = legacy +/// `omnigraph-server ` invocation, one graph, flat HTTP routes. +/// Multi mode = `--config omnigraph.yaml` with a non-empty `graphs:` +/// map, N graphs, cluster routes (`/graphs/{graph_id}/...`). Mode is +/// determined at startup by `load_server_settings`. +/// +/// In single mode the handle lives here directly — there is no +/// registry, no sentinel key, no walk-and-assert. In multi mode the +/// registry carries N handles and the middleware dispatches on the +/// URL's `{graph_id}` segment. +/// +/// Both modes share the same handler bodies — the routing middleware +/// (`resolve_graph_handle`) injects `Arc` as a request +/// extension so handlers never see the routing discriminator. +#[derive(Clone)] +pub enum GraphRouting { + /// Single-graph deployment: one handle, flat routes (`/snapshot`, + /// `/read`, …). The `handle.uri` field carries the URI the engine + /// was opened from. Backward compatible with v0.6.0 deployments. + Single { handle: Arc }, + /// Multi-graph deployment: many handles, cluster routes + /// (`/graphs/{graph_id}/...`). `config_path` is the `omnigraph.yaml` + /// the server reads at startup; preserved here so future runtime + /// mutation (deferred) can find the source of truth without + /// re-parsing CLI args. The server treats the file as + /// operator-owned and never writes it. + Multi { + registry: Arc, + config_path: Option, + }, +} + #[derive(Clone)] pub struct AppState { - uri: String, - /// PR 2 (MR-686): the engine is now `Arc` — no global - /// write lock. Concurrent handlers call `&self` engine APIs - /// directly. Per-(table, branch) write queues inside the engine - /// serialize same-key writers; per-actor admission control on - /// `workload` isolates noisy actors. - engine: Arc, - /// Per-actor admission control. See `workload::WorkloadController`. + /// Runtime routing — the single source of truth for where each + /// request's graph lives. Single mode holds the handle directly; + /// multi mode holds the registry + config path. Both arms are + /// the same shape from a handler's perspective: middleware + /// extracts an `Arc` and injects it as a request + /// extension. + routing: GraphRouting, + /// Per-actor admission control. Process-wide (not per-graph) — + /// see MR-668 decision Q6. workload: Arc, bearer_tokens: Arc<[(BearerTokenHash, Arc)]>, - policy_engine: Option>, + /// Server-level Cedar policy. Used by management endpoints (`POST + /// /graphs`, `GET /graphs`) which act on the registry resource, + /// not on a per-graph resource. Loaded from `server.policy.file` + /// in `omnigraph.yaml`. `None` outside multi mode and when no + /// server policy is configured. Per-graph policies live on each + /// `GraphHandle.policy`. + server_policy: Option>, } -#[derive(Debug, Clone)] -struct AuthenticatedActor(Arc); - struct ExportStreamWriter { sender: mpsc::UnboundedSender>, } @@ -151,12 +258,6 @@ impl Write for ExportStreamWriter { } } -impl AuthenticatedActor { - fn as_str(&self) -> &str { - &self.0 - } -} - #[derive(Debug)] pub struct ApiError { status: StatusCode, @@ -167,8 +268,34 @@ pub struct ApiError { } impl AppState { + /// Canonical single-mode constructor. Every other `new_*` / `open_*` + /// helper is a thin convenience wrapper around this one. Builds the + /// engine + per-graph policy through `build_single_mode`, which + /// applies `Omnigraph::with_policy` so HTTP-layer and engine-layer + /// policy can never diverge — there is no "policy installed on HTTP + /// but not on engine" representable state (closes the prior + /// `with_policy_engine` footgun that reused the engine `Arc` + /// without re-applying `with_policy`). + pub fn new_single( + uri: String, + db: Omnigraph, + bearer_tokens: Vec<(String, String)>, + policy_engine: Option, + workload: workload::WorkloadController, + ) -> Self { + let bearer_tokens = hash_bearer_tokens(bearer_tokens); + let per_graph_policy = policy_engine.map(Arc::new); + Self::build_single_mode(uri, db, bearer_tokens, per_graph_policy, Arc::new(workload)) + } + pub fn new(uri: String, db: Omnigraph) -> Self { - Self::new_with_bearer_tokens(uri, db, Vec::new()) + Self::new_single( + uri, + db, + Vec::new(), + None, + workload::WorkloadController::from_env(), + ) } pub fn new_with_bearer_token(uri: String, db: Omnigraph, bearer_token: Option) -> Self { @@ -184,7 +311,13 @@ impl AppState { db: Omnigraph, bearer_tokens: Vec<(String, String)>, ) -> Self { - Self::new_with_bearer_tokens_and_policy(uri, db, bearer_tokens, None) + Self::new_single( + uri, + db, + bearer_tokens, + None, + workload::WorkloadController::from_env(), + ) } pub fn new_with_bearer_tokens_and_policy( @@ -193,40 +326,27 @@ impl AppState { bearer_tokens: Vec<(String, String)>, policy_engine: Option, ) -> Self { - let bearer_tokens: Vec<(BearerTokenHash, Arc)> = bearer_tokens - .into_iter() - .map(|(actor, token)| (hash_bearer_token(&token), Arc::::from(actor))) - .collect(); - Self { + Self::new_single( uri, - engine: Arc::new(db), - workload: Arc::new(workload::WorkloadController::from_env()), - bearer_tokens: Arc::from(bearer_tokens), - policy_engine: policy_engine.map(Arc::new), - } + db, + bearer_tokens, + policy_engine, + workload::WorkloadController::from_env(), + ) } /// Construct with a caller-provided [`workload::WorkloadController`]. /// Tests and benches use this to override per-actor caps without - /// mutating global env vars (which is unsafe in Rust 2024 once the - /// async runtime is up — `setenv` isn't thread-safe). + /// mutating global env vars (unsafe in Rust 2024 once the async + /// runtime is up — `setenv` isn't thread-safe). For tests that also + /// need a custom `PolicyEngine`, use [`new_single`] directly. pub fn new_with_workload( uri: String, db: Omnigraph, bearer_tokens: Vec<(String, String)>, workload: workload::WorkloadController, ) -> Self { - let bearer_tokens: Vec<(BearerTokenHash, Arc)> = bearer_tokens - .into_iter() - .map(|(actor, token)| (hash_bearer_token(&token), Arc::::from(actor))) - .collect(); - Self { - uri, - engine: Arc::new(db), - workload: Arc::new(workload), - bearer_tokens: Arc::from(bearer_tokens), - policy_engine: None, - } + Self::new_single(uri, db, bearer_tokens, None, workload) } pub async fn open(uri: impl Into) -> Result { @@ -248,7 +368,7 @@ impl AppState { uri: impl Into, bearer_tokens: Vec<(String, String)>, ) -> Result { - let uri = uri.into(); + let uri = normalize_root_uri(&uri.into()).wrap_err("normalize graph URI")?; let db = Omnigraph::open(&uri).await?; Ok(Self::new_with_bearer_tokens(uri, db, bearer_tokens)) } @@ -258,15 +378,17 @@ impl AppState { bearer_tokens: Vec<(String, String)>, policy_file: Option<&PathBuf>, ) -> Result { - let uri = uri.into(); + // The "policy requires tokens" invariant is enforced once by + // `classify_server_runtime_state` in `serve()`, before either + // single-mode or multi-mode construction is reached. By the + // time we get here, the (policy, no-tokens) combination has + // already been rejected — no second bail needed. + let uri = normalize_root_uri(&uri.into()).wrap_err("normalize graph URI")?; let db = Omnigraph::open(&uri).await?; let policy_engine = match policy_file { - Some(path) => Some(PolicyEngine::load(path, &uri)?), + Some(path) => Some(PolicyEngine::load_graph(path, &uri)?), None => None, }; - if policy_engine.is_some() && bearer_tokens.is_empty() { - bail!("policy requires at least one configured bearer token actor"); - } Ok(Self::new_with_bearer_tokens_and_policy( uri, db, @@ -275,15 +397,109 @@ impl AppState { )) } - pub fn uri(&self) -> &str { - &self.uri + /// Single-mode shared construction: wraps the bare engine + per-graph + /// policy in a `GraphHandle` carried directly by `GraphRouting::Single`. + /// Per-graph policy enforcement on the engine (MR-722) is re-applied + /// via `Omnigraph::with_policy` so HTTP and engine layers can never + /// diverge. + fn build_single_mode( + uri: String, + db: Omnigraph, + bearer_tokens: Arc<[(BearerTokenHash, Arc)]>, + policy_engine: Option>, + workload: Arc, + ) -> Self { + // Engine-layer policy gate (MR-722). With a per-graph policy + // installed, every `_as` writer on `Omnigraph` calls into the + // PolicyChecker. HTTP-layer `authorize_request` is the first + // gate; engine-layer is the redundant-but-correct backstop. + let db = if let Some(policy) = policy_engine.as_ref() { + let checker = Arc::clone(policy) as Arc; + db.with_policy(checker) + } else { + db + }; + // `GraphHandle.key` is required by the struct, but in single + // mode it is never a registry key (there's no registry) and + // never compared against user input (routes are flat, no + // `{graph_id}` parameter). The label appears only in tracing + // output from `resolve_graph_handle`. The literal below is a + // log label, not a routing key — when the future cluster + // catalog ships, single mode may carry the catalog-assigned + // id here instead. + let uri = normalize_root_uri(&uri).unwrap_or(uri); + let key = GraphKey::cluster( + GraphId::try_from("default").expect("'default' is a valid GraphId log label"), + ); + let handle = Arc::new(GraphHandle { + key, + uri, + engine: Arc::new(db), + policy: policy_engine, + }); + Self { + routing: GraphRouting::Single { handle }, + workload, + bearer_tokens, + server_policy: None, + } + } + + /// Multi-mode constructor — used by the startup loop. Operators + /// reach this by invoking `omnigraph-server --config omnigraph.yaml` + /// with a non-empty `graphs:` map. + /// + /// Caller supplies the already-opened `GraphHandle`s and (optionally) + /// the path to the source config file. `server_policy` is loaded + /// from `server.policy.file` if configured. + pub fn new_multi( + handles: Vec>, + bearer_tokens: Vec<(String, String)>, + server_policy: Option, + workload: workload::WorkloadController, + config_path: Option, + ) -> std::result::Result { + let bearer_tokens = hash_bearer_tokens(bearer_tokens); + let registry = Arc::new(GraphRegistry::from_handles(handles)?); + Ok(Self { + routing: GraphRouting::Multi { + registry, + config_path, + }, + workload: Arc::new(workload), + bearer_tokens, + server_policy: server_policy.map(Arc::new), + }) + } + + /// Runtime routing accessor. Handlers don't typically inspect this — + /// they extract `Arc` via the routing middleware — but + /// `build_app` matches on it to decide flat vs nested route + /// mounting, and a handful of management endpoints (`GET /graphs`, + /// the OpenAPI cluster rewrite) match on the discriminant. + pub fn routing(&self) -> &GraphRouting { + &self.routing } fn requires_bearer_auth(&self) -> bool { - !self.bearer_tokens.is_empty() || self.policy_engine.is_some() + if !self.bearer_tokens.is_empty() { + return true; + } + if self.server_policy.is_some() { + return true; + } + // Any per-graph policy also requires auth — otherwise the + // policy gate would receive unauthenticated requests. Reading + // from `routing` is O(1) in both arms: single mode is a direct + // `handle.policy.is_some()` check, multi mode reads the + // cached `any_per_graph_policy` flag on the registry snapshot. + match &self.routing { + GraphRouting::Single { handle } => handle.policy.is_some(), + GraphRouting::Multi { registry, .. } => registry.snapshot_ref().any_per_graph_policy, + } } - fn authenticate_bearer_token(&self, provided_token: &str) -> Option> { + fn authenticate_bearer_token(&self, provided_token: &str) -> Option { // Hash the incoming token and compare against every stored digest in // constant time. Iterate all entries unconditionally so total work — // and therefore response timing — doesn't depend on which slot matches. @@ -294,12 +510,16 @@ impl AppState { matched = Some(Arc::clone(actor)); } } - matched + matched.map(ResolvedActor::cluster_static) } +} - fn policy_engine(&self) -> Option<&PolicyEngine> { - self.policy_engine.as_deref() - } +fn hash_bearer_tokens(bearer_tokens: Vec<(String, String)>) -> Arc<[(BearerTokenHash, Arc)]> { + let tokens: Vec<(BearerTokenHash, Arc)> = bearer_tokens + .into_iter() + .map(|(actor, token)| (hash_bearer_token(&token), Arc::::from(actor))) + .collect(); + Arc::from(tokens) } impl ApiError { @@ -343,6 +563,20 @@ impl ApiError { } } + /// HTTP 405 Method Not Allowed. Used when the route is mounted but + /// the active server mode doesn't serve it (`GET /graphs` in + /// single-graph mode returns this instead of 404 so clients can + /// distinguish "wrong context" from "no such resource"). + pub fn method_not_allowed(message: impl Into) -> Self { + Self { + status: StatusCode::METHOD_NOT_ALLOWED, + code: ErrorCode::MethodNotAllowed, + message: message.into(), + merge_conflicts: Vec::new(), + manifest_conflict: None, + } + } + pub fn conflict(message: impl Into) -> Self { Self { status: StatusCode::CONFLICT, @@ -398,10 +632,7 @@ impl ApiError { } } - fn manifest_version_conflict( - message: String, - details: api::ManifestConflictOutput, - ) -> Self { + fn manifest_version_conflict(message: String, details: api::ManifestConflictOutput) -> Self { Self { status: StatusCode::CONFLICT, code: ErrorCode::Conflict, @@ -443,6 +674,19 @@ impl ApiError { ), OmniError::Lance(message) => Self::internal(format!("storage: {message}")), OmniError::Io(err) => Self::internal(format!("io: {err}")), + // Engine-layer policy enforcement (MR-722). All denials and + // evaluation failures surface here as 403. The HTTP-layer + // `authorize_request` already distinguishes 401 (missing + // bearer) from 403 (policy denial), so by the time the + // engine gate fires, the bearer is valid — any failure from + // the engine is a policy outcome, not an auth one. + OmniError::Policy(message) => Self::forbidden(message), + // `Omnigraph::init` against an existing graph URI in strict + // mode. Not currently HTTP-reachable (POST /graphs was + // pulled), but mapping is wired so the variant has a + // single canonical translation when a future runtime + // create endpoint lands. + err @ OmniError::AlreadyInitialized { .. } => Self::conflict(err.to_string()), } } } @@ -511,26 +755,200 @@ pub fn load_server_settings( cli_uri: Option, cli_target: Option, cli_bind: Option, + cli_allow_unauthenticated: bool, ) -> Result { let config = load_config(config_path)?; - let uri = - config.resolve_target_uri(cli_uri, cli_target.as_deref(), config.server_graph_name())?; let bind = cli_bind.unwrap_or_else(|| config.server_bind().to_string()); - let policy_file = config.resolve_policy_file(); + // Either `--unauthenticated` or `OMNIGRAPH_UNAUTHENTICATED=1` flips + // this. Treat any non-empty, non-"0"/"false" string as truthy — + // standard 12-factor "any value is true" reading of the env var. + let env_unauth = std::env::var("OMNIGRAPH_UNAUTHENTICATED") + .ok() + .map(|v| { + let trimmed = v.trim(); + !trimmed.is_empty() && trimmed != "0" && !trimmed.eq_ignore_ascii_case("false") + }) + .unwrap_or(false); + let allow_unauthenticated = cli_allow_unauthenticated || env_unauth; + + // MR-668 decision 2 — four-rule mode inference matrix. + // + // 1. CLI `` positional → Single (URI = the value) + // 2. CLI `--target ` → Single (URI = graphs..uri) + // 3. `server.graph` in config → Single (URI = graphs..uri) + // 4. `--config` + non-empty `graphs:` + no single-mode selector + // → Multi (every entry in `graphs:`) + // 5. otherwise → error with migration hint + // + // Rules 1-3 are mutually compatible (CLI URI wins over `--target` + // wins over `server.graph`), reusing the existing + // `resolve_target_uri` precedence. + let has_cli_uri = cli_uri.is_some(); + let has_cli_target = cli_target.is_some(); + let has_server_graph = config.server_graph_name().is_some(); + let has_graphs_map = !config.graphs.is_empty(); + let has_explicit_config = config_path.is_some(); + + let mode = if has_cli_uri || has_cli_target || has_server_graph { + // Rules 1, 2, or 3 → Single mode. + let raw_uri = config.resolve_target_uri( + cli_uri, + cli_target.as_deref(), + config.server_graph_name(), + )?; + let uri = normalize_root_uri(&raw_uri).wrap_err_with(|| { + format!("normalize single-graph URI '{raw_uri}' from server settings") + })?; + let policy_file = config.resolve_policy_file(); + ServerConfigMode::Single { uri, policy_file } + } else if has_explicit_config && has_graphs_map { + if config.resolve_policy_file().is_some() { + bail!( + "top-level `policy.file` is single-graph/CLI-local policy only; \ + in multi-graph mode move per-graph rules to \ + `graphs..policy.file` and move `graph_list` rules to \ + `server.policy.file`." + ); + } + // Rule 4 → Multi mode. Build a startup config per graph. + let mut graphs = Vec::with_capacity(config.graphs.len()); + for (name, target) in &config.graphs { + // Validate the graph id can construct a `GraphId` newtype. + // Doing this here (not at registry insert) so a malformed + // omnigraph.yaml fails at startup with a clear error. + GraphId::try_from(name.clone()).map_err(|err| { + color_eyre::eyre::eyre!("invalid graph id '{name}' in omnigraph.yaml: {err}") + })?; + let raw_uri = config.resolve_uri_value(&target.uri); + let uri = normalize_root_uri(&raw_uri).wrap_err_with(|| { + format!("normalize URI '{raw_uri}' for graph '{name}' in omnigraph.yaml") + })?; + graphs.push(GraphStartupConfig { + graph_id: name.clone(), + uri, + policy_file: config.resolve_target_policy_file(name), + }); + } + let config_path = config_path + .cloned() + .expect("has_explicit_config implies config_path is Some"); + let server_policy_file = config.resolve_server_policy_file(); + ServerConfigMode::Multi { + graphs, + config_path, + server_policy_file, + } + } else { + // Rule 5 → error with migration hint. + bail!( + "no graph to serve: pass a URI (`omnigraph-server `), select a target \ + (`--target --config omnigraph.yaml`), set `server.graph: ` in \ + omnigraph.yaml, or for multi-graph mode add a `graphs:` map to the config \ + file referenced by `--config`." + ); + }; Ok(ServerConfig { - uri, + mode, bind, - policy_file, + allow_unauthenticated, }) } +/// Whether the loaded config will run the server in multi-graph mode. +/// Useful for the test that constructs `ServerConfig` directly. +pub fn server_config_is_multi(config: &ServerConfig) -> bool { + matches!(config.mode, ServerConfigMode::Multi { .. }) +} + +/// MR-723 server runtime state, classified from the three-state matrix +/// of (bearer tokens configured) × (policy file configured) at startup. +/// +/// * **Open** — neither tokens nor policy; requires explicit +/// `allow_unauthenticated`. Effectively a "trust the network" dev +/// mode. `serve()` refuses to start in this shape without the flag, +/// so the only way to reach this state at runtime is via deliberate +/// operator opt-in. +/// * **DefaultDeny** — tokens configured but no policy file. The +/// server requires a valid bearer token; once authenticated, every +/// action except `Read` is denied with 403. Closes the "tokens but +/// forgot the policy file" trap. +/// * **PolicyEnabled** — policy file configured and at least one +/// bearer token configured. Cedar evaluates every authenticated +/// request. Policy without tokens is rejected at startup — +/// such a server would 401 every request, which is bug-shaped +/// rather than feature-shaped (operators wanting "deny all +/// unauthenticated traffic" should configure tokens plus a +/// deny-all policy to get meaningful 403s with policy-decision +/// logging instead). +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum ServerRuntimeState { + Open, + DefaultDeny, + PolicyEnabled, +} + +/// Compute the [`ServerRuntimeState`] from the configured inputs. +/// Pulled out as a pure function so the matrix is unit-testable +/// without standing up the full server. +/// +/// The classifier is the **single source of truth** for "should we +/// start?" — both `serve()`'s single-mode and multi-mode branches +/// call this before constructing their `AppState`. Adding a startup +/// invariant here means both modes enforce it automatically; the +/// alternative (per-constructor `bail!`) drifts the moment a third +/// mode is added. +pub fn classify_server_runtime_state( + has_tokens: bool, + has_policy: bool, + allow_unauthenticated: bool, +) -> Result { + match (has_tokens, has_policy, allow_unauthenticated) { + (false, false, false) => bail!( + "server has no bearer tokens and no policy file configured. This is a fully \ + open server — pass `--unauthenticated` (or set OMNIGRAPH_UNAUTHENTICATED=1) \ + if you actually want that, otherwise configure bearer tokens (see \ + docs/user/server.md) and/or `policy.file` in omnigraph.yaml." + ), + (false, false, true) => Ok(ServerRuntimeState::Open), + (true, false, _) => Ok(ServerRuntimeState::DefaultDeny), + (false, true, _) => bail!( + "policy file is configured but no bearer tokens — every request would 401 \ + because no token can ever match. Configure at least one bearer token (see \ + docs/user/server.md), or remove the policy file. To deny all unauthenticated \ + traffic deliberately, configure tokens plus a deny-all Cedar rule — that \ + produces meaningful 403s with policy-decision logging instead of silent 401s." + ), + (true, true, _) => Ok(ServerRuntimeState::PolicyEnabled), + } +} + pub fn build_app(state: AppState) -> Router { - let protected = Router::new() + // The per-graph protected routes, identical in single + multi mode. + // Two middleware layers wrap them (outer first, inner last): + // 1. `require_bearer_auth` — extracts the bearer token and injects + // `ResolvedActor` (or rejects 401). + // 2. `resolve_graph_handle` — injects `Arc` based on + // the active mode (single: the only handle; multi: lookup by + // `{graph_id}` in the URI path). + let per_graph_protected = Router::new() .route("/snapshot", get(server_snapshot)) .route("/export", post(server_export)) - .route("/read", post(server_read)) - .route("/change", post(server_change)) + // /read and /change are kept indefinitely for back-compat; + // their handlers carry #[deprecated] so the OpenAPI operation is + // flagged and their responses include RFC 9745 Deprecation + + // RFC 8288 Link headers. Suppress the call-site warning for the + // route registration itself. + .route("/read", post({ + #[allow(deprecated)] + server_read + })) + .route("/query", post(server_query)) + .route("/change", post({ + #[allow(deprecated)] + server_change + })) + .route("/mutate", post(server_mutate)) .route("/schema", get(server_schema_get)) .route("/schema/apply", post(server_schema_apply)) .route( @@ -545,11 +963,42 @@ pub fn build_app(state: AppState) -> Router { .route("/branches/merge", post(server_branch_merge)) .route("/commits", get(server_commit_list)) .route("/commits/{commit_id}", get(server_commit_show)) + .route_layer(middleware::from_fn_with_state( + state.clone(), + resolve_graph_handle, + )) .route_layer(middleware::from_fn_with_state( state.clone(), require_bearer_auth, )); + // Management endpoints (`GET /graphs`) live alongside the per-graph + // router. They go through bearer auth but NOT through + // `resolve_graph_handle` — they operate on the registry directly. + // The endpoint is mounted in both modes; in single mode the handler + // returns 405 so clients see "resource exists, wrong context" + // rather than 404 "no such resource." + // + // Runtime add/remove (`POST /graphs`, `DELETE /graphs/{id}`) is not + // exposed in v0.6.0 — operators add graphs by editing + // `omnigraph.yaml` and restarting. + let management = Router::new() + .route("/graphs", get(server_graphs_list)) + .route_layer(middleware::from_fn_with_state( + state.clone(), + require_bearer_auth, + )); + + // Mount the protected routes differently per mode: + // * Single → flat routes (legacy: `/snapshot`, `/read`, etc.) + // * Multi → nested under `/graphs/{graph_id}/...` + let protected: Router = match state.routing() { + GraphRouting::Single { .. } => per_graph_protected.merge(management), + GraphRouting::Multi { .. } => Router::new() + .nest("/graphs/{graph_id}", per_graph_protected) + .merge(management), + }; + Router::new() .route("/healthz", get(server_health)) .route("/openapi.json", get(server_openapi)) @@ -562,20 +1011,144 @@ pub fn build_app(state: AppState) -> Router { pub async fn serve(config: ServerConfig) -> Result<()> { let token_source = resolve_token_source().await?; info!(source = token_source.name(), "loaded bearer token source"); - let state = AppState::open_with_bearer_tokens_and_policy( - config.uri.clone(), - token_source.load().await?, - config.policy_file.as_ref(), - ) - .await?; - let listener = TcpListener::bind(&config.bind).await?; - info!(uri = %config.uri, bind = %config.bind, "serving omnigraph"); + let tokens = token_source.load().await?; + + // For runtime-state classification, "any policy configured" means + // either the top-level/single-mode policy file OR a server-level + // policy OR any per-graph policy file. Mirrors the + // `requires_bearer_auth` semantics on AppState. + let has_policy_configured = match &config.mode { + ServerConfigMode::Single { policy_file, .. } => policy_file.is_some(), + ServerConfigMode::Multi { + graphs, + server_policy_file, + .. + } => server_policy_file.is_some() || graphs.iter().any(|g| g.policy_file.is_some()), + }; + let runtime_state = classify_server_runtime_state( + !tokens.is_empty(), + has_policy_configured, + config.allow_unauthenticated, + )?; + match runtime_state { + ServerRuntimeState::Open => warn!( + "running with --unauthenticated: no bearer tokens, no policy file, all \ + requests permitted. This is for local dev only — do not expose to a \ + network you don't fully trust." + ), + ServerRuntimeState::DefaultDeny => warn!( + "bearer tokens are configured but no policy file is set — running in \ + default-deny mode (only `read` actions are permitted for authenticated \ + actors). Configure `policy.file` in omnigraph.yaml to enable Cedar rules." + ), + ServerRuntimeState::PolicyEnabled => {} + } + + let bind = config.bind.clone(); + let state = match config.mode { + ServerConfigMode::Single { uri, policy_file } => { + let uri_for_log = uri.clone(); + info!(uri = %uri_for_log, bind = %bind, mode = "single", "serving omnigraph"); + AppState::open_with_bearer_tokens_and_policy(uri, tokens, policy_file.as_ref()).await? + } + ServerConfigMode::Multi { + graphs, + config_path, + server_policy_file, + } => { + info!( + bind = %bind, + mode = "multi", + graph_count = graphs.len(), + config = %config_path.display(), + "serving omnigraph" + ); + open_multi_graph_state(graphs, tokens, server_policy_file.as_ref(), config_path).await? + } + }; + + let listener = TcpListener::bind(&bind).await?; axum::serve(listener, build_app(state)) .with_graceful_shutdown(shutdown_signal()) .await?; Ok(()) } +/// Parallel open of every graph in the startup config, with bounded +/// concurrency (`buffer_unordered(4)`). Fail-fast — the first open error +/// aborts startup; other in-flight opens are dropped (their `Omnigraph` +/// instances close cleanly via Arc drop). +/// +/// The bound 4 is a rule-of-thumb for I/O-bound work. At N ≤ 10 this +/// trades startup latency for a small amount of concurrent S3 / Lance +/// open pressure. +async fn open_multi_graph_state( + graphs: Vec, + tokens: Vec<(String, String)>, + server_policy_file: Option<&PathBuf>, + config_path: PathBuf, +) -> Result { + use futures::{StreamExt, TryStreamExt}; + + if graphs.is_empty() { + bail!("multi-graph mode requires at least one graph in the `graphs:` map"); + } + + // Server-level policy (loaded once, applies to management endpoints). + // The placeholder graph_id `"server"` is the sentinel the Cedar + // resource-model refactor maps to the singleton + // `Omnigraph::Server::"root"` entity at evaluation time. + let server_policy = match server_policy_file { + Some(path) => Some(PolicyEngine::load_server(path)?), + None => None, + }; + + // `try_collect` propagates the first error eagerly, dropping every + // in-flight open. `buffer_unordered + collect::>` would drain + // the stream before checking errors — incorrect for the docstring's + // "fail-fast" claim and wasteful on S3-backed graphs. + let handles: Vec> = futures::stream::iter(graphs.into_iter()) + .map(|cfg| async move { open_single_graph(cfg).await }) + .buffer_unordered(4) + .try_collect() + .await?; + + let workload = workload::WorkloadController::from_env(); + let state = AppState::new_multi(handles, tokens, server_policy, workload, Some(config_path)) + .map_err(|err| color_eyre::eyre::eyre!("multi-graph registry: {err}"))?; + Ok(state) +} + +/// Open one graph and wrap it in a `GraphHandle`. Used at startup by +/// `open_multi_graph_state`. +async fn open_single_graph(cfg: GraphStartupConfig) -> Result> { + let graph_id = GraphId::try_from(cfg.graph_id.clone()) + .map_err(|err| color_eyre::eyre::eyre!("graph id '{}': {err}", cfg.graph_id))?; + let uri = normalize_root_uri(&cfg.uri) + .wrap_err_with(|| format!("normalize URI for graph '{}'", cfg.graph_id))?; + + let db = Omnigraph::open(&uri) + .await + .map_err(|err| color_eyre::eyre::eyre!("open graph '{}' at {}: {err}", graph_id, uri))?; + + let (policy_arc, db) = match &cfg.policy_file { + Some(path) => { + let policy = PolicyEngine::load_graph(path, graph_id.as_str())?; + let policy_arc: Arc = Arc::new(policy); + let checker = Arc::clone(&policy_arc) as Arc; + (Some(policy_arc), db.with_policy(checker)) + } + None => (None, db), + }; + + Ok(Arc::new(GraphHandle { + key: GraphKey::cluster(graph_id), + uri, + engine: Arc::new(db), + policy: policy_arc, + })) +} + async fn shutdown_signal() { if let Err(err) = tokio::signal::ctrl_c().await { error!(error = %err, "failed to install ctrl-c handler"); @@ -606,14 +1179,176 @@ async fn server_health() -> Json { }) } +#[utoipa::path( + get, + path = "/graphs", + tag = "management", + operation_id = "listGraphs", + responses( + (status = 200, description = "List of registered graphs", body = GraphListResponse), + (status = 401, description = "Unauthorized", body = ErrorOutput), + (status = 403, description = "Forbidden", body = ErrorOutput), + (status = 405, description = "Method not allowed (single-graph mode)", body = ErrorOutput), + ), + security(("bearer_token" = [])), +)] +/// List every graph currently registered with this server (MR-668). +/// +/// Multi-graph mode only. In single mode, the route returns 405 — there's +/// no registry to enumerate. Cedar-gated by the server-level policy via +/// the `graph_list` action against `Omnigraph::Server::"root"`. +/// +/// Order: alphabetical by `graph_id` (server-sorted so clients see +/// deterministic output across requests). +async fn server_graphs_list( + State(state): State, + actor: Option>, +) -> std::result::Result, ApiError> { + // 405 in single mode — there's no registry to enumerate, and the + // legacy URL surface didn't expose this endpoint. + let registry = match state.routing() { + GraphRouting::Single { .. } => { + return Err(ApiError::method_not_allowed( + "GET /graphs is only available in multi-graph mode", + )); + } + GraphRouting::Multi { registry, .. } => registry, + }; + + // Server-level Cedar gate. `state.server_policy` is loaded from + // `server.policy.file` in `omnigraph.yaml` at startup. When no + // server policy is configured, `authorize_request_server` falls + // through to the MR-723 default-deny semantics (every non-Read + // action denied for an authenticated actor). `GraphList` is not + // `Read`, so without a server policy the request gets 403 — which + // is the right default (don't leak the registry until the operator + // explicitly authorizes it). + authorize_request( + actor.as_ref().map(|Extension(actor)| actor), + state.server_policy.as_deref(), + PolicyRequest { + action: PolicyAction::GraphList, + branch: None, + target_branch: None, + }, + )?; + + let mut graphs: Vec = registry + .list() + .into_iter() + .map(|handle| GraphInfo { + graph_id: handle.key.graph_id.as_str().to_string(), + uri: handle.uri.clone(), + }) + .collect(); + graphs.sort_by(|a, b| a.graph_id.cmp(&b.graph_id)); + Ok(Json(GraphListResponse { graphs })) +} + async fn server_openapi(State(state): State) -> Json { let mut doc = ApiDoc::openapi(); if !state.requires_bearer_auth() { strip_security(&mut doc); } + // MR-668: in multi mode, the protected routes live under + // `/graphs/{graph_id}/...`. Rewrite the doc so the spec matches + // the routes the router actually serves. Public paths (`/healthz`) + // stay flat in both modes. + if matches!(state.routing(), GraphRouting::Multi { .. }) { + nest_paths_under_cluster_prefix(&mut doc); + } Json(doc) } +/// Path prefix used to namespace per-graph routes in multi mode. +/// Kept in sync with the `Router::nest(...)` invocation in `build_app`. +const CLUSTER_PATH_PREFIX: &str = "/graphs/{graph_id}"; + +/// Operation-id prefix applied to every cloned cluster operation. +/// Decision 7 in the implementation plan — keeps operation IDs unique +/// across the spec when both flat and nested variants ever appear in +/// the same generation pass. +const CLUSTER_OPERATION_ID_PREFIX: &str = "cluster_"; + +/// Paths that stay flat in every server mode (public or server-level, +/// no per-graph dependency). Update this list when adding new +/// always-flat endpoints. `/graphs` is the management enumeration — +/// it lives at the root in both single mode (405) and multi mode, and +/// must never be rewritten to `/graphs/{graph_id}/graphs`. +const ALWAYS_FLAT_PATHS: &[&str] = &["/healthz", "/graphs"]; + +/// In multi-mode `server_openapi`, every protected path-item is +/// reattached under the cluster prefix. Operation IDs gain the +/// `cluster_` prefix so SDK generators don't collide if/when both +/// surfaces are merged. Every rewritten operation also declares the +/// required `{graph_id}` path parameter so the served OpenAPI document +/// remains internally valid. +/// +/// Removing the flat protected paths matches the runtime router — +/// in multi mode, requests to `/snapshot` etc. return 404, so the +/// spec must agree. +fn nest_paths_under_cluster_prefix(doc: &mut utoipa::openapi::OpenApi) { + let original = std::mem::take(&mut doc.paths.paths); + let mut rewritten = std::collections::BTreeMap::new(); + for (path, mut item) in original { + if ALWAYS_FLAT_PATHS.contains(&path.as_str()) { + rewritten.insert(path, item); + continue; + } + rename_operation_ids(&mut item, CLUSTER_OPERATION_ID_PREFIX); + add_cluster_graph_id_parameter(&mut item); + let new_path = format!("{CLUSTER_PATH_PREFIX}{path}"); + rewritten.insert(new_path, item); + } + doc.paths.paths = rewritten; +} + +fn add_cluster_graph_id_parameter(item: &mut utoipa::openapi::PathItem) { + for op in path_item_operations_mut(item) { + let parameters = op.parameters.get_or_insert_with(Vec::new); + let has_graph_id = parameters + .iter() + .any(|param| param.name == "graph_id" && param.parameter_in == ParameterIn::Path); + if !has_graph_id { + parameters.insert(0, graph_id_path_parameter()); + } + } +} + +fn graph_id_path_parameter() -> Parameter { + let mut parameter = Parameter::new("graph_id"); + parameter.parameter_in = ParameterIn::Path; + parameter.description = Some("Graph id to route the request to.".to_string()); + parameter.schema = Some(Object::with_type(Type::String).into()); + parameter +} + +/// Prefix every operation_id in this PathItem with `prefix`. +fn rename_operation_ids(item: &mut utoipa::openapi::PathItem, prefix: &str) { + for op in path_item_operations_mut(item) { + if let Some(id) = op.operation_id.as_deref() { + op.operation_id = Some(format!("{prefix}{id}")); + } + } +} + +fn path_item_operations_mut( + item: &mut utoipa::openapi::PathItem, +) -> impl Iterator { + [ + item.get.as_mut(), + item.post.as_mut(), + item.put.as_mut(), + item.delete.as_mut(), + item.options.as_mut(), + item.head.as_mut(), + item.patch.as_mut(), + item.trace.as_mut(), + ] + .into_iter() + .flatten() +} + fn strip_security(doc: &mut utoipa::openapi::OpenApi) { if let Some(components) = doc.components.as_mut() { components.security_schemes.clear(); @@ -661,11 +1396,77 @@ async fn require_bearer_auth( let Some(actor) = state.authenticate_bearer_token(provided_token) else { return Err(ApiError::unauthorized("invalid bearer token")); }; - request.extensions_mut().insert(AuthenticatedActor(actor)); + request.extensions_mut().insert(actor); Ok(next.run(request).await) } +/// Routing middleware (MR-668). Resolves the active graph for the +/// request and injects `Arc` as an extension so handlers can +/// extract it via `Extension>`. +/// +/// **Single mode**: the routing field holds the single handle directly. +/// Routes are flat; every request resolves to that handle, regardless +/// of the URI path. No registry walk, no sentinel key, no +/// programmer-error guard. +/// +/// **Multi mode**: routes are nested under `/graphs/{graph_id}/...`. The +/// middleware extracts `{graph_id}` from the URI path and looks it up in +/// the registry. Returns 404 if the graph is not registered. +/// +/// The middleware fires AFTER `require_bearer_auth`, so the actor is +/// already in the request extensions (or auth was off entirely). +async fn resolve_graph_handle( + State(state): State, + mut request: Request, + next: Next, +) -> std::result::Result { + let handle = match &state.routing { + GraphRouting::Single { handle } => Arc::clone(handle), + GraphRouting::Multi { registry, .. } => { + // `Router::nest("/graphs/{graph_id}", inner)` rewrites + // `request.uri().path()` to the inner suffix (e.g. `/snapshot`). + // The pre-rewrite URI is preserved in the `OriginalUri` + // request extension by axum's router; we read from there to + // extract `{graph_id}`. Fall back to the current URI only if + // the extension is missing, which shouldn't happen for + // nested routes but is safe defensive code. + let original_path: String = request + .extensions() + .get::() + .map(|OriginalUri(uri)| uri.path().to_string()) + .unwrap_or_else(|| request.uri().path().to_string()); + let graph_id_str = original_path + .strip_prefix("/graphs/") + .and_then(|rest| rest.split('/').next()) + .filter(|s| !s.is_empty()) + .ok_or_else(|| { + ApiError::bad_request( + "cluster route missing /graphs/{graph_id} prefix".to_string(), + ) + })?; + let graph_id = GraphId::try_from(graph_id_str.to_string()) + .map_err(|err| ApiError::bad_request(err.to_string()))?; + let key = GraphKey::cluster(graph_id.clone()); + match registry.get(&key) { + RegistryLookup::Ready(handle) => handle, + RegistryLookup::Gone => { + return Err(ApiError::not_found(format!("graph '{graph_id}' not found"))); + } + } + } + }; + + // Per-request observability. `Span::current().record` would silently + // no-op here because no upstream `#[tracing::instrument(...)]` macro + // declares a `graph_id` field; emit an explicit event instead so the + // routing decision actually lands in logs. + info!(graph_id = %handle.key.graph_id, "graph routed"); + + request.extensions_mut().insert(handle); + Ok(next.run(request).await) +} + fn log_policy_decision(actor_id: &str, request: &PolicyRequest, decision: &PolicyDecision) { info!( actor_id = actor_id, @@ -678,25 +1479,86 @@ fn log_policy_decision(actor_id: &str, request: &PolicyRequest, decision: &Polic ); } +/// HTTP-layer Cedar policy gate. Two sources of the policy engine: +/// * Per-graph handler — passes `handle.policy.as_deref()` so the +/// graph's Cedar rules govern read/change/branch_*/schema_apply. +/// * Management handler — passes `state.server_policy.as_deref()` so +/// server-level Cedar rules govern `graph_list` (the only shipped +/// server-scoped action; runtime `graph_create` / `graph_delete` +/// are deferred until a managed cluster catalog lands). +/// +/// The MR-731 invariant lives inside this function: actor identity is +/// supplied as a separate argument from the resolved bearer match. The +/// `PolicyRequest` struct itself does not carry identity (the field was +/// dropped from the type), so handlers cannot smuggle it through the +/// request. See `actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers` +/// at `tests/server.rs`. fn authorize_request( - state: &AppState, - actor: Option<&AuthenticatedActor>, - mut request: PolicyRequest, + actor: Option<&ResolvedActor>, + policy: Option<&PolicyEngine>, + request: PolicyRequest, ) -> std::result::Result<(), ApiError> { - let Some(engine) = state.policy_engine() else { + let Some(engine) = policy else { + // No PolicyEngine installed. Three runtime states can reach this: + // + // * **Open mode** (`--unauthenticated`): no tokens, no policy. + // Per-graph operations are open by operator opt-in (they + // accepted "trust the network" for graph data). + // * **DefaultDeny mode**: tokens configured but no policy. The + // request went through bearer auth, so `actor` is Some. Only + // per-graph `Read` is permitted; other per-graph actions + // return 403. Closes the "configured auth but forgot the + // policy file" trap from MR-723. + // * Either of the above with a **server-scoped** action + // (`graph_list`, future `graph_create`/`graph_delete`). + // + // Server-scoped actions are always denied here, regardless of + // mode or actor presence. The management surface leaks server + // topology (graph IDs + URIs that may contain S3 bucket paths + // or internal hostnames) — operators who opted into Open mode + // accepted exposure of graph DATA, not exposure of server + // topology. Closing the management surface by default in every + // runtime state means the docstring contract on + // `server_graphs_list` ("don't leak the registry until the + // operator explicitly authorizes it") holds uniformly; the + // operator's only path to enabling it is configuring an + // explicit `server.policy.file` in omnigraph.yaml. + if request.action.resource_kind() == PolicyResourceKind::Server { + return Err(ApiError::forbidden( + "server-scoped actions require an explicit `server.policy.file` \ + configured in omnigraph.yaml — the management surface is closed \ + by default in every runtime state, including --unauthenticated, \ + so that server topology is never exposed without operator opt-in.", + )); + } + if actor.is_some() && request.action != PolicyAction::Read { + return Err(ApiError::forbidden( + "server runs in default-deny mode (bearer tokens configured but no \ + policy file). Only `read` actions are permitted; configure \ + `policy.file` in omnigraph.yaml to enable other actions.", + )); + } return Ok(()); }; let Some(actor) = actor else { return Err(ApiError::unauthorized("missing bearer token")); }; - // Authoritative actor_id is the authenticated session, not whatever the - // handler put in the request. Prevents an empty-string default at any - // call site from ever reaching the engine as a policy subject. - request.actor_id = actor.as_str().to_string(); + // SECURITY INVARIANT (MR-731): actor identity is supplied to the + // policy engine here as a separate argument, sourced from the + // bearer-token match resolved by `require_bearer_auth`. The + // `PolicyRequest` struct itself no longer carries `actor_id` (it + // was dropped from the type), so handlers cannot smuggle identity + // through the request body and there is no overwrite step that + // could be skipped. The principle is codified in + // `docs/dev/invariants.md` Hard Invariant 11 ("clients cannot set + // actor identity directly") and pinned by the regression test + // `actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers` + // in `crates/omnigraph-server/tests/server.rs`. + let actor_id = actor.actor_id.as_ref(); let decision = engine - .authorize(&request) + .authorize(actor_id, &request) .map_err(|err| ApiError::internal(format!("policy: {err}")))?; - log_policy_decision(actor.as_str(), &request, &decision); + log_policy_decision(actor_id, &request, &decision); if decision.allowed { Ok(()) } else { @@ -723,26 +1585,22 @@ fn authorize_request( /// count) for every table on the branch. Defaults to `main` when `branch` is /// omitted. Read-only. async fn server_snapshot( - State(state): State, - actor: Option>, + Extension(handle): Extension>, + actor: Option>, Query(query): Query, ) -> std::result::Result, ApiError> { let branch = query.branch.unwrap_or_else(|| "main".to_string()); authorize_request( - &state, actor.as_ref().map(|Extension(actor)| actor), + handle.policy.as_deref(), PolicyRequest { - actor_id: actor - .as_ref() - .map(|Extension(actor)| actor.as_str().to_string()) - .unwrap_or_default(), action: PolicyAction::Read, branch: Some(branch.clone()), target_branch: None, }, )?; let snapshot = { - let db = &state.engine; + let db = &handle.engine; db.snapshot_of(ReadTarget::branch(branch.as_str())) .await .map_err(ApiError::from_omni)? @@ -750,6 +1608,21 @@ async fn server_snapshot( Ok(Json(snapshot_payload(&branch, &snapshot))) } +/// Header values that flag a response as coming from a deprecated route +/// (RFC 9745 / RFC 8288) and point at the canonical successor. +fn deprecation_headers(successor_link: &'static str) -> [(HeaderName, HeaderValue); 2] { + [ + ( + HeaderName::from_static("deprecation"), + HeaderValue::from_static("true"), + ), + ( + HeaderName::from_static("link"), + HeaderValue::from_static(successor_link), + ), + ] +} + #[utoipa::path( post, path = "/read", @@ -757,73 +1630,84 @@ async fn server_snapshot( operation_id = "read", request_body = ReadRequest, responses( - (status = 200, description = "Query results", body = ReadOutput), + (status = 200, description = "Query results (response includes `Deprecation: true` + `Link: ; rel=\"successor-version\"`)", body = ReadOutput), (status = 400, description = "Bad request", body = ErrorOutput), (status = 401, description = "Unauthorized", body = ErrorOutput), (status = 403, description = "Forbidden", body = ErrorOutput), ), security(("bearer_token" = [])), )] -/// Execute a GQ read query. +#[deprecated(note = "use POST /query instead; /read is kept indefinitely for byte-stable back-compat")] +/// **Deprecated** — use [`POST /query`](#tag/queries/operation/query) instead. /// -/// Runs the query in `query_source` against either a branch or a frozen -/// snapshot (mutually exclusive). When `query_source` defines multiple named -/// queries, pick one with `query_name`. `params` is a JSON object whose keys -/// match the parameters declared by the query. Returns rows as a JSON array -/// plus a `columns` list. Read-only. +/// Execute a GQ read query. Behavior is unchanged from prior releases; the +/// route is kept indefinitely for byte-stable back-compat. New integrations +/// should target `POST /query`, which has clean field names (`query` / +/// `name`) and a 400-on-mutation guard. Responses from this route include +/// `Deprecation: true` and `Link: ; rel="successor-version"` +/// headers per RFC 9745 / RFC 8288 so SDKs and proxies can surface the +/// signal. async fn server_read( - State(state): State, - actor: Option>, + Extension(handle): Extension>, + actor: Option>, Json(request): Json, -) -> std::result::Result, ApiError> { - if request.branch.is_some() && request.snapshot.is_some() { - return Err(ApiError::bad_request( - "read request may specify branch or snapshot, not both", - )); - } - - let target = read_target_from_request(request.branch, request.snapshot); - let policy_branch = match &target { - ReadTarget::Branch(branch) => Some(branch.clone()), - ReadTarget::Snapshot(_) if state.policy_engine().is_some() && actor.is_some() => { - let db = &state.engine; - db.resolved_branch_of(target.clone()) - .await - .map(|branch| branch.or_else(|| Some("main".to_string()))) - .map_err(ApiError::from_omni)? - } - ReadTarget::Snapshot(_) => None, - }; - authorize_request( - &state, +) -> std::result::Result<([(HeaderName, HeaderValue); 2], Json), ApiError> { + let (selected_name, target, result) = run_query( + handle, actor.as_ref().map(|Extension(actor)| actor), - PolicyRequest { - actor_id: actor - .as_ref() - .map(|Extension(actor)| actor.as_str().to_string()) - .unwrap_or_default(), - action: PolicyAction::Read, - branch: policy_branch, - target_branch: None, - }, - )?; - let (selected_name, query_params) = - select_named_query(&request.query_source, request.query_name.as_deref()) - .map_err(|err| ApiError::bad_request(err.to_string()))?; - let params = query_params_from_json(&query_params, request.params.as_ref()) - .map_err(|err| ApiError::bad_request(err.to_string()))?; + &request.query_source, + request.query_name.as_deref(), + request.params.as_ref(), + request.branch, + request.snapshot, + false, // /read predates the D2 rule; legacy callers may submit mutating queries here + ) + .await?; + Ok(( + deprecation_headers("; rel=\"successor-version\""), + Json(api::read_output(selected_name, &target, result)), + )) +} - let result = { - let db = &state.engine; - db.query( - target.clone(), - &request.query_source, - &selected_name, - ¶ms, - ) - .await - .map_err(ApiError::from_omni)? - }; +#[utoipa::path( + post, + path = "/query", + tag = "queries", + operation_id = "query", + request_body = QueryRequest, + responses( + (status = 200, description = "Query results", body = ReadOutput), + (status = 400, description = "Bad request - also returned when the query body contains mutations; use POST /mutate (or its deprecated alias POST /change) for write queries", body = ErrorOutput), + (status = 401, description = "Unauthorized", body = ErrorOutput), + (status = 403, description = "Forbidden", body = ErrorOutput), + ), + security(("bearer_token" = [])), +)] +/// Execute an inline read query (friendlier-named alternative to `POST /read`). +/// +/// Designed for ad-hoc exploration and AI-agent tool-use: short field +/// names (`query`, `name`) match the CLI `-e` flag and the GQ `query` +/// keyword. Mutations (`insert`/`update`/`delete`) are rejected with 400 +/// -- use `POST /mutate` (or its deprecated alias `POST /change`) for +/// write queries. Otherwise behaves identically to `POST /read`: same +/// target semantics (branch xor snapshot), same Cedar action (Read), +/// same response shape. +async fn server_query( + Extension(handle): Extension>, + actor: Option>, + Json(request): Json, +) -> std::result::Result, ApiError> { + let (selected_name, target, result) = run_query( + handle, + actor.as_ref().map(|Extension(actor)| actor), + &request.query, + request.name.as_deref(), + request.params.as_ref(), + request.branch, + request.snapshot, + true, // /query is read-only; reject mutations + ) + .await?; Ok(Json(api::read_output(selected_name, &target, result))) } @@ -848,25 +1732,21 @@ async fn server_read( /// streams the entire branch. Suitable for large exports — the response is /// streamed, not buffered. Read-only. async fn server_export( - State(state): State, - actor: Option>, + Extension(handle): Extension>, + actor: Option>, Json(request): Json, ) -> std::result::Result { let branch = request.branch.unwrap_or_else(|| "main".to_string()); authorize_request( - &state, actor.as_ref().map(|Extension(actor)| actor), + handle.policy.as_deref(), PolicyRequest { - actor_id: actor - .as_ref() - .map(|Extension(actor)| actor.as_str().to_string()) - .unwrap_or_default(), action: PolicyAction::Export, branch: Some(branch.clone()), target_branch: None, }, )?; - let engine = Arc::clone(&state.engine); + let engine = Arc::clone(&handle.engine); let type_names = request.type_names.clone(); let table_keys = request.table_keys.clone(); let (tx, rx) = mpsc::unbounded_channel::>(); @@ -892,12 +1772,195 @@ async fn server_export( .into_response()) } +/// Shared implementation behind `POST /mutate` (canonical) and +/// `POST /change` (deprecated alias). Returns the bare `ChangeOutput`; +/// each route handler wraps it (the alias also attaches Deprecation +/// headers). +/// Shared backend for `/mutate` (canonical) and `/change` (deprecated alias). +/// +/// Decoupled from `ChangeRequest` so MR-969's `/queries/{name}` stored-query +/// handler can call this directly with registry-supplied fields without +/// rebuilding the request body. Today's HTTP handlers unpack the request and +/// call here; the registry would do the same. +async fn run_mutate( + state: AppState, + handle: Arc, + actor: Option<&ResolvedActor>, + query: &str, + name: Option<&str>, + params_json: Option<&Value>, + branch: String, +) -> std::result::Result { + let actor_arc = actor + .map(|a| Arc::clone(&a.actor_id)) + .unwrap_or_else(|| Arc::::from("anonymous")); + let actor_id = actor.map(|a| a.actor_id.as_ref()); + authorize_request( + actor, + handle.policy.as_deref(), + PolicyRequest { + action: PolicyAction::Change, + branch: Some(branch.clone()), + target_branch: None, + }, + )?; + // Per-actor admission: bound concurrent in-flight mutations and + // estimated bytes per actor. Cedar runs FIRST so denied requests + // don't consume admission slots. Estimate uses the request body + // size as a coarse proxy; engine memory pressure can run higher. + let est_bytes = query.len() as u64 + + params_json + .map(|p| p.to_string().len() as u64) + .unwrap_or(0); + let _admission = state + .workload + .try_admit(&actor_arc, est_bytes) + .map_err(ApiError::from_workload_reject)?; + let (selected_name, query_params) = + select_named_query(query, name).map_err(|err| ApiError::bad_request(err.to_string()))?; + let params = query_params_from_json(&query_params, params_json) + .map_err(|err| ApiError::bad_request(err.to_string()))?; + + let result = { + let db = &handle.engine; + db.mutate_as(&branch, query, &selected_name, ¶ms, actor_id) + .await + .map_err(ApiError::from_omni)? + }; + Ok(ChangeOutput { + branch, + query_name: selected_name, + affected_nodes: result.affected_nodes, + affected_edges: result.affected_edges, + actor_id: actor_id.map(str::to_string), + }) +} + +/// Shared backend for `/query` (canonical) and `/read` (deprecated alias). +/// +/// Mirrors [`run_mutate`]'s decoupled shape so MR-969's stored-query handler +/// can call here with registry-supplied fields. Rejects inline source that +/// contains mutations (D2 rule); callers wanting writes go through +/// [`run_mutate`] instead. +/// +/// Intentionally does **not** take [`AppState`] (unlike [`run_mutate`]): +/// reads are not admission-gated today, so there is no `state.workload` +/// consumer. The signature grows the parameter when Phase 1 (MR-976) adds +/// the request envelope's `expect: { max_rows_scanned: N }` budget, or +/// MR-969 extends per-actor admission to stored-read invocations. +async fn run_query( + handle: Arc, + actor: Option<&ResolvedActor>, + query: &str, + name: Option<&str>, + params_json: Option<&Value>, + branch: Option, + snapshot: Option, + reject_mutations: bool, +) -> std::result::Result<(String, ReadTarget, omnigraph_compiler::result::QueryResult), ApiError> { + if branch.is_some() && snapshot.is_some() { + return Err(ApiError::bad_request( + "request may specify branch or snapshot, not both", + )); + } + + let target = read_target_from_request(branch, snapshot); + let policy_branch = match &target { + ReadTarget::Branch(branch) => Some(branch.clone()), + ReadTarget::Snapshot(_) if handle.policy.is_some() && actor.is_some() => { + let db = &handle.engine; + db.resolved_branch_of(target.clone()) + .await + .map(|branch| branch.or_else(|| Some("main".to_string()))) + .map_err(ApiError::from_omni)? + } + ReadTarget::Snapshot(_) => None, + }; + authorize_request( + actor, + handle.policy.as_deref(), + PolicyRequest { + action: PolicyAction::Read, + branch: policy_branch, + target_branch: None, + }, + )?; + let query_decl = + select_named_query_decl(query, name).map_err(|err| ApiError::bad_request(err.to_string()))?; + if reject_mutations && !query_decl.mutations.is_empty() { + return Err(ApiError::bad_request(format!( + "query '{}' contains mutations (insert/update/delete); use POST /mutate for write queries", + query_decl.name + ))); + } + let selected_name = query_decl.name.clone(); + let params = query_params_from_json(&query_decl.params, params_json) + .map_err(|err| ApiError::bad_request(err.to_string()))?; + + let result = { + let db = &handle.engine; + db.query(target.clone(), query, &selected_name, ¶ms) + .await + .map_err(ApiError::from_omni)? + }; + Ok((selected_name, target, result)) +} + #[utoipa::path( post, path = "/change", tag = "mutations", operation_id = "change", request_body = ChangeRequest, + responses( + (status = 200, description = "Mutation results (response includes `Deprecation: true` + `Link: ; rel=\"successor-version\"`)", body = ChangeOutput), + (status = 400, description = "Bad request", body = ErrorOutput), + (status = 401, description = "Unauthorized", body = ErrorOutput), + (status = 403, description = "Forbidden", body = ErrorOutput), + (status = 409, description = "Merge conflict", body = ErrorOutput), + (status = 429, description = "Per-actor admission cap exceeded; honor `Retry-After` header", body = ErrorOutput), + ), + security(("bearer_token" = [])), +)] +#[deprecated(note = "use POST /mutate instead; /change is kept indefinitely for back-compat")] +/// **Deprecated** — use [`POST /mutate`](#tag/mutations/operation/mutate) instead. +/// +/// Apply a GQ mutation to a branch. Behavior is unchanged; the route is +/// kept indefinitely for back-compat. New integrations should target +/// `POST /mutate`, which has identical semantics and a name that pairs +/// cleanly with `POST /query`. Responses from this route include +/// `Deprecation: true` and `Link: ; rel="successor-version"` +/// headers per RFC 9745 / RFC 8288 so SDKs and proxies can surface the +/// signal. +async fn server_change( + State(state): State, + Extension(handle): Extension>, + actor: Option>, + Json(request): Json, +) -> std::result::Result<([(HeaderName, HeaderValue); 2], Json), ApiError> { + let branch = request.branch.unwrap_or_else(|| "main".to_string()); + let output = run_mutate( + state, + handle, + actor.as_ref().map(|Extension(actor)| actor), + &request.query, + request.name.as_deref(), + request.params.as_ref(), + branch, + ) + .await?; + Ok(( + deprecation_headers("; rel=\"successor-version\""), + Json(output), + )) +} + +#[utoipa::path( + post, + path = "/mutate", + tag = "mutations", + operation_id = "mutate", + request_body = ChangeRequest, responses( (status = 200, description = "Mutation results", body = ChangeOutput), (status = 400, description = "Bad request", body = ErrorOutput), @@ -908,72 +1971,34 @@ async fn server_export( ), security(("bearer_token" = [])), )] -/// Apply a GQ mutation to a branch. +/// Apply a GQ mutation to a branch (canonical mutation endpoint). /// /// Writes to the named `branch` (defaults to `main`). Mutations are atomic /// per call and produce a new commit. Returns counts of nodes and edges /// affected. **Destructive**: on success the branch is updated; rejected /// mutations may still acquire locks briefly. Returns 409 on merge conflict. -async fn server_change( +/// +/// Pairs with `POST /query` (read-only). The legacy `POST /change` route +/// has identical semantics and is kept as a deprecated alias. +async fn server_mutate( State(state): State, - actor: Option>, + Extension(handle): Extension>, + actor: Option>, Json(request): Json, ) -> std::result::Result, ApiError> { let branch = request.branch.unwrap_or_else(|| "main".to_string()); - let actor_arc = actor - .as_ref() - .map(|Extension(actor)| Arc::clone(&actor.0)) - .unwrap_or_else(|| Arc::::from("anonymous")); - let actor_id = actor.as_ref().map(|Extension(actor)| actor.as_str()); - authorize_request( - &state, - actor.as_ref().map(|Extension(actor)| actor), - PolicyRequest { - actor_id: actor_id.map(str::to_string).unwrap_or_default(), - action: PolicyAction::Change, - branch: Some(branch.clone()), - target_branch: None, - }, - )?; - // Per-actor admission: bound concurrent in-flight mutations and - // estimated bytes per actor. Cedar runs FIRST so denied requests - // don't consume admission slots. Estimate uses the request body - // size as a coarse proxy; engine memory pressure can run higher. - let est_bytes = request.query_source.len() as u64 - + request - .params - .as_ref() - .map(|p| p.to_string().len() as u64) - .unwrap_or(0); - let _admission = state - .workload - .try_admit(&actor_arc, est_bytes) - .map_err(ApiError::from_workload_reject)?; - let (selected_name, query_params) = - select_named_query(&request.query_source, request.query_name.as_deref()) - .map_err(|err| ApiError::bad_request(err.to_string()))?; - let params = query_params_from_json(&query_params, request.params.as_ref()) - .map_err(|err| ApiError::bad_request(err.to_string()))?; - - let result = { - let db = &state.engine; - db.mutate_as( - &branch, - &request.query_source, - &selected_name, - ¶ms, - actor_id, + Ok(Json( + run_mutate( + state, + handle, + actor.as_ref().map(|Extension(actor)| actor), + &request.query, + request.name.as_deref(), + request.params.as_ref(), + branch, ) - .await - .map_err(ApiError::from_omni)? - }; - Ok(Json(ChangeOutput { - branch, - query_name: selected_name, - affected_nodes: result.affected_nodes, - affected_edges: result.affected_edges, - actor_id: actor_id.map(str::to_string), - })) + .await?, + )) } #[utoipa::path( @@ -994,24 +2019,20 @@ async fn server_change( /// Useful for clients that want to introspect available types and tables /// before constructing GQ queries. Read-only. async fn server_schema_get( - State(state): State, - actor: Option>, + Extension(handle): Extension>, + actor: Option>, ) -> std::result::Result, ApiError> { authorize_request( - &state, actor.as_ref().map(|Extension(actor)| actor), + handle.policy.as_deref(), PolicyRequest { - actor_id: actor - .as_ref() - .map(|Extension(actor)| actor.as_str().to_string()) - .unwrap_or_default(), action: PolicyAction::Read, branch: None, target_branch: None, }, )?; let schema_source = { - let db = &state.engine; + let db = &handle.engine; db.schema_source().to_string() }; Ok(Json(SchemaOutput { schema_source })) @@ -1040,19 +2061,21 @@ async fn server_schema_get( /// false the diff was unsupported and no changes were made. async fn server_schema_apply( State(state): State, - actor: Option>, + Extension(handle): Extension>, + actor: Option>, Json(request): Json, ) -> std::result::Result, ApiError> { let actor_arc = actor .as_ref() - .map(|Extension(actor)| Arc::clone(&actor.0)) + .map(|Extension(actor)| Arc::clone(&actor.actor_id)) .unwrap_or_else(|| Arc::::from("anonymous")); - let actor_id = actor.as_ref().map(|Extension(actor)| actor.as_str()); + let actor_id = actor + .as_ref() + .map(|Extension(actor)| actor.actor_id.as_ref()); authorize_request( - &state, actor.as_ref().map(|Extension(actor)| actor), + handle.policy.as_deref(), PolicyRequest { - actor_id: actor_id.map(str::to_string).unwrap_or_default(), action: PolicyAction::SchemaApply, branch: None, target_branch: Some("main".to_string()), @@ -1064,12 +2087,24 @@ async fn server_schema_apply( .try_admit(&actor_arc, est_bytes) .map_err(ApiError::from_workload_reject)?; let result = { - let db = &state.engine; - db.apply_schema(&request.schema_source) - .await - .map_err(ApiError::from_omni)? + let db = &handle.engine; + // Engine-layer policy enforcement (MR-722): pass the resolved + // actor through so apply_schema_as can call enforce() with the + // authoritative identity. With a policy installed in AppState, + // engine-side enforcement re-checks the same decision the + // HTTP-layer authorize_request just made above. PR #3 collapses + // the redundancy. + db.apply_schema_as( + &request.schema_source, + omnigraph::db::SchemaApplyOptions { + allow_data_loss: request.allow_data_loss, + }, + actor_id, + ) + .await + .map_err(ApiError::from_omni)? }; - Ok(Json(schema_apply_output(state.uri(), result))) + Ok(Json(schema_apply_output(handle.uri.as_str(), result))) } #[utoipa::path( @@ -1096,7 +2131,8 @@ async fn server_schema_apply( /// `overwrite` or when ingest produces conflicting writes. async fn server_ingest( State(state): State, - actor: Option>, + Extension(handle): Extension>, + actor: Option>, Json(request): Json, ) -> std::result::Result, ApiError> { let branch = request.branch.unwrap_or_else(|| "main".to_string()); @@ -1104,12 +2140,14 @@ async fn server_ingest( let mode = request.mode.unwrap_or(omnigraph::loader::LoadMode::Merge); let actor_arc = actor .as_ref() - .map(|Extension(actor)| Arc::clone(&actor.0)) + .map(|Extension(actor)| Arc::clone(&actor.actor_id)) .unwrap_or_else(|| Arc::::from("anonymous")); - let actor_id = actor.as_ref().map(|Extension(actor)| actor.as_str()); + let actor_id = actor + .as_ref() + .map(|Extension(actor)| actor.actor_id.as_ref()); let branch_exists = { - let db = &state.engine; + let db = &handle.engine; db.branch_list() .await .map_err(ApiError::from_omni)? @@ -1119,10 +2157,9 @@ async fn server_ingest( if !branch_exists { authorize_request( - &state, actor.as_ref().map(|Extension(actor)| actor), + handle.policy.as_deref(), PolicyRequest { - actor_id: actor_id.map(str::to_string).unwrap_or_default(), action: PolicyAction::BranchCreate, branch: Some(from.clone()), target_branch: Some(branch.clone()), @@ -1130,10 +2167,9 @@ async fn server_ingest( )?; } authorize_request( - &state, actor.as_ref().map(|Extension(actor)| actor), + handle.policy.as_deref(), PolicyRequest { - actor_id: actor_id.map(str::to_string).unwrap_or_default(), action: PolicyAction::Change, branch: Some(branch.clone()), target_branch: None, @@ -1146,14 +2182,14 @@ async fn server_ingest( .map_err(ApiError::from_workload_reject)?; let result = { - let db = &state.engine; + let db = &handle.engine; db.ingest_as(&branch, Some(&from), &request.data, mode, actor_id) .await .map_err(ApiError::from_omni)? }; Ok(Json(ingest_output( - state.uri(), + handle.uri.as_str(), &result, actor_id.map(str::to_string), ))) @@ -1175,24 +2211,20 @@ async fn server_ingest( /// /// Returns branch names sorted alphabetically. Read-only. async fn server_branch_list( - State(state): State, - actor: Option>, + Extension(handle): Extension>, + actor: Option>, ) -> std::result::Result, ApiError> { authorize_request( - &state, actor.as_ref().map(|Extension(actor)| actor), + handle.policy.as_deref(), PolicyRequest { - actor_id: actor - .as_ref() - .map(|Extension(actor)| actor.as_str().to_string()) - .unwrap_or_default(), action: PolicyAction::Read, branch: None, target_branch: None, }, )?; let mut branches = { - let db = &state.engine; + let db = &handle.engine; db.branch_list().await.map_err(ApiError::from_omni)? }; branches.sort(); @@ -1222,22 +2254,19 @@ async fn server_branch_list( /// already exists. async fn server_branch_create( State(state): State, - actor: Option>, + Extension(handle): Extension>, + actor: Option>, Json(request): Json, ) -> std::result::Result, ApiError> { let from = request.from.unwrap_or_else(|| "main".to_string()); let actor_arc = actor .as_ref() - .map(|Extension(actor)| Arc::clone(&actor.0)) + .map(|Extension(actor)| Arc::clone(&actor.actor_id)) .unwrap_or_else(|| Arc::::from("anonymous")); authorize_request( - &state, actor.as_ref().map(|Extension(actor)| actor), + handle.policy.as_deref(), PolicyRequest { - actor_id: actor - .as_ref() - .map(|Extension(actor)| actor.as_str().to_string()) - .unwrap_or_default(), action: PolicyAction::BranchCreate, branch: Some(from.clone()), target_branch: Some(request.name.clone()), @@ -1251,19 +2280,37 @@ async fn server_branch_create( .try_admit(&actor_arc, 256) .map_err(ApiError::from_workload_reject)?; { - let db = &state.engine; - db.branch_create_from(ReadTarget::branch(&from), &request.name) - .await - .map_err(ApiError::from_omni)?; + let db = &handle.engine; + db.branch_create_from_as( + ReadTarget::branch(&from), + &request.name, + actor.as_ref().map(|Extension(a)| a.actor_id.as_ref()), + ) + .await + .map_err(ApiError::from_omni)?; } Ok(Json(BranchCreateOutput { - uri: state.uri().to_string(), + uri: handle.uri.clone(), from, name: request.name, - actor_id: actor.map(|Extension(actor)| actor.as_str().to_string()), + actor_id: actor.map(|Extension(actor)| actor.actor_id.as_ref().to_string()), })) } +/// Path-param shape for [`server_branch_delete`]. Named-field +/// deserialization (rather than `Path` or `Path<(String,)>`) +/// keeps the extractor stable across single-mode flat routes and +/// multi-mode nested routes: the `{branch}` capture is picked by +/// name and any other captures in scope (e.g. `{graph_id}` in +/// multi-mode) are ignored without breaking deserialization. +/// +/// Closes the "handler path-extractor type is positional and breaks +/// when route nesting changes" class. +#[derive(Deserialize)] +struct BranchPath { + branch: String, +} + #[utoipa::path( delete, path = "/branches/{branch}", @@ -1288,19 +2335,21 @@ async fn server_branch_create( /// exist. async fn server_branch_delete( State(state): State, - actor: Option>, - Path(branch): Path, + Extension(handle): Extension>, + actor: Option>, + Path(BranchPath { branch }): Path, ) -> std::result::Result, ApiError> { let actor_arc = actor .as_ref() - .map(|Extension(actor)| Arc::clone(&actor.0)) + .map(|Extension(actor)| Arc::clone(&actor.actor_id)) .unwrap_or_else(|| Arc::::from("anonymous")); - let actor_id = actor.as_ref().map(|Extension(actor)| actor.as_str()); + let actor_id = actor + .as_ref() + .map(|Extension(actor)| actor.actor_id.as_ref()); authorize_request( - &state, actor.as_ref().map(|Extension(actor)| actor), + handle.policy.as_deref(), PolicyRequest { - actor_id: actor_id.map(str::to_string).unwrap_or_default(), action: PolicyAction::BranchDelete, branch: None, target_branch: Some(branch.clone()), @@ -1312,13 +2361,13 @@ async fn server_branch_delete( .try_admit(&actor_arc, 256) .map_err(ApiError::from_workload_reject)?; { - let db = &state.engine; - db.branch_delete(&branch) + let db = &handle.engine; + db.branch_delete_as(&branch, actor_id) .await .map_err(ApiError::from_omni)?; } Ok(Json(BranchDeleteOutput { - uri: state.uri().to_string(), + uri: handle.uri.clone(), name: branch, actor_id: actor_id.map(str::to_string), })) @@ -1348,20 +2397,22 @@ async fn server_branch_delete( /// unchanged in that case. **Destructive** to `target` on success. async fn server_branch_merge( State(state): State, - actor: Option>, + Extension(handle): Extension>, + actor: Option>, Json(request): Json, ) -> std::result::Result, ApiError> { let target = request.target.unwrap_or_else(|| "main".to_string()); let actor_arc = actor .as_ref() - .map(|Extension(actor)| Arc::clone(&actor.0)) + .map(|Extension(actor)| Arc::clone(&actor.actor_id)) .unwrap_or_else(|| Arc::::from("anonymous")); - let actor_id = actor.as_ref().map(|Extension(actor)| actor.as_str()); + let actor_id = actor + .as_ref() + .map(|Extension(actor)| actor.actor_id.as_ref()); authorize_request( - &state, actor.as_ref().map(|Extension(actor)| actor), + handle.policy.as_deref(), PolicyRequest { - actor_id: actor_id.map(str::to_string).unwrap_or_default(), action: PolicyAction::BranchMerge, branch: Some(request.source.clone()), target_branch: Some(target.clone()), @@ -1375,7 +2426,7 @@ async fn server_branch_merge( .try_admit(&actor_arc, 256) .map_err(ApiError::from_workload_reject)?; let outcome = { - let db = &state.engine; + let db = &handle.engine; db.branch_merge_as(&request.source, &target, actor_id) .await .map_err(ApiError::from_omni)? @@ -1406,25 +2457,21 @@ async fn server_branch_merge( /// Filter by `branch` to get the commits on a single branch (most recent /// first); omit to list across all branches. Read-only. async fn server_commit_list( - State(state): State, - actor: Option>, + Extension(handle): Extension>, + actor: Option>, Query(query): Query, ) -> std::result::Result, ApiError> { authorize_request( - &state, actor.as_ref().map(|Extension(actor)| actor), + handle.policy.as_deref(), PolicyRequest { - actor_id: actor - .as_ref() - .map(|Extension(actor)| actor.as_str().to_string()) - .unwrap_or_default(), action: PolicyAction::Read, branch: query.branch.clone(), target_branch: None, }, )?; let commits = { - let db = &state.engine; + let db = &handle.engine; db.list_commits(query.branch.as_deref()) .await .map_err(ApiError::from_omni)? @@ -1434,6 +2481,13 @@ async fn server_commit_list( })) } +/// Path-param shape for [`server_commit_show`]. See [`BranchPath`] +/// for the design rationale — same pattern, different field name. +#[derive(Deserialize)] +struct CommitPath { + commit_id: String, +} + #[utoipa::path( get, path = "/commits/{commit_id}", @@ -1450,30 +2504,27 @@ async fn server_commit_list( ), security(("bearer_token" = [])), )] + /// Get a single commit. /// /// Returns the commit's manifest version, parent commit(s), and creation /// metadata. Read-only. async fn server_commit_show( - State(state): State, - actor: Option>, - Path(commit_id): Path, + Extension(handle): Extension>, + actor: Option>, + Path(CommitPath { commit_id }): Path, ) -> std::result::Result, ApiError> { authorize_request( - &state, actor.as_ref().map(|Extension(actor)| actor), + handle.policy.as_deref(), PolicyRequest { - actor_id: actor - .as_ref() - .map(|Extension(actor)| actor.as_str().to_string()) - .unwrap_or_default(), action: PolicyAction::Read, branch: None, target_branch: None, }, )?; let commit = { - let db = &state.engine; + let db = &handle.engine; db.get_commit(&commit_id) .await .map_err(ApiError::from_omni)? @@ -1489,10 +2540,10 @@ fn read_target_from_request(branch: Option, snapshot: Option) -> } } -fn select_named_query( +fn select_named_query_decl( query_source: &str, requested_name: Option<&str>, -) -> Result<(String, Vec)> { +) -> Result { let parsed = parse_query(query_source)?; let query = if let Some(name) = requested_name { parsed @@ -1505,7 +2556,14 @@ fn select_named_query( } else { bail!("query file contains multiple queries; pass --name"); }; + Ok(query) +} +fn select_named_query( + query_source: &str, + requested_name: Option<&str>, +) -> Result<(String, Vec)> { + let query = select_named_query_decl(query_source, requested_name)?; Ok((query.name, query.params)) } @@ -1591,9 +2649,11 @@ fn server_bearer_tokens_from_env() -> Result> { #[cfg(test)] mod tests { use super::{ - hash_bearer_token, load_server_settings, normalize_bearer_token, parse_bearer_tokens_json, - server_bearer_tokens_from_env, + GraphStartupConfig, ServerConfig, ServerConfigMode, ServerRuntimeState, + classify_server_runtime_state, hash_bearer_token, load_server_settings, + normalize_bearer_token, parse_bearer_tokens_json, serve, server_bearer_tokens_from_env, }; + use serial_test::serial; use std::env; use std::fs; use tempfile::tempdir; @@ -1645,8 +2705,11 @@ server: ) .unwrap(); - let settings = load_server_settings(Some(&config), None, None, None).unwrap(); - assert_eq!(settings.uri, "/tmp/demo.omni"); + let settings = load_server_settings(Some(&config), None, None, None, false).unwrap(); + match &settings.mode { + ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "/tmp/demo.omni"), + ServerConfigMode::Multi { .. } => panic!("expected Single mode, got Multi"), + } assert_eq!(settings.bind, "0.0.0.0:9090"); } @@ -1672,9 +2735,13 @@ server: Some("/tmp/override.omni".to_string()), None, Some("0.0.0.0:9999".to_string()), + false, ) .unwrap(); - assert_eq!(settings.uri, "/tmp/override.omni"); + match &settings.mode { + ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "/tmp/override.omni"), + ServerConfigMode::Multi { .. } => panic!("expected Single mode, got Multi"), + } assert_eq!(settings.bind, "0.0.0.0:9999"); } @@ -1698,14 +2765,256 @@ server: .unwrap(); let settings = - load_server_settings(Some(&config), None, Some("dev".to_string()), None).unwrap(); - assert_eq!(settings.uri, "http://127.0.0.1:8080"); + load_server_settings(Some(&config), None, Some("dev".to_string()), None, false) + .unwrap(); + match &settings.mode { + ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "http://127.0.0.1:8080"), + ServerConfigMode::Multi { .. } => panic!("expected Single mode, got Multi"), + } } #[test] fn server_settings_require_uri_from_cli_or_config() { - let error = load_server_settings(None, None, None, None).unwrap_err(); - assert!(error.to_string().contains("URI must be provided")); + let error = load_server_settings(None, None, None, None, false).unwrap_err(); + assert!( + error.to_string().contains("no graph to serve"), + "expected mode-inference error, got: {error}", + ); + } + + #[test] + fn classify_open_requires_explicit_unauthenticated_flag() { + // State 1: no tokens, no policy, no flag → refuse to start. + let error = classify_server_runtime_state(false, false, false).unwrap_err(); + let msg = error.to_string(); + assert!( + msg.contains("--unauthenticated"), + "expected refusal message mentioning --unauthenticated, got: {msg}" + ); + + // Same matrix cell but with the flag set → Open mode permitted. + assert_eq!( + classify_server_runtime_state(false, false, true).unwrap(), + ServerRuntimeState::Open + ); + } + + #[test] + fn classify_tokens_without_policy_is_default_deny() { + // State 2: tokens configured, no policy → DefaultDeny regardless + // of the flag (the flag opts into the fully-open dev mode; it + // doesn't downgrade default-deny back to open). + assert_eq!( + classify_server_runtime_state(true, false, false).unwrap(), + ServerRuntimeState::DefaultDeny + ); + assert_eq!( + classify_server_runtime_state(true, false, true).unwrap(), + ServerRuntimeState::DefaultDeny + ); + } + + #[tokio::test] + #[serial] + async fn serve_refuses_to_start_with_policy_but_no_tokens_multi_mode() { + // Bug 2 from the bot-review pass: multi-mode startup was missing + // the "policy requires tokens" check that single-mode enforces. + // After centralizing the check in `classify_server_runtime_state`, + // both modes get the same enforcement. This test guards the + // multi-mode propagation path. + // + // Sibling test below pins single mode. Together they pin that + // the classifier is called from both branches of `serve()`. + let _guard = EnvGuard::set(&[ + ("OMNIGRAPH_SERVER_BEARER_TOKEN", None), + ("OMNIGRAPH_SERVER_BEARER_TOKENS_FILE", None), + ("OMNIGRAPH_SERVER_BEARER_TOKENS_JSON", None), + ("OMNIGRAPH_SERVER_BEARER_TOKENS_AWS_SECRET", None), + ("OMNIGRAPH_UNAUTHENTICATED", None), + ]); + let temp = tempdir().unwrap(); + // The classifier reads `has_policy_configured` from the config + // shape (does the Option contain a path?), not from file + // existence, so we can hand it a path without writing a real + // policy file — the bail fires before policy load. + let policy_path = temp.path().join("server-policy.yaml"); + let config = ServerConfig { + mode: ServerConfigMode::Multi { + graphs: vec![GraphStartupConfig { + graph_id: "alpha".to_string(), + uri: temp + .path() + .join("alpha.omni") + .to_string_lossy() + .into_owned(), + policy_file: None, + }], + config_path: temp.path().join("omnigraph.yaml"), + server_policy_file: Some(policy_path), + }, + bind: "127.0.0.1:0".to_string(), + allow_unauthenticated: false, + }; + let result = serve(config).await; + let err = result + .expect_err("serve should refuse to start in multi mode with policy but no tokens"); + let msg = format!("{:?}", err); + assert!( + msg.contains("policy file is configured but no bearer tokens"), + "expected policy-without-tokens rejection in multi mode, got: {msg}", + ); + } + + #[tokio::test] + #[serial] + async fn serve_refuses_to_start_in_state_1_without_unauthenticated() { + // MR-723 PR A: pin the integration boundary that the classifier + // is actually called by `serve()` before any side-effecting + // work (Lance dataset open, TcpListener::bind). The classifier + // itself is unit-tested above; this test guards the propagation + // path from `classify_server_runtime_state` through serve's + // `?` so a future refactor that drops the call returns red. + // + // Marked `#[serial]` because we have to clear all bearer-token + // env vars, and another test in this module setting any of them + // concurrently would corrupt the read inside `resolve_token_source`. + let _guard = EnvGuard::set(&[ + ("OMNIGRAPH_SERVER_BEARER_TOKEN", None), + ("OMNIGRAPH_SERVER_BEARER_TOKENS_FILE", None), + ("OMNIGRAPH_SERVER_BEARER_TOKENS_JSON", None), + ("OMNIGRAPH_SERVER_BEARER_TOKENS_AWS_SECRET", None), + ("OMNIGRAPH_UNAUTHENTICATED", None), + ]); + let temp = tempdir().unwrap(); + // Graph path doesn't need to exist — classifier fires before + // `AppState::open_with_bearer_tokens_and_policy`. + let config = ServerConfig { + mode: ServerConfigMode::Single { + uri: temp + .path() + .join("graph.omni") + .to_string_lossy() + .into_owned(), + policy_file: None, + }, + bind: "127.0.0.1:0".to_string(), + allow_unauthenticated: false, + }; + let result = serve(config).await; + let err = + result.expect_err("serve should refuse to start in State 1 without --unauthenticated"); + let msg = format!("{:?}", err); + assert!( + msg.contains("no bearer tokens") || msg.contains("policy file"), + "expected refusal message naming the misconfiguration, got: {msg}", + ); + } + + #[test] + #[serial] + fn unauthenticated_env_var_classification() { + // MR-723 PR A: closes the gap where the env-var read path inside + // `load_server_settings` was structurally implemented but not + // exercised by any test. Three properties to pin, all in one + // sequential test because `cargo test` runs the mod test suite + // in parallel and `OMNIGRAPH_UNAUTHENTICATED` is process-global + // — interleaving with another test that sets the same env var + // (concurrent classifier tests, even the bearer-token suite + // sharing `EnvGuard`) corrupts the read. Sequential within one + // test fn is the simplest race-free shape. + let temp = tempdir().unwrap(); + let config_path = temp.path().join("omnigraph.yaml"); + fs::write( + &config_path, + r#" +graphs: + local: + uri: /tmp/demo-unauth.omni +server: + graph: local +"#, + ) + .unwrap(); + + // Truthy values flip Open mode on, even with CLI flag off. + for value in ["1", "true", "yes", "TRUE", "anything"] { + let _guard = EnvGuard::set(&[("OMNIGRAPH_UNAUTHENTICATED", Some(value))]); + let settings = load_server_settings(Some(&config_path), None, None, None, false) + .expect("settings load should succeed"); + assert!( + settings.allow_unauthenticated, + "OMNIGRAPH_UNAUTHENTICATED={value:?} should enable Open mode", + ); + } + + // Falsy values keep refusal behavior, even with CLI flag off. + for value in ["0", "false", "FALSE", ""] { + let _guard = EnvGuard::set(&[("OMNIGRAPH_UNAUTHENTICATED", Some(value))]); + let settings = load_server_settings(Some(&config_path), None, None, None, false) + .expect("settings load should succeed"); + assert!( + !settings.allow_unauthenticated, + "OMNIGRAPH_UNAUTHENTICATED={value:?} should NOT enable Open mode", + ); + } + + // Unset env var: also false. + let _guard = EnvGuard::set(&[("OMNIGRAPH_UNAUTHENTICATED", None)]); + let settings = load_server_settings(Some(&config_path), None, None, None, false) + .expect("settings load should succeed"); + assert!( + !settings.allow_unauthenticated, + "OMNIGRAPH_UNAUTHENTICATED unset should NOT enable Open mode", + ); + drop(_guard); + + // CLI flag wins even when env is falsy — `serve()` honors the + // OR of both inputs. + let _guard = EnvGuard::set(&[("OMNIGRAPH_UNAUTHENTICATED", Some("0"))]); + let settings = load_server_settings(Some(&config_path), None, None, None, true) + .expect("settings load should succeed"); + assert!( + settings.allow_unauthenticated, + "--unauthenticated CLI flag should win even when env is falsy", + ); + } + + #[test] + fn classify_policy_enabled_requires_tokens() { + // State 3: tokens + policy → PolicyEnabled, regardless of the + // `allow_unauthenticated` flag (Cedar evaluates the bearer, + // the flag is moot once tokens exist). + assert_eq!( + classify_server_runtime_state(true, true, false).unwrap(), + ServerRuntimeState::PolicyEnabled + ); + assert_eq!( + classify_server_runtime_state(true, true, true).unwrap(), + ServerRuntimeState::PolicyEnabled + ); + } + + #[test] + fn classify_policy_without_tokens_is_rejected() { + // Closes the "policy installed but no tokens → silent 401 on + // every request" footgun. The same shape that single-mode + // `open_with_bearer_tokens_and_policy` used to bail on + // privately is now rejected by the classifier so both single + // and multi mode get the same enforcement from one source of + // truth. + for allow_unauthenticated in [false, true] { + let err = + classify_server_runtime_state(false, true, allow_unauthenticated).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("policy file is configured but no bearer tokens"), + "expected policy-without-tokens rejection message; got: {msg}" + ); + assert!( + msg.contains("every request would 401"), + "rejection message must name the failure mode; got: {msg}" + ); + } } #[test] @@ -1762,6 +3071,7 @@ server: } #[test] + #[serial] fn server_bearer_tokens_from_env_reads_legacy_token_and_token_file() { let temp = tempdir().unwrap(); let tokens_path = temp.path().join("tokens.json"); diff --git a/crates/omnigraph-server/src/main.rs b/crates/omnigraph-server/src/main.rs index 0b43105..4e1c256 100644 --- a/crates/omnigraph-server/src/main.rs +++ b/crates/omnigraph-server/src/main.rs @@ -8,7 +8,7 @@ use omnigraph_server::{ServerConfig, init_tracing, load_server_settings, serve}; #[command(name = "omnigraph-server")] #[command(about = "HTTP server for the Omnigraph graph database")] struct Cli { - /// Repo URI + /// Graph URI uri: Option, #[arg(long)] target: Option, @@ -16,6 +16,12 @@ struct Cli { config: Option, #[arg(long)] bind: Option, + /// Run without bearer tokens and without a policy file (MR-723). + /// Required when neither is configured — otherwise the server + /// refuses to start to prevent shipping the illusion of protection. + /// Equivalent to setting `OMNIGRAPH_UNAUTHENTICATED=1`. + #[arg(long)] + unauthenticated: bool, } #[tokio::main] @@ -24,7 +30,12 @@ async fn main() -> Result<()> { init_tracing(); let cli = Cli::parse(); - let settings: ServerConfig = - load_server_settings(cli.config.as_ref(), cli.uri, cli.target, cli.bind)?; + let settings: ServerConfig = load_server_settings( + cli.config.as_ref(), + cli.uri, + cli.target, + cli.bind, + cli.unauthenticated, + )?; serve(settings).await } diff --git a/crates/omnigraph-server/src/policy.rs b/crates/omnigraph-server/src/policy.rs index 4cf6412..518bb48 100644 --- a/crates/omnigraph-server/src/policy.rs +++ b/crates/omnigraph-server/src/policy.rs @@ -1,844 +1,8 @@ -use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; -use std::fmt; -use std::fs; -use std::path::Path; -use std::str::FromStr; - -use cedar_policy::{ - Authorizer, Context, Decision, Entities, Entity, EntityId, EntityTypeName, EntityUid, Policy, - PolicyId, PolicySet, Request, Schema, ValidationMode, Validator, -}; -use clap::ValueEnum; -use color_eyre::eyre::{Result, bail, eyre}; -use serde::{Deserialize, Serialize}; -use serde_json::json; - -#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, ValueEnum)] -#[serde(rename_all = "snake_case")] -pub enum PolicyAction { - Read, - Export, - Change, - SchemaApply, - BranchCreate, - BranchDelete, - BranchMerge, - Admin, -} - -impl PolicyAction { - pub fn as_str(self) -> &'static str { - match self { - Self::Read => "read", - Self::Export => "export", - Self::Change => "change", - Self::SchemaApply => "schema_apply", - Self::BranchCreate => "branch_create", - Self::BranchDelete => "branch_delete", - Self::BranchMerge => "branch_merge", - Self::Admin => "admin", - } - } - - fn uses_branch_scope(self) -> bool { - matches!(self, Self::Read | Self::Export | Self::Change) - } - - fn uses_target_branch_scope(self) -> bool { - matches!( - self, - Self::BranchCreate | Self::SchemaApply | Self::BranchDelete | Self::BranchMerge - ) - } -} - -impl fmt::Display for PolicyAction { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(self.as_str()) - } -} - -impl FromStr for PolicyAction { - type Err = color_eyre::eyre::Error; - - fn from_str(value: &str) -> Result { - match value.trim() { - "read" => Ok(Self::Read), - "export" => Ok(Self::Export), - "change" => Ok(Self::Change), - "schema_apply" => Ok(Self::SchemaApply), - "branch_create" => Ok(Self::BranchCreate), - "branch_delete" => Ok(Self::BranchDelete), - "branch_merge" => Ok(Self::BranchMerge), - "admin" => Ok(Self::Admin), - other => bail!("unknown policy action '{other}'"), - } - } -} - -#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum PolicyBranchScope { - Any, - Protected, - Unprotected, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PolicyActorSelector { - pub group: String, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PolicyAllowRule { - pub actors: PolicyActorSelector, - pub actions: Vec, - pub branch_scope: Option, - pub target_branch_scope: Option, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PolicyRule { - pub id: String, - pub allow: PolicyAllowRule, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PolicyConfig { - pub version: u32, - #[serde(default)] - pub groups: BTreeMap>, - #[serde(default)] - pub protected_branches: Vec, - #[serde(default)] - pub rules: Vec, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PolicyTestConfig { - pub version: u32, - #[serde(default)] - pub cases: Vec, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PolicyTestCase { - pub id: String, - pub actor: String, - pub action: PolicyAction, - pub branch: Option, - pub target_branch: Option, - pub expect: PolicyExpectation, -} - -#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum PolicyExpectation { - Allow, - Deny, -} - -#[derive(Debug, Clone)] -pub struct PolicyRequest { - pub actor_id: String, - pub action: PolicyAction, - pub branch: Option, - pub target_branch: Option, -} - -#[derive(Debug, Clone)] -pub struct PolicyDecision { - pub allowed: bool, - pub matched_rule_id: Option, - pub message: String, -} - -pub struct PolicyCompiler; - -#[derive(Clone)] -pub struct PolicyEngine { - repo_id: String, - protected_branches: BTreeSet, - known_actors: BTreeSet, - schema: Schema, - entities: Entities, - policies: PolicySet, - policy_to_rule: HashMap, -} - -impl PolicyConfig { - pub fn load(path: &Path) -> Result { - let config: Self = serde_yaml::from_str(&fs::read_to_string(path)?)?; - config.validate()?; - Ok(config) - } - - pub fn validate(&self) -> Result<()> { - if self.version != 1 { - bail!("policy version must be 1"); - } - - for (group, members) in &self.groups { - if group.trim().is_empty() { - bail!("policy group names must not be blank"); - } - if members.is_empty() { - bail!("policy group '{group}' must not be empty"); - } - for actor in members { - if actor.trim().is_empty() { - bail!("policy group '{group}' contains a blank actor id"); - } - } - } - - for branch in &self.protected_branches { - if branch.trim().is_empty() { - bail!("protected branch names must not be blank"); - } - } - - let mut seen_rule_ids = HashSet::new(); - for rule in &self.rules { - if rule.id.trim().is_empty() { - bail!("policy rule ids must not be blank"); - } - if !seen_rule_ids.insert(rule.id.clone()) { - bail!("duplicate policy rule id '{}'", rule.id); - } - if rule.allow.actors.group.trim().is_empty() { - bail!("policy rule '{}' must reference a non-blank group", rule.id); - } - if !self.groups.contains_key(rule.allow.actors.group.as_str()) { - bail!( - "policy rule '{}' references unknown group '{}'", - rule.id, - rule.allow.actors.group - ); - } - if rule.allow.actions.is_empty() { - bail!("policy rule '{}' must include at least one action", rule.id); - } - if rule.allow.branch_scope.is_some() && rule.allow.target_branch_scope.is_some() { - bail!( - "policy rule '{}' may specify branch_scope or target_branch_scope, not both", - rule.id - ); - } - if let Some(_) = rule.allow.branch_scope { - for action in &rule.allow.actions { - if !action.uses_branch_scope() { - bail!( - "policy rule '{}' uses branch_scope with unsupported action '{}'", - rule.id, - action - ); - } - } - } - if let Some(_) = rule.allow.target_branch_scope { - for action in &rule.allow.actions { - if !action.uses_target_branch_scope() { - bail!( - "policy rule '{}' uses target_branch_scope with unsupported action '{}'", - rule.id, - action - ); - } - } - } - } - - Ok(()) - } -} - -impl PolicyTestConfig { - pub fn load(path: &Path) -> Result { - let config: Self = serde_yaml::from_str(&fs::read_to_string(path)?)?; - if config.version != 1 { - bail!("policy test version must be 1"); - } - let mut seen = HashSet::new(); - for case in &config.cases { - if case.id.trim().is_empty() { - bail!("policy test case ids must not be blank"); - } - if !seen.insert(case.id.clone()) { - bail!("duplicate policy test case id '{}'", case.id); - } - if case.actor.trim().is_empty() { - bail!("policy test case '{}' must not use a blank actor", case.id); - } - } - Ok(config) - } -} - -impl PolicyCompiler { - pub fn compile(config: &PolicyConfig, repo_id: &str) -> Result { - config.validate()?; - let (schema, schema_warnings) = Schema::from_cedarschema_str(policy_schema_source())?; - let schema_warnings = schema_warnings - .map(|warning| warning.to_string()) - .collect::>(); - if !schema_warnings.is_empty() { - bail!("policy schema warnings:\n{}", schema_warnings.join("\n")); - } - let entities = compile_entities(config, repo_id, &schema)?; - let (policies, policy_to_rule) = compile_policies(config, repo_id)?; - let validator = Validator::new(schema.clone()); - let validation = validator.validate(&policies, ValidationMode::Strict); - let errors = validation - .validation_errors() - .map(|err| err.to_string()) - .collect::>(); - if !errors.is_empty() { - bail!("policy validation failed:\n{}", errors.join("\n")); - } - - let known_actors = config - .groups - .values() - .flat_map(|members| members.iter().cloned()) - .collect(); - Ok(PolicyEngine { - repo_id: repo_id.to_string(), - protected_branches: config.protected_branches.iter().cloned().collect(), - known_actors, - schema, - entities, - policies, - policy_to_rule, - }) - } -} - -impl PolicyEngine { - pub fn load(path: &Path, repo_id: &str) -> Result { - let config = PolicyConfig::load(path)?; - PolicyCompiler::compile(&config, repo_id) - } - - pub fn authorize(&self, request: &PolicyRequest) -> Result { - if !self.known_actors.contains(request.actor_id.as_str()) { - return Ok(self.deny( - request, - None, - format!( - "policy denied action '{}' for unknown actor '{}'", - request.action, request.actor_id - ), - )); - } - - let principal = entity_uid("Actor", &request.actor_id)?; - let action = entity_uid("Action", request.action.as_str())?; - let resource = entity_uid("Repo", &self.repo_id)?; - let context_value = json!({ - "has_branch": request.branch.is_some(), - "branch": request.branch.clone().unwrap_or_default(), - "has_target_branch": request.target_branch.is_some(), - "target_branch": request.target_branch.clone().unwrap_or_default(), - "branch_is_protected": request.branch.as_ref().is_some_and(|branch| self.protected_branches.contains(branch)), - "target_branch_is_protected": request.target_branch.as_ref().is_some_and(|branch| self.protected_branches.contains(branch)), - }); - let context = Context::from_json_value(context_value, Some((&self.schema, &action)))?; - let cedar_request = Request::new(principal, action, resource, context, Some(&self.schema))?; - let response = - Authorizer::new().is_authorized(&cedar_request, &self.policies, &self.entities); - let errors = response - .diagnostics() - .errors() - .map(|err| err.to_string()) - .collect::>(); - if !errors.is_empty() { - bail!("policy evaluation failed:\n{}", errors.join("\n")); - } - - let matched_rule_id = response - .diagnostics() - .reason() - .filter_map(|policy_id| { - let key: &str = policy_id.as_ref(); - self.policy_to_rule.get(key).cloned() - }) - .min(); - - Ok(match response.decision() { - Decision::Allow => PolicyDecision { - allowed: true, - matched_rule_id: matched_rule_id.clone(), - message: format!( - "policy allowed action '{}' for actor '{}'", - request.action, request.actor_id - ), - }, - Decision::Deny => { - let message = format!( - "policy denied action '{}'{}{} for actor '{}'", - request.action, - request - .branch - .as_deref() - .map(|branch| format!(" on branch '{}'", branch)) - .unwrap_or_default(), - request - .target_branch - .as_deref() - .map(|branch| format!(" targeting branch '{}'", branch)) - .unwrap_or_default(), - request.actor_id - ); - self.deny(request, matched_rule_id, message) - } - }) - } - - pub fn validate_request(&self, request: &PolicyRequest) -> Result<()> { - let _ = self.authorize(request)?; - Ok(()) - } - - pub fn run_tests(&self, tests: &PolicyTestConfig) -> Result<()> { - if tests.version != 1 { - bail!("policy test version must be 1"); - } - let mut failures = Vec::new(); - for case in &tests.cases { - let decision = self.authorize(&PolicyRequest { - actor_id: case.actor.clone(), - action: case.action, - branch: case.branch.clone(), - target_branch: case.target_branch.clone(), - })?; - let expected_allowed = matches!(case.expect, PolicyExpectation::Allow); - if decision.allowed != expected_allowed { - failures.push(format!( - "{}: expected {:?} but got {}", - case.id, - case.expect, - if decision.allowed { "allow" } else { "deny" } - )); - } - } - if failures.is_empty() { - Ok(()) - } else { - bail!("policy tests failed:\n{}", failures.join("\n")) - } - } - - pub fn known_actor_count(&self) -> usize { - self.known_actors.len() - } - - fn deny( - &self, - _request: &PolicyRequest, - matched_rule_id: Option, - message: String, - ) -> PolicyDecision { - PolicyDecision { - allowed: false, - matched_rule_id, - message, - } - } -} - -fn compile_entities(config: &PolicyConfig, repo_id: &str, schema: &Schema) -> Result { - let mut group_entities = Vec::new(); - for group in config.groups.keys() { - group_entities.push(Entity::new( - entity_uid("Group", group)?, - HashMap::new(), - HashSet::::new(), - )?); - } - - let mut actor_groups: BTreeMap> = BTreeMap::new(); - for (group, members) in &config.groups { - for actor in members { - actor_groups - .entry(actor.clone()) - .or_default() - .insert(group.clone()); - } - } - - let mut actor_entities = Vec::new(); - for (actor, groups) in actor_groups { - let parents = groups - .iter() - .map(|group| entity_uid("Group", group)) - .collect::>>()?; - actor_entities.push(Entity::new( - entity_uid("Actor", &actor)?, - HashMap::new(), - parents, - )?); - } - - let repo_entity = Entity::new( - entity_uid("Repo", repo_id)?, - HashMap::new(), - HashSet::::new(), - )?; - - let mut entities = Vec::new(); - entities.extend(group_entities); - entities.extend(actor_entities); - entities.push(repo_entity); - Ok(Entities::from_entities(entities, Some(schema))?) -} - -fn compile_policies( - config: &PolicyConfig, - repo_id: &str, -) -> Result<(PolicySet, HashMap)> { - let mut policies = Vec::new(); - let mut policy_to_rule = HashMap::new(); - - for rule in &config.rules { - for action in &rule.allow.actions { - let policy_id = PolicyId::new(format!("{}:{}", rule.id, action.as_str())); - let source = compile_policy_source(rule, action, repo_id); - let policy = Policy::parse(Some(policy_id.clone()), source.as_str())?; - policy_to_rule.insert(policy_id.to_string(), rule.id.clone()); - policies.push(policy); - } - } - - Ok((PolicySet::from_policies(policies)?, policy_to_rule)) -} - -fn compile_policy_source(rule: &PolicyRule, action: &PolicyAction, repo_id: &str) -> String { - let mut conditions = Vec::new(); - if let Some(scope) = rule.allow.branch_scope { - conditions.push(branch_scope_condition(scope)); - } - if let Some(scope) = rule.allow.target_branch_scope { - conditions.push(target_branch_scope_condition(scope)); - } - - let when = if conditions.is_empty() { - String::new() - } else { - format!("\nwhen {{ {} }}", conditions.join(" && ")) - }; - - format!( - r#"permit ( - principal in Omnigraph::Group::{group}, - action == Omnigraph::Action::{action}, - resource == Omnigraph::Repo::{repo} -){when};"#, - group = cedar_literal(&rule.allow.actors.group), - action = cedar_literal(action.as_str()), - repo = cedar_literal(repo_id), - when = when, - ) -} - -fn branch_scope_condition(scope: PolicyBranchScope) -> String { - match scope { - PolicyBranchScope::Any => "true".to_string(), - PolicyBranchScope::Protected => { - "context.has_branch && context.branch_is_protected".to_string() - } - PolicyBranchScope::Unprotected => { - "context.has_branch && context.branch_is_protected == false".to_string() - } - } -} - -fn target_branch_scope_condition(scope: PolicyBranchScope) -> String { - match scope { - PolicyBranchScope::Any => "true".to_string(), - PolicyBranchScope::Protected => { - "context.has_target_branch && context.target_branch_is_protected".to_string() - } - PolicyBranchScope::Unprotected => { - "context.has_target_branch && context.target_branch_is_protected == false".to_string() - } - } -} - -fn policy_schema_source() -> &'static str { - r#" -namespace Omnigraph { - type RequestContext = { - has_branch: Bool, - branch: String, - has_target_branch: Bool, - target_branch: String, - branch_is_protected: Bool, - target_branch_is_protected: Bool, - }; - - entity Actor in [Group]; - entity Group; - entity Repo; - - action "read" appliesTo { principal: Actor, resource: Repo, context: RequestContext }; - action "export" appliesTo { principal: Actor, resource: Repo, context: RequestContext }; - action "change" appliesTo { principal: Actor, resource: Repo, context: RequestContext }; - action "schema_apply" appliesTo { principal: Actor, resource: Repo, context: RequestContext }; - action "branch_create" appliesTo { principal: Actor, resource: Repo, context: RequestContext }; - action "branch_delete" appliesTo { principal: Actor, resource: Repo, context: RequestContext }; - action "branch_merge" appliesTo { principal: Actor, resource: Repo, context: RequestContext }; - action "admin" appliesTo { principal: Actor, resource: Repo, context: RequestContext }; -} -"# -} - -fn entity_uid(entity_type: &str, id: &str) -> Result { - let typename = EntityTypeName::from_str(&format!("Omnigraph::{entity_type}"))?; - let entity_id = EntityId::from_str(id).map_err(|err| eyre!(err.to_string()))?; - Ok(EntityUid::from_type_name_and_id(typename, entity_id)) -} - -fn cedar_literal(value: &str) -> String { - serde_json::to_string(value).expect("string literal should serialize") -} - -impl PolicyRequest { - pub fn actor_id(&self) -> &str { - &self.actor_id - } - - pub fn action(&self) -> PolicyAction { - self.action - } - - pub fn branch(&self) -> Option<&str> { - self.branch.as_deref() - } - - pub fn target_branch(&self) -> Option<&str> { - self.target_branch.as_deref() - } -} - -#[cfg(test)] -mod tests { - use super::{ - PolicyAction, PolicyCompiler, PolicyConfig, PolicyExpectation, PolicyRequest, - PolicyTestCase, PolicyTestConfig, - }; - - #[test] - fn rejects_duplicate_rule_ids() { - let policy: PolicyConfig = serde_yaml::from_str( - r#" -version: 1 -groups: - team: [act-andrew] -rules: - - id: same - allow: - actors: { group: team } - actions: [read] - branch_scope: any - - id: same - allow: - actors: { group: team } - actions: [export] - branch_scope: any -"#, - ) - .unwrap(); - - let err = policy.validate().unwrap_err(); - assert!(err.to_string().contains("duplicate policy rule id")); - } - - #[test] - fn rejects_unknown_group_references() { - let policy: PolicyConfig = serde_yaml::from_str( - r#" -version: 1 -groups: - team: [act-andrew] -rules: - - id: bad - allow: - actors: { group: admins } - actions: [read] - branch_scope: any -"#, - ) - .unwrap(); - - let err = policy.validate().unwrap_err(); - assert!(err.to_string().contains("references unknown group")); - } - - #[test] - fn rejects_invalid_scope_action_combinations() { - let policy: PolicyConfig = serde_yaml::from_str( - r#" -version: 1 -groups: - team: [act-andrew] -rules: - - id: bad - allow: - actors: { group: team } - actions: [branch_merge] - branch_scope: protected -"#, - ) - .unwrap(); - - let err = policy.validate().unwrap_err(); - assert!(err.to_string().contains("unsupported action")); - } - - #[test] - fn compiles_and_authorizes_branch_and_target_rules() { - let policy: PolicyConfig = serde_yaml::from_str( - r#" -version: 1 -groups: - team: [act-andrew, act-bruno] - admins: [act-andrew] -protected_branches: [main] -rules: - - id: team-read - allow: - actors: { group: team } - actions: [read, export] - branch_scope: any - - id: team-write - allow: - actors: { group: team } - actions: [change] - branch_scope: unprotected - - id: admins-promote - allow: - actors: { group: admins } - actions: [branch_delete, branch_merge] - target_branch_scope: protected -"#, - ) - .unwrap(); - - let engine = PolicyCompiler::compile(&policy, "repo").unwrap(); - let allow = engine - .authorize(&PolicyRequest { - actor_id: "act-bruno".to_string(), - action: PolicyAction::Change, - branch: Some("feature".to_string()), - target_branch: None, - }) - .unwrap(); - assert!(allow.allowed); - assert_eq!(allow.matched_rule_id.as_deref(), Some("team-write")); - - let deny = engine - .authorize(&PolicyRequest { - actor_id: "act-bruno".to_string(), - action: PolicyAction::BranchDelete, - branch: None, - target_branch: Some("main".to_string()), - }) - .unwrap(); - assert!(!deny.allowed); - - let admin = engine - .authorize(&PolicyRequest { - actor_id: "act-andrew".to_string(), - action: PolicyAction::BranchDelete, - branch: None, - target_branch: Some("main".to_string()), - }) - .unwrap(); - assert!(admin.allowed); - assert_eq!(admin.matched_rule_id.as_deref(), Some("admins-promote")); - } - - #[test] - fn policy_tests_enforce_expected_outcomes() { - let policy: PolicyConfig = serde_yaml::from_str( - r#" -version: 1 -groups: - team: [act-andrew] -protected_branches: [main] -rules: - - id: team-read - allow: - actors: { group: team } - actions: [read] - branch_scope: any -"#, - ) - .unwrap(); - let engine = PolicyCompiler::compile(&policy, "repo").unwrap(); - let tests = PolicyTestConfig { - version: 1, - cases: vec![ - PolicyTestCase { - id: "allow-read".to_string(), - actor: "act-andrew".to_string(), - action: PolicyAction::Read, - branch: Some("main".to_string()), - target_branch: None, - expect: PolicyExpectation::Allow, - }, - PolicyTestCase { - id: "deny-change".to_string(), - actor: "act-andrew".to_string(), - action: PolicyAction::Change, - branch: Some("main".to_string()), - target_branch: None, - expect: PolicyExpectation::Deny, - }, - ], - }; - - engine.run_tests(&tests).unwrap(); - } - - #[test] - fn schema_apply_uses_target_branch_scope() { - let policy: PolicyConfig = serde_yaml::from_str( - r#" -version: 1 -groups: - admins: [act-ragnor] -protected_branches: [main] -rules: - - id: admins-schema-apply - allow: - actors: { group: admins } - actions: [schema_apply] - target_branch_scope: protected -"#, - ) - .unwrap(); - - let engine = PolicyCompiler::compile(&policy, "repo").unwrap(); - let allow = engine - .authorize(&PolicyRequest { - actor_id: "act-ragnor".to_string(), - action: PolicyAction::SchemaApply, - branch: None, - target_branch: Some("main".to_string()), - }) - .unwrap(); - assert!(allow.allowed); - - let deny = engine - .authorize(&PolicyRequest { - actor_id: "act-ragnor".to_string(), - action: PolicyAction::SchemaApply, - branch: None, - target_branch: Some("feature".to_string()), - }) - .unwrap(); - assert!(!deny.allowed); - } -} +// Module shim: PolicyEngine moved to the omnigraph-policy workspace crate +// (MR-722 chassis core). The re-exports below preserve the existing +// `omnigraph_server::policy::*` paths so call sites (CLI, tests, +// downstream consumers) don't have to change in one go. Direct callers +// should migrate to `omnigraph_policy::*` over time; this shim can +// be removed once that migration completes. + +pub use omnigraph_policy::*; diff --git a/crates/omnigraph-server/src/registry.rs b/crates/omnigraph-server/src/registry.rs new file mode 100644 index 0000000..5897ad1 --- /dev/null +++ b/crates/omnigraph-server/src/registry.rs @@ -0,0 +1,558 @@ +//! `GraphRegistry` — the multi-graph routing substrate (MR-668). +//! +//! Holds the open `Arc` for every graph the server is currently +//! serving. Lock-free reads via `ArcSwap`; mutations +//! serialize through `mutate: Mutex<()>` for read-modify-write atomicity. +//! +//! **Deletion is deferred** in v0.6.0 (MR-668 scope cut). The registry has +//! no `tombstones` field, no `RegistryLookup::Tombstoned` variant, no +//! `tombstone()` / `clear_tombstone()` methods. When `DELETE /graphs/{id}` +//! lands in a follow-up release, those return without breaking caller +//! signatures (`Gone` is the closest semantic — the graph is no longer +//! in the registry). +//! +//! Engine instance survival across registry mutations: +//! a request that grabbed `Arc` before a registry swap keeps +//! the engine alive via its own `Arc` clone (see `server_export` at +//! `lib.rs:1019-1033` for the spawn-and-clone pattern). The engine drops +//! when the last `Arc` clone drops, regardless of the +//! registry's current state. + +use std::collections::HashMap; +use std::sync::Arc; + +use arc_swap::ArcSwap; +use omnigraph::db::Omnigraph; +use omnigraph::storage::normalize_root_uri; +#[cfg(test)] +use tokio::sync::Mutex; + +use crate::identity::GraphKey; +use crate::policy::PolicyEngine; + +/// Open handle for a single graph in the registry. Cheap to clone (`Arc`-wrapped +/// engine + policy). Cluster-mode handlers extract this via +/// `Extension>` injected by the routing middleware. +pub struct GraphHandle { + /// Registry key. In Cluster mode `key.tenant_id` is always `None`. + pub key: GraphKey, + /// The URI the engine was opened from (`s3://...` or local path). + /// Stable for the engine's lifetime; surfaced in responses like + /// `BranchCreateOutput.uri`. + pub uri: String, + /// Engine. Reads/writes go directly through `&self` methods on + /// `Omnigraph` (no `RwLock` — MR-686 preserved). + pub engine: Arc, + /// Per-graph Cedar policy. `None` means "no policy gate on engine-layer + /// `_as` writers"; the HTTP-layer `require_bearer_auth` middleware still + /// runs regardless. + pub policy: Option>, +} + +/// Immutable snapshot of the registry's current state. Replaced atomically +/// via `ArcSwap`; readers see a consistent view of all graphs without locking. +/// +/// Derived state (`any_per_graph_policy`) is computed at snapshot +/// construction so request-time middleware doesn't have to walk the +/// graph map every call. Construct only via [`RegistrySnapshot::new`] +/// (or `Default`) so the field stays in sync with `graphs`. +pub struct RegistrySnapshot { + pub graphs: HashMap>, + /// `true` iff any registered graph has a per-graph policy installed. + /// Used by `AppState::requires_bearer_auth` to decide whether the + /// auth middleware should challenge a request — a per-graph policy + /// implies bearer auth is required even when no server-level tokens + /// or policy are configured. + pub any_per_graph_policy: bool, +} + +impl RegistrySnapshot { + /// Build a snapshot from a graph map, deriving cached fields. + /// The only construction path — direct struct-literal use elsewhere + /// would let derived state drift from `graphs`. + pub fn new(graphs: HashMap>) -> Self { + let any_per_graph_policy = graphs.values().any(|h| h.policy.is_some()); + Self { + graphs, + any_per_graph_policy, + } + } +} + +impl Default for RegistrySnapshot { + fn default() -> Self { + Self::new(HashMap::new()) + } +} + +/// Result of a registry lookup. Two-valued — `Tombstoned` deferred with DELETE. +pub enum RegistryLookup { + /// Graph is open and ready to serve. + Ready(Arc), + /// Graph is not in the registry (never existed, or was unregistered in a + /// future release). Handlers respond with 404. + Gone, +} + +/// Why an `insert` was rejected. +#[derive(Debug, thiserror::Error)] +pub enum InsertError { + /// Another handle already exists for this `GraphKey`. Maps to HTTP 409. + #[error("graph '{0}' is already registered")] + DuplicateKey(GraphKey), + /// Another handle is open against this URI. Two graphs sharing a URI + /// would commit through the same Lance manifest and corrupt each other. + /// Maps to HTTP 409. + #[error("URI '{0}' is already registered as another graph")] + DuplicateUri(String), + /// A handle carried an invalid graph URI. Maps to startup failure. + #[error("URI '{uri}' is invalid: {message}")] + InvalidUri { uri: String, message: String }, +} + +pub struct GraphRegistry { + snapshot: ArcSwap, + /// Serializes runtime mutations through [`GraphRegistry::insert`]. + /// Gated with `insert` because they share a single contract — if + /// the consumer goes away, so does the lock. Re-introducing one + /// requires re-introducing the other. + #[cfg(test)] + mutate: Mutex<()>, +} + +impl GraphRegistry { + /// Empty registry. Used as a placeholder before startup populates it. + pub fn new() -> Self { + Self { + snapshot: ArcSwap::from_pointee(RegistrySnapshot::default()), + #[cfg(test)] + mutate: Mutex::new(()), + } + } + + /// Build a registry from a startup-time list of open handles. + /// Rejects duplicate `GraphKey`s and duplicate URIs. + pub fn from_handles(handles: Vec>) -> Result { + let mut graphs: HashMap> = HashMap::with_capacity(handles.len()); + let mut seen_uris: HashMap = HashMap::with_capacity(handles.len()); + for handle in handles { + let (canonical_uri, handle) = canonicalize_handle_uri(handle)?; + if graphs.contains_key(&handle.key) { + return Err(InsertError::DuplicateKey(handle.key.clone())); + } + if seen_uris.contains_key(&canonical_uri) { + return Err(InsertError::DuplicateUri(handle.uri.clone())); + } + seen_uris.insert(canonical_uri, handle.key.clone()); + graphs.insert(handle.key.clone(), handle); + } + Ok(Self { + snapshot: ArcSwap::from_pointee(RegistrySnapshot::new(graphs)), + #[cfg(test)] + mutate: Mutex::new(()), + }) + } + + /// Lock-free snapshot read. Callers that need derived state cached + /// on the snapshot (e.g. `any_per_graph_policy`) go through here; + /// callers that only need values of `graphs` should use [`list`] + /// or [`get`]. + pub fn snapshot_ref(&self) -> arc_swap::Guard> { + self.snapshot.load() + } + + /// Lock-free read. Returns `Ready` if the graph is in the current snapshot, + /// `Gone` otherwise. + pub fn get(&self, key: &GraphKey) -> RegistryLookup { + let snapshot = self.snapshot.load(); + match snapshot.graphs.get(key) { + Some(handle) => RegistryLookup::Ready(Arc::clone(handle)), + None => RegistryLookup::Gone, + } + } + + /// Snapshot the full set of currently-registered handles. Ordering + /// matches the underlying `HashMap` iteration (intentionally + /// non-deterministic — callers that need a stable order sort by + /// `handle.key.graph_id`). + pub fn list(&self) -> Vec> { + let snapshot = self.snapshot.load(); + snapshot.graphs.values().cloned().collect() + } + + /// Number of registered graphs (excluding any future tombstones). + pub fn len(&self) -> usize { + self.snapshot.load().graphs.len() + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Add a new handle. Async because the mutex is `tokio::sync::Mutex` + /// (a future managed-catalog flow may hold it across `.await` points + /// during atomic registry mutations). Rejects duplicate `GraphKey` + /// and duplicate `uri`. + /// + /// **Test-only surface.** No production code reaches this — startup + /// uses `from_handles`, and runtime add/remove is deferred. The + /// race-contract tests below pin the mutex linearization point so + /// that when a real consumer ships (managed cluster catalog), the + /// concurrency contract is already proven. Ungate by removing + /// `#[cfg(test)]` once that consumer is in scope. + /// + /// Race semantics (pinned by `concurrent_insert_same_key_exactly_one_succeeds`): + /// under N concurrent calls with the same key, exactly one returns + /// `Ok(())` and the rest return `Err(InsertError::DuplicateKey(_))`. + #[cfg(test)] + pub async fn insert(&self, handle: Arc) -> Result<(), InsertError> { + let _guard = self.mutate.lock().await; + let current = self.snapshot.load(); + let (canonical_uri, handle) = canonicalize_handle_uri(handle)?; + if current.graphs.contains_key(&handle.key) { + return Err(InsertError::DuplicateKey(handle.key.clone())); + } + for existing in current.graphs.values() { + let existing_uri = + normalize_root_uri(&existing.uri).map_err(|err| InsertError::InvalidUri { + uri: existing.uri.clone(), + message: err.to_string(), + })?; + if existing_uri == canonical_uri { + return Err(InsertError::DuplicateUri(handle.uri.clone())); + } + } + let mut new_graphs = current.graphs.clone(); + new_graphs.insert(handle.key.clone(), handle); + self.snapshot + .store(Arc::new(RegistrySnapshot::new(new_graphs))); + Ok(()) + } +} + +fn canonicalize_handle_uri( + handle: Arc, +) -> Result<(String, Arc), InsertError> { + let canonical_uri = normalize_root_uri(&handle.uri).map_err(|err| InsertError::InvalidUri { + uri: handle.uri.clone(), + message: err.to_string(), + })?; + if canonical_uri == handle.uri { + return Ok((canonical_uri, handle)); + } + let canonical_handle = Arc::new(GraphHandle { + key: handle.key.clone(), + uri: canonical_uri.clone(), + engine: Arc::clone(&handle.engine), + policy: handle.policy.clone(), + }); + Ok((canonical_uri, canonical_handle)) +} + +impl Default for GraphRegistry { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use std::path::Path; + + use tempfile::TempDir; + + use super::*; + use crate::graph_id::GraphId; + + const TEST_SCHEMA: &str = "node Person { name: String @key }\n"; + + async fn build_handle(graph_id: &str, dir: &Path) -> Arc { + let graph_uri = dir.join(graph_id).to_str().unwrap().to_string(); + let engine = Omnigraph::init(&graph_uri, TEST_SCHEMA) + .await + .expect("init engine for registry test"); + Arc::new(GraphHandle { + key: GraphKey::cluster(GraphId::try_from(graph_id).unwrap()), + uri: graph_uri, + engine: Arc::new(engine), + policy: None, + }) + } + + #[tokio::test] + async fn new_registry_is_empty() { + let registry = GraphRegistry::new(); + assert!(registry.is_empty()); + assert_eq!(registry.len(), 0); + assert!(registry.list().is_empty()); + } + + #[tokio::test] + async fn insert_then_get_returns_ready() { + let dir = TempDir::new().unwrap(); + let registry = GraphRegistry::new(); + let handle = build_handle("alpha", dir.path()).await; + registry.insert(Arc::clone(&handle)).await.unwrap(); + + match registry.get(&handle.key) { + RegistryLookup::Ready(found) => { + assert!(Arc::ptr_eq(&found, &handle)); + } + RegistryLookup::Gone => panic!("expected Ready, got Gone"), + } + } + + #[tokio::test] + async fn get_nonexistent_returns_gone() { + let registry = GraphRegistry::new(); + let key = GraphKey::cluster(GraphId::try_from("ghost").unwrap()); + match registry.get(&key) { + RegistryLookup::Gone => {} + RegistryLookup::Ready(_) => panic!("expected Gone"), + } + } + + #[tokio::test] + async fn insert_duplicate_key_returns_error() { + let dir = TempDir::new().unwrap(); + let registry = GraphRegistry::new(); + let h1 = build_handle("alpha", dir.path()).await; + // Same key, different URI sub-path (build_handle uses graph_id as subdir). + let dir2 = TempDir::new().unwrap(); + let h2 = build_handle("alpha", dir2.path()).await; + registry.insert(h1).await.unwrap(); + + match registry.insert(h2).await { + Err(InsertError::DuplicateKey(_)) => {} + other => panic!("expected DuplicateKey, got {other:?}"), + } + } + + #[tokio::test] + async fn insert_duplicate_uri_returns_error() { + let dir = TempDir::new().unwrap(); + // Two handles with the same URI but different keys. + let shared_uri = dir.path().join("shared").to_str().unwrap().to_string(); + let engine = Omnigraph::init(&shared_uri, TEST_SCHEMA).await.unwrap(); + let engine = Arc::new(engine); + let h1 = Arc::new(GraphHandle { + key: GraphKey::cluster(GraphId::try_from("alpha").unwrap()), + uri: shared_uri.clone(), + engine: Arc::clone(&engine), + policy: None, + }); + let h2 = Arc::new(GraphHandle { + key: GraphKey::cluster(GraphId::try_from("beta").unwrap()), + uri: shared_uri, + engine, + policy: None, + }); + + let registry = GraphRegistry::new(); + registry.insert(h1).await.unwrap(); + match registry.insert(h2).await { + Err(InsertError::DuplicateUri(_)) => {} + other => panic!("expected DuplicateUri, got {other:?}"), + } + } + + #[tokio::test] + async fn list_returns_all_inserted_handles() { + let dir = TempDir::new().unwrap(); + let registry = GraphRegistry::new(); + for name in ["alpha", "beta", "gamma"] { + let h = build_handle(name, dir.path()).await; + registry.insert(h).await.unwrap(); + } + assert_eq!(registry.len(), 3); + let mut ids: Vec<_> = registry + .list() + .into_iter() + .map(|h| h.key.graph_id.as_str().to_string()) + .collect(); + ids.sort(); + assert_eq!(ids, vec!["alpha", "beta", "gamma"]); + } + + #[tokio::test] + async fn from_handles_bulk_init_succeeds() { + let dir = TempDir::new().unwrap(); + let handles = vec![ + build_handle("alpha", dir.path()).await, + build_handle("beta", dir.path()).await, + ]; + let registry = GraphRegistry::from_handles(handles).unwrap(); + assert_eq!(registry.len(), 2); + } + + #[tokio::test] + async fn from_handles_rejects_duplicate_keys() { + let dir1 = TempDir::new().unwrap(); + let dir2 = TempDir::new().unwrap(); + let h1 = build_handle("alpha", dir1.path()).await; + let h2 = build_handle("alpha", dir2.path()).await; + let err = match GraphRegistry::from_handles(vec![h1, h2]) { + Ok(_) => panic!("expected DuplicateKey, got Ok"), + Err(err) => err, + }; + assert!( + matches!(err, InsertError::DuplicateKey(_)), + "expected DuplicateKey, got {err}", + ); + } + + #[tokio::test] + async fn from_handles_rejects_duplicate_uris() { + let dir = TempDir::new().unwrap(); + let shared_uri = dir.path().join("shared").to_str().unwrap().to_string(); + let engine = Arc::new(Omnigraph::init(&shared_uri, TEST_SCHEMA).await.unwrap()); + let h1 = Arc::new(GraphHandle { + key: GraphKey::cluster(GraphId::try_from("alpha").unwrap()), + uri: shared_uri.clone(), + engine: Arc::clone(&engine), + policy: None, + }); + let h2 = Arc::new(GraphHandle { + key: GraphKey::cluster(GraphId::try_from("beta").unwrap()), + uri: shared_uri, + engine, + policy: None, + }); + let err = match GraphRegistry::from_handles(vec![h1, h2]) { + Ok(_) => panic!("expected DuplicateUri, got Ok"), + Err(err) => err, + }; + assert!( + matches!(err, InsertError::DuplicateUri(_)), + "expected DuplicateUri, got {err}", + ); + } + + /// Race test modeled on `actor_admission_race_does_not_exceed_cap` + /// at `tests/server.rs:3596+`. Spawn N concurrent inserts with the + /// same `GraphKey` (each constructing its own `GraphHandle` against + /// its own tempdir). Exactly one must succeed; the others must + /// return `DuplicateKey`. No `unwrap` panic: the `Mutex<()>` + + /// in-mutex re-check is the linearization point. + #[tokio::test(flavor = "multi_thread")] + async fn concurrent_insert_same_key_exactly_one_succeeds() { + const N: usize = 8; + + let registry = Arc::new(GraphRegistry::new()); + // Pre-create N handles (each in its own tempdir; same key). + let mut handles = Vec::with_capacity(N); + let mut dirs = Vec::with_capacity(N); + for _ in 0..N { + let d = TempDir::new().unwrap(); + handles.push(build_handle("contested", d.path()).await); + dirs.push(d); + } + + let barrier = Arc::new(tokio::sync::Barrier::new(N)); + let mut tasks = Vec::with_capacity(N); + for handle in handles { + let registry = Arc::clone(®istry); + let barrier = Arc::clone(&barrier); + tasks.push(tokio::spawn(async move { + barrier.wait().await; + registry.insert(handle).await + })); + } + + let mut ok_count = 0usize; + let mut dup_count = 0usize; + for t in tasks { + match t.await.unwrap() { + Ok(()) => ok_count += 1, + Err(InsertError::DuplicateKey(_)) => dup_count += 1, + Err(other) => panic!("unexpected error: {other:?}"), + } + } + assert_eq!(ok_count, 1, "exactly one insert must succeed"); + assert_eq!(dup_count, N - 1, "the rest must return DuplicateKey"); + assert_eq!(registry.len(), 1); + + // Drop the dirs at the end (preserves engines until tasks finish). + drop(dirs); + } + + /// Concurrent inserts with **distinct** keys all succeed. + /// Linearizability over the mutex still serializes them. + #[tokio::test(flavor = "multi_thread")] + async fn concurrent_insert_distinct_keys_all_succeed() { + const N: usize = 8; + + let registry = Arc::new(GraphRegistry::new()); + // Pre-create N handles with distinct ids, each in its own tempdir. + let mut handles = Vec::with_capacity(N); + let mut dirs = Vec::with_capacity(N); + for i in 0..N { + let d = TempDir::new().unwrap(); + handles.push(build_handle(&format!("graph-{i}"), d.path()).await); + dirs.push(d); + } + + let barrier = Arc::new(tokio::sync::Barrier::new(N)); + let mut tasks = Vec::with_capacity(N); + for handle in handles { + let registry = Arc::clone(®istry); + let barrier = Arc::clone(&barrier); + tasks.push(tokio::spawn(async move { + barrier.wait().await; + registry.insert(handle).await + })); + } + for t in tasks { + t.await.unwrap().unwrap(); + } + assert_eq!(registry.len(), N); + drop(dirs); + } + + /// Concurrent reads during a write must always see a consistent + /// snapshot (no torn state). With `ArcSwap`, the read either sees + /// the old snapshot or the new one — never both, never neither. + #[tokio::test(flavor = "multi_thread")] + async fn concurrent_reads_during_inserts_see_consistent_snapshots() { + let dir = TempDir::new().unwrap(); + let registry = Arc::new(GraphRegistry::new()); + + // Spawn a writer that inserts graph-0..graph-9 sequentially. + const N_WRITES: usize = 10; + let writer_registry = Arc::clone(®istry); + let writer_dir = dir.path().to_path_buf(); + let writer = tokio::spawn(async move { + for i in 0..N_WRITES { + let h = build_handle(&format!("graph-{i}"), &writer_dir).await; + writer_registry.insert(h).await.unwrap(); + } + }); + + // Reader loop: repeatedly snapshot the registry until the writer + // finishes. Every snapshot's len must be in [0, N_WRITES], and + // for every key g in the snapshot, get(g) must return Ready. + let reader_registry = Arc::clone(®istry); + let reader = tokio::spawn(async move { + for _ in 0..200 { + let snap = reader_registry.list(); + assert!(snap.len() <= N_WRITES); + for handle in &snap { + match reader_registry.get(&handle.key) { + RegistryLookup::Ready(found) => { + assert!(Arc::ptr_eq(&found, handle)); + } + RegistryLookup::Gone => panic!( + "snapshot listed key {} but get() returned Gone", + handle.key.graph_id + ), + } + } + tokio::task::yield_now().await; + } + }); + + writer.await.unwrap(); + reader.await.unwrap(); + assert_eq!(registry.len(), N_WRITES); + } +} diff --git a/crates/omnigraph-server/src/workload.rs b/crates/omnigraph-server/src/workload.rs index efc7068..4e84532 100644 --- a/crates/omnigraph-server/src/workload.rs +++ b/crates/omnigraph-server/src/workload.rs @@ -270,12 +270,13 @@ mod tests { let err = controller .try_admit(&actor, 100) .expect_err("third should reject on count"); - assert!(matches!(err, RejectReason::InFlightCountExceeded { cap: 2 })); + assert!(matches!( + err, + RejectReason::InFlightCountExceeded { cap: 2 } + )); drop(g1); // After drop, a new admit succeeds again. - let _g3 = controller - .try_admit(&actor, 100) - .expect("admit after drop"); + let _g3 = controller.try_admit(&actor, 100).expect("admit after drop"); } #[tokio::test(flavor = "multi_thread", worker_threads = 4)] @@ -356,7 +357,9 @@ mod tests { let bob: Arc = "bob".into(); let _ga = controller.try_admit(&alice, 100).expect("alice ok"); // Alice over count cap, Bob unaffected. - let err = controller.try_admit(&alice, 100).expect_err("alice rejected"); + let err = controller + .try_admit(&alice, 100) + .expect_err("alice rejected"); assert!(matches!(err, RejectReason::InFlightCountExceeded { .. })); let _gb = controller.try_admit(&bob, 100).expect("bob ok"); } diff --git a/crates/omnigraph-server/tests/openapi.rs b/crates/omnigraph-server/tests/openapi.rs index 86a124d..a2542db 100644 --- a/crates/omnigraph-server/tests/openapi.rs +++ b/crates/omnigraph-server/tests/openapi.rs @@ -19,42 +19,42 @@ fn fixture(name: &str) -> PathBuf { .join(name) } -fn repo_path(root: &Path) -> PathBuf { +fn graph_path(root: &Path) -> PathBuf { root.join("openapi_test.omni") } -async fn init_loaded_repo() -> tempfile::TempDir { +async fn init_loaded_graph() -> tempfile::TempDir { let temp = tempfile::tempdir().unwrap(); - let repo = repo_path(temp.path()); - fs::create_dir_all(&repo).unwrap(); + let graph = graph_path(temp.path()); + fs::create_dir_all(&graph).unwrap(); let schema = fs::read_to_string(fixture("test.pg")).unwrap(); let data = fs::read_to_string(fixture("test.jsonl")).unwrap(); - Omnigraph::init(repo.to_str().unwrap(), &schema) + Omnigraph::init(graph.to_str().unwrap(), &schema) .await .unwrap(); - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); load_jsonl(&mut db, &data, LoadMode::Overwrite) .await .unwrap(); temp } -async fn app_for_loaded_repo() -> (tempfile::TempDir, Router) { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let state = AppState::open(repo.to_string_lossy().to_string()) +async fn app_for_loaded_graph() -> (tempfile::TempDir, Router) { + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let state = AppState::open(graph.to_string_lossy().to_string()) .await .unwrap(); let app = build_app(state); (temp, app) } -async fn app_for_loaded_repo_with_auth(token: &str) -> (tempfile::TempDir, Router) { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); +async fn app_for_loaded_graph_with_auth(token: &str) -> (tempfile::TempDir, Router) { + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); let state = AppState::new_with_bearer_token( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), db, Some(token.to_string()), ); @@ -84,7 +84,7 @@ fn openapi_json() -> Value { #[tokio::test] async fn openapi_endpoint_returns_200_with_valid_json() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let request = Request::builder() .method(Method::GET) .uri("/openapi.json") @@ -97,7 +97,7 @@ async fn openapi_endpoint_returns_200_with_valid_json() { #[tokio::test] async fn openapi_endpoint_returns_openapi_31_version() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let request = Request::builder() .method(Method::GET) .uri("/openapi.json") @@ -113,11 +113,11 @@ async fn openapi_endpoint_returns_openapi_31_version() { #[tokio::test] async fn openapi_endpoint_does_not_require_auth() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); let state = AppState::new_with_bearer_token( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), db, Some("secret-token".to_string()), ); @@ -129,7 +129,11 @@ async fn openapi_endpoint_does_not_require_auth() { .body(Body::empty()) .unwrap(); let (status, _) = json_response(&app, request).await; - assert_eq!(status, StatusCode::OK, "/openapi.json should not require auth"); + assert_eq!( + status, + StatusCode::OK, + "/openapi.json should not require auth" + ); } // --------------------------------------------------------------------------- @@ -157,10 +161,13 @@ fn openapi_info_contains_version() { const EXPECTED_PATHS: &[&str] = &[ "/healthz", + "/graphs", "/snapshot", "/read", + "/query", "/export", "/change", + "/mutate", "/schema", "/schema/apply", "/ingest", @@ -227,6 +234,64 @@ fn openapi_change_is_post() { assert!(doc["paths"]["/change"]["post"].is_object()); } +#[test] +fn openapi_mutate_is_post() { + let doc = openapi_json(); + assert!(doc["paths"]["/mutate"]["post"].is_object()); +} + +// Deprecation flagging — `/read` and `/change` are kept indefinitely for +// back-compat but are flagged so OpenAPI codegens (typescript-fetch, +// openapi-generator, oapi-codegen, etc.) emit @deprecated on the generated +// SDK methods. The canonical successors `/query` and `/mutate` are not +// flagged. See `deprecation_headers` in `omnigraph-server/src/lib.rs` for +// the matching runtime signal (RFC 9745 + RFC 8288 headers). +#[test] +fn openapi_read_is_deprecated() { + let doc = openapi_json(); + assert_eq!( + doc["paths"]["/read"]["post"]["deprecated"], + serde_json::Value::Bool(true), + "/read must be flagged deprecated in OpenAPI; use /query instead" + ); +} + +#[test] +fn openapi_change_is_deprecated() { + let doc = openapi_json(); + assert_eq!( + doc["paths"]["/change"]["post"]["deprecated"], + serde_json::Value::Bool(true), + "/change must be flagged deprecated in OpenAPI; use /mutate instead" + ); +} + +#[test] +fn openapi_query_is_not_deprecated() { + let doc = openapi_json(); + let deprecated = doc["paths"]["/query"]["post"] + .get("deprecated") + .and_then(serde_json::Value::as_bool) + .unwrap_or(false); + assert!( + !deprecated, + "/query is the canonical read endpoint and must not be deprecated" + ); +} + +#[test] +fn openapi_mutate_is_not_deprecated() { + let doc = openapi_json(); + let deprecated = doc["paths"]["/mutate"]["post"] + .get("deprecated") + .and_then(serde_json::Value::as_bool) + .unwrap_or(false); + assert!( + !deprecated, + "/mutate is the canonical mutation endpoint and must not be deprecated" + ); +} + #[test] fn openapi_ingest_is_post() { let doc = openapi_json(); @@ -278,6 +343,7 @@ const EXPECTED_SCHEMAS: &[&str] = &[ "BranchMergeRequest", "ChangeOutput", "ChangeRequest", + "QueryRequest", "CommitListOutput", "CommitOutput", "ErrorCode", @@ -368,13 +434,65 @@ fn read_output_schema_has_expected_fields() { #[test] fn change_request_schema_has_expected_fields() { + // Canonical field names on the wire are now `query` and `name`. The + // schema descriptions document `query_source` and `query_name` as + // legacy deserialization aliases for backward compatibility. let doc = openapi_json(); let schema = &doc["components"]["schemas"]["ChangeRequest"]; let props = schema["properties"].as_object().unwrap(); - assert!(props.contains_key("query_source")); - assert!(props.contains_key("query_name")); + assert!(props.contains_key("query")); + assert!(props.contains_key("name")); assert!(props.contains_key("params")); assert!(props.contains_key("branch")); + let query_desc = schema["properties"]["query"]["description"] + .as_str() + .unwrap_or_default(); + assert!( + query_desc.contains("query_source"), + "expected `query` description to mention the legacy `query_source` alias, got: {query_desc}" + ); +} + +#[test] +fn query_request_schema_has_expected_fields() { + let doc = openapi_json(); + let schema = &doc["components"]["schemas"]["QueryRequest"]; + let props = schema["properties"].as_object().unwrap(); + assert!(props.contains_key("query")); + assert!(props.contains_key("name")); + assert!(props.contains_key("params")); + assert!(props.contains_key("branch")); + assert!(props.contains_key("snapshot")); +} + +#[test] +fn query_request_query_is_required() { + let doc = openapi_json(); + let schema = &doc["components"]["schemas"]["QueryRequest"]; + let required: Vec<&str> = schema["required"] + .as_array() + .unwrap() + .iter() + .map(|v| v.as_str().unwrap()) + .collect(); + assert!(required.contains(&"query")); +} + +#[test] +fn openapi_query_is_post() { + let doc = openapi_json(); + assert!(doc["paths"]["/query"]["post"].is_object()); +} + +#[test] +fn query_endpoint_documents_mutation_400() { + let doc = openapi_json(); + let four_hundred = &doc["paths"]["/query"]["post"]["responses"]["400"]; + let description = four_hundred["description"].as_str().unwrap_or_default(); + assert!( + description.contains("mutations") || description.contains("POST /mutate"), + "expected /query 400 response to mention mutation rejection, got: {description}" + ); } #[test] @@ -626,10 +744,13 @@ fn branch_delete_has_branch_path_parameter() { let params = doc["paths"]["/branches/{branch}"]["delete"]["parameters"] .as_array() .unwrap(); - let has_branch = params.iter().any(|p| { - p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("path") - }); - assert!(has_branch, "DELETE /branches/{{branch}} must have 'branch' path parameter"); + let has_branch = params + .iter() + .any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("path")); + assert!( + has_branch, + "DELETE /branches/{{branch}} must have 'branch' path parameter" + ); } #[test] @@ -638,10 +759,13 @@ fn commit_show_has_commit_id_path_parameter() { let params = doc["paths"]["/commits/{commit_id}"]["get"]["parameters"] .as_array() .unwrap(); - let has_commit_id = params.iter().any(|p| { - p["name"].as_str() == Some("commit_id") && p["in"].as_str() == Some("path") - }); - assert!(has_commit_id, "GET /commits/{{commit_id}} must have 'commit_id' path parameter"); + let has_commit_id = params + .iter() + .any(|p| p["name"].as_str() == Some("commit_id") && p["in"].as_str() == Some("path")); + assert!( + has_commit_id, + "GET /commits/{{commit_id}} must have 'commit_id' path parameter" + ); } #[test] @@ -650,10 +774,13 @@ fn snapshot_has_branch_query_parameter() { let params = doc["paths"]["/snapshot"]["get"]["parameters"] .as_array() .unwrap(); - let has_branch = params.iter().any(|p| { - p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query") - }); - assert!(has_branch, "GET /snapshot must have 'branch' query parameter"); + let has_branch = params + .iter() + .any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query")); + assert!( + has_branch, + "GET /snapshot must have 'branch' query parameter" + ); } #[test] @@ -662,10 +789,13 @@ fn commits_has_branch_query_parameter() { let params = doc["paths"]["/commits"]["get"]["parameters"] .as_array() .unwrap(); - let has_branch = params.iter().any(|p| { - p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query") - }); - assert!(has_branch, "GET /commits must have 'branch' query parameter"); + let has_branch = params + .iter() + .any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query")); + assert!( + has_branch, + "GET /commits must have 'branch' query parameter" + ); } // --------------------------------------------------------------------------- @@ -741,8 +871,7 @@ fn error_responses_reference_error_output_schema() { ]; for (path, method, status) in paths_with_errors { - let content = - &doc["paths"][path][method]["responses"][status]["content"]; + let content = &doc["paths"][path][method]["responses"][status]["content"]; let schema = &content["application/json"]["schema"]; let ref_path = schema["$ref"].as_str().unwrap(); assert!( @@ -804,7 +933,7 @@ fn openapi_spec_round_trips_through_json() { #[tokio::test] async fn open_mode_spec_has_no_security_schemes() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let request = Request::builder() .method(Method::GET) .uri("/openapi.json") @@ -820,7 +949,7 @@ async fn open_mode_spec_has_no_security_schemes() { #[tokio::test] async fn open_mode_spec_has_no_operation_security() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let request = Request::builder() .method(Method::GET) .uri("/openapi.json") @@ -841,7 +970,7 @@ async fn open_mode_spec_has_no_operation_security() { #[tokio::test] async fn auth_mode_spec_includes_bearer_token_security_scheme() { - let (_temp, app) = app_for_loaded_repo_with_auth("secret").await; + let (_temp, app) = app_for_loaded_graph_with_auth("secret").await; let request = Request::builder() .method(Method::GET) .uri("/openapi.json") @@ -855,7 +984,7 @@ async fn auth_mode_spec_includes_bearer_token_security_scheme() { #[tokio::test] async fn auth_mode_spec_has_security_on_protected_operations() { - let (_temp, app) = app_for_loaded_repo_with_auth("secret").await; + let (_temp, app) = app_for_loaded_graph_with_auth("secret").await; let request = Request::builder() .method(Method::GET) .uri("/openapi.json") @@ -886,7 +1015,7 @@ async fn auth_mode_spec_has_security_on_protected_operations() { #[tokio::test] async fn auth_mode_spec_matches_static_generation() { - let (_temp, app) = app_for_loaded_repo_with_auth("secret").await; + let (_temp, app) = app_for_loaded_graph_with_auth("secret").await; let request = Request::builder() .method(Method::GET) .uri("/openapi.json") @@ -902,7 +1031,7 @@ async fn auth_mode_spec_matches_static_generation() { #[tokio::test] async fn auth_mode_healthz_still_has_no_security() { - let (_temp, app) = app_for_loaded_repo_with_auth("secret").await; + let (_temp, app) = app_for_loaded_graph_with_auth("secret").await; let request = Request::builder() .method(Method::GET) .uri("/openapi.json") @@ -918,8 +1047,7 @@ async fn auth_mode_healthz_still_has_no_security() { #[test] fn openapi_spec_is_up_to_date() { - let spec_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("../../openapi.json"); + let spec_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../openapi.json"); let generated = serde_json::to_string_pretty(&openapi_doc()).unwrap() + "\n"; @@ -943,3 +1071,289 @@ fn openapi_spec_is_up_to_date() { "openapi.json is out of date. Run: OMNIGRAPH_UPDATE_OPENAPI=1 cargo test -p omnigraph-server --test openapi openapi_spec_is_up_to_date" ); } + +// --------------------------------------------------------------------------- +// MR-668 — multi-mode OpenAPI cluster filter +// --------------------------------------------------------------------------- +// +// In multi-graph mode, `/openapi.json` reports cluster routes +// (`/graphs/{graph_id}/...`) instead of the legacy flat routes. The +// only flat path that survives is `/healthz`. Operation IDs gain a +// `cluster_` prefix so SDK generators have stable, unique ids. +// +// These tests exercise the request-time `server_openapi` handler via +// `oneshot`, not the static `ApiDoc::openapi()` — the rewrite happens +// only on the served document. + +const EXPECTED_CLUSTER_PATHS: &[&str] = &[ + "/graphs/{graph_id}/snapshot", + "/graphs/{graph_id}/read", + "/graphs/{graph_id}/export", + "/graphs/{graph_id}/change", + "/graphs/{graph_id}/schema", + "/graphs/{graph_id}/schema/apply", + "/graphs/{graph_id}/ingest", + "/graphs/{graph_id}/branches", + "/graphs/{graph_id}/branches/{branch}", + "/graphs/{graph_id}/branches/merge", + "/graphs/{graph_id}/commits", + "/graphs/{graph_id}/commits/{commit_id}", +]; + +async fn app_for_multi_mode(graph_ids: &[&str]) -> (Vec, Router) { + use std::sync::Arc; + + use omnigraph_server::{GraphHandle, GraphId, GraphKey}; + + let mut dirs = Vec::with_capacity(graph_ids.len()); + let mut handles = Vec::with_capacity(graph_ids.len()); + for id in graph_ids { + let dir = tempfile::tempdir().unwrap(); + let graph_uri = dir.path().join(id).to_str().unwrap().to_string(); + let schema = fs::read_to_string(fixture("test.pg")).unwrap(); + let engine = Omnigraph::init(&graph_uri, &schema).await.unwrap(); + handles.push(Arc::new(GraphHandle { + key: GraphKey::cluster(GraphId::try_from(*id).unwrap()), + uri: graph_uri, + engine: Arc::new(engine), + policy: None, + })); + dirs.push(dir); + } + let workload = omnigraph_server::workload::WorkloadController::from_env(); + let state = AppState::new_multi(handles, Vec::new(), None, workload, None).unwrap(); + let app = build_app(state); + (dirs, app) +} + +#[tokio::test] +async fn multi_mode_openapi_lists_cluster_paths() { + let (_dirs, app) = app_for_multi_mode(&["alpha"]).await; + let request = Request::builder() + .method(Method::GET) + .uri("/openapi.json") + .body(Body::empty()) + .unwrap(); + let (status, json) = json_response(&app, request).await; + assert_eq!(status, StatusCode::OK); + let paths = json["paths"].as_object().expect("paths must be an object"); + let path_keys: HashSet<&str> = paths.keys().map(|k| k.as_str()).collect(); + for expected in EXPECTED_CLUSTER_PATHS { + assert!( + path_keys.contains(expected), + "missing cluster path in multi-mode spec: {expected}. \ + Found: {path_keys:?}" + ); + } +} + +#[tokio::test] +async fn multi_mode_openapi_drops_flat_protected_paths() { + let (_dirs, app) = app_for_multi_mode(&["alpha"]).await; + let request = Request::builder() + .method(Method::GET) + .uri("/openapi.json") + .body(Body::empty()) + .unwrap(); + let (_, json) = json_response(&app, request).await; + let paths = json["paths"].as_object().unwrap(); + // None of the legacy flat protected paths should appear in multi mode. + let flat_protected = [ + "/snapshot", + "/read", + "/export", + "/change", + "/schema", + "/schema/apply", + "/ingest", + "/branches", + "/branches/{branch}", + "/branches/merge", + "/commits", + "/commits/{commit_id}", + ]; + for flat in flat_protected { + assert!( + !paths.contains_key(flat), + "flat path {flat} must not appear in multi-mode spec; \ + cluster routes are the only protected surface" + ); + } +} + +#[tokio::test] +async fn multi_mode_openapi_keeps_management_paths_flat() { + let (_dirs, app) = app_for_multi_mode(&["alpha"]).await; + let request = Request::builder() + .method(Method::GET) + .uri("/openapi.json") + .body(Body::empty()) + .unwrap(); + let (_, json) = json_response(&app, request).await; + let paths = json["paths"].as_object().unwrap(); + for flat in ["/healthz", "/graphs"] { + assert!( + paths.contains_key(flat), + "{flat} must remain flat in multi mode" + ); + let nested = format!("/graphs/{{graph_id}}{flat}"); + assert!( + !paths.contains_key(&nested), + "{flat} must NOT be cluster-prefixed to {nested}" + ); + } +} + +#[tokio::test] +async fn multi_mode_openapi_prefixes_operation_ids_with_cluster() { + let (_dirs, app) = app_for_multi_mode(&["alpha"]).await; + let request = Request::builder() + .method(Method::GET) + .uri("/openapi.json") + .body(Body::empty()) + .unwrap(); + let (_, json) = json_response(&app, request).await; + // Every cluster path operation must have a `cluster_` operation_id. + // Flat-mounted paths (healthz, management /graphs) keep their + // original operation_ids — they're not per-graph. + let paths = json["paths"].as_object().unwrap(); + let mut checked = 0; + for (path, item) in paths { + if path == "/healthz" || path == "/graphs" { + continue; + } + for method in ["get", "post", "put", "delete", "patch"] { + if let Some(op) = item.get(method).filter(|v| v.is_object()) { + if let Some(id) = op["operationId"].as_str() { + assert!( + id.starts_with("cluster_"), + "operation_id at {path}.{method} must start with `cluster_`, got `{id}`" + ); + checked += 1; + } + } + } + } + assert!( + checked >= EXPECTED_CLUSTER_PATHS.len(), + "expected at least {} cluster operation_ids; checked {checked}", + EXPECTED_CLUSTER_PATHS.len() + ); +} + +#[tokio::test] +async fn multi_mode_openapi_declares_graph_id_path_parameter() { + let (_dirs, app) = app_for_multi_mode(&["alpha"]).await; + let request = Request::builder() + .method(Method::GET) + .uri("/openapi.json") + .body(Body::empty()) + .unwrap(); + let (_, json) = json_response(&app, request).await; + let paths = json["paths"].as_object().unwrap(); + + for expected_path in EXPECTED_CLUSTER_PATHS { + let item = paths + .get(*expected_path) + .unwrap_or_else(|| panic!("missing cluster path {expected_path}")); + for method in ["get", "post", "put", "delete", "patch"] { + let Some(operation) = item.get(method).filter(|value| value.is_object()) else { + continue; + }; + let parameters = operation["parameters"] + .as_array() + .unwrap_or_else(|| panic!("{expected_path}.{method} missing parameters")); + let graph_id = parameters + .iter() + .find(|param| param["name"] == "graph_id" && param["in"] == "path") + .unwrap_or_else(|| { + panic!("{expected_path}.{method} missing graph_id path parameter") + }); + assert_eq!( + graph_id["required"].as_bool(), + Some(true), + "{expected_path}.{method} graph_id parameter must be required" + ); + assert_eq!( + graph_id["schema"]["type"].as_str(), + Some("string"), + "{expected_path}.{method} graph_id parameter must be string typed" + ); + } + } + + for flat in ["/healthz", "/graphs"] { + let item = paths.get(flat).unwrap(); + for method in ["get", "post", "put", "delete", "patch"] { + if let Some(operation) = item.get(method).filter(|value| value.is_object()) { + let has_graph_id = operation["parameters"] + .as_array() + .map(|params| { + params + .iter() + .any(|param| param["name"] == "graph_id" && param["in"] == "path") + }) + .unwrap_or(false); + assert!( + !has_graph_id, + "{flat}.{method} must not declare graph_id; it remains flat" + ); + } + } + } +} + +#[tokio::test] +async fn multi_mode_operation_ids_are_unique() { + // Sanity check: the cluster_ prefix prevents collision with flat ids + // (which don't appear in multi mode, but the contract is "unique + // across the spec"). Verify every operation_id in the multi-mode + // spec is unique. + let (_dirs, app) = app_for_multi_mode(&["alpha"]).await; + let request = Request::builder() + .method(Method::GET) + .uri("/openapi.json") + .body(Body::empty()) + .unwrap(); + let (_, json) = json_response(&app, request).await; + let paths = json["paths"].as_object().unwrap(); + let mut seen_ids: HashSet = HashSet::new(); + for (_, item) in paths { + for method in ["get", "post", "put", "delete", "patch"] { + if let Some(op) = item.get(method).filter(|v| v.is_object()) { + if let Some(id) = op["operationId"].as_str() { + assert!( + seen_ids.insert(id.to_string()), + "duplicate operation_id `{id}` in multi-mode spec" + ); + } + } + } + } +} + +#[tokio::test] +async fn single_mode_openapi_unchanged_by_cluster_filter() { + // Regression: single mode still emits the legacy flat surface. + let (_temp, app) = app_for_loaded_graph().await; + let request = Request::builder() + .method(Method::GET) + .uri("/openapi.json") + .body(Body::empty()) + .unwrap(); + let (_, json) = json_response(&app, request).await; + let paths = json["paths"].as_object().unwrap(); + let path_keys: HashSet<&str> = paths.keys().map(|k| k.as_str()).collect(); + for expected in EXPECTED_PATHS { + assert!( + path_keys.contains(expected), + "single mode must still emit flat path: {expected}" + ); + } + for cluster in EXPECTED_CLUSTER_PATHS { + assert!( + !path_keys.contains(cluster), + "single mode must NOT emit cluster path: {cluster}" + ); + } +} diff --git a/crates/omnigraph-server/tests/server.rs b/crates/omnigraph-server/tests/server.rs index 03f4aa7..3ace80e 100644 --- a/crates/omnigraph-server/tests/server.rs +++ b/crates/omnigraph-server/tests/server.rs @@ -5,13 +5,16 @@ use std::sync::Arc; use axum::Router; use axum::body::{Body, to_bytes}; +use axum::http::header::AUTHORIZATION; use axum::http::{Method, Request, StatusCode}; -use lance_index::traits::DatasetIndexExt; -use omnigraph::db::{Omnigraph, ReadTarget}; +use lance::index::DatasetIndexExt; +use omnigraph::db::{Omnigraph, ReadTarget, SchemaApplyOptions}; +use omnigraph::error::OmniError; use omnigraph::loader::{LoadMode, load_jsonl}; +use omnigraph_policy::{PolicyChecker, PolicyEngine}; use omnigraph_server::api::{ BranchCreateRequest, BranchMergeRequest, ChangeRequest, ErrorOutput, ExportRequest, - IngestRequest, ReadRequest, SchemaApplyRequest, SchemaOutput, + IngestRequest, QueryRequest, ReadRequest, SchemaApplyRequest, SchemaOutput, }; use omnigraph_server::{AppState, build_app}; use serde_json::{Value, json}; @@ -102,39 +105,39 @@ fn fixture(name: &str) -> PathBuf { .join(name) } -async fn init_loaded_repo() -> tempfile::TempDir { - init_repo_with_schema_and_data( +async fn init_loaded_graph() -> tempfile::TempDir { + init_graph_with_schema_and_data( &fs::read_to_string(fixture("test.pg")).unwrap(), &fs::read_to_string(fixture("test.jsonl")).unwrap(), ) .await } -async fn init_repo_with_schema_and_data(schema: &str, data: &str) -> tempfile::TempDir { +async fn init_graph_with_schema_and_data(schema: &str, data: &str) -> tempfile::TempDir { let temp = tempfile::tempdir().unwrap(); - let repo = repo_path(temp.path()); - fs::create_dir_all(&repo).unwrap(); - Omnigraph::init(repo.to_str().unwrap(), schema) + let graph = graph_path(temp.path()); + fs::create_dir_all(&graph).unwrap(); + Omnigraph::init(graph.to_str().unwrap(), schema) .await .unwrap(); - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); load_jsonl(&mut db, data, LoadMode::Overwrite) .await .unwrap(); temp } -async fn init_repo_with_schema(schema: &str) -> tempfile::TempDir { +async fn init_graph_with_schema(schema: &str) -> tempfile::TempDir { let temp = tempfile::tempdir().unwrap(); - let repo = repo_path(temp.path()); - fs::create_dir_all(&repo).unwrap(); - Omnigraph::init(repo.to_str().unwrap(), schema) + let graph = graph_path(temp.path()); + fs::create_dir_all(&graph).unwrap(); + Omnigraph::init(graph.to_str().unwrap(), schema) .await .unwrap(); temp } -fn repo_path(root: &Path) -> PathBuf { +fn graph_path(root: &Path) -> PathBuf { root.join("server.omni") } @@ -144,8 +147,8 @@ fn drifted_test_schema() -> String { .replace("age: I32?", "age: I64?") } -async fn manifest_dataset_version(repo: &Path) -> u64 { - Omnigraph::open(repo.to_string_lossy().as_ref()) +async fn manifest_dataset_version(graph: &Path) -> u64 { + Omnigraph::open(graph.to_string_lossy().as_ref()) .await .unwrap() .snapshot_of(ReadTarget::branch("main")) @@ -154,7 +157,7 @@ async fn manifest_dataset_version(repo: &Path) -> u64 { .version() } -fn s3_test_repo_uri(suite: &str) -> Option { +fn s3_test_graph_uri(suite: &str) -> Option { let bucket = env::var("OMNIGRAPH_S3_TEST_BUCKET").ok()?; let prefix = env::var("OMNIGRAPH_S3_TEST_PREFIX") .ok() @@ -167,54 +170,77 @@ fn s3_test_repo_uri(suite: &str) -> Option { Some(format!("s3://{}/{}/{}/{}", bucket, prefix, suite, unique)) } -async fn app_for_loaded_repo() -> (tempfile::TempDir, Router) { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let state = AppState::open(repo.to_string_lossy().to_string()) +async fn app_for_loaded_graph() -> (tempfile::TempDir, Router) { + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let state = AppState::open(graph.to_string_lossy().to_string()) .await .unwrap(); (temp, build_app(state)) } -async fn app_for_loaded_repo_with_auth(token: &str) -> (tempfile::TempDir, Router) { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); - let state = AppState::new_with_bearer_token( - repo.to_string_lossy().to_string(), - db, - Some(token.to_string()), - ); - (temp, build_app(state)) +/// Build a permit-all policy YAML that grants every action used by the +/// HTTP-layer tests to the listed actor names. MR-723 default-deny +/// closed the "tokens but no policy" loophole; helpers that used to +/// represent "auth without policy" now install this permit-all policy +/// so test cases retain their pre-MR-723 semantics ("auth required, +/// every action permitted") without conflicting with the new state +/// matrix. Tests that specifically need the State-2 deny path use +/// `app_for_graph_with_auth_tokens_only` instead. +fn permit_all_policy_yaml(actors: &[&str]) -> String { + let members = actors + .iter() + .map(|a| format!("\"{a}\"")) + .collect::>() + .join(", "); + format!( + r#" +version: 1 +groups: + permitted: [{members}] +protected_branches: [main] +rules: + - id: permit-data + allow: + actors: {{ group: permitted }} + actions: [read, change, export] + branch_scope: any + - id: permit-protected-target-actions + allow: + actors: {{ group: permitted }} + actions: [schema_apply, branch_create, branch_delete, branch_merge] + target_branch_scope: any +"# + ) } -async fn app_for_loaded_repo_with_auth_tokens( - tokens: &[(&str, &str)], -) -> (tempfile::TempDir, Router) { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); - let state = AppState::new_with_bearer_tokens( - repo.to_string_lossy().to_string(), - db, - tokens - .iter() - .map(|(actor, token)| ((*actor).to_string(), (*token).to_string())) - .collect(), - ); - (temp, build_app(state)) -} - -async fn app_for_loaded_repo_with_auth_tokens_and_policy( - tokens: &[(&str, &str)], - policy: &str, -) -> (tempfile::TempDir, Router) { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); +async fn app_for_loaded_graph_with_auth(token: &str) -> (tempfile::TempDir, Router) { + // `AppState::new_with_bearer_token(token)` maps the token to actor "default"; + // permit-all policy needs to include that actor. + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); let policy_path = temp.path().join("policy.yaml"); - fs::write(&policy_path, policy).unwrap(); + fs::write(&policy_path, permit_all_policy_yaml(&["default"])).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), + vec![("default".to_string(), token.to_string())], + Some(&policy_path), + ) + .await + .unwrap(); + (temp, build_app(state)) +} + +async fn app_for_loaded_graph_with_auth_tokens( + tokens: &[(&str, &str)], +) -> (tempfile::TempDir, Router) { + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let policy_path = temp.path().join("policy.yaml"); + let actors: Vec<&str> = tokens.iter().map(|(actor, _)| *actor).collect(); + fs::write(&policy_path, permit_all_policy_yaml(&actors)).unwrap(); + let state = AppState::open_with_bearer_tokens_and_policy( + graph.to_string_lossy().to_string(), tokens .iter() .map(|(actor, token)| ((*actor).to_string(), (*token).to_string())) @@ -226,17 +252,16 @@ async fn app_for_loaded_repo_with_auth_tokens_and_policy( (temp, build_app(state)) } -async fn app_for_repo_with_auth_tokens_and_policy( - schema: &str, +async fn app_for_loaded_graph_with_auth_tokens_and_policy( tokens: &[(&str, &str)], policy: &str, ) -> (tempfile::TempDir, Router) { - let temp = init_repo_with_schema(schema).await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, policy).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), tokens .iter() .map(|(actor, token)| ((*actor).to_string(), (*token).to_string())) @@ -248,6 +273,51 @@ async fn app_for_repo_with_auth_tokens_and_policy( (temp, build_app(state)) } +async fn app_for_graph_with_auth_tokens_and_policy( + schema: &str, + tokens: &[(&str, &str)], + policy: &str, +) -> (tempfile::TempDir, Router) { + let temp = init_graph_with_schema(schema).await; + let graph = graph_path(temp.path()); + let policy_path = temp.path().join("policy.yaml"); + fs::write(&policy_path, policy).unwrap(); + let state = AppState::open_with_bearer_tokens_and_policy( + graph.to_string_lossy().to_string(), + tokens + .iter() + .map(|(actor, token)| ((*actor).to_string(), (*token).to_string())) + .collect(), + Some(&policy_path), + ) + .await + .unwrap(); + (temp, build_app(state)) +} + +/// MR-723 default-deny mode: bearer tokens configured, no policy file. +/// Exercises ServerRuntimeState::DefaultDeny — authenticated requests +/// for Read succeed, every other action is rejected with 403 from +/// `authorize_request`'s state-2 branch. +async fn app_for_graph_with_auth_tokens_only( + schema: &str, + tokens: &[(&str, &str)], +) -> (tempfile::TempDir, Router) { + let temp = init_graph_with_schema(schema).await; + let graph = graph_path(temp.path()); + let state = AppState::open_with_bearer_tokens_and_policy( + graph.to_string_lossy().to_string(), + tokens + .iter() + .map(|(actor, token)| ((*actor).to_string(), (*token).to_string())) + .collect(), + None, + ) + .await + .unwrap(); + (temp, build_app(state)) +} + fn additive_schema_with_nickname() -> String { fs::read_to_string(fixture("test.pg")).unwrap().replace( " age: I32?\n}", @@ -255,6 +325,31 @@ fn additive_schema_with_nickname() -> String { ) } +fn schema_without_age() -> String { + // Drop the nullable `age` column from the test schema. Used by the + // HTTP soft/hard drop tests below. + fs::read_to_string(fixture("test.pg")) + .unwrap() + .replace(" age: I32?\n", "") +} + +fn schema_without_company() -> String { + // Drop the `Company` node type and the edge referencing it. Used + // by the HTTP DropType test below. Hand-crafted (no template + // string replace) because the fixture interleaves the type and + // its edge. + r#"node Person { + name: String @key + age: I32? +} + +edge Knows: Person -> Person { + since: Date? +} +"# + .to_string() +} + fn renamed_person_schema() -> String { fs::read_to_string(fixture("test.pg")) .unwrap() @@ -293,8 +388,8 @@ async fn json_response(app: &Router, request: Request) -> (StatusCode, Val } #[tokio::test] -async fn schema_apply_route_updates_repo_for_authorized_admin() { - let (temp, app) = app_for_repo_with_auth_tokens_and_policy( +async fn schema_apply_route_updates_graph_for_authorized_admin() { + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, @@ -310,6 +405,7 @@ async fn schema_apply_route_updates_repo_for_authorized_admin() { .body(Body::from( serde_json::to_vec(&SchemaApplyRequest { schema_source: schema, + ..Default::default() }) .unwrap(), )) @@ -318,8 +414,8 @@ async fn schema_apply_route_updates_repo_for_authorized_admin() { assert_eq!(status, StatusCode::OK); assert_eq!(payload["applied"], true); - let repo = repo_path(temp.path()); - let reopened = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let graph = graph_path(temp.path()); + let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); assert!( reopened.catalog().node_types["Person"] .properties @@ -329,7 +425,7 @@ async fn schema_apply_route_updates_repo_for_authorized_admin() { #[tokio::test] async fn schema_apply_route_requires_schema_apply_policy_permission() { - let (_temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (_temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], POLICY_YAML, @@ -344,6 +440,7 @@ async fn schema_apply_route_requires_schema_apply_policy_permission() { .body(Body::from( serde_json::to_vec(&SchemaApplyRequest { schema_source: additive_schema_with_nickname(), + ..Default::default() }) .unwrap(), )) @@ -359,7 +456,7 @@ async fn schema_apply_route_requires_schema_apply_policy_permission() { #[tokio::test] async fn schema_apply_route_requires_bearer_token_when_policy_enabled() { - let (_temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (_temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, @@ -373,6 +470,7 @@ async fn schema_apply_route_requires_bearer_token_when_policy_enabled() { .body(Body::from( serde_json::to_vec(&SchemaApplyRequest { schema_source: additive_schema_with_nickname(), + ..Default::default() }) .unwrap(), )) @@ -388,7 +486,7 @@ async fn schema_apply_route_requires_bearer_token_when_policy_enabled() { #[tokio::test] async fn schema_apply_route_can_rename_type() { - let (temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, @@ -403,6 +501,7 @@ async fn schema_apply_route_can_rename_type() { .body(Body::from( serde_json::to_vec(&SchemaApplyRequest { schema_source: renamed_person_schema(), + ..Default::default() }) .unwrap(), )) @@ -411,8 +510,8 @@ async fn schema_apply_route_can_rename_type() { assert_eq!(status, StatusCode::OK); assert_eq!(payload["applied"], true); - let repo = repo_path(temp.path()); - let reopened = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let graph = graph_path(temp.path()); + let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); let snapshot = reopened .snapshot_of(ReadTarget::branch("main")) .await @@ -423,7 +522,7 @@ async fn schema_apply_route_can_rename_type() { #[tokio::test] async fn schema_apply_route_can_rename_property() { - let (temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, @@ -438,6 +537,7 @@ async fn schema_apply_route_can_rename_property() { .body(Body::from( serde_json::to_vec(&SchemaApplyRequest { schema_source: renamed_age_schema(), + ..Default::default() }) .unwrap(), )) @@ -446,8 +546,8 @@ async fn schema_apply_route_can_rename_property() { assert_eq!(status, StatusCode::OK); assert_eq!(payload["applied"], true); - let repo = repo_path(temp.path()); - let reopened = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let graph = graph_path(temp.path()); + let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); let person = &reopened.catalog().node_types["Person"]; assert!(person.properties.contains_key("years")); assert!(!person.properties.contains_key("age")); @@ -455,15 +555,15 @@ async fn schema_apply_route_can_rename_property() { #[tokio::test] async fn schema_apply_route_can_add_index() { - let (temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, ) .await; - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); let before_index_count = { - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); let snapshot = db.snapshot_of(ReadTarget::branch("main")).await.unwrap(); let dataset = snapshot.open("node:Person").await.unwrap(); dataset.load_indices().await.unwrap().len() @@ -477,6 +577,7 @@ async fn schema_apply_route_can_add_index() { .body(Body::from( serde_json::to_vec(&SchemaApplyRequest { schema_source: indexed_name_schema(), + ..Default::default() }) .unwrap(), )) @@ -485,7 +586,7 @@ async fn schema_apply_route_can_add_index() { assert_eq!(status, StatusCode::OK); assert_eq!(payload["applied"], true); - let reopened = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); let snapshot = reopened .snapshot_of(ReadTarget::branch("main")) .await @@ -497,7 +598,7 @@ async fn schema_apply_route_can_add_index() { #[tokio::test] async fn schema_apply_route_rejects_unsupported_plan() { - let (_temp, app) = app_for_repo_with_auth_tokens_and_policy( + let (_temp, app) = app_for_graph_with_auth_tokens_and_policy( &fs::read_to_string(fixture("test.pg")).unwrap(), &[("act-ragnor", "admin-token")], SCHEMA_APPLY_POLICY_YAML, @@ -512,6 +613,7 @@ async fn schema_apply_route_rejects_unsupported_plan() { .body(Body::from( serde_json::to_vec(&SchemaApplyRequest { schema_source: unsupported_schema_change(), + ..Default::default() }) .unwrap(), )) @@ -527,16 +629,16 @@ async fn schema_apply_route_rejects_unsupported_plan() { #[tokio::test] async fn schema_apply_route_rejects_when_non_main_branch_exists() { - let temp = init_repo_with_schema(&fs::read_to_string(fixture("test.pg")).unwrap()).await; - let repo = repo_path(temp.path()); - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let temp = init_graph_with_schema(&fs::read_to_string(fixture("test.pg")).unwrap()).await; + let graph = graph_path(temp.path()); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.branch_create("feature").await.unwrap(); drop(db); let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, SCHEMA_APPLY_POLICY_YAML).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![("act-ragnor".to_string(), "admin-token".to_string())], Some(&policy_path), ) @@ -552,6 +654,7 @@ async fn schema_apply_route_rejects_when_non_main_branch_exists() { .body(Body::from( serde_json::to_vec(&SchemaApplyRequest { schema_source: additive_schema_with_nickname(), + ..Default::default() }) .unwrap(), )) @@ -651,7 +754,7 @@ fn mock_embedding(input: &str, dim: usize) -> Vec { #[tokio::test(flavor = "multi_thread")] async fn healthz_succeeds_after_startup() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let (status, body) = json_response( &app, Request::builder() @@ -673,9 +776,9 @@ async fn healthz_succeeds_after_startup() { #[tokio::test(flavor = "multi_thread")] async fn schema_drift_returns_conflict_for_snapshot_read_and_change() { - let (temp, app) = app_for_loaded_repo().await; - let repo = repo_path(temp.path()); - fs::write(repo.join("_schema.pg"), drifted_test_schema()).unwrap(); + let (temp, app) = app_for_loaded_graph().await; + let graph = graph_path(temp.path()); + fs::write(graph.join("_schema.pg"), drifted_test_schema()).unwrap(); let (snapshot_status, snapshot_body) = json_response( &app, @@ -728,8 +831,8 @@ async fn schema_drift_returns_conflict_for_snapshot_read_and_change() { ); let change = ChangeRequest { - query_source: MUTATION_QUERIES.to_string(), - query_name: Some("insert_person".to_string()), + query: MUTATION_QUERIES.to_string(), + name: Some("insert_person".to_string()), params: Some(json!({ "name": "Mina", "age": 28 })), branch: Some("main".to_string()), }; @@ -758,7 +861,7 @@ async fn schema_drift_returns_conflict_for_snapshot_read_and_change() { #[tokio::test(flavor = "multi_thread")] async fn protected_routes_require_bearer_token() { - let (_temp, app) = app_for_loaded_repo_with_auth("demo-token").await; + let (_temp, app) = app_for_loaded_graph_with_auth("demo-token").await; let (status, body) = json_response( &app, Request::builder() @@ -779,7 +882,7 @@ async fn protected_routes_require_bearer_token() { #[tokio::test(flavor = "multi_thread")] async fn protected_routes_accept_valid_bearer_token_while_healthz_stays_open() { - let (_temp, app) = app_for_loaded_repo_with_auth("demo-token").await; + let (_temp, app) = app_for_loaded_graph_with_auth("demo-token").await; let health = app .clone() @@ -812,9 +915,9 @@ async fn protected_routes_accept_valid_bearer_token_while_healthz_stays_open() { #[tokio::test(flavor = "multi_thread")] async fn export_route_returns_jsonl_for_branch_snapshot() { let token = "demo-token"; - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.branch_create_from(ReadTarget::branch("main"), "feature") .await .unwrap(); @@ -831,11 +934,20 @@ async fn export_route_returns_jsonl_for_branch_snapshot() { .unwrap(); drop(db); - let state = AppState::new_with_bearer_token( - repo.to_string_lossy().to_string(), - Omnigraph::open(repo.to_str().unwrap()).await.unwrap(), - Some(token.to_string()), - ); + // MR-723: tokens-without-policy is now default-deny. Install a + // permit-all policy alongside the bearer token so /export + // (action=Export) passes Cedar evaluation. The test is exercising + // export semantics, not policy — the policy is just enough to clear + // the State 3 path. + let policy_path = temp.path().join("policy.yaml"); + fs::write(&policy_path, permit_all_policy_yaml(&["default"])).unwrap(); + let state = AppState::open_with_bearer_tokens_and_policy( + graph.to_string_lossy().to_string(), + vec![("default".to_string(), token.to_string())], + Some(&policy_path), + ) + .await + .unwrap(); let app = build_app(state); let response = app @@ -871,9 +983,11 @@ async fn export_route_returns_jsonl_for_branch_snapshot() { #[tokio::test(flavor = "multi_thread")] async fn protected_routes_accept_any_configured_team_bearer_token() { - let (_temp, app) = - app_for_loaded_repo_with_auth_tokens(&[("team-01", "token-one"), ("team-02", "token-two")]) - .await; + let (_temp, app) = app_for_loaded_graph_with_auth_tokens(&[ + ("team-01", "token-one"), + ("team-02", "token-two"), + ]) + .await; let (status, body) = json_response( &app, @@ -897,8 +1011,8 @@ async fn protected_routes_accept_any_configured_team_bearer_token() { /// the policy outcome. #[tokio::test(flavor = "multi_thread")] async fn bearer_token_resolves_to_correct_actor_for_policy_decisions() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); let policy_path = temp.path().join("policy.yaml"); fs::write( &policy_path, @@ -918,7 +1032,7 @@ rules: ) .unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![ ("act-a".to_string(), "token-a".to_string()), ("act-b".to_string(), "token-b".to_string()), @@ -975,9 +1089,135 @@ rules: assert_eq!(bad_status, StatusCode::UNAUTHORIZED); } +/// Regression test for MR-731: actor identity comes from the matched +/// bearer token, never from a client-supplied request header. A future +/// "convenience" PR that lets clients override `actor_id` to spoof +/// another identity must break this test. The principle is named in +/// `docs/dev/invariants.md` Hard Invariant 11 and at the actor-resolution +/// site in `omnigraph-server/src/lib.rs::authorize_request`. +/// +/// Two assertions in one fixture: +/// 1. Spoof-up: bearer for a *denied* actor + X-Actor-Id naming an +/// *allowed* actor — policy still denies (proves the spoof header +/// doesn't promote the request). +/// 2. Spoof-down: bearer for an *allowed* actor + X-Actor-Id naming a +/// *denied* actor — policy still allows (proves the server-resolved +/// identity wins; the spoof can't trick the request into a denial +/// either, which would otherwise be a confusing UX trap). +/// +/// Cross-reference: MR-777 covers boundary cases like actor-id +/// *collision* (two distinct tokens minting the same actor_id) and +/// malformed bearer header parsing. See `auth_boundary_case_coverage` +/// suite when it lands; the two tests together pin the full bearer-token +/// → actor identity contract. +#[tokio::test(flavor = "multi_thread")] +async fn actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers() { + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let policy_path = temp.path().join("policy.yaml"); + // Same readers/writers split as + // `bearer_token_resolves_to_correct_actor_for_policy_decisions` — + // `act-a` can read main, `act-b` cannot. The asymmetry is what + // makes the spoof-up/spoof-down distinction observable. + fs::write( + &policy_path, + r#" +version: 1 +groups: + readers: [act-a] + writers: [act-b] +protected_branches: [main] +rules: + - id: readers-only + allow: + actors: { group: readers } + actions: [read] + branch_scope: any +"#, + ) + .unwrap(); + let state = AppState::open_with_bearer_tokens_and_policy( + graph.to_string_lossy().to_string(), + vec![ + ("act-a".to_string(), "token-a".to_string()), + ("act-b".to_string(), "token-b".to_string()), + ], + Some(&policy_path), + ) + .await + .unwrap(); + let app = build_app(state); + + // (1) Spoof-up: bearer for act-b (denied) + X-Actor-Id: act-a (allowed). + // If the server were trusting the header, this would succeed as + // act-a. The contract is: the bearer wins. Expect 403 because + // act-b can't read. + let (spoof_up_status, spoof_up_body) = json_response( + &app, + Request::builder() + .uri("/snapshot?branch=main") + .method(Method::GET) + .header("authorization", "Bearer token-b") + .header("x-actor-id", "act-a") + .body(Body::empty()) + .unwrap(), + ) + .await; + let spoof_up_error: ErrorOutput = serde_json::from_value(spoof_up_body).unwrap(); + assert_eq!( + spoof_up_status, + StatusCode::FORBIDDEN, + "X-Actor-Id must not promote a denied bearer to an allowed actor", + ); + assert_eq!( + spoof_up_error.code, + Some(omnigraph_server::api::ErrorCode::Forbidden), + ); + + // (2) Spoof-down: bearer for act-a (allowed) + X-Actor-Id: act-b (denied). + // If the server were trusting the header, this would fail as act-b. + // The contract is: the bearer wins. Expect 200 because act-a can read. + let (spoof_down_status, _) = json_response( + &app, + Request::builder() + .uri("/snapshot?branch=main") + .method(Method::GET) + .header("authorization", "Bearer token-a") + .header("x-actor-id", "act-b") + .body(Body::empty()) + .unwrap(), + ) + .await; + assert_eq!( + spoof_down_status, + StatusCode::OK, + "X-Actor-Id must not demote an allowed bearer to a denied actor", + ); + + // (3) Empty-string spoof attempt: an X-Actor-Id of "" must not + // leak through as the policy subject. Same expectation as (1): + // bearer for act-b is denied regardless of what the header tries. + let (empty_spoof_status, _) = json_response( + &app, + Request::builder() + .uri("/snapshot?branch=main") + .method(Method::GET) + .header("authorization", "Bearer token-b") + .header("x-actor-id", "") + .body(Body::empty()) + .unwrap(), + ) + .await; + assert_eq!( + empty_spoof_status, + StatusCode::FORBIDDEN, + "empty X-Actor-Id must not clear the resolved actor", + ); +} + #[tokio::test(flavor = "multi_thread")] async fn policy_allows_read_but_distinguishes_401_from_403() { - let (_temp, app) = app_for_loaded_repo_with_auth_tokens_and_policy( + let (_temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy( &[("act-bruno", "team-token"), ("act-ragnor", "admin-token")], POLICY_YAML, ) @@ -1053,16 +1293,16 @@ async fn policy_allows_read_but_distinguishes_401_from_403() { #[tokio::test(flavor = "multi_thread")] async fn policy_uses_resolved_branch_for_snapshot_reads() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); let snapshot_id = { - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.resolve_snapshot("main").await.unwrap().to_string() }; let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, POLICY_PROTECTED_READ_YAML).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![("act-bruno".to_string(), "team-token".to_string())], Some(&policy_path), ) @@ -1100,9 +1340,9 @@ async fn policy_uses_resolved_branch_for_snapshot_reads() { #[tokio::test(flavor = "multi_thread")] async fn snapshot_route_returns_manifest_dataset_version() { - let (temp, app) = app_for_loaded_repo().await; - let repo = repo_path(temp.path()); - let expected_manifest_version = manifest_dataset_version(&repo).await; + let (temp, app) = app_for_loaded_graph().await; + let graph = graph_path(temp.path()); + let expected_manifest_version = manifest_dataset_version(&graph).await; let (snapshot_status, snapshot_body) = json_response( &app, @@ -1125,7 +1365,7 @@ async fn snapshot_route_returns_manifest_dataset_version() { #[tokio::test(flavor = "multi_thread")] async fn schema_route_returns_current_source() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let (status, body) = json_response( &app, Request::builder() @@ -1143,7 +1383,7 @@ async fn schema_route_returns_current_source() { #[tokio::test(flavor = "multi_thread")] async fn schema_route_requires_bearer_token_when_auth_configured() { - let (_temp, app) = app_for_loaded_repo_with_auth("demo-token").await; + let (_temp, app) = app_for_loaded_graph_with_auth("demo-token").await; let (missing_status, missing_body) = json_response( &app, @@ -1178,13 +1418,13 @@ async fn schema_route_requires_bearer_token_when_auth_configured() { #[tokio::test(flavor = "multi_thread")] async fn schema_route_denied_when_actor_lacks_read_permission() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); let policy_path = temp.path().join("policy.yaml"); // Policy grants branch_create only — no read action for act-bruno. fs::write(&policy_path, INGEST_CREATE_ONLY_POLICY_YAML).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![("act-bruno".to_string(), "team-token".to_string())], Some(&policy_path), ) @@ -1212,9 +1452,9 @@ async fn schema_route_denied_when_actor_lacks_read_permission() { #[tokio::test(flavor = "multi_thread")] async fn policy_blocks_change_on_protected_main_but_allows_unprotected_branch() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.branch_create_from(ReadTarget::branch("main"), "feature") .await .unwrap(); @@ -1223,7 +1463,7 @@ async fn policy_blocks_change_on_protected_main_but_allows_unprotected_branch() let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, POLICY_YAML).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![("act-bruno".to_string(), "team-token".to_string())], Some(&policy_path), ) @@ -1232,8 +1472,8 @@ async fn policy_blocks_change_on_protected_main_but_allows_unprotected_branch() let app = build_app(state); let main_change = ChangeRequest { - query_source: MUTATION_QUERIES.to_string(), - query_name: Some("insert_person".to_string()), + query: MUTATION_QUERIES.to_string(), + name: Some("insert_person".to_string()), params: Some(json!({ "name": "Mina", "age": 28 })), branch: Some("main".to_string()), }; @@ -1256,8 +1496,8 @@ async fn policy_blocks_change_on_protected_main_but_allows_unprotected_branch() ); let feature_change = ChangeRequest { - query_source: MUTATION_QUERIES.to_string(), - query_name: Some("insert_person".to_string()), + query: MUTATION_QUERIES.to_string(), + name: Some("insert_person".to_string()), params: Some(json!({ "name": "Mina", "age": 28 })), branch: Some("feature".to_string()), }; @@ -1279,9 +1519,9 @@ async fn policy_blocks_change_on_protected_main_but_allows_unprotected_branch() #[tokio::test(flavor = "multi_thread")] async fn policy_blocks_non_admin_merge_to_main_and_allows_admin() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.branch_create_from(ReadTarget::branch("main"), "feature") .await .unwrap(); @@ -1297,7 +1537,7 @@ async fn policy_blocks_non_admin_merge_to_main_and_allows_admin() { let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, POLICY_YAML).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![ ("act-bruno".to_string(), "team-token".to_string()), ("act-ragnor".to_string(), "admin-token".to_string()), @@ -1349,11 +1589,11 @@ async fn policy_blocks_non_admin_merge_to_main_and_allows_admin() { async fn authenticated_change_stamps_actor_on_commits() { // With the Run state machine removed, actor_id is recorded // directly on the commit graph (no intermediate run record). - let (_temp, app) = app_for_loaded_repo_with_auth_tokens(&[("act-andrew", "token-one")]).await; + let (_temp, app) = app_for_loaded_graph_with_auth_tokens(&[("act-andrew", "token-one")]).await; let change = ChangeRequest { - query_source: MUTATION_QUERIES.to_string(), - query_name: Some("insert_person".to_string()), + query: MUTATION_QUERIES.to_string(), + name: Some("insert_person".to_string()), params: Some(json!({ "name": "Mina", "age": 28 })), branch: Some("main".to_string()), }; @@ -1392,8 +1632,8 @@ async fn authenticated_change_stamps_actor_on_commits() { #[tokio::test(flavor = "multi_thread")] async fn ingest_creates_branch_returns_metadata_and_stamps_actor() { - let (temp, app) = app_for_loaded_repo_with_auth_tokens(&[("act-andrew", "token-one")]).await; - let repo = repo_path(temp.path()); + let (temp, app) = app_for_loaded_graph_with_auth_tokens(&[("act-andrew", "token-one")]).await; + let graph = graph_path(temp.path()); let ingest = IngestRequest { branch: Some("feature-ingest".to_string()), from: Some("main".to_string()), @@ -1423,7 +1663,7 @@ async fn ingest_creates_branch_returns_metadata_and_stamps_actor() { assert_eq!(body["tables"][0]["table_key"], "node:Person"); assert_eq!(body["tables"][0]["rows_loaded"], 2); - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); let snapshot = db .snapshot_of(ReadTarget::branch("feature-ingest")) .await @@ -1442,10 +1682,10 @@ async fn ingest_creates_branch_returns_metadata_and_stamps_actor() { #[tokio::test(flavor = "multi_thread")] async fn ingest_existing_branch_skips_branch_create_policy_check() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); { - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.branch_create_from(ReadTarget::branch("main"), "feature") .await .unwrap(); @@ -1453,7 +1693,7 @@ async fn ingest_existing_branch_skips_branch_create_policy_check() { let policy_path = temp.path().join("policy.yaml"); fs::write(&policy_path, POLICY_YAML).unwrap(); let state = AppState::open_with_bearer_tokens_and_policy( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), vec![("act-bruno".to_string(), "team-token".to_string())], Some(&policy_path), ) @@ -1486,7 +1726,7 @@ async fn ingest_existing_branch_skips_branch_create_policy_check() { #[tokio::test(flavor = "multi_thread")] async fn ingest_denies_missing_branch_without_branch_create_permission() { - let (_temp, app) = app_for_loaded_repo_with_auth_tokens_and_policy( + let (_temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy( &[("act-bruno", "team-token")], POLICY_YAML, ) @@ -1519,7 +1759,7 @@ async fn ingest_denies_missing_branch_without_branch_create_permission() { #[tokio::test(flavor = "multi_thread")] async fn ingest_denies_when_actor_lacks_change_permission() { - let (_temp, app) = app_for_loaded_repo_with_auth_tokens_and_policy( + let (_temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy( &[("act-bruno", "team-token")], INGEST_CREATE_ONLY_POLICY_YAML, ) @@ -1552,7 +1792,7 @@ async fn ingest_denies_when_actor_lacks_change_permission() { #[tokio::test(flavor = "multi_thread")] async fn ingest_rejects_payloads_over_32_mib() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let oversize = IngestRequest { branch: Some("feature".to_string()), from: Some("main".to_string()), @@ -1577,7 +1817,7 @@ async fn ingest_rejects_payloads_over_32_mib() { #[tokio::test(flavor = "multi_thread")] async fn authenticated_branch_merge_stamps_merge_actor_on_head_commit() { - let (_temp, app) = app_for_loaded_repo_with_auth_tokens(&[ + let (_temp, app) = app_for_loaded_graph_with_auth_tokens(&[ ("act-andrew", "token-one"), ("act-ragnor", "token-two"), ]) @@ -1601,8 +1841,8 @@ async fn authenticated_branch_merge_stamps_merge_actor_on_head_commit() { assert_eq!(create_status, StatusCode::OK); let change = ChangeRequest { - query_source: MUTATION_QUERIES.to_string(), - query_name: Some("insert_person".to_string()), + query: MUTATION_QUERIES.to_string(), + name: Some("insert_person".to_string()), params: Some(json!({ "name": "Zoe", "age": 33 })), branch: Some("feature".to_string()), }; @@ -1658,9 +1898,9 @@ async fn authenticated_branch_merge_stamps_merge_actor_on_head_commit() { #[tokio::test(flavor = "multi_thread")] async fn branch_merge_conflict_response_includes_structured_conflicts() { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.branch_create_from(ReadTarget::branch("main"), "feature") .await .unwrap(); @@ -1696,7 +1936,7 @@ async fn branch_merge_conflict_response_includes_structured_conflicts() { .unwrap(); drop(db); - let state = AppState::open(repo.to_string_lossy().to_string()) + let state = AppState::open(graph.to_string_lossy().to_string()) .await .unwrap(); let app = build_app(state); @@ -1728,11 +1968,11 @@ async fn branch_merge_conflict_response_includes_structured_conflicts() { #[tokio::test(flavor = "multi_thread")] async fn repeated_read_after_change_sees_updated_state_from_same_app() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let change = ChangeRequest { - query_source: MUTATION_QUERIES.to_string(), - query_name: Some("insert_person".to_string()), + query: MUTATION_QUERIES.to_string(), + name: Some("insert_person".to_string()), params: Some(json!({ "name": "Mina", "age": 28 })), branch: Some("main".to_string()), }; @@ -1771,9 +2011,268 @@ async fn repeated_read_after_change_sees_updated_state_from_same_app() { assert_eq!(read_body["rows"][0]["p.name"], "Mina"); } +#[tokio::test(flavor = "multi_thread")] +async fn query_endpoint_runs_inline_read() { + let (_temp, app) = app_for_loaded_graph().await; + + let query = QueryRequest { + query: fs::read_to_string(fixture("test.gq")).unwrap(), + name: Some("get_person".to_string()), + params: Some(json!({ "name": "Alice" })), + branch: Some("main".to_string()), + snapshot: None, + }; + let (status, body) = json_response( + &app, + Request::builder() + .uri("/query") + .method(Method::POST) + .header("content-type", "application/json") + .body(Body::from(serde_json::to_vec(&query).unwrap())) + .unwrap(), + ) + .await; + assert_eq!(status, StatusCode::OK); + assert_eq!(body["query_name"], "get_person"); + assert_eq!(body["row_count"], 1); + assert_eq!(body["rows"][0]["p.name"], "Alice"); +} + +#[tokio::test(flavor = "multi_thread")] +async fn query_endpoint_rejects_mutation_with_400() { + let (_temp, app) = app_for_loaded_graph().await; + + let query = QueryRequest { + query: MUTATION_QUERIES.to_string(), + name: Some("insert_person".to_string()), + params: Some(json!({ "name": "Should", "age": 1 })), + branch: Some("main".to_string()), + snapshot: None, + }; + let (status, body) = json_response( + &app, + Request::builder() + .uri("/query") + .method(Method::POST) + .header("content-type", "application/json") + .body(Body::from(serde_json::to_vec(&query).unwrap())) + .unwrap(), + ) + .await; + assert_eq!(status, StatusCode::BAD_REQUEST); + let err = body["error"].as_str().unwrap_or_default(); + assert!( + err.contains("contains mutations") && err.contains("POST /mutate"), + "expected mutation-rejection message pointing at canonical /mutate, got: {err}" + ); +} + +#[tokio::test(flavor = "multi_thread")] +async fn mutate_endpoint_runs_inline_mutation() { + // Canonical mutation endpoint. Pairs with `/query` on the read side. + // Same wire shape as `/change`, no deprecation signal. + let (_temp, app) = app_for_loaded_graph().await; + + let request = json!({ + "query": MUTATION_QUERIES, + "name": "insert_person", + "params": { "name": "Mutie", "age": 30 }, + "branch": "main", + }); + let response = app + .clone() + .oneshot( + Request::builder() + .uri("/mutate") + .method(Method::POST) + .header("content-type", "application/json") + .body(Body::from(serde_json::to_vec(&request).unwrap())) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(response.status(), StatusCode::OK); + // Canonical route is NOT deprecated; no Deprecation header expected. + assert!( + response.headers().get("deprecation").is_none(), + "POST /mutate must not advertise itself as deprecated" + ); + let body_bytes = to_bytes(response.into_body(), usize::MAX).await.unwrap(); + let body: Value = serde_json::from_slice(&body_bytes).unwrap(); + assert_eq!(body["affected_nodes"], 1); + assert_eq!(body["query_name"], "insert_person"); + assert_eq!(body["branch"], "main"); +} + +#[tokio::test(flavor = "multi_thread")] +async fn change_endpoint_emits_deprecation_headers() { + // `/change` is kept indefinitely for back-compat but flagged at runtime + // per RFC 9745 (`Deprecation: true`) + RFC 8288 (`Link: ; + // rel="successor-version"`). The OpenAPI side is covered by + // `openapi_change_is_deprecated` in tests/openapi.rs. + let (_temp, app) = app_for_loaded_graph().await; + + let request = json!({ + "query": MUTATION_QUERIES, + "name": "insert_person", + "params": { "name": "Legacyer", "age": 33 }, + "branch": "main", + }); + let response = app + .clone() + .oneshot( + Request::builder() + .uri("/change") + .method(Method::POST) + .header("content-type", "application/json") + .body(Body::from(serde_json::to_vec(&request).unwrap())) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(response.status(), StatusCode::OK); + assert_eq!( + response + .headers() + .get("deprecation") + .and_then(|v| v.to_str().ok()), + Some("true"), + "POST /change must advertise `Deprecation: true` (RFC 9745)" + ); + assert_eq!( + response.headers().get("link").and_then(|v| v.to_str().ok()), + Some("; rel=\"successor-version\""), + "POST /change must point at /mutate via `Link` rel=successor-version (RFC 8288)" + ); +} + +#[tokio::test(flavor = "multi_thread")] +async fn read_endpoint_emits_deprecation_headers() { + // `/read` is kept indefinitely for byte-stable back-compat but flagged + // at runtime per RFC 9745 + RFC 8288. Successor is `/query`. + let (_temp, app) = app_for_loaded_graph().await; + + let request = ReadRequest { + query_source: fs::read_to_string(fixture("test.gq")).unwrap(), + query_name: Some("get_person".to_string()), + params: Some(json!({ "name": "Alice" })), + branch: Some("main".to_string()), + snapshot: None, + }; + let response = app + .clone() + .oneshot( + Request::builder() + .uri("/read") + .method(Method::POST) + .header("content-type", "application/json") + .body(Body::from(serde_json::to_vec(&request).unwrap())) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(response.status(), StatusCode::OK); + assert_eq!( + response + .headers() + .get("deprecation") + .and_then(|v| v.to_str().ok()), + Some("true"), + "POST /read must advertise `Deprecation: true` (RFC 9745)" + ); + assert_eq!( + response.headers().get("link").and_then(|v| v.to_str().ok()), + Some("; rel=\"successor-version\""), + "POST /read must point at /query via `Link` rel=successor-version (RFC 8288)" + ); +} + +#[tokio::test(flavor = "multi_thread")] +async fn query_endpoint_does_not_emit_deprecation_headers() { + // Sanity check the inverse: the canonical `/query` endpoint must not + // carry deprecation signaling, so SDK codegens don't propagate a + // bogus `@deprecated` marker. + let (_temp, app) = app_for_loaded_graph().await; + + let request = QueryRequest { + query: fs::read_to_string(fixture("test.gq")).unwrap(), + name: Some("get_person".to_string()), + params: Some(json!({ "name": "Alice" })), + branch: Some("main".to_string()), + snapshot: None, + }; + let response = app + .clone() + .oneshot( + Request::builder() + .uri("/query") + .method(Method::POST) + .header("content-type", "application/json") + .body(Body::from(serde_json::to_vec(&request).unwrap())) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(response.status(), StatusCode::OK); + assert!( + response.headers().get("deprecation").is_none(), + "POST /query is canonical and must not advertise itself as deprecated" + ); +} + +#[tokio::test(flavor = "multi_thread")] +async fn change_endpoint_accepts_legacy_field_names() { + // The canonical wire field names on /change are `query` and `name`, but + // serde aliases keep the legacy `query_source`/`query_name` payload + // shape working for clients that haven't migrated yet. Pin both shapes. + let (_temp, app) = app_for_loaded_graph().await; + + let legacy_body = json!({ + "query_source": MUTATION_QUERIES, + "query_name": "insert_person", + "params": { "name": "Legacy", "age": 21 }, + "branch": "main", + }); + let (status, body) = json_response( + &app, + Request::builder() + .uri("/change") + .method(Method::POST) + .header("content-type", "application/json") + .body(Body::from(serde_json::to_vec(&legacy_body).unwrap())) + .unwrap(), + ) + .await; + assert_eq!(status, StatusCode::OK); + assert_eq!(body["affected_nodes"], 1); + + let canonical_body = json!({ + "query": MUTATION_QUERIES, + "name": "insert_person", + "params": { "name": "Canonical", "age": 22 }, + "branch": "main", + }); + let (status, body) = json_response( + &app, + Request::builder() + .uri("/change") + .method(Method::POST) + .header("content-type", "application/json") + .body(Body::from(serde_json::to_vec(&canonical_body).unwrap())) + .unwrap(), + ) + .await; + assert_eq!(status, StatusCode::OK); + assert_eq!(body["affected_nodes"], 1); +} + #[tokio::test(flavor = "multi_thread")] async fn remote_branch_list_create_merge_flow_works() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let (list_status, list_body) = json_response( &app, @@ -1818,8 +2317,8 @@ async fn remote_branch_list_create_merge_flow_works() { assert_eq!(list_body["branches"], json!(["feature", "main"])); let change = ChangeRequest { - query_source: MUTATION_QUERIES.to_string(), - query_name: Some("insert_person".to_string()), + query: MUTATION_QUERIES.to_string(), + name: Some("insert_person".to_string()), params: Some(json!({ "name": "Zoe", "age": 33 })), branch: Some("feature".to_string()), }; @@ -1900,7 +2399,7 @@ async fn remote_branch_list_create_merge_flow_works() { #[tokio::test(flavor = "multi_thread")] async fn remote_branch_delete_flow_works() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let create = BranchCreateRequest { from: Some("main".to_string()), @@ -1945,14 +2444,14 @@ async fn remote_branch_delete_flow_works() { #[tokio::test(flavor = "multi_thread")] async fn branch_delete_denies_without_policy_permission() { - let (temp, app) = app_for_loaded_repo_with_auth_tokens_and_policy( + let (temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy( &[("act-andrew", "token-admin"), ("act-bruno", "token-team")], POLICY_YAML, ) .await; - let repo = repo_path(temp.path()); + let graph = graph_path(temp.path()); - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.branch_create_from(ReadTarget::branch("main"), "feature") .await .unwrap(); @@ -1978,8 +2477,8 @@ async fn branch_delete_denies_without_policy_permission() { } #[tokio::test(flavor = "multi_thread")] -async fn server_opens_s3_repo_directly_and_serves_snapshot_and_read() { - let Some(uri) = s3_test_repo_uri("server") else { +async fn server_opens_s3_graph_directly_and_serves_snapshot_and_read() { + let Some(uri) = s3_test_graph_uri("server") else { eprintln!("skipping s3 server test: OMNIGRAPH_S3_TEST_BUCKET is not set"); return; }; @@ -2077,9 +2576,9 @@ query vector_search_string($q: String) { ("OMNIGRAPH_EMBEDDINGS_MOCK", Some("1")), ("GEMINI_API_KEY", None), ]); - let temp = init_repo_with_schema_and_data(EMBED_SCHEMA, &data).await; - let repo = repo_path(temp.path()); - let state = AppState::open(repo.to_string_lossy().to_string()) + let temp = init_graph_with_schema_and_data(EMBED_SCHEMA, &data).await; + let graph = graph_path(temp.path()); + let state = AppState::open(graph.to_string_lossy().to_string()) .await .unwrap(); let app = build_app(state); @@ -2113,20 +2612,20 @@ async fn change_conflict_returns_manifest_conflict_409() { // a structured `manifest_conflict` body — `table_key`, `expected`, // and `actual` — so clients can detect-and-retry without parsing // the message. - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); // Build the server first so its handle pins the pre-mutation manifest // version. Then advance the manifest from outside the server. The // server's next /change call will capture stale `expected_versions` // (from its still-pinned snapshot) and the publisher's CAS rejects. - let state = AppState::open(repo.to_string_lossy().to_string()) + let state = AppState::open(graph.to_string_lossy().to_string()) .await .unwrap(); let app = build_app(state); { - let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); db.mutate( "main", MUTATION_QUERIES, @@ -2152,8 +2651,8 @@ async fn change_conflict_returns_manifest_conflict_409() { .header("content-type", "application/json") .body(Body::from( serde_json::to_vec(&ChangeRequest { - query_source: MUTATION_QUERIES.to_string(), - query_name: Some("set_age".to_string()), + query: MUTATION_QUERIES.to_string(), + name: Some("set_age".to_string()), params: Some(json!({ "name": "Alice", "age": 33 })), branch: Some("main".to_string()), }) @@ -2196,9 +2695,9 @@ async fn change_concurrent_inserts_same_key_serialize_without_409() { // node type and asserts: every request returns 200 (no 409), // and the final row count equals the seed count + N (every // staged batch actually committed). - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let state = AppState::open(repo.to_string_lossy().to_string()) + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let state = AppState::open(graph.to_string_lossy().to_string()) .await .unwrap(); let app = build_app(state); @@ -2212,8 +2711,8 @@ async fn change_concurrent_inserts_same_key_serialize_without_409() { let app = app.clone(); handles.push(tokio::spawn(async move { let body = serde_json::to_vec(&ChangeRequest { - query_source: MUTATION_QUERIES.to_string(), - query_name: Some("insert_person".to_string()), + query: MUTATION_QUERIES.to_string(), + name: Some("insert_person".to_string()), params: Some(json!({ "name": format!("racer-{i}"), "age": i as i32 })), branch: Some("main".to_string()), }) @@ -2309,9 +2808,9 @@ async fn change_concurrent_updates_same_key_serialize_via_publisher_cas() { // Lance error variant. The drift check fires at the right architectural // layer (engine boundary, under the queue) and respects the existing // `MutationOpKind` policy. - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let state = AppState::open(repo.to_string_lossy().to_string()) + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let state = AppState::open(graph.to_string_lossy().to_string()) .await .unwrap(); let app = build_app(state); @@ -2325,8 +2824,8 @@ async fn change_concurrent_updates_same_key_serialize_via_publisher_cas() { let target_age = 100 + i as i32; handles.push(tokio::spawn(async move { let body = serde_json::to_vec(&ChangeRequest { - query_source: MUTATION_QUERIES.to_string(), - query_name: Some("set_age".to_string()), + query: MUTATION_QUERIES.to_string(), + name: Some("set_age".to_string()), params: Some(json!({ "name": "Alice", "age": target_age })), branch: Some("main".to_string()), }) @@ -2350,10 +2849,7 @@ async fn change_concurrent_updates_same_key_serialize_via_publisher_cas() { } let statuses: Vec = results.iter().map(|(s, _)| *s).collect(); - let ok_count = statuses - .iter() - .filter(|s| **s == StatusCode::OK) - .count(); + let ok_count = statuses.iter().filter(|s| **s == StatusCode::OK).count(); let conflict_count = statuses .iter() .filter(|s| **s == StatusCode::CONFLICT) @@ -2383,7 +2879,8 @@ async fn change_concurrent_updates_same_key_serialize_via_publisher_cas() { statuses ); assert_eq!( - ok_count, 1, + ok_count, + 1, "expected exactly one update to commit and N-1 to receive 409 manifest_conflict \ (op-kind-aware drift check rejects stale-V0 staged datasets at commit_all entry). \ Got {} OK + {} 409 + {} other. \ @@ -2440,8 +2937,8 @@ mod matrix { impl Harness { pub async fn new() -> Self { - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); // Build the WorkloadController explicitly with defaults rather // than letting `AppState::open` call // `WorkloadController::from_env()`. The admission-gate test @@ -2454,20 +2951,16 @@ mod matrix { // 429 instead of the expected 200. Constructing the // controller here with explicit defaults makes cells // independent of any env mutation other tests perform. - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); - let workload = - omnigraph_server::workload::WorkloadController::with_defaults(); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); + let workload = omnigraph_server::workload::WorkloadController::with_defaults(); let state = AppState::new_with_workload( - repo.to_string_lossy().to_string(), + graph.to_string_lossy().to_string(), db, Vec::new(), workload, ); let app = build_app(state); - Self { - _temp: temp, - app, - } + Self { _temp: temp, app } } pub async fn create_branch(&self, from: &str, name: &str) { @@ -2500,8 +2993,8 @@ mod matrix { pub async fn insert_person(&self, branch: &str, name: &str, age: i32) { let body = serde_json::to_vec(&ChangeRequest { - query_source: MUTATION_QUERIES.to_string(), - query_name: Some("insert_person".to_string()), + query: MUTATION_QUERIES.to_string(), + name: Some("insert_person".to_string()), params: Some(json!({ "name": name, "age": age })), branch: Some(branch.to_string()), }) @@ -2560,12 +3053,7 @@ mod matrix { ) .await .unwrap(); - assert_eq!( - r.status(), - StatusCode::OK, - "snapshot {} failed", - branch - ); + assert_eq!(r.status(), StatusCode::OK, "snapshot {} failed", branch); let body = to_bytes(r.into_body(), usize::MAX).await.unwrap(); let v: Value = serde_json::from_slice(&body).unwrap(); v["tables"] @@ -2584,10 +3072,7 @@ mod matrix { /// just count. pub async fn person_exists(&self, branch: &str, name: &str) -> bool { let body = serde_json::to_vec(&ReadRequest { - query_source: include_str!( - "../../omnigraph/tests/fixtures/test.gq" - ) - .to_string(), + query_source: include_str!("../../omnigraph/tests/fixtures/test.gq").to_string(), query_name: Some("get_person".to_string()), params: Some(json!({ "name": name })), branch: Some(branch.to_string()), @@ -2655,8 +3140,8 @@ mod matrix { /// /change either deadlocks or returns a non-200. pub async fn assert_post_op_sentinel(&self, cell: &str, sentinel: &str) { let body = serde_json::to_vec(&ChangeRequest { - query_source: MUTATION_QUERIES.to_string(), - query_name: Some("insert_person".to_string()), + query: MUTATION_QUERIES.to_string(), + name: Some("insert_person".to_string()), params: Some(json!({ "name": sentinel, "age": 99 })), branch: Some("main".to_string()), }) @@ -2706,12 +3191,12 @@ mod matrix { .unwrap(); let response = app .oneshot( - Request::builder() - .uri("/branches/merge") - .method(Method::POST) - .header("content-type", "application/json") - .body(Body::from(body)) - .unwrap(), + Request::builder() + .uri("/branches/merge") + .method(Method::POST) + .header("content-type", "application/json") + .body(Body::from(body)) + .unwrap(), ) .await .unwrap(); @@ -2734,20 +3219,20 @@ mod matrix { tokio::spawn(async move { barrier.wait().await; let body = serde_json::to_vec(&ChangeRequest { - query_source: MUTATION_QUERIES.to_string(), - query_name: Some("insert_person".to_string()), + query: MUTATION_QUERIES.to_string(), + name: Some("insert_person".to_string()), params: Some(json!({ "name": name, "age": age })), branch: Some(branch), }) .unwrap(); let response = app .oneshot( - Request::builder() - .uri("/change") - .method(Method::POST) - .header("content-type", "application/json") - .body(Body::from(body)) - .unwrap(), + Request::builder() + .uri("/change") + .method(Method::POST) + .header("content-type", "application/json") + .body(Body::from(body)) + .unwrap(), ) .await .unwrap(); @@ -2775,12 +3260,12 @@ mod matrix { .unwrap(); let response = app .oneshot( - Request::builder() - .uri("/branches") - .method(Method::POST) - .header("content-type", "application/json") - .body(Body::from(body)) - .unwrap(), + Request::builder() + .uri("/branches") + .method(Method::POST) + .header("content-type", "application/json") + .body(Body::from(body)) + .unwrap(), ) .await .unwrap(); @@ -2802,11 +3287,11 @@ mod matrix { barrier.wait().await; let response = app .oneshot( - Request::builder() - .uri(format!("/branches/{}", name)) - .method(Method::DELETE) - .body(Body::empty()) - .unwrap(), + Request::builder() + .uri(format!("/branches/{}", name)) + .method(Method::DELETE) + .body(Body::empty()) + .unwrap(), ) .await .unwrap(); @@ -2840,14 +3325,8 @@ async fn concurrent_branch_ops_morphological_matrix() { let (sa, sb) = h .run_pair( - matrix::op_merge( - "feature-a-cella".to_string(), - "target-a-cella".to_string(), - ), - matrix::op_merge( - "feature-b-cella".to_string(), - "target-b-cella".to_string(), - ), + matrix::op_merge("feature-a-cella".to_string(), "target-a-cella".to_string()), + matrix::op_merge("feature-b-cella".to_string(), "target-b-cella".to_string()), ) .await; assert_eq!(sa.status, StatusCode::OK, "[{}] merge a", cell); @@ -2890,20 +3369,15 @@ async fn concurrent_branch_ops_morphological_matrix() { let cell = "c:merge×merge:same-source-distinct-targets"; let h = matrix::Harness::new().await; h.create_branch("main", "src-shared-cellc").await; - h.insert_person("src-shared-cellc", "Sharon-cellc", 50).await; + h.insert_person("src-shared-cellc", "Sharon-cellc", 50) + .await; h.create_branch("main", "tgt-1-cellc").await; h.create_branch("main", "tgt-2-cellc").await; let (sa, sb) = h .run_pair( - matrix::op_merge( - "src-shared-cellc".to_string(), - "tgt-1-cellc".to_string(), - ), - matrix::op_merge( - "src-shared-cellc".to_string(), - "tgt-2-cellc".to_string(), - ), + matrix::op_merge("src-shared-cellc".to_string(), "tgt-1-cellc".to_string()), + matrix::op_merge("src-shared-cellc".to_string(), "tgt-2-cellc".to_string()), ) .await; assert_eq!(sa.status, StatusCode::OK, "[{}] merge into tgt-1", cell); @@ -2945,7 +3419,11 @@ async fn concurrent_branch_ops_morphological_matrix() { let conflict = error .manifest_conflict .expect("merge 409 must include manifest_conflict"); - assert_eq!(conflict.table_key, "node:Person", "[{}] conflict table", cell); + assert_eq!( + conflict.table_key, "node:Person", + "[{}] conflict table", + cell + ); h.assert_persons("main", cell, &["FrankD-celld"], &["EveD-celld"]) .await; } @@ -2998,22 +3476,18 @@ async fn concurrent_branch_ops_morphological_matrix() { let (sa, sb) = h .run_pair( - matrix::op_branch_create( - "alpha-cellf".to_string(), - "gamma-cellf".to_string(), - ), - matrix::op_branch_create( - "beta-cellf".to_string(), - "delta-cellf".to_string(), - ), + matrix::op_branch_create("alpha-cellf".to_string(), "gamma-cellf".to_string()), + matrix::op_branch_create("beta-cellf".to_string(), "delta-cellf".to_string()), ) .await; assert_eq!(sa.status, StatusCode::OK, "[{}] gamma create", cell); assert_eq!(sb.status, StatusCode::OK, "[{}] delta create", cell); // gamma forks off alpha → must contain Eve. - h.assert_persons("gamma-cellf", cell, &["Eve-cellf"], &[]).await; + h.assert_persons("gamma-cellf", cell, &["Eve-cellf"], &[]) + .await; // delta forks off beta → must NOT contain Eve. - h.assert_persons("delta-cellf", cell, &[], &["Eve-cellf"]).await; + h.assert_persons("delta-cellf", cell, &[], &["Eve-cellf"]) + .await; h.assert_post_op_sentinel(cell, "sentinel-cellf").await; } @@ -3034,7 +3508,8 @@ async fn concurrent_branch_ops_morphological_matrix() { assert_eq!(sa.status, StatusCode::OK, "[{}] create newborn", cell); assert_eq!(sb.status, StatusCode::OK, "[{}] delete doomed", cell); // newborn-cellg exists with main's content. - h.assert_persons("newborn-cellg", cell, &["Alice"], &[]).await; + h.assert_persons("newborn-cellg", cell, &["Alice"], &[]) + .await; h.assert_post_op_sentinel(cell, "sentinel-cellg").await; } @@ -3164,14 +3639,18 @@ async fn concurrent_branch_ops_morphological_matrix() { let conflict = error .manifest_conflict .expect("merge 409 must include manifest_conflict"); - assert_eq!(conflict.table_key, "node:Person", "[{}] conflict table", cell); + assert_eq!( + conflict.table_key, "node:Person", + "[{}] conflict table", + cell + ); h.assert_persons("main", cell, &["Steve-cellk"], &["Rita-cellk"]) .await; } - // Reopen via a fresh AppState on the same repo. - let repo_uri = format!("{}/server.omni", h._temp.path().display()); - let reopened = AppState::open(repo_uri.clone()).await.unwrap(); + // Reopen via a fresh AppState on the same graph. + let graph_uri = format!("{}/server.omni", h._temp.path().display()); + let reopened = AppState::open(graph_uri.clone()).await.unwrap(); let app2 = build_app(reopened); // Sanity: the same identity check via the new app must see // Rita and Steve. @@ -3234,9 +3713,9 @@ query insert_c($name: String) { const SEED_COMPANIES: u64 = 2; const PER_TYPE: usize = 4; - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let state = AppState::open(repo.to_string_lossy().to_string()) + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let state = AppState::open(graph.to_string_lossy().to_string()) .await .unwrap(); let app = build_app(state); @@ -3246,8 +3725,8 @@ query insert_c($name: String) { let app_p = app.clone(); handles.push(tokio::spawn(async move { let body = serde_json::to_vec(&ChangeRequest { - query_source: PERSON_QUERY.to_string(), - query_name: Some("insert_p".to_string()), + query: PERSON_QUERY.to_string(), + name: Some("insert_p".to_string()), params: Some(json!({ "name": format!("p-{i}"), "age": i as i32 })), branch: Some("main".to_string()), }) @@ -3263,8 +3742,8 @@ query insert_c($name: String) { let app_c = app.clone(); handles.push(tokio::spawn(async move { let body = serde_json::to_vec(&ChangeRequest { - query_source: COMPANY_QUERY.to_string(), - query_name: Some("insert_c".to_string()), + query: COMPANY_QUERY.to_string(), + name: Some("insert_c".to_string()), params: Some(json!({ "name": format!("c-{i}") })), branch: Some("main".to_string()), }) @@ -3309,7 +3788,11 @@ query insert_c($name: String) { let lookup_count = |table_key: &str| -> u64 { body["tables"] .as_array() - .and_then(|tables| tables.iter().find(|t| t["table_key"].as_str() == Some(table_key))) + .and_then(|tables| { + tables + .iter() + .find(|t| t["table_key"].as_str() == Some(table_key)) + }) .and_then(|t| t["row_count"].as_u64()) .unwrap_or_else(|| panic!("snapshot missing {}", table_key)) }; @@ -3354,17 +3837,27 @@ async fn ingest_per_actor_admission_cap_returns_429() { // `AppState::new_with_workload` constructor closes that bug class — // this test no longer mutates global state and no longer needs // `#[serial]`. - let temp = init_loaded_repo().await; - let repo = repo_path(temp.path()); - let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap(); + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); let workload = omnigraph_server::workload::WorkloadController::new( 1, // per-actor in-flight cap (the fixture under test) 1_000_000_000, // per-actor byte budget — large so it never bottlenecks ); - let state = AppState::new_with_workload( - repo.to_string_lossy().to_string(), + // MR-723: install a permit-all policy alongside the bearer token so + // /ingest (action=Change) passes Cedar evaluation. The test is + // exercising the admission cap, not policy — the policy is just + // enough to clear the State 3 path so the test reaches workload. + let policy_path = temp.path().join("policy.yaml"); + fs::write(&policy_path, permit_all_policy_yaml(&["act-flooder"])).unwrap(); + let policy_engine = + omnigraph_server::PolicyEngine::load_graph(&policy_path, graph.to_string_lossy().as_ref()) + .unwrap(); + let state = AppState::new_single( + graph.to_string_lossy().to_string(), db, vec![("act-flooder".to_string(), "flooder-token".to_string())], + Some(policy_engine), workload, ); let app = build_app(state); @@ -3459,9 +3952,82 @@ async fn ingest_per_actor_admission_cap_returns_429() { } } +/// Regression for B2 (MR-668): when an `AppState` is built with a +/// per-graph policy and a custom workload, the engine inside the +/// routing's `GraphHandle` MUST have the same policy applied via +/// `Omnigraph::with_policy`. Pre-fix, `new_with_workload(...).with_policy_engine(p)` +/// installed the policy only on the HTTP-layer `handle.policy`; the +/// underlying `Arc` was reused without `with_policy`, so any +/// caller reaching through `state.routing()` could bypass Cedar. +/// +/// This test reaches the engine the same way an embedded SDK consumer +/// or future routing code path would, and asserts the policy still +/// fires. The deny path is "act-blocked has a valid bearer but isn't in +/// the policy's allowed group" — i.e., authenticated-but-unauthorised. +#[tokio::test(flavor = "multi_thread")] +async fn engine_layer_policy_fires_via_direct_arc_omnigraph_from_new_single() { + use omnigraph_server::GraphRouting; + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); + + // Permit `act-allowed` for change actions; `act-blocked` is not in + // any allowed group — every change request from them must deny. + let policy_path = temp.path().join("policy.yaml"); + fs::write(&policy_path, permit_all_policy_yaml(&["act-allowed"])).unwrap(); + let policy_engine = + omnigraph_server::PolicyEngine::load_graph(&policy_path, graph.to_string_lossy().as_ref()) + .unwrap(); + + let workload = omnigraph_server::workload::WorkloadController::new(100, 1_000_000_000); + let state = AppState::new_single( + graph.to_string_lossy().to_string(), + db, + vec![("act-blocked".to_string(), "block-token".to_string())], + Some(policy_engine), + workload, + ); + + // Reach into the routing and pull the engine the same way an + // embedded consumer holding `Arc` would. If `new_single` + // failed to apply `with_policy` to the engine, this `mutate_as` + // would succeed — the HTTP-layer is bypassed entirely. + let handle = match state.routing() { + GraphRouting::Single { handle } => Arc::clone(handle), + GraphRouting::Multi { .. } => panic!("expected single-mode routing"), + }; + let engine = Arc::clone(&handle.engine); + + let mut params: omnigraph_compiler::ParamMap = Default::default(); + params.insert( + "name".to_string(), + omnigraph_compiler::Literal::String("EngineLayerBlocked".to_string()), + ); + params.insert("age".to_string(), omnigraph_compiler::Literal::Integer(30)); + let result = engine + .mutate_as( + "main", + MUTATION_QUERIES, + "insert_person", + ¶ms, + Some("act-blocked"), + ) + .await; + match result { + Err(OmniError::Policy(_)) => { /* expected — engine-layer gate fired */ } + Ok(_) => panic!( + "engine-layer policy did NOT fire — act-blocked successfully ran mutate_as via \ + the engine pulled from the registry handle. AppState::new_single failed to apply \ + with_policy to the underlying Omnigraph engine. This is the B2 footgun the \ + with_policy_engine deletion was supposed to close." + ), + Err(other) => panic!("expected OmniError::Policy, got: {other:?}"), + } +} + #[tokio::test(flavor = "multi_thread")] async fn oversized_request_body_returns_payload_too_large() { - let (_temp, app) = app_for_loaded_repo().await; + let (_temp, app) = app_for_loaded_graph().await; let oversized = "x".repeat(1_100_000); let response = app .clone() @@ -3478,3 +4044,1537 @@ async fn oversized_request_body_returns_payload_too_large() { assert_eq!(response.status(), StatusCode::PAYLOAD_TOO_LARGE); } + +// ─── MR-723 default-deny mode (State 2: tokens without policy) ────────── +// +// `authorize_request` returns 403 for every action except `Read` when a +// PolicyEngine is not installed but bearer tokens are configured. Pinned +// by the three tests below — Read allowed, Change/SchemaApply denied — +// to prevent regressing back to the pre-MR-723 "tokens configured but +// no policy = fully open" trap. + +#[tokio::test(flavor = "multi_thread")] +async fn default_deny_mode_allows_read_for_authenticated_actor() { + let (_temp, app) = app_for_graph_with_auth_tokens_only( + &fs::read_to_string(fixture("test.pg")).unwrap(), + &[("act-andrew", "demo-token")], + ) + .await; + + let (status, _body) = json_response( + &app, + Request::builder() + .uri("/snapshot") + .method(Method::GET) + .header(AUTHORIZATION, "Bearer demo-token") + .body(Body::empty()) + .unwrap(), + ) + .await; + assert_eq!(status, StatusCode::OK); +} + +#[tokio::test(flavor = "multi_thread")] +async fn default_deny_mode_rejects_change_with_forbidden() { + let (_temp, app) = app_for_graph_with_auth_tokens_only( + &fs::read_to_string(fixture("test.pg")).unwrap(), + &[("act-andrew", "demo-token")], + ) + .await; + + let change = ChangeRequest { + query: MUTATION_QUERIES.to_string(), + name: Some("insert_person".to_string()), + params: Some(json!({ "name": "DefaultDeny", "age": 1 })), + branch: Some("main".to_string()), + }; + let (status, body) = json_response( + &app, + Request::builder() + .uri("/change") + .method(Method::POST) + .header(AUTHORIZATION, "Bearer demo-token") + .header("content-type", "application/json") + .body(Body::from(serde_json::to_vec(&change).unwrap())) + .unwrap(), + ) + .await; + assert_eq!(status, StatusCode::FORBIDDEN); + let error: ErrorOutput = serde_json::from_value(body).unwrap(); + assert!( + error.error.contains("default-deny"), + "expected default-deny in error message, got: {}", + error.error + ); +} + +#[tokio::test(flavor = "multi_thread")] +async fn default_deny_mode_rejects_schema_apply_with_forbidden() { + let (_temp, app) = app_for_graph_with_auth_tokens_only( + &fs::read_to_string(fixture("test.pg")).unwrap(), + &[("act-andrew", "demo-token")], + ) + .await; + + let req = SchemaApplyRequest { + schema_source: additive_schema_with_nickname(), + ..Default::default() + }; + let (status, body) = json_response( + &app, + Request::builder() + .uri("/schema/apply") + .method(Method::POST) + .header(AUTHORIZATION, "Bearer demo-token") + .header("content-type", "application/json") + .body(Body::from(serde_json::to_vec(&req).unwrap())) + .unwrap(), + ) + .await; + assert_eq!(status, StatusCode::FORBIDDEN); + let error: ErrorOutput = serde_json::from_value(body).unwrap(); + assert!( + error.error.contains("default-deny"), + "expected default-deny in error message, got: {}", + error.error + ); +} + +// ─── SDK ↔ HTTP decision parity (MR-722 PR A) ───────────────────────────── +// +// Engine and HTTP both consult Cedar via `PolicyChecker::check()`; by +// construction they cannot disagree on a decision. These tests pin that +// property explicitly so a future refactor that introduces a separate +// auth path (or copy-pastes Cedar evaluation logic) turns red. +// +// Four cases cover the per-action scope shapes: +// * Change on a protected branch via `mutate_as` / POST /change +// * Change with an actor that has no permit +// * BranchMerge to a protected target via `branch_merge_as` / POST /branches/merge +// * BranchMerge with an actor that has no permit + +const PARITY_POLICY_YAML: &str = r#" +version: 1 +groups: + team: [act-bruno] + admins: [act-ragnor] +protected_branches: [main] +rules: + - id: admins-change-anywhere + allow: + actors: { group: admins } + actions: [change] + branch_scope: any + - id: admins-merge-to-protected + allow: + actors: { group: admins } + actions: [branch_merge] + target_branch_scope: protected +"#; + +#[derive(Clone, Copy, Debug)] +enum ParityDecision { + Allow, + Deny, +} + +async fn build_parity_graph() -> (tempfile::TempDir, PathBuf, PathBuf) { + // Build a graph with `main` loaded and a `feature` branch ready for + // merge. Returns the graph path and a written policy.yaml path. + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + { + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); + db.branch_create_from(ReadTarget::branch("main"), "feature") + .await + .unwrap(); + db.load_as( + "feature", + r#"{"type":"Person","data":{"name":"ParityEve","age":29}}"#, + LoadMode::Append, + None, + ) + .await + .unwrap(); + } + let policy_path = temp.path().join("policy.yaml"); + fs::write(&policy_path, PARITY_POLICY_YAML).unwrap(); + (temp, graph, policy_path) +} + +async fn sdk_change_decision(graph: &Path, policy_path: &Path, actor: &str) -> ParityDecision { + let policy = PolicyEngine::load_graph(policy_path, graph.to_string_lossy().as_ref()).unwrap(); + let db = Omnigraph::open(graph.to_str().unwrap()) + .await + .unwrap() + .with_policy(Arc::new(policy) as Arc); + let mut params: omnigraph_compiler::ParamMap = Default::default(); + // Parameter keys are bare names (no `$` prefix); the runtime resolves + // `$name` references in the query body to `params["name"]`. + params.insert( + "name".to_string(), + omnigraph_compiler::Literal::String("ParityCharlie".to_string()), + ); + params.insert("age".to_string(), omnigraph_compiler::Literal::Integer(30)); + let result = db + .mutate_as( + "main", + MUTATION_QUERIES, + "insert_person", + ¶ms, + Some(actor), + ) + .await; + match result { + Ok(_) => ParityDecision::Allow, + Err(OmniError::Policy(_)) => ParityDecision::Deny, + Err(other) => panic!("unexpected SDK error for change: {other:?}"), + } +} + +async fn http_change_decision( + graph: &Path, + policy_path: &PathBuf, + actor: &str, + token: &str, +) -> ParityDecision { + let state = AppState::open_with_bearer_tokens_and_policy( + graph.to_string_lossy().to_string(), + vec![(actor.to_string(), token.to_string())], + Some(policy_path), + ) + .await + .unwrap(); + let app = build_app(state); + let req = ChangeRequest { + query: MUTATION_QUERIES.to_string(), + name: Some("insert_person".to_string()), + params: Some(json!({ "name": "ParityCharlie", "age": 30 })), + branch: Some("main".to_string()), + }; + let (status, _body) = json_response( + &app, + Request::builder() + .uri("/change") + .method(Method::POST) + .header(AUTHORIZATION, format!("Bearer {token}")) + .header("content-type", "application/json") + .body(Body::from(serde_json::to_vec(&req).unwrap())) + .unwrap(), + ) + .await; + match status { + StatusCode::OK => ParityDecision::Allow, + StatusCode::FORBIDDEN => ParityDecision::Deny, + other => panic!("unexpected HTTP status for change: {other}"), + } +} + +async fn sdk_merge_decision(graph: &Path, policy_path: &Path, actor: &str) -> ParityDecision { + let policy = PolicyEngine::load_graph(policy_path, graph.to_string_lossy().as_ref()).unwrap(); + let db = Omnigraph::open(graph.to_str().unwrap()) + .await + .unwrap() + .with_policy(Arc::new(policy) as Arc); + let result = db.branch_merge_as("feature", "main", Some(actor)).await; + match result { + Ok(_) => ParityDecision::Allow, + Err(OmniError::Policy(_)) => ParityDecision::Deny, + Err(other) => panic!("unexpected SDK error for branch_merge: {other:?}"), + } +} + +async fn http_merge_decision( + graph: &Path, + policy_path: &PathBuf, + actor: &str, + token: &str, +) -> ParityDecision { + let state = AppState::open_with_bearer_tokens_and_policy( + graph.to_string_lossy().to_string(), + vec![(actor.to_string(), token.to_string())], + Some(policy_path), + ) + .await + .unwrap(); + let app = build_app(state); + let req = BranchMergeRequest { + source: "feature".to_string(), + target: Some("main".to_string()), + }; + let (status, _body) = json_response( + &app, + Request::builder() + .uri("/branches/merge") + .method(Method::POST) + .header(AUTHORIZATION, format!("Bearer {token}")) + .header("content-type", "application/json") + .body(Body::from(serde_json::to_vec(&req).unwrap())) + .unwrap(), + ) + .await; + match status { + StatusCode::OK => ParityDecision::Allow, + StatusCode::FORBIDDEN => ParityDecision::Deny, + other => panic!("unexpected HTTP status for branch_merge: {other}"), + } +} + +#[tokio::test(flavor = "multi_thread")] +async fn policy_decision_parity_change_admin_on_main_allowed() { + // (act-ragnor, change, main) — admins-change-anywhere rule applies. + // Both SDK and HTTP must allow. Each path uses its own fresh graph + // because allow→side-effects. + let (_t1, graph1, policy1) = build_parity_graph().await; + let sdk = sdk_change_decision(&graph1, &policy1, "act-ragnor").await; + let (_t2, graph2, policy2) = build_parity_graph().await; + let http = http_change_decision(&graph2, &policy2, "act-ragnor", "ragnor-token").await; + assert!( + matches!(sdk, ParityDecision::Allow) && matches!(http, ParityDecision::Allow), + "SDK={sdk:?} HTTP={http:?} — should both Allow", + ); +} + +#[tokio::test(flavor = "multi_thread")] +async fn policy_decision_parity_change_team_on_main_denied() { + // (act-bruno, change, main) — no rule grants bruno change on + // protected. Both SDK and HTTP must deny. Same graph is reusable + // because deny→no side-effects. + let (_temp, graph, policy) = build_parity_graph().await; + let sdk = sdk_change_decision(&graph, &policy, "act-bruno").await; + let http = http_change_decision(&graph, &policy, "act-bruno", "bruno-token").await; + assert!( + matches!(sdk, ParityDecision::Deny) && matches!(http, ParityDecision::Deny), + "SDK={sdk:?} HTTP={http:?} — should both Deny", + ); +} + +#[tokio::test(flavor = "multi_thread")] +async fn policy_decision_parity_branch_merge_admin_allowed() { + // (act-ragnor, branch_merge, feature→main) — admins-merge-to-protected + // rule applies. Both Allow. Each path uses its own fresh graph — + // a successful merge consumes the feature branch's commit on main. + let (_t1, graph1, policy1) = build_parity_graph().await; + let sdk = sdk_merge_decision(&graph1, &policy1, "act-ragnor").await; + let (_t2, graph2, policy2) = build_parity_graph().await; + let http = http_merge_decision(&graph2, &policy2, "act-ragnor", "ragnor-token").await; + assert!( + matches!(sdk, ParityDecision::Allow) && matches!(http, ParityDecision::Allow), + "SDK={sdk:?} HTTP={http:?} — should both Allow", + ); +} + +#[tokio::test(flavor = "multi_thread")] +async fn policy_decision_parity_branch_merge_team_denied() { + // (act-bruno, branch_merge, feature→main) — no rule grants bruno + // branch_merge. Both Deny. + let (_temp, graph, policy) = build_parity_graph().await; + let sdk = sdk_merge_decision(&graph, &policy, "act-bruno").await; + let http = http_merge_decision(&graph, &policy, "act-bruno", "bruno-token").await; + assert!( + matches!(sdk, ParityDecision::Deny) && matches!(http, ParityDecision::Deny), + "SDK={sdk:?} HTTP={http:?} — should both Deny", + ); +} + +// ─── MR-694 PR B: HTTP soft + hard drop semantics + data preservation ──── +// +// SDK-level drop semantics are pinned in `crates/omnigraph/tests/schema_apply.rs`. +// These HTTP-side tests mirror the assertions through POST /schema/apply +// and exercise the new `allow_data_loss` field (closes the gap where +// the schema-lint chassis v1.2 shipped Hard mode on the CLI but the +// HTTP request struct had no equivalent field). + +#[tokio::test(flavor = "multi_thread")] +async fn schema_apply_route_soft_drops_property_via_http() { + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( + &fs::read_to_string(fixture("test.pg")).unwrap(), + &[("act-ragnor", "admin-token")], + SCHEMA_APPLY_POLICY_YAML, + ) + .await; + // Load a row that has the column we're about to drop. + let graph = graph_path(temp.path()); + { + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); + db.load( + "main", + r#"{"type":"Person","data":{"name":"PreDrop","age":42}}"#, + LoadMode::Append, + ) + .await + .unwrap(); + } + let pre_version = manifest_dataset_version(&graph).await; + + let (status, payload) = json_response( + &app, + Request::builder() + .method(Method::POST) + .uri("/schema/apply") + .header("content-type", "application/json") + .header("authorization", "Bearer admin-token") + .body(Body::from( + serde_json::to_vec(&SchemaApplyRequest { + schema_source: schema_without_age(), + ..Default::default() + }) + .unwrap(), + )) + .unwrap(), + ) + .await; + assert_eq!(status, StatusCode::OK); + assert_eq!(payload["applied"], true); + + // Catalog reflects the drop: `age` is gone from the live schema. + let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); + assert!( + !reopened.catalog().node_types["Person"] + .properties + .contains_key("age"), + "catalog should not contain `age` after drop" + ); + + // Soft drop preserves the prior version — `age` is still readable + // via time travel to the pre-drop manifest version. Mirrors the + // SDK-side assertion in `apply_schema_drops_a_nullable_property_softly_preserves_prior_version`. + let pre_drop_snapshot = reopened.snapshot_at_version(pre_version).await.unwrap(); + let pre_drop_ds = pre_drop_snapshot.open("node:Person").await.unwrap(); + let pre_drop_fields = pre_drop_ds + .schema() + .fields + .iter() + .map(|f| f.name.clone()) + .collect::>(); + assert!( + pre_drop_fields.iter().any(|f| f == "age"), + "soft drop should leave the pre-drop dataset's `age` column \ + time-travel-reachable; got fields {pre_drop_fields:?}" + ); +} + +#[tokio::test(flavor = "multi_thread")] +async fn schema_apply_route_soft_drops_node_type_via_http() { + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( + &fs::read_to_string(fixture("test.pg")).unwrap(), + &[("act-ragnor", "admin-token")], + SCHEMA_APPLY_POLICY_YAML, + ) + .await; + let graph = graph_path(temp.path()); + + let (status, payload) = json_response( + &app, + Request::builder() + .method(Method::POST) + .uri("/schema/apply") + .header("content-type", "application/json") + .header("authorization", "Bearer admin-token") + .body(Body::from( + serde_json::to_vec(&SchemaApplyRequest { + schema_source: schema_without_company(), + ..Default::default() + }) + .unwrap(), + )) + .unwrap(), + ) + .await; + assert_eq!(status, StatusCode::OK); + assert_eq!(payload["applied"], true); + + let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); + assert!( + !reopened.catalog().node_types.contains_key("Company"), + "catalog should not contain `Company` after drop" + ); + assert!( + !reopened.catalog().edge_types.contains_key("WorksAt"), + "catalog should not contain `WorksAt` after cascade" + ); +} + +#[tokio::test(flavor = "multi_thread")] +async fn schema_apply_route_hard_drops_property_with_allow_data_loss() { + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( + &fs::read_to_string(fixture("test.pg")).unwrap(), + &[("act-ragnor", "admin-token")], + SCHEMA_APPLY_POLICY_YAML, + ) + .await; + let graph = graph_path(temp.path()); + { + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); + db.load( + "main", + r#"{"type":"Person","data":{"name":"PreDropHard","age":50}}"#, + LoadMode::Append, + ) + .await + .unwrap(); + } + + // Apply with allow_data_loss=true → Hard mode promotion. + let (status, payload) = json_response( + &app, + Request::builder() + .method(Method::POST) + .uri("/schema/apply") + .header("content-type", "application/json") + .header("authorization", "Bearer admin-token") + .body(Body::from( + serde_json::to_vec(&SchemaApplyRequest { + schema_source: schema_without_age(), + allow_data_loss: true, + }) + .unwrap(), + )) + .unwrap(), + ) + .await; + assert_eq!(status, StatusCode::OK); + assert_eq!(payload["applied"], true); + + // Catalog reflects the drop. + let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); + assert!( + !reopened.catalog().node_types["Person"] + .properties + .contains_key("age"), + "catalog should not contain `age` after Hard drop" + ); + // Plan steps should show DropMode::Hard for property drops. + let steps = payload["steps"].as_array().expect("steps array"); + let drop_step = steps + .iter() + .find(|s| s["kind"] == "drop_property") + .expect("plan should include drop_property step"); + let mode = &drop_step["mode"]; + assert_eq!( + mode, "hard", + "expected hard mode under allow_data_loss=true" + ); +} + +#[tokio::test(flavor = "multi_thread")] +async fn schema_apply_route_keeps_drops_soft_without_flag() { + // Symmetric to the Hard test: same schema change, but no + // allow_data_loss flag → drops stay Soft (prior column data + // remains time-travel-reachable). Pins the default semantics + // against accidental Hard promotion. + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( + &fs::read_to_string(fixture("test.pg")).unwrap(), + &[("act-ragnor", "admin-token")], + SCHEMA_APPLY_POLICY_YAML, + ) + .await; + let graph = graph_path(temp.path()); + + let (status, payload) = json_response( + &app, + Request::builder() + .method(Method::POST) + .uri("/schema/apply") + .header("content-type", "application/json") + .header("authorization", "Bearer admin-token") + .body(Body::from( + serde_json::to_vec(&SchemaApplyRequest { + schema_source: schema_without_age(), + allow_data_loss: false, + }) + .unwrap(), + )) + .unwrap(), + ) + .await; + assert_eq!(status, StatusCode::OK); + assert_eq!(payload["applied"], true); + + let steps = payload["steps"].as_array().expect("steps array"); + let drop_step = steps + .iter() + .find(|s| s["kind"] == "drop_property") + .expect("plan should include drop_property step"); + let mode = &drop_step["mode"]; + assert_eq!(mode, "soft", "expected soft mode without allow_data_loss"); + let _ = graph; +} + +#[tokio::test(flavor = "multi_thread")] +async fn schema_apply_route_additive_property_preserves_existing_rows() { + // SDK suite covers rename and drop data preservation. Additive + // AddProperty wasn't pinned with a row-count check anywhere. + // Load N rows, apply schema adding nullable property, verify + // every row is still readable and the new column is null. + let (temp, app) = app_for_graph_with_auth_tokens_and_policy( + &fs::read_to_string(fixture("test.pg")).unwrap(), + &[("act-ragnor", "admin-token")], + SCHEMA_APPLY_POLICY_YAML, + ) + .await; + let graph = graph_path(temp.path()); + + // Standard fixture data: 4 Persons + 1 Company. Load it. + let pre_count = { + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); + db.load( + "main", + &fs::read_to_string(fixture("test.jsonl")).unwrap(), + LoadMode::Append, + ) + .await + .unwrap(); + let snap = db + .snapshot_of(omnigraph::db::ReadTarget::branch("main")) + .await + .unwrap(); + snap.entry("node:Person").expect("Person").row_count + }; + assert!(pre_count > 0, "fixture should have loaded Person rows"); + + let (status, payload) = json_response( + &app, + Request::builder() + .method(Method::POST) + .uri("/schema/apply") + .header("content-type", "application/json") + .header("authorization", "Bearer admin-token") + .body(Body::from( + serde_json::to_vec(&SchemaApplyRequest { + schema_source: additive_schema_with_nickname(), + ..Default::default() + }) + .unwrap(), + )) + .unwrap(), + ) + .await; + assert_eq!(status, StatusCode::OK); + assert_eq!(payload["applied"], true); + + // Row count preserved. + let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap(); + let snap = db + .snapshot_of(omnigraph::db::ReadTarget::branch("main")) + .await + .unwrap(); + let post_count = snap.entry("node:Person").expect("Person").row_count; + assert_eq!( + post_count, pre_count, + "AddProperty should preserve row count", + ); +} + +// ─── MR-668: multi-graph startup ────────────────────────────────────────── + +mod multi_graph_startup { + use super::*; + use omnigraph::storage::normalize_root_uri; + use omnigraph_server::{ + GraphHandle, GraphId, GraphKey, GraphRegistry, InsertError, ServerConfig, ServerConfigMode, + load_server_settings, + }; + use std::sync::Arc; + + async fn build_multi_mode_app(graph_ids: &[&str]) -> (Vec, Router) { + let mut dirs = Vec::with_capacity(graph_ids.len()); + let mut handles = Vec::with_capacity(graph_ids.len()); + for id in graph_ids { + let dir = tempfile::tempdir().unwrap(); + let graph_uri = dir.path().join(id).to_str().unwrap().to_string(); + let schema = fs::read_to_string(fixture("test.pg")).unwrap(); + let engine = Omnigraph::init(&graph_uri, &schema).await.unwrap(); + handles.push(Arc::new(GraphHandle { + key: GraphKey::cluster(GraphId::try_from(*id).unwrap()), + uri: graph_uri, + engine: Arc::new(engine), + policy: None, + })); + dirs.push(dir); + } + let workload = omnigraph_server::workload::WorkloadController::from_env(); + let state = AppState::new_multi(handles, Vec::new(), None, workload, None).unwrap(); + let app = build_app(state); + (dirs, app) + } + + /// Cluster route `/graphs/{graph_id}/snapshot` resolves to the right + /// engine. Two graphs side by side; assert each responds to its own + /// id and does NOT respond to the other's URL. + #[tokio::test(flavor = "multi_thread")] + async fn cluster_routes_dispatch_per_graph_handle() { + let (_dirs, app) = build_multi_mode_app(&["alpha", "beta"]).await; + for id in ["alpha", "beta"] { + let resp = app + .clone() + .oneshot( + Request::builder() + .method(Method::GET) + .uri(format!("/graphs/{id}/snapshot?branch=main")) + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!( + resp.status(), + StatusCode::OK, + "graph '{id}' must respond OK on its cluster snapshot route" + ); + } + } + + /// Unknown graph id under the cluster prefix yields 404 (not 500, + /// not 410 — `Gone` is reserved for the future DELETE flow). + #[tokio::test(flavor = "multi_thread")] + async fn cluster_route_for_unknown_graph_returns_404() { + let (_dirs, app) = build_multi_mode_app(&["alpha"]).await; + let resp = app + .oneshot( + Request::builder() + .method(Method::GET) + .uri("/graphs/nonexistent/snapshot?branch=main") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::NOT_FOUND); + } + + /// Coverage net for cluster-route regressions across every + /// protected handler — not just the few that have inner path + /// params. Bug-1 surfaced because only `/snapshot` was being + /// exercised in cluster mode, leaving the other six protected + /// routes implicitly untested. This sweep hits each one and + /// asserts the response shows the handler was reached: no 404 + /// (router didn't match), no 500 with "Wrong number of path + /// arguments" (path extractor broke), no 500 with "missing + /// extension" (routing middleware didn't inject the handle). + /// + /// Status codes are negative assertions because each handler's + /// happy-path inputs differ — what matters is "the request + /// reached the handler," not "the handler returned 200." The + /// individual handlers' logic is already tested in single mode. + #[tokio::test(flavor = "multi_thread")] + async fn all_protected_cluster_routes_resolve_to_their_handler() { + let (_dirs, app) = build_multi_mode_app(&["alpha"]).await; + + // (method, path, body) — one minimal request per protected + // cluster route. Bodies are valid enough that the router and + // extractors succeed; whether the engine ultimately returns + // 200 or 4xx is per-handler and not what this test pins. + let cases: &[(Method, &str, Option<&str>)] = &[ + (Method::GET, "/graphs/alpha/snapshot?branch=main", None), + (Method::GET, "/graphs/alpha/schema", None), + (Method::GET, "/graphs/alpha/branches", None), + (Method::GET, "/graphs/alpha/commits", None), + ( + Method::POST, + "/graphs/alpha/read", + Some(r#"{"query_source":"query q() { return {} }"}"#), + ), + ( + Method::POST, + "/graphs/alpha/change", + Some(r#"{"query_source":"query q() { return {} }"}"#), + ), + ( + Method::POST, + "/graphs/alpha/export", + Some(r#"{"branch":"main"}"#), + ), + ( + Method::POST, + "/graphs/alpha/schema/apply", + Some(r#"{"schema_source":"","allow_data_loss":false}"#), + ), + (Method::POST, "/graphs/alpha/ingest", Some(r#"{"data":""}"#)), + ( + Method::POST, + "/graphs/alpha/branches/merge", + Some(r#"{"source":"main","target":"main"}"#), + ), + ]; + + for (method, path, body) in cases { + let req_body = body + .map(|s| Body::from(s.to_string())) + .unwrap_or_else(Body::empty); + let req = Request::builder() + .method(method.clone()) + .uri(*path) + .header("content-type", "application/json") + .body(req_body) + .unwrap(); + let resp = app.clone().oneshot(req).await.unwrap(); + let status = resp.status(); + let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap(); + let body_str = String::from_utf8_lossy(&bytes); + + assert_ne!( + status, + StatusCode::NOT_FOUND, + "{} {} — router didn't match (cluster-route mounting regression). Body: {}", + method, + path, + body_str, + ); + assert!( + !(status == StatusCode::INTERNAL_SERVER_ERROR + && body_str.contains("Wrong number of path arguments")), + "{} {} — path extractor broke (Bug-1 class regression). Body: {}", + method, + path, + body_str, + ); + assert!( + !(status == StatusCode::INTERNAL_SERVER_ERROR + && body_str.to_lowercase().contains("missing extension")), + "{} {} — routing middleware didn't inject GraphHandle. Body: {}", + method, + path, + body_str, + ); + } + } + + /// Regression for the bot-surfaced path-extractor bug: cluster + /// routes whose inner path also captures a parameter + /// (`/graphs/{graph_id}/branches/{branch}`, + /// `/graphs/{graph_id}/commits/{commit_id}`) must extract the + /// inner param cleanly. Axum 0.8 propagates the outer `{graph_id}` + /// capture into nested handlers, so a `Path` extractor + /// would see two values and fail with "Wrong number of path + /// arguments. Expected 1 but got 2." Today both DELETE branch and + /// GET commit-by-id break in multi-mode because their handlers + /// use bare `Path` — this test pins the fix. + /// + /// The broader `all_protected_cluster_routes_resolve_to_their_handler` + /// test sweeps the full route surface; this one stays narrowly + /// targeted at the inner-path-param shape because that's the + /// specific regression class. + #[tokio::test(flavor = "multi_thread")] + async fn cluster_routes_with_inner_path_params_deserialize_correctly() { + let (_dirs, app) = build_multi_mode_app(&["alpha"]).await; + + // Create a branch we can then delete — DELETE /graphs/alpha/branches/feature + let create_resp = app + .clone() + .oneshot( + Request::builder() + .method(Method::POST) + .uri("/graphs/alpha/branches") + .header("content-type", "application/json") + .body(Body::from(r#"{"name":"feature"}"#)) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!( + create_resp.status(), + StatusCode::OK, + "branch create on the cluster route must succeed before delete can be tested" + ); + + // DELETE /graphs/{graph_id}/branches/{branch} — exercises a handler + // whose only Path extractor (`branch`) is inside a nested route + // that also captures `graph_id`. The handler must pick `branch` + // by name, not by position. + let delete_resp = app + .clone() + .oneshot( + Request::builder() + .method(Method::DELETE) + .uri("/graphs/alpha/branches/feature") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + let delete_status = delete_resp.status(); + let delete_body = to_bytes(delete_resp.into_body(), usize::MAX).await.unwrap(); + assert_eq!( + delete_status, + StatusCode::OK, + "DELETE /graphs/{{id}}/branches/{{branch}} must extract `branch` cleanly. \ + Body: {}", + String::from_utf8_lossy(&delete_body), + ); + + // GET /graphs/{graph_id}/commits/{commit_id} — same shape: the + // handler's only Path extractor is the inner `commit_id`, which + // must deserialize by name even though `graph_id` is also in scope. + // We don't know a real commit_id, but the failure mode under test + // is path extraction, not commit lookup — a 404 from the engine + // is fine; a 500 with "Wrong number of path arguments" is the bug. + let commit_resp = app + .oneshot( + Request::builder() + .method(Method::GET) + .uri("/graphs/alpha/commits/0000000000000000") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + let commit_status = commit_resp.status(); + let commit_body = to_bytes(commit_resp.into_body(), usize::MAX).await.unwrap(); + let body_str = String::from_utf8_lossy(&commit_body); + assert!( + commit_status != StatusCode::INTERNAL_SERVER_ERROR + || !body_str.contains("Wrong number of path arguments"), + "GET /graphs/{{id}}/commits/{{commit_id}} must extract `commit_id` cleanly. \ + Got: {} | {}", + commit_status, + body_str, + ); + } + + /// Flat routes 404 in multi mode — the router only mounts under + /// `/graphs/{graph_id}/...` so `/snapshot` doesn't resolve. + #[tokio::test(flavor = "multi_thread")] + async fn flat_routes_404_in_multi_mode() { + let (_dirs, app) = build_multi_mode_app(&["alpha"]).await; + let resp = app + .oneshot( + Request::builder() + .method(Method::GET) + .uri("/snapshot?branch=main") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::NOT_FOUND); + } + + /// `GraphId` validation runs at startup — a reserved name in + /// `omnigraph.yaml` produces a clear error rather than getting + /// rejected per-request. + #[test] + fn load_server_settings_rejects_reserved_graph_id() { + let temp = tempfile::tempdir().unwrap(); + let config_path = temp.path().join("omnigraph.yaml"); + fs::write( + &config_path, + r#" +graphs: + policies: + uri: /tmp/g1.omni +"#, + ) + .unwrap(); + let err = load_server_settings(Some(&config_path), None, None, None, false).unwrap_err(); + assert!( + err.to_string().contains("invalid graph id 'policies'"), + "expected reserved-name rejection, got: {err}" + ); + } + + #[tokio::test(flavor = "multi_thread")] + async fn registry_rejects_duplicate_normalized_graph_uris() { + let dir = tempfile::tempdir().unwrap(); + let graph_uri = dir.path().join("same").to_str().unwrap().to_string(); + let schema = fs::read_to_string(fixture("test.pg")).unwrap(); + let engine = Arc::new(Omnigraph::init(&graph_uri, &schema).await.unwrap()); + + let alpha = Arc::new(GraphHandle { + key: GraphKey::cluster(GraphId::try_from("alpha").unwrap()), + uri: graph_uri.clone(), + engine: Arc::clone(&engine), + policy: None, + }); + let beta = Arc::new(GraphHandle { + key: GraphKey::cluster(GraphId::try_from("beta").unwrap()), + uri: format!("file://{graph_uri}/"), + engine, + policy: None, + }); + + match GraphRegistry::from_handles(vec![alpha, beta]) { + Err(InsertError::DuplicateUri(uri)) => { + assert!( + normalize_root_uri(&uri).is_ok(), + "duplicate URI should still be parseable, got {uri}" + ); + } + Err(err) => panic!("expected DuplicateUri for normalized aliases, got {err:?}"), + Ok(_) => panic!("expected DuplicateUri for normalized aliases, got Ok"), + } + } + + #[tokio::test(flavor = "multi_thread")] + async fn registry_stores_canonical_graph_uri() { + let dir = tempfile::tempdir().unwrap(); + let graph_uri = dir.path().join("canonical").to_str().unwrap().to_string(); + let schema = fs::read_to_string(fixture("test.pg")).unwrap(); + let engine = Omnigraph::init(&graph_uri, &schema).await.unwrap(); + let handle = Arc::new(GraphHandle { + key: GraphKey::cluster(GraphId::try_from("alpha").unwrap()), + uri: format!("file://{graph_uri}/"), + engine: Arc::new(engine), + policy: None, + }); + + let registry = GraphRegistry::from_handles(vec![handle]).unwrap(); + let listed = registry.list(); + assert_eq!(listed.len(), 1); + assert_eq!(listed[0].uri, graph_uri); + } + + // ── Four-rule mode inference matrix ─────────────────────────────── + + /// Rule 1: CLI positional URI → Single. + #[test] + fn mode_inference_cli_uri_is_single() { + let settings = load_server_settings( + None, + Some("/tmp/cli.omni".to_string()), + None, + None, + true, // allow unauth so we get past the runtime-state check + ) + .unwrap(); + match settings.mode { + ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "/tmp/cli.omni"), + ServerConfigMode::Multi { .. } => panic!("expected Single (rule 1), got Multi"), + } + } + + /// Rule 2: --target picks one graph from `graphs:` map → Single. + #[test] + fn mode_inference_cli_target_is_single() { + let temp = tempfile::tempdir().unwrap(); + let config_path = temp.path().join("omnigraph.yaml"); + fs::write( + &config_path, + r#" +graphs: + alpha: + uri: /tmp/alpha.omni + beta: + uri: /tmp/beta.omni +"#, + ) + .unwrap(); + let settings = + load_server_settings(Some(&config_path), None, Some("alpha".into()), None, true) + .unwrap(); + match settings.mode { + ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "/tmp/alpha.omni"), + ServerConfigMode::Multi { .. } => panic!("expected Single (rule 2), got Multi"), + } + } + + /// Rule 3: `server.graph` set → Single (target picked from config). + #[test] + fn mode_inference_server_graph_is_single() { + let temp = tempfile::tempdir().unwrap(); + let config_path = temp.path().join("omnigraph.yaml"); + fs::write( + &config_path, + r#" +graphs: + alpha: + uri: /tmp/alpha.omni + beta: + uri: /tmp/beta.omni +server: + graph: beta +"#, + ) + .unwrap(); + let settings = load_server_settings(Some(&config_path), None, None, None, true).unwrap(); + match settings.mode { + ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "/tmp/beta.omni"), + ServerConfigMode::Multi { .. } => panic!("expected Single (rule 3), got Multi"), + } + } + + /// Rule 4: `--config` + non-empty `graphs:` + no single-mode selector → Multi. + #[test] + fn mode_inference_config_plus_graphs_is_multi() { + let temp = tempfile::tempdir().unwrap(); + let config_path = temp.path().join("omnigraph.yaml"); + fs::write( + &config_path, + r#" +graphs: + alpha: + uri: /tmp/alpha.omni + beta: + uri: /tmp/beta.omni +"#, + ) + .unwrap(); + let settings = load_server_settings(Some(&config_path), None, None, None, true).unwrap(); + match settings.mode { + ServerConfigMode::Multi { graphs, .. } => { + let ids: Vec<&str> = graphs.iter().map(|g| g.graph_id.as_str()).collect(); + // BTreeMap iteration order is alphabetical. + assert_eq!(ids, vec!["alpha", "beta"]); + } + ServerConfigMode::Single { .. } => panic!("expected Multi (rule 4), got Single"), + } + } + + #[test] + fn mode_inference_multi_rejects_top_level_policy_file() { + let temp = tempfile::tempdir().unwrap(); + let config_path = temp.path().join("omnigraph.yaml"); + fs::write( + &config_path, + r#" +policy: + file: ./policy.yaml +graphs: + alpha: + uri: /tmp/alpha.omni +"#, + ) + .unwrap(); + let err = load_server_settings(Some(&config_path), None, None, None, true).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("top-level `policy.file` is single-graph/CLI-local policy only"), + "expected single-graph policy guidance, got: {msg}" + ); + assert!( + msg.contains("graphs..policy.file"), + "expected per-graph migration guidance, got: {msg}" + ); + assert!( + msg.contains("server.policy.file"), + "expected server policy migration guidance, got: {msg}" + ); + } + + #[test] + fn mode_inference_normalizes_multi_graph_uris() { + let temp = tempfile::tempdir().unwrap(); + let graph = temp.path().join("alpha.omni"); + let config_path = temp.path().join("omnigraph.yaml"); + fs::write( + &config_path, + format!( + r#" +graphs: + alpha: + uri: file://{}/ +"#, + graph.display() + ), + ) + .unwrap(); + let settings = load_server_settings(Some(&config_path), None, None, None, true).unwrap(); + match settings.mode { + ServerConfigMode::Multi { graphs, .. } => { + assert_eq!(graphs[0].uri, graph.to_string_lossy()); + } + ServerConfigMode::Single { .. } => panic!("expected Multi"), + } + } + + /// Rule 5: nothing → error with migration hint. + #[test] + fn mode_inference_no_inputs_errors_with_migration_hint() { + let err = load_server_settings(None, None, None, None, true).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("no graph to serve"), + "expected migration-hint error, got: {msg}" + ); + } + + /// Rule 4 sub-case: `--config` with empty `graphs:` map and no + /// single-mode selector → rule 5 fires (no graph to serve). + #[test] + fn mode_inference_empty_graphs_map_errors() { + let temp = tempfile::tempdir().unwrap(); + let config_path = temp.path().join("omnigraph.yaml"); + fs::write(&config_path, "server:\n bind: 127.0.0.1:8080\n").unwrap(); + let err = load_server_settings(Some(&config_path), None, None, None, true).unwrap_err(); + assert!(err.to_string().contains("no graph to serve")); + } + + /// `--config` + `` together: URI wins → Single (the CLI URI + /// takes precedence over the config's graphs map). + #[test] + fn mode_inference_cli_uri_overrides_graphs_map() { + let temp = tempfile::tempdir().unwrap(); + let config_path = temp.path().join("omnigraph.yaml"); + fs::write( + &config_path, + r#" +graphs: + alpha: + uri: /tmp/alpha.omni +"#, + ) + .unwrap(); + let settings = load_server_settings( + Some(&config_path), + Some("/tmp/cli-override.omni".to_string()), + None, + None, + true, + ) + .unwrap(); + match settings.mode { + ServerConfigMode::Single { uri, .. } => { + assert_eq!( + uri, "/tmp/cli-override.omni", + "CLI URI must win over graphs: map" + ); + } + ServerConfigMode::Multi { .. } => { + panic!("expected Single (CLI URI wins), got Multi") + } + } + } + + /// Per-graph `policy.file` is resolved relative to the config base_dir. + #[test] + fn per_graph_policy_file_is_resolved_relative_to_base_dir() { + let temp = tempfile::tempdir().unwrap(); + let config_path = temp.path().join("omnigraph.yaml"); + fs::write( + &config_path, + r#" +graphs: + alpha: + uri: /tmp/alpha.omni + policy: + file: ./policies/alpha.yaml + beta: + uri: /tmp/beta.omni +"#, + ) + .unwrap(); + let settings = load_server_settings(Some(&config_path), None, None, None, true).unwrap(); + let graphs = match settings.mode { + ServerConfigMode::Multi { graphs, .. } => graphs, + _ => panic!("expected Multi"), + }; + // graphs is BTreeMap-iter order (alphabetical). + let alpha = &graphs[0]; + let beta = &graphs[1]; + assert_eq!(alpha.graph_id, "alpha"); + assert_eq!( + alpha.policy_file.as_ref().unwrap(), + &temp.path().join("policies/alpha.yaml") + ); + assert_eq!(beta.graph_id, "beta"); + assert!(beta.policy_file.is_none()); + } + + /// `server.policy.file` resolves alongside the graphs map. + #[test] + fn server_policy_file_is_resolved_relative_to_base_dir() { + let temp = tempfile::tempdir().unwrap(); + let config_path = temp.path().join("omnigraph.yaml"); + fs::write( + &config_path, + r#" +server: + policy: + file: ./server-policy.yaml +graphs: + alpha: + uri: /tmp/alpha.omni +"#, + ) + .unwrap(); + let settings = load_server_settings(Some(&config_path), None, None, None, true).unwrap(); + match settings.mode { + ServerConfigMode::Multi { + server_policy_file, .. + } => { + assert_eq!( + server_policy_file.unwrap(), + temp.path().join("server-policy.yaml") + ); + } + _ => panic!("expected Multi"), + } + } + + /// `GET /graphs` must NOT leak the registry in Open mode without + /// an explicit server policy. Operators who pass `--unauthenticated` + /// opted into trusting the network for graph DATA, not for leaking + /// server topology (graph IDs + URIs, which may contain S3 bucket + /// paths or internal hostnames). Cedar gating the management + /// surface is the documented contract for `server_graphs_list` + /// ("don't leak the registry until the operator explicitly + /// authorizes it"); enforcing that contract in every runtime + /// state — not just `PolicyEnabled` — is the correct-by-design + /// closure of the open-mode hole the bot-review pass surfaced. + /// + /// Today (pre-fix) this returns 200 because `authorize_request`'s + /// no-policy fallback only denies when `actor.is_some()`, so Open + /// mode (`actor: None`) falls through to `Ok(())`. The fix in the + /// next commit tightens the fallback so server-scoped actions + /// always require explicit policy. + /// + /// Sort-order coverage previously lived here; it has moved to + /// `get_graphs_with_server_policy_authorizes_per_cedar` where + /// the response body is now non-empty and operator-authorized. + #[tokio::test(flavor = "multi_thread")] + async fn get_graphs_denied_in_open_mode_without_server_policy() { + let (_dirs, app) = build_multi_mode_app(&["beta", "alpha"]).await; + let resp = app + .oneshot( + Request::builder() + .method(Method::GET) + .uri("/graphs") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + let status = resp.status(); + let body = to_bytes(resp.into_body(), usize::MAX).await.unwrap(); + let body_str = String::from_utf8_lossy(&body); + assert_eq!( + status, + StatusCode::FORBIDDEN, + "GET /graphs must require an explicit server policy in every \ + runtime state; Open-mode bypass would leak server topology. \ + Body: {body_str}", + ); + } + + /// `GET /graphs` returns 405 in single mode (resource exists in the + /// API surface, just not operational without a `graphs:` map). + #[tokio::test(flavor = "multi_thread")] + async fn get_graphs_returns_405_in_single_mode() { + let temp = init_loaded_graph().await; + let graph = graph_path(temp.path()); + let state = AppState::open(graph.to_string_lossy().to_string()) + .await + .unwrap(); + let app = build_app(state); + let resp = app + .oneshot( + Request::builder() + .method(Method::GET) + .uri("/graphs") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::METHOD_NOT_ALLOWED); + } + + /// `GET /graphs` requires bearer auth when tokens are configured. + #[tokio::test(flavor = "multi_thread")] + async fn get_graphs_requires_bearer_auth_when_configured() { + use omnigraph_server::{GraphHandle, GraphId, GraphKey}; + // Build a multi-mode app with bearer tokens configured. + let dir = tempfile::tempdir().unwrap(); + let graph_uri = dir.path().join("alpha").to_str().unwrap().to_string(); + let schema = fs::read_to_string(fixture("test.pg")).unwrap(); + let engine = Omnigraph::init(&graph_uri, &schema).await.unwrap(); + let handle = Arc::new(GraphHandle { + key: GraphKey::cluster(GraphId::try_from("alpha").unwrap()), + uri: graph_uri, + engine: Arc::new(engine), + policy: None, + }); + let tokens = vec![("act-andrew".to_string(), "secret-token".to_string())]; + let workload = omnigraph_server::workload::WorkloadController::from_env(); + let state = AppState::new_multi(vec![handle], tokens, None, workload, None).unwrap(); + let app = build_app(state); + + // No Authorization header → 401. + let resp_no_auth = app + .clone() + .oneshot( + Request::builder() + .method(Method::GET) + .uri("/graphs") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp_no_auth.status(), StatusCode::UNAUTHORIZED); + + // With auth but no server policy → 403 (default-deny, since + // GraphList is not Read). + let resp_authed = app + .oneshot( + Request::builder() + .method(Method::GET) + .uri("/graphs") + .header("authorization", "Bearer secret-token") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp_authed.status(), StatusCode::FORBIDDEN); + } + + /// `GET /graphs` with a server policy that allows `graph_list` → 200 + /// and returns the registry sorted alphabetically by `graph_id`. + /// `GET /graphs` with a server policy that does NOT allow + /// `graph_list` (viewer group) → 403. + /// + /// This test owns the alphabetical-sort coverage that previously + /// lived in `get_graphs_lists_registered_graphs_in_multi_mode`. + /// That test now asserts denial in Open mode (server-scoped actions + /// require explicit policy in every runtime state), so the positive + /// body-shape assertions need a home where the response is + /// operator-authorized — here. + #[tokio::test(flavor = "multi_thread")] + async fn get_graphs_with_server_policy_authorizes_per_cedar() { + use omnigraph_policy::PolicyEngine; + use omnigraph_server::{GraphHandle, GraphId, GraphKey}; + + let dir = tempfile::tempdir().unwrap(); + + // Two graphs deliberately registered in non-alphabetical order + // so the test would fail if the handler relied on insertion + // order instead of server-side sorting. + let schema = fs::read_to_string(fixture("test.pg")).unwrap(); + let mut handles = Vec::new(); + for id in ["beta", "alpha"] { + let graph_uri = dir.path().join(id).to_str().unwrap().to_string(); + let engine = Omnigraph::init(&graph_uri, &schema).await.unwrap(); + handles.push(Arc::new(GraphHandle { + key: GraphKey::cluster(GraphId::try_from(id).unwrap()), + uri: graph_uri, + engine: Arc::new(engine), + policy: None, + })); + } + + // Server policy: admins can graph_list, viewers cannot. + let policy_path = dir.path().join("server-policy.yaml"); + fs::write( + &policy_path, + r#" +version: 1 +groups: + admins: [act-andrew] + viewers: [act-bruno] +rules: + - id: admins-list-graphs + allow: + actors: { group: admins } + actions: [graph_list] +"#, + ) + .unwrap(); + let server_policy = PolicyEngine::load_server(&policy_path).unwrap(); + + let tokens = vec![ + ("act-andrew".to_string(), "andrew-token".to_string()), + ("act-bruno".to_string(), "bruno-token".to_string()), + ]; + let workload = omnigraph_server::workload::WorkloadController::from_env(); + let state = + AppState::new_multi(handles, tokens, Some(server_policy), workload, None).unwrap(); + let app = build_app(state); + + // Admin → 200, body returns both graphs alphabetically sorted. + let resp_admin = app + .clone() + .oneshot( + Request::builder() + .method(Method::GET) + .uri("/graphs") + .header("authorization", "Bearer andrew-token") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!( + resp_admin.status(), + StatusCode::OK, + "admin must be allowed graph_list" + ); + let body = to_bytes(resp_admin.into_body(), usize::MAX).await.unwrap(); + let json: Value = serde_json::from_slice(&body).unwrap(); + let graphs = json["graphs"].as_array().unwrap(); + assert_eq!(graphs.len(), 2, "response must list both registered graphs"); + assert_eq!( + graphs[0]["graph_id"].as_str().unwrap(), + "alpha", + "server must sort graphs alphabetically by graph_id (insertion order was 'beta', 'alpha')" + ); + assert_eq!(graphs[1]["graph_id"].as_str().unwrap(), "beta"); + + // Viewer → 403 + let resp_viewer = app + .oneshot( + Request::builder() + .method(Method::GET) + .uri("/graphs") + .header("authorization", "Bearer bruno-token") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!( + resp_viewer.status(), + StatusCode::FORBIDDEN, + "viewer must be denied graph_list (Cedar gate)" + ); + } + + /// Loads an `omnigraph.yaml` with two graphs and verifies multi-mode + /// inference plus graph entry resolution. Cluster-route dispatch is + /// covered by the route tests above. + #[tokio::test(flavor = "multi_thread")] + async fn server_settings_load_multi_graph_config_entries() { + let cfg_dir = tempfile::tempdir().unwrap(); + // Real graph storage dirs (the URIs in the config must point to + // a graph init-able location). + let alpha_dir = cfg_dir.path().join("alpha.omni"); + let beta_dir = cfg_dir.path().join("beta.omni"); + let schema = fs::read_to_string(fixture("test.pg")).unwrap(); + Omnigraph::init(alpha_dir.to_str().unwrap(), &schema) + .await + .unwrap(); + Omnigraph::init(beta_dir.to_str().unwrap(), &schema) + .await + .unwrap(); + + let config_path = cfg_dir.path().join("omnigraph.yaml"); + fs::write( + &config_path, + format!( + r#" +graphs: + alpha: + uri: {alpha} + beta: + uri: {beta} +"#, + alpha = alpha_dir.display(), + beta = beta_dir.display(), + ), + ) + .unwrap(); + + let settings: ServerConfig = + load_server_settings(Some(&config_path), None, None, None, true).unwrap(); + assert!(matches!(settings.mode, ServerConfigMode::Multi { .. })); + + match settings.mode { + ServerConfigMode::Multi { graphs, .. } => { + assert_eq!(graphs.len(), 2); + let ids: Vec<&str> = graphs.iter().map(|g| g.graph_id.as_str()).collect(); + assert_eq!(ids, vec!["alpha", "beta"]); + } + _ => unreachable!(), + } + } +} diff --git a/crates/omnigraph/Cargo.toml b/crates/omnigraph/Cargo.toml index b507389..1fa3436 100644 --- a/crates/omnigraph/Cargo.toml +++ b/crates/omnigraph/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "omnigraph-engine" -version = "0.4.2" +version = "0.6.0" edition = "2024" description = "Runtime engine for the Omnigraph graph database." license = "MIT" @@ -16,7 +16,8 @@ default = [] failpoints = ["dep:fail", "fail/failpoints"] [dependencies] -omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" } +omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" } +omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" } lance = { workspace = true } lance-datafusion = { workspace = true } datafusion = { workspace = true } @@ -50,7 +51,7 @@ chrono = { workspace = true } arc-swap = { workspace = true } [dev-dependencies] -omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" } +omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" } tokio = { workspace = true } lance-namespace-impls = { workspace = true } serial_test = "3" diff --git a/crates/omnigraph/examples/bench_expand.rs b/crates/omnigraph/examples/bench_expand.rs index 1b0011a..c723b24 100644 --- a/crates/omnigraph/examples/bench_expand.rs +++ b/crates/omnigraph/examples/bench_expand.rs @@ -239,7 +239,9 @@ async fn main() { let jsonl = generate_jsonl(n, avg_deg, 42); let t = Instant::now(); - load_jsonl(&mut db, &jsonl, LoadMode::Overwrite).await.unwrap(); + load_jsonl(&mut db, &jsonl, LoadMode::Overwrite) + .await + .unwrap(); let load_elapsed = t.elapsed(); println!( diff --git a/crates/omnigraph/src/db/manifest.rs b/crates/omnigraph/src/db/manifest.rs index f31cc4f..7fcf7de 100644 --- a/crates/omnigraph/src/db/manifest.rs +++ b/crates/omnigraph/src/db/manifest.rs @@ -6,6 +6,8 @@ use lance::Dataset; use lance_namespace::models::CreateTableVersionRequest; use omnigraph_compiler::catalog::Catalog; +#[path = "manifest/graph.rs"] +mod graph; #[path = "manifest/layout.rs"] mod layout; #[path = "manifest/metadata.rs"] @@ -18,11 +20,10 @@ mod namespace; mod publisher; #[path = "manifest/recovery.rs"] mod recovery; -#[path = "manifest/repo.rs"] -mod repo; #[path = "manifest/state.rs"] mod state; +use graph::{init_manifest_graph, open_manifest_graph, snapshot_state_at}; use layout::{manifest_uri, open_manifest_dataset, type_name_hash}; pub(crate) use metadata::TableVersionMetadata; #[cfg(test)] @@ -33,11 +34,10 @@ pub(crate) use namespace::open_table_head_for_write; use namespace::{branch_manifest_namespace, staged_table_namespace}; use publisher::{GraphNamespacePublisher, ManifestBatchPublisher}; pub(crate) use recovery::{ - delete_sidecar, has_schema_apply_sidecar, new_sidecar, recover_manifest_drift, write_sidecar, RecoveryMode, RecoverySidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin, - SidecarTableRegistration, SidecarTombstone, + SidecarTableRegistration, SidecarTombstone, delete_sidecar, has_schema_apply_sidecar, + new_sidecar, recover_manifest_drift, write_sidecar, }; -use repo::{init_manifest_repo, open_manifest_repo, snapshot_state_at}; pub use state::SubTableEntry; #[cfg(test)] use state::string_column; @@ -215,12 +215,12 @@ impl ManifestCoordinator { self } - /// Create a new repo at `root_uri` from a catalog. + /// Create a new graph at `root_uri` from a catalog. /// /// Creates per-type Lance datasets and the namespace `__manifest` table. pub async fn init(root_uri: &str, catalog: &Catalog) -> Result { let root = root_uri.trim_end_matches('/'); - let (dataset, known_state) = init_manifest_repo(root, catalog).await?; + let (dataset, known_state) = init_manifest_graph(root, catalog).await?; Ok(Self::from_parts_with_default_publisher( root, @@ -230,10 +230,10 @@ impl ManifestCoordinator { )) } - /// Open an existing repo's manifest. + /// Open an existing graph's manifest. pub async fn open(root_uri: &str) -> Result { let root = root_uri.trim_end_matches('/'); - let (dataset, known_state) = open_manifest_repo(root, None).await?; + let (dataset, known_state) = open_manifest_graph(root, None).await?; Ok(Self::from_parts_with_default_publisher( root, dataset, @@ -242,14 +242,14 @@ impl ManifestCoordinator { )) } - /// Open an existing repo's manifest at a specific branch. + /// Open an existing graph's manifest at a specific branch. pub async fn open_at_branch(root_uri: &str, branch: &str) -> Result { if branch == "main" { return Self::open(root_uri).await; } let root = root_uri.trim_end_matches('/'); - let (dataset, known_state) = open_manifest_repo(root, Some(branch)).await?; + let (dataset, known_state) = open_manifest_graph(root, Some(branch)).await?; Ok(Self::from_parts_with_default_publisher( root, dataset, @@ -410,7 +410,7 @@ impl ManifestCoordinator { Ok(descendants) } - /// Root URI of the repo. + /// Root URI of the graph. pub fn root_uri(&self) -> &str { &self.root_uri } diff --git a/crates/omnigraph/src/db/manifest/repo.rs b/crates/omnigraph/src/db/manifest/graph.rs similarity index 98% rename from crates/omnigraph/src/db/manifest/repo.rs rename to crates/omnigraph/src/db/manifest/graph.rs index 90a958b..6c414aa 100644 --- a/crates/omnigraph/src/db/manifest/repo.rs +++ b/crates/omnigraph/src/db/manifest/graph.rs @@ -17,7 +17,7 @@ use super::state::{ ManifestState, SubTableEntry, entries_to_batch, manifest_schema, read_manifest_state, }; -pub(super) async fn init_manifest_repo( +pub(super) async fn init_manifest_graph( root_uri: &str, catalog: &Catalog, ) -> Result<(Dataset, ManifestState)> { @@ -47,7 +47,7 @@ pub(super) async fn init_manifest_repo( Ok((dataset, known_state)) } -pub(super) async fn open_manifest_repo( +pub(super) async fn open_manifest_graph( root_uri: &str, branch: Option<&str>, ) -> Result<(Dataset, ManifestState)> { diff --git a/crates/omnigraph/src/db/manifest/migrations.rs b/crates/omnigraph/src/db/manifest/migrations.rs index c568bef..bbb7995 100644 --- a/crates/omnigraph/src/db/manifest/migrations.rs +++ b/crates/omnigraph/src/db/manifest/migrations.rs @@ -24,8 +24,8 @@ //! Only on open-for-write paths (the publisher's `load_publish_state`). //! Reads are side-effect-free by contract; an old-shape `__manifest` reads //! fine, it just lacks the protections introduced by later versions. -//! `init_manifest_repo` stamps the current version at creation, so newly -//! initialized repos never need migration. +//! `init_manifest_graph` stamps the current version at creation, so newly +//! initialized graphs never need migration. //! //! ## Forward-version protection //! @@ -78,7 +78,7 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()> if current > INTERNAL_MANIFEST_SCHEMA_VERSION { return Err(OmniError::manifest(format!( "__manifest is stamped at internal schema v{} but this binary expects v{} \ - — upgrade omnigraph before opening this repo for writes", + — upgrade omnigraph before opening this graph for writes", current, INTERNAL_MANIFEST_SCHEMA_VERSION, ))); } @@ -112,7 +112,10 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()> async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> { dataset .update_field_metadata() - .update("object_id", [(OBJECT_ID_PK_KEY.to_string(), "true".to_string())]) + .update( + "object_id", + [(OBJECT_ID_PK_KEY.to_string(), "true".to_string())], + ) .map_err(|e| OmniError::Lance(e.to_string()))? .await .map_err(|e| OmniError::Lance(e.to_string()))?; @@ -121,10 +124,7 @@ async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> { async fn set_stamp(dataset: &mut Dataset, version: u32) -> Result<()> { dataset - .update_schema_metadata([( - INTERNAL_SCHEMA_VERSION_KEY.to_string(), - version.to_string(), - )]) + .update_schema_metadata([(INTERNAL_SCHEMA_VERSION_KEY.to_string(), version.to_string())]) .await .map_err(|e| OmniError::Lance(e.to_string()))?; Ok(()) diff --git a/crates/omnigraph/src/db/manifest/namespace.rs b/crates/omnigraph/src/db/manifest/namespace.rs index 724b3e5..80d206f 100644 --- a/crates/omnigraph/src/db/manifest/namespace.rs +++ b/crates/omnigraph/src/db/manifest/namespace.rs @@ -230,6 +230,11 @@ impl LanceNamespace for BranchManifestNamespace { metadata: None, properties: None, managed_versioning: Some(true), + // Every table we return from describe_table is physically + // materialized (open_manifest_dataset succeeds), never just + // "declared." See lance-namespace 6.0.1 DescribeTableResponse + // field docs. + is_only_declared: Some(false), }) } @@ -373,6 +378,11 @@ impl LanceNamespace for StagedTableNamespace { metadata: None, properties: None, managed_versioning: Some(true), + // Every table we return from describe_table is physically + // materialized (open_manifest_dataset succeeds), never just + // "declared." See lance-namespace 6.0.1 DescribeTableResponse + // field docs. + is_only_declared: Some(false), }) } diff --git a/crates/omnigraph/src/db/manifest/recovery.rs b/crates/omnigraph/src/db/manifest/recovery.rs index 588042c..425499a 100644 --- a/crates/omnigraph/src/db/manifest/recovery.rs +++ b/crates/omnigraph/src/db/manifest/recovery.rs @@ -2,7 +2,7 @@ //! //! This module implements the building blocks of the per-sidecar recovery //! sweep that closes the documented Phase B → Phase C residual (see -//! `docs/runs.md` "Open-time recovery sweep"). The high-level shape: +//! `docs/dev/runs.md` "Open-time recovery sweep"). The high-level shape: //! //! 1. Each writer that performs a multi-table commit writes a small JSON //! sidecar at `__recovery/{ulid}.json` BEFORE its per-table @@ -58,7 +58,7 @@ use super::{ManifestChange, SubTableUpdate, TableRegistration, TableTombstone}; /// into the audit row's `recovery_for_actor` field. pub(crate) const RECOVERY_ACTOR: &str = "omnigraph:recovery"; -/// Subdirectory under the repo root holding sidecar files. +/// Subdirectory under the graph root holding sidecar files. pub(crate) const RECOVERY_DIR_NAME: &str = "__recovery"; /// Current sidecar JSON shape version. Bumping this is a breaking change: @@ -142,7 +142,7 @@ pub(crate) struct SidecarTablePin { pub(crate) struct SidecarTableRegistration { /// Stable identifier (`node:Tag`, `edge:WorksAt`, etc.). pub table_key: String, - /// Repo-relative path the manifest will register + /// Graph-relative path the manifest will register /// (e.g. `nodes/{fnv1a64-hex}`); recovery joins this with `root_uri` /// to open the dataset Lance HEAD when constructing the /// accompanying `Update`. @@ -274,8 +274,9 @@ pub(crate) enum TableClassification { /// /// **All-or-nothing**: the writer that produced the sidecar intended an /// atomic publish across every table it listed. Rolling forward only some -/// of them would publish a partial commit and violate `docs/invariants.md` -/// §VI.23. The decision is based on the worst classification: +/// of them would publish a partial commit and violate the manifest-atomic +/// graph visibility invariant in `docs/dev/invariants.md`. The decision is +/// based on the worst classification: /// /// - Any `InvariantViolation` → `Abort` (operator action required). /// - Any `UnexpectedAtP1` / `UnexpectedMultistep` / `NoMovement` → @@ -294,7 +295,7 @@ pub(crate) enum SidecarDecision { Abort, } -/// Build the `__recovery/` directory URI under a repo root. +/// Build the `__recovery/` directory URI under a graph root. pub(crate) fn recovery_dir_uri(root_uri: &str) -> String { let trimmed = root_uri.trim_end_matches('/'); format!("{}/{}", trimmed, RECOVERY_DIR_NAME) @@ -463,7 +464,7 @@ pub(crate) fn classify_table( /// Compute the per-sidecar decision from a slice of table classifications. /// -/// All-or-nothing per `docs/invariants.md` §VI.23 — see [`SidecarDecision`]. +/// All-or-nothing per `docs/dev/invariants.md` -- see [`SidecarDecision`]. pub(crate) fn decide(classifications: &[TableClassification]) -> SidecarDecision { use SidecarDecision::*; use TableClassification::*; @@ -1121,7 +1122,7 @@ async fn record_audit( /// the rename so the recovery sweep's roll-forward step sees the new /// catalog. Without this, the disambiguation logic deletes the staging /// files (since manifest still pins the old table set) and leaves the -/// repo with new-schema data on disk but the old `_schema.pg` live — +/// graph with new-schema data on disk but the old `_schema.pg` live — /// real corruption. pub(crate) async fn has_schema_apply_sidecar( root_uri: &str, diff --git a/crates/omnigraph/src/db/manifest/tests.rs b/crates/omnigraph/src/db/manifest/tests.rs index d51a882..effa0b5 100644 --- a/crates/omnigraph/src/db/manifest/tests.rs +++ b/crates/omnigraph/src/db/manifest/tests.rs @@ -1393,7 +1393,10 @@ async fn test_concurrent_publish_with_overlapping_expected_versions_one_succeeds // version (no duplicate version rows). let mc = ManifestCoordinator::open(uri).await.unwrap(); let entry = mc.snapshot().entry("node:Person").unwrap().clone(); - assert!(entry.table_version > 1, "Person should have advanced past v=1"); + assert!( + entry.table_version > 1, + "Person should have advanced past v=1" + ); } #[tokio::test] @@ -1418,7 +1421,7 @@ async fn test_publish_migrates_pre_stamp_manifest_to_current_version() { let catalog = build_test_catalog(); let mc = ManifestCoordinator::init(uri, &catalog).await.unwrap(); - // Simulate a v1 (pre-stamp) repo by removing the schema-level stamp on disk. + // Simulate a v1 (pre-stamp) graph by removing the schema-level stamp on disk. { let mut ds = open_manifest_dataset(uri, None).await.unwrap(); ds.update_schema_metadata([( @@ -1449,7 +1452,7 @@ async fn test_publish_migrates_pre_stamp_manifest_to_current_version() { assert_eq!( super::migrations::read_stamp(&post), super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION, - "publish on a v1 repo should leave the manifest stamped at the current version", + "publish on a v1 graph should leave the manifest stamped at the current version", ); // Manifest should still serve correctly post-migration. diff --git a/crates/omnigraph/src/db/mod.rs b/crates/omnigraph/src/db/mod.rs index b6ab0da..d0b292f 100644 --- a/crates/omnigraph/src/db/mod.rs +++ b/crates/omnigraph/src/db/mod.rs @@ -10,11 +10,11 @@ pub(crate) mod write_queue; pub use commit_graph::GraphCommit; pub use graph_coordinator::{GraphCoordinator, ReadTarget, ResolvedTarget, SnapshotId}; pub use manifest::{Snapshot, SubTableEntry, SubTableUpdate}; -pub use omnigraph::{ - CleanupPolicyOptions, MergeOutcome, Omnigraph, OpenMode, SchemaApplyResult, - TableCleanupStats, TableOptimizeStats, -}; pub(crate) use omnigraph::ensure_public_branch_ref; +pub use omnigraph::{ + CleanupPolicyOptions, InitOptions, MergeOutcome, Omnigraph, OpenMode, SchemaApplyOptions, + SchemaApplyResult, TableCleanupStats, TableOptimizeStats, +}; pub(crate) use run_registry::is_internal_run_branch; pub(crate) const SCHEMA_APPLY_LOCK_BRANCH: &str = "__schema_apply_lock__"; @@ -59,9 +59,7 @@ impl MutationOpKind { pub(crate) fn strict_pre_stage_version_check(self) -> bool { match self { MutationOpKind::Insert | MutationOpKind::Merge => false, - MutationOpKind::Update - | MutationOpKind::Delete - | MutationOpKind::SchemaRewrite => true, + MutationOpKind::Update | MutationOpKind::Delete | MutationOpKind::SchemaRewrite => true, } } } diff --git a/crates/omnigraph/src/db/omnigraph.rs b/crates/omnigraph/src/db/omnigraph.rs index 50d4963..5c92ac3 100644 --- a/crates/omnigraph/src/db/omnigraph.rs +++ b/crates/omnigraph/src/db/omnigraph.rs @@ -18,8 +18,8 @@ use omnigraph_compiler::catalog::{Catalog, EdgeType, NodeType}; use omnigraph_compiler::schema::parser::parse_schema; use omnigraph_compiler::types::ScalarType; use omnigraph_compiler::{ - SchemaIR, SchemaMigrationPlan, SchemaMigrationStep, SchemaTypeKind, build_catalog_from_ir, - build_schema_ir, plan_schema_migration, + DropMode, SchemaIR, SchemaMigrationPlan, SchemaMigrationStep, SchemaTypeKind, + build_catalog_from_ir, build_schema_ir, plan_schema_migration, }; use crate::db::graph_coordinator::{GraphCoordinator, PublishedSnapshot}; @@ -34,6 +34,7 @@ mod schema_apply; mod table_ops; pub use optimize::{CleanupPolicyOptions, TableCleanupStats, TableOptimizeStats}; +pub use schema_apply::SchemaApplyOptions; use super::commit_graph::GraphCommit; use super::manifest::{ @@ -128,6 +129,22 @@ pub struct Omnigraph { /// every `self.snapshot()` and `self.ensure_commit_graph_initialized()` /// call inside the merge body. merge_exclusive: Arc>, + /// Optional policy checker for engine-layer enforcement (MR-722). + /// `None` = no enforcement; mutating methods are unconditionally + /// allowed (this is the embedded/dev default). `Some` = every + /// mutating method calls `self.enforce(action, scope, actor)` at + /// entry; denial returns `OmniError::Policy`. + /// + /// Per chassis design (see `omnigraph_policy::PolicyChecker`), the + /// trait surface is deliberately coarse — action × scope × actor. + /// Per-row / per-type / per-column scope lives at the query layer + /// (MR-725), which extends the same trait with a different method. + /// Don't be tempted to add per-row enforcement here. + /// + /// Set via `with_policy(checker)` after construction. Today only + /// `apply_schema_as` consults this field (PR #2 proof-of-concept); + /// PR #3 fans the `enforce()` call out to the remaining writers. + policy: Option>, } /// Whether [`Omnigraph::open`] runs the open-time recovery sweep. @@ -148,31 +165,137 @@ pub enum OpenMode { ReadOnly, } +/// Options for [`Omnigraph::init_with_options`]. +/// +/// `force` controls the safety preflight that prevents an +/// accidental re-init from overwriting an existing graph's schema +/// metadata. Default behavior (`force: false`) fails fast with +/// [`OmniError::AlreadyInitialized`] if any of `_schema.pg`, +/// `_schema.ir.json`, or `__schema_state.json` already exists at +/// the target URI. With `force: true` the preflight is skipped — +/// existing schema files are overwritten in place. Force does NOT +/// purge old Lance datasets or `__manifest/`; reclaiming those +/// still requires deleting the graph directory by hand (or via a +/// future `DELETE /graphs/{id}`). +#[derive(Debug, Clone, Copy, Default)] +pub struct InitOptions { + /// Skip the existing-graph preflight. Operators set this when + /// they actually mean to overwrite — e.g. `omnigraph init --force`. + pub force: bool, +} + impl Omnigraph { - /// Create a new repo at `uri` from schema source. + /// Create a new graph at `uri` from schema source. /// - /// Creates `_schema.pg`, per-type Lance datasets, and `__manifest`. + /// Strict mode: errors with [`OmniError::AlreadyInitialized`] if + /// `uri` already holds any of the three schema artifacts. To + /// overwrite an existing graph deliberately, call + /// [`Self::init_with_options`] with `InitOptions { force: true }`. pub async fn init(uri: &str, schema_source: &str) -> Result { - Self::init_with_storage(uri, schema_source, storage_for_uri(uri)?).await + Self::init_with_options(uri, schema_source, InitOptions::default()).await + } + + /// Create a new graph at `uri`, with explicit init-time options. + /// + /// See [`InitOptions`] for the safety contract — by default this + /// behaves identically to [`Self::init`]. + pub async fn init_with_options( + uri: &str, + schema_source: &str, + options: InitOptions, + ) -> Result { + Self::init_with_storage(uri, schema_source, storage_for_uri(uri)?, options).await } pub(crate) async fn init_with_storage( uri: &str, schema_source: &str, storage: Arc, + options: InitOptions, ) -> Result { let root = normalize_root_uri(uri)?; + + // Preflight: refuse to clobber an existing graph unless the + // operator passed `force`. This runs BEFORE any parse or + // write so a misdirected `init` against an existing graph + // URI cannot reach a code path that overwrites or, on a + // later cleanup, deletes the schema files. + // + // Closes the "init is destructive against existing state" + // class: there is no longer a code path where strict-mode + // `init` can mutate a populated graph root. + if !options.force { + for candidate in [ + schema_source_uri(&root), + schema_ir_uri(&root), + schema_state_uri(&root), + ] { + if storage.exists(&candidate).await? { + return Err(OmniError::AlreadyInitialized { uri: root.clone() }); + } + } + } + let schema_ir = read_schema_ir_from_source(schema_source)?; let mut catalog = build_catalog_from_ir(&schema_ir)?; fixup_blob_schemas(&mut catalog); - // Write _schema.pg - let schema_path = join_uri(&root, SCHEMA_SOURCE_FILENAME); - storage.write_text(&schema_path, schema_source).await?; - write_schema_contract(&root, storage.as_ref(), &schema_ir).await?; + // Establish an atomic ownership claim on `_schema.pg` before + // writing the remaining init artifacts. A check-then-write preflight + // is not enough under concurrent `init` calls: two callers can both + // observe an empty root, one can successfully initialize, and the + // loser can then fail in Lance `WriteMode::Create`. Only the caller + // that atomically created `_schema.pg` may clean up schema artifacts + // on later failure. + let schema_pg_claimed = if options.force { + false + } else { + let schema_path = join_uri(&root, SCHEMA_SOURCE_FILENAME); + if !storage + .write_text_if_absent(&schema_path, schema_source) + .await? + { + return Err(OmniError::AlreadyInitialized { uri: root.clone() }); + } + if let Err(err) = crate::failpoints::maybe_fail("init.after_schema_pg_written") { + best_effort_cleanup_init_artifacts(&root, storage.as_ref()).await; + return Err(err); + } + true + }; - // Create manifest + per-type datasets - let coordinator = GraphCoordinator::init(&root, &catalog, Arc::clone(&storage)).await?; + // Run the I/O phase. On any error, best-effort-clean schema + // artifacts only when this invocation owns them: strict mode owns + // them after the atomic `_schema.pg` claim above; force mode owns + // destructive overwrite semantics by explicit operator request. + // + // Coverage gap: Lance per-type datasets and `__manifest/` + // directory created by `GraphCoordinator::init` are NOT cleaned + // up here — fully recursive directory deletion requires a + // `StorageAdapter::delete_prefix` primitive that's deferred + // along with `DELETE /graphs/{id}` (PR 2b in the MR-668 plan + // is currently deferred). If `init` fails after coordinator + // init succeeds, operators may need to remove the graph + // directory manually before retrying `init` on the same URI. + // Documented in the PR 2a commit message and `init` rustdoc. + let coordinator = match init_storage_phase( + &root, + schema_source, + &schema_ir, + &catalog, + &storage, + !schema_pg_claimed, + ) + .await + { + Ok(coordinator) => coordinator, + Err(err) => { + if schema_pg_claimed || options.force { + best_effort_cleanup_init_artifacts(&root, storage.as_ref()).await; + } + return Err(err); + } + }; Ok(Self { root_uri: root.clone(), @@ -184,10 +307,11 @@ impl Omnigraph { schema_source: Arc::new(ArcSwap::from_pointee(schema_source.to_string())), write_queue: Arc::new(crate::db::write_queue::WriteQueueManager::new()), merge_exclusive: Arc::new(tokio::sync::Mutex::new(())), + policy: None, }) } - /// Open an existing repo (read-write). + /// Open an existing graph (read-write). /// /// Reads `_schema.pg`, parses it, builds the catalog, and opens `__manifest`. /// Runs the open-time recovery sweep before returning — see [`OpenMode`]. @@ -195,7 +319,7 @@ impl Omnigraph { Self::open_with_storage_and_mode(uri, storage_for_uri(uri)?, OpenMode::ReadWrite).await } - /// Open an existing repo for read-only consumers (NDJSON export, + /// Open an existing graph for read-only consumers (NDJSON export, /// `commit list`, etc.). Skips the recovery sweep — see [`OpenMode`]. pub async fn open_read_only(uri: &str) -> Result { Self::open_with_storage_and_mode(uri, storage_for_uri(uri)?, OpenMode::ReadOnly).await @@ -271,6 +395,7 @@ impl Omnigraph { schema_source: Arc::new(ArcSwap::from_pointee(schema_source)), write_queue: Arc::new(crate::db::write_queue::WriteQueueManager::new()), merge_exclusive: Arc::new(tokio::sync::Mutex::new(())), + policy: None, }) } @@ -303,16 +428,102 @@ impl Omnigraph { &self.root_uri } + /// Install a policy checker for engine-layer enforcement (MR-722). + /// Builder-style setter — consumes `self`, returns `Self`. Calling + /// this on a `Omnigraph` previously without policy enables + /// `enforce()` to fire at every mutating engine method that's been + /// wired to call it (currently `apply_schema_as`; PR #3 fans out to + /// the remaining writers). + /// + /// Embedded callers that don't care about authorization should + /// just not call this. Server / CLI callers that have loaded a + /// `PolicyEngine` from `policy.yaml` pass it here. + pub fn with_policy(mut self, checker: Arc) -> Self { + self.policy = Some(checker); + self + } + + /// Engine-layer policy enforcement gate (MR-722 chassis core). + /// + /// * If no policy is installed → no-op (returns `Ok(())`). + /// * If policy is installed AND actor is None → denial with a + /// clear "no actor for engine-layer policy check" message. + /// Forces server / CLI / SDK callers to thread an actor through + /// when policy is configured — silent bypass via "I forgot the + /// actor" is exactly the footgun this gate is here to prevent. + /// * If policy is installed AND actor is Some → call + /// `PolicyChecker::check(action, scope, actor)`; map denial / + /// internal failure to `OmniError::Policy(...)`. + pub(crate) fn enforce( + &self, + action: omnigraph_policy::PolicyAction, + scope: &omnigraph_policy::ResourceScope, + actor: Option<&str>, + ) -> Result<()> { + let Some(checker) = self.policy.as_ref() else { + return Ok(()); + }; + let Some(actor) = actor else { + return Err(OmniError::Policy( + "no actor for engine-layer policy check (policy is configured but the call site \ + didn't thread an actor through — this is almost certainly a bug, not an \ + intended bypass)" + .to_string(), + )); + }; + checker + .check(action, scope, actor) + .map_err(|err| OmniError::Policy(err.to_string())) + } + pub(crate) async fn ensure_schema_state_valid(&self) -> Result<()> { validate_schema_contract(self.uri(), Arc::clone(&self.storage)).await } pub async fn plan_schema(&self, desired_schema_source: &str) -> Result { - schema_apply::plan_schema(self, desired_schema_source).await + self.plan_schema_with_options(desired_schema_source, SchemaApplyOptions::default()) + .await + } + + pub async fn plan_schema_with_options( + &self, + desired_schema_source: &str, + options: SchemaApplyOptions, + ) -> Result { + schema_apply::plan_schema(self, desired_schema_source, options).await } pub async fn apply_schema(&self, desired_schema_source: &str) -> Result { - schema_apply::apply_schema(self, desired_schema_source).await + self.apply_schema_as(desired_schema_source, SchemaApplyOptions::default(), None) + .await + } + + pub async fn apply_schema_with_options( + &self, + desired_schema_source: &str, + options: SchemaApplyOptions, + ) -> Result { + self.apply_schema_as(desired_schema_source, options, None) + .await + } + + /// Apply a schema migration with an explicit actor for engine-layer + /// policy enforcement (MR-722). When a `PolicyChecker` is installed + /// via [`Self::with_policy`], this method calls `enforce(SchemaApply, + /// Branch("main"), actor)` before any apply work happens. Denial + /// returns `OmniError::Policy` and leaves the manifest untouched. + /// + /// The no-actor variants (`apply_schema`, `apply_schema_with_options`) + /// pass `None` here. They work fine without a policy; if a policy IS + /// installed and actor is None, enforcement intentionally fails to + /// prevent silent-bypass-via-forgetting-the-actor footguns. + pub async fn apply_schema_as( + &self, + desired_schema_source: &str, + options: SchemaApplyOptions, + actor: Option<&str>, + ) -> Result { + schema_apply::apply_schema(self, desired_schema_source, options, actor).await } pub(crate) async fn ensure_schema_apply_idle(&self, operation: &str) -> Result<()> { @@ -366,7 +577,7 @@ impl Omnigraph { Arc::clone(&self.merge_exclusive) } - /// Engine-level access to the repo's normalized root URI. Used by + /// Engine-level access to the graph's normalized root URI. Used by /// the recovery sidecar protocol to compute `__recovery/` paths. pub(crate) fn root_uri(&self) -> &str { &self.root_uri @@ -406,9 +617,10 @@ impl Omnigraph { let normalized = normalize_branch_name(branch.unwrap_or("main"))?; let coord = self.coordinator.read().await; if normalized.as_deref() == coord.current_branch() { - let snapshot_id = coord.head_commit_id().await?.unwrap_or_else(|| { - SnapshotId::synthetic(coord.current_branch(), coord.version()) - }); + let snapshot_id = coord + .head_commit_id() + .await? + .unwrap_or_else(|| SnapshotId::synthetic(coord.current_branch(), coord.version())); return Ok(ResolvedTarget { requested, branch: coord.current_branch().map(str::to_string), @@ -483,7 +695,7 @@ impl Omnigraph { /// exist. Required BEFORE manifest-drift recovery so a /// SchemaApply roll-forward doesn't publish the manifest while /// the staging files remain unrenamed (which would corrupt the - /// repo: data on new schema, catalog on old). + /// graph: data on new schema, catalog on old). /// 3. `recover_manifest_drift(... RollForwardOnly)` — close the /// finalize→publisher residual via roll-forward; defer rollback /// work to next ReadWrite open. @@ -564,7 +776,11 @@ impl Omnigraph { pub async fn resolve_snapshot(&self, branch: &str) -> Result { self.ensure_schema_state_valid().await?; - self.coordinator.read().await.resolve_snapshot_id(branch).await + self.coordinator + .read() + .await + .resolve_snapshot_id(branch) + .await } pub(crate) async fn resolved_target( @@ -572,7 +788,11 @@ impl Omnigraph { target: impl Into, ) -> Result { self.ensure_schema_state_valid().await?; - self.coordinator.read().await.resolve_target(&target.into()).await + self.coordinator + .read() + .await + .resolve_target(&target.into()) + .await } // ─── Change detection ──────────────────────────────────────────────── @@ -604,7 +824,9 @@ impl Omnigraph { filter: &crate::changes::ChangeFilter, ) -> Result { let coord = self.coordinator.read().await; - let from_commit = coord.resolve_commit(&SnapshotId::new(from_commit_id)).await?; + let from_commit = coord + .resolve_commit(&SnapshotId::new(from_commit_id)) + .await?; let to_commit = coord.resolve_commit(&SnapshotId::new(to_commit_id)).await?; let from_snap = coord .resolve_target(&ReadTarget::Snapshot(SnapshotId::new( @@ -649,7 +871,11 @@ impl Omnigraph { /// Create a Snapshot at any historical manifest version. pub async fn snapshot_at_version(&self, version: u64) -> Result { self.ensure_schema_state_valid().await?; - self.coordinator.read().await.snapshot_at_version(version).await + self.coordinator + .read() + .await + .snapshot_at_version(version) + .await } pub async fn export_jsonl( @@ -790,11 +1016,20 @@ impl Omnigraph { } pub(crate) async fn active_branch(&self) -> Option { - self.coordinator.read().await.current_branch().map(str::to_string) + self.coordinator + .read() + .await + .current_branch() + .map(str::to_string) } async fn ensure_branch_delete_safe(&self, branch: &str, branches: &[String]) -> Result<()> { - let descendants = self.coordinator.read().await.branch_descendants(branch).await?; + let descendants = self + .coordinator + .read() + .await + .branch_descendants(branch) + .await?; if let Some(descendant) = descendants.first() { return Err(OmniError::manifest_conflict(format!( "cannot delete branch '{}' because descendant branch '{}' still depends on it", @@ -850,7 +1085,12 @@ impl Omnigraph { } async fn delete_branch_storage_only(&self, branch: &str) -> Result<()> { - let active = self.coordinator.read().await.current_branch().map(str::to_string); + let active = self + .coordinator + .read() + .await + .current_branch() + .map(str::to_string); if active.as_deref() == Some(branch) { return Err(OmniError::manifest_conflict(format!( "cannot delete currently active branch '{}'", @@ -887,19 +1127,64 @@ impl Omnigraph { } pub async fn branch_create(&self, name: &str) -> Result<()> { + self.branch_create_as(name, None).await + } + + /// Create a branch from the coordinator's currently-open snapshot, + /// with an explicit actor for engine-layer policy enforcement + /// (MR-722 fan-out). Scope is `TargetBranch(name)` — symmetric with + /// `branch_delete_as`: the branch being acted upon is the target. + /// Cedar rules using `target_branch_scope: protected` therefore see + /// the new-branch name and can deny e.g. creating any branch named + /// `main` from a non-privileged actor. + pub async fn branch_create_as(&self, name: &str, actor: Option<&str>) -> Result<()> { + self.enforce( + omnigraph_policy::PolicyAction::BranchCreate, + &omnigraph_policy::ResourceScope::TargetBranch(name.to_string()), + actor, + )?; self.ensure_schema_state_valid().await?; self.ensure_schema_apply_idle("branch_create").await?; ensure_public_branch_ref(name, "branch_create")?; self.coordinator.write().await.branch_create(name).await } - pub async fn branch_create_from( + pub async fn branch_create_from(&self, from: impl Into, name: &str) -> Result<()> { + self.branch_create_from_as(from, name, None).await + } + + /// Create a branch from a specific source branch with an explicit + /// actor for engine-layer policy enforcement (MR-722 fan-out). + /// + /// Scope is `BranchTransition { source, target }` — matches the + /// HTTP-layer convention at `server_branch_create` + /// (branch=Some(from), target_branch=Some(name)), so engine and + /// HTTP fire the same Cedar decision. Pinned-snapshot sources + /// (which aren't a branch ref) materialize as the sentinel + /// `` for the policy check; Cedar rules using + /// `branch_scope: any` still match, rules pinning a specific + /// source branch correctly do not. + pub async fn branch_create_from_as( &self, from: impl Into, name: &str, + actor: Option<&str>, ) -> Result<()> { + let target = from.into(); + let source_branch = match &target { + ReadTarget::Branch(b) => b.clone(), + _ => "".to_string(), + }; + self.enforce( + omnigraph_policy::PolicyAction::BranchCreate, + &omnigraph_policy::ResourceScope::BranchTransition { + source: source_branch, + target: name.to_string(), + }, + actor, + )?; self.ensure_schema_apply_idle("branch_create_from").await?; - self.branch_create_from_impl(from, name, false).await + self.branch_create_from_impl(target, name, false).await } async fn branch_create_from_impl( @@ -945,6 +1230,22 @@ impl Omnigraph { } pub async fn branch_delete(&self, name: &str) -> Result<()> { + self.branch_delete_as(name, None).await + } + + /// Delete a branch with an explicit actor for engine-layer policy + /// enforcement (MR-722 fan-out). Scope is `TargetBranch(name)` — + /// matches the HTTP-layer convention at `server_branch_delete` + /// (branch=None, target_branch=Some(name)). Cedar rules using + /// `target_branch_scope: protected` therefore correctly gate + /// deletion of protected branches (e.g. deny BranchDelete against + /// `main`). + pub async fn branch_delete_as(&self, name: &str, actor: Option<&str>) -> Result<()> { + self.enforce( + omnigraph_policy::PolicyAction::BranchDelete, + &omnigraph_policy::ResourceScope::TargetBranch(name.to_string()), + actor, + )?; self.ensure_schema_state_valid().await?; self.ensure_schema_apply_idle("branch_delete").await?; ensure_public_branch_ref(name, "branch_delete")?; @@ -965,7 +1266,9 @@ impl Omnigraph { pub async fn get_commit(&self, commit_id: &str) -> Result { self.ensure_schema_state_valid().await?; - self.coordinator.read().await + self.coordinator + .read() + .await .resolve_commit(&SnapshotId::new(commit_id)) .await } @@ -1280,6 +1583,71 @@ fn read_schema_ir_from_source(schema_source: &str) -> Result { build_schema_ir(&schema_ast).map_err(|err| OmniError::manifest(err.to_string())) } +/// I/O phase of `Omnigraph::init_with_storage`. Split out so the caller +/// can pattern-match on the result and run cleanup on error before +/// returning the original error. +/// +/// Failpoints fire at the phase boundaries: +/// * `init.after_schema_pg_written` — `_schema.pg` is on disk. In strict mode +/// this fires in the caller immediately after the atomic ownership claim; in +/// force mode it fires here after the explicit overwrite. +/// * `init.after_schema_contract_written` — `_schema.pg` + `_schema.ir.json` +/// + `__schema_state.json` are on disk. +/// * `init.after_coordinator_init` — all schema files plus Lance per-type +/// datasets and `__manifest/` are on disk. (The cleanup wrapper can only +/// remove the schema files; Lance directories need `delete_prefix` — +/// deferred along with `DELETE /graphs/{id}`.) +async fn init_storage_phase( + root: &str, + schema_source: &str, + schema_ir: &SchemaIR, + catalog: &Catalog, + storage: &Arc, + write_schema_pg: bool, +) -> Result { + if write_schema_pg { + let schema_path = join_uri(root, SCHEMA_SOURCE_FILENAME); + storage.write_text(&schema_path, schema_source).await?; + crate::failpoints::maybe_fail("init.after_schema_pg_written")?; + } + + write_schema_contract(root, storage.as_ref(), schema_ir).await?; + crate::failpoints::maybe_fail("init.after_schema_contract_written")?; + + let coordinator = GraphCoordinator::init(root, catalog, Arc::clone(storage)).await?; + crate::failpoints::maybe_fail("init.after_coordinator_init")?; + + Ok(coordinator) +} + +/// Best-effort cleanup of init-phase artifacts. Called from +/// `init_with_storage` on any error returned by `init_storage_phase`. +/// +/// Removes the three schema files: `_schema.pg`, `_schema.ir.json`, +/// `__schema_state.json`. Lance datasets and `__manifest/` are not +/// touched here — recursive directory deletion requires a +/// `StorageAdapter::delete_prefix` primitive that's deferred along +/// with `DELETE /graphs/{id}` (MR-668 PR 2b). +/// +/// Failures to delete are logged via `tracing::warn` and do not mask +/// the original init error. +async fn best_effort_cleanup_init_artifacts(root: &str, storage: &dyn StorageAdapter) { + for uri in [ + schema_source_uri(root), + schema_ir_uri(root), + schema_state_uri(root), + ] { + if let Err(err) = storage.delete(&uri).await { + tracing::warn!( + target: "omnigraph::init::cleanup", + uri = %uri, + error = %err, + "init failed; best-effort cleanup could not delete artifact", + ); + } + } +} + fn schema_table_key(type_kind: SchemaTypeKind, name: &str) -> String { match type_kind { SchemaTypeKind::Node => format!("node:{}", name), @@ -1489,7 +1857,7 @@ mod tests { use crate::db::manifest::ManifestCoordinator; use async_trait::async_trait; use serde_json::Value; - use std::sync::Mutex; + use std::sync::{Arc, Mutex}; use crate::storage::{LocalStorageAdapter, StorageAdapter, join_uri}; @@ -1543,6 +1911,11 @@ edge WorksAt: Person -> Company self.inner.write_text(uri, contents).await } + async fn write_text_if_absent(&self, uri: &str, contents: &str) -> Result { + self.writes.lock().unwrap().push(uri.to_string()); + self.inner.write_text_if_absent(uri, contents).await + } + async fn exists(&self, uri: &str) -> Result { self.exists_checks.lock().unwrap().push(uri.to_string()); self.inner.exists(uri).await @@ -1566,13 +1939,96 @@ edge WorksAt: Person -> Company } } + #[derive(Debug)] + struct InitRaceStorageAdapter { + inner: LocalStorageAdapter, + root: String, + barrier: Arc, + } + + #[async_trait] + impl StorageAdapter for InitRaceStorageAdapter { + async fn read_text(&self, uri: &str) -> Result { + self.inner.read_text(uri).await + } + + async fn write_text(&self, uri: &str, contents: &str) -> Result<()> { + self.inner.write_text(uri, contents).await + } + + async fn write_text_if_absent(&self, uri: &str, contents: &str) -> Result { + self.inner.write_text_if_absent(uri, contents).await + } + + async fn exists(&self, uri: &str) -> Result { + let exists = self.inner.exists(uri).await?; + if uri == schema_state_uri(&self.root) { + self.barrier.wait().await; + } + Ok(exists) + } + + async fn rename_text(&self, from_uri: &str, to_uri: &str) -> Result<()> { + self.inner.rename_text(from_uri, to_uri).await + } + + async fn delete(&self, uri: &str) -> Result<()> { + self.inner.delete(uri).await + } + + async fn list_dir(&self, dir_uri: &str) -> Result> { + self.inner.list_dir(dir_uri).await + } + } + + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + async fn concurrent_strict_init_does_not_delete_winning_schema_files() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap().to_string(); + let root = normalize_root_uri(&uri).unwrap(); + let storage: Arc = Arc::new(InitRaceStorageAdapter { + inner: LocalStorageAdapter, + root, + barrier: Arc::new(tokio::sync::Barrier::new(2)), + }); + + let left = Omnigraph::init_with_storage( + &uri, + TEST_SCHEMA, + Arc::clone(&storage), + InitOptions::default(), + ); + let right = Omnigraph::init_with_storage( + &uri, + TEST_SCHEMA, + Arc::clone(&storage), + InitOptions::default(), + ); + let (left, right) = tokio::join!(left, right); + let ok_count = usize::from(left.is_ok()) + usize::from(right.is_ok()); + assert_eq!(ok_count, 1, "exactly one concurrent init should win"); + + assert!( + dir.path().join("_schema.pg").exists(), + "winning init must leave _schema.pg in place" + ); + assert!( + dir.path().join("_schema.ir.json").exists(), + "winning init must leave _schema.ir.json in place" + ); + assert!( + dir.path().join("__schema_state.json").exists(), + "winning init must leave __schema_state.json in place" + ); + } + #[tokio::test] async fn test_init_and_open_route_graph_metadata_through_storage_adapter() { let dir = tempfile::tempdir().unwrap(); let uri = dir.path().to_str().unwrap(); let adapter = Arc::new(RecordingStorageAdapter::default()); - Omnigraph::init_with_storage(uri, TEST_SCHEMA, adapter.clone()) + Omnigraph::init_with_storage(uri, TEST_SCHEMA, adapter.clone(), InitOptions::default()) .await .unwrap(); assert!(adapter.writes().contains(&join_uri(uri, "_schema.pg"))); diff --git a/crates/omnigraph/src/db/omnigraph/export.rs b/crates/omnigraph/src/db/omnigraph/export.rs index 3fcd4f4..366f50a 100644 --- a/crates/omnigraph/src/db/omnigraph/export.rs +++ b/crates/omnigraph/src/db/omnigraph/export.rs @@ -16,7 +16,12 @@ pub(super) async fn entity_at( id: &str, version: u64, ) -> Result> { - let snap = db.coordinator.read().await.snapshot_at_version(version).await?; + let snap = db + .coordinator + .read() + .await + .snapshot_at_version(version) + .await?; entity_from_snapshot(db, &snap, table_key, id).await } diff --git a/crates/omnigraph/src/db/omnigraph/optimize.rs b/crates/omnigraph/src/db/omnigraph/optimize.rs index 4d0f0ce..e158dc7 100644 --- a/crates/omnigraph/src/db/omnigraph/optimize.rs +++ b/crates/omnigraph/src/db/omnigraph/optimize.rs @@ -1,7 +1,7 @@ //! Lance compaction + version cleanup exposed at the graph level. //! //! Lance accumulates many small `.lance` fragment files per table over the -//! life of a repo: each `write`, `load`, and `change` op appends one or more +//! life of a graph: each `write`, `load`, and `change` op appends one or more //! fragments and a new manifest. Over long timescales this hurts open times //! and S3 object counts without improving anything. //! @@ -176,10 +176,9 @@ pub async fn cleanup_all_tables( clean_referenced_branches: false, delete_rate_limit: None, }; - let removed: RemovalStats = - lance::dataset::cleanup::cleanup_old_versions(&ds, policy) - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; + let removed: RemovalStats = lance::dataset::cleanup::cleanup_old_versions(&ds, policy) + .await + .map_err(|e| OmniError::Lance(e.to_string()))?; Ok(TableCleanupStats { table_key, bytes_removed: removed.bytes_removed, @@ -198,12 +197,7 @@ fn all_table_keys(catalog: &omnigraph_compiler::catalog::Catalog) -> Vec .node_types .keys() .map(|n| format!("node:{}", n)) - .chain( - catalog - .edge_types - .keys() - .map(|n| format!("edge:{}", n)), - ) + .chain(catalog.edge_types.keys().map(|n| format!("edge:{}", n))) .collect(); keys.sort(); keys diff --git a/crates/omnigraph/src/db/omnigraph/schema_apply.rs b/crates/omnigraph/src/db/omnigraph/schema_apply.rs index a01cbc8..0dcf0f9 100644 --- a/crates/omnigraph/src/db/omnigraph/schema_apply.rs +++ b/crates/omnigraph/src/db/omnigraph/schema_apply.rs @@ -1,22 +1,83 @@ use super::*; +/// Operator-supplied options that gate schema-apply behavior. +/// +/// Today the only knob is `allow_data_loss`, which promotes +/// `DropMode::Soft` steps to `DropMode::Hard` (per chassis v1 +/// commit #5). Soft is the default — drops are reversible via Lance +/// time travel until cleanup runs. Hard runs `cleanup_old_versions` +/// on the affected datasets immediately after the manifest publish, +/// making the prior column data unreachable. +#[derive(Debug, Clone, Default)] +pub struct SchemaApplyOptions { + /// Allow destructive (data-loss) schema changes. When true, the + /// planner promotes every `DropMode::Soft` step to + /// `DropMode::Hard`, and the apply path runs + /// `cleanup_old_versions` on affected datasets after the publish. + pub allow_data_loss: bool, +} + +/// Promote every `Soft` drop variant in the plan to `Hard` when +/// `allow_data_loss` is set. Idempotent on non-drop steps. +fn promote_drops_to_hard(plan: &mut SchemaMigrationPlan, allow_data_loss: bool) { + if !allow_data_loss { + return; + } + for step in &mut plan.steps { + match step { + SchemaMigrationStep::DropType { mode, .. } + | SchemaMigrationStep::DropProperty { mode, .. } => { + *mode = DropMode::Hard; + } + _ => {} + } + } +} + pub(super) async fn plan_schema( db: &Omnigraph, desired_schema_source: &str, + options: SchemaApplyOptions, ) -> Result { db.ensure_schema_state_valid().await?; let accepted_ir = read_accepted_schema_ir(db.uri(), Arc::clone(&db.storage)).await?; let desired_ir = read_schema_ir_from_source(desired_schema_source)?; - plan_schema_migration(&accepted_ir, &desired_ir) - .map_err(|err| OmniError::manifest(err.to_string())) + let mut plan = plan_schema_migration(&accepted_ir, &desired_ir) + .map_err(|err| OmniError::manifest(err.to_string()))?; + promote_drops_to_hard(&mut plan, options.allow_data_loss); + Ok(plan) } pub(super) async fn apply_schema( db: &Omnigraph, desired_schema_source: &str, + options: SchemaApplyOptions, + actor: Option<&str>, ) -> Result { + // Engine-layer policy gate (MR-722 chassis core). + // + // Fires BEFORE acquiring the schema-apply lock or doing any other + // work. When no PolicyChecker is installed this is a no-op and + // the apply path behaves exactly as it did before MR-722. When + // a PolicyChecker IS installed and the actor is None, this is a + // hard error — see Omnigraph::enforce's docstring for the + // forget-the-actor-footgun reasoning. + // + // Scope is TargetBranch("main") to match the HTTP-layer convention + // for SchemaApply: branch=None, target_branch=Some("main"). Cedar + // policies in the wild use `target_branch_scope: protected` to + // gate schema applies, so the engine-layer call has to set the + // target_branch shape that activates that predicate. Wrong scope + // here = silent policy mismatch with HTTP. See + // `omnigraph_policy::ResourceScope::to_branch_pair` for the mapping. + db.enforce( + omnigraph_policy::PolicyAction::SchemaApply, + &omnigraph_policy::ResourceScope::TargetBranch("main".to_string()), + actor, + )?; + acquire_schema_apply_lock(db).await?; - let result = apply_schema_with_lock(db, desired_schema_source).await; + let result = apply_schema_with_lock(db, desired_schema_source, options).await; let release_result = release_schema_apply_lock(db).await; match (result, release_result) { (Ok(result), Ok(())) => Ok(result), @@ -29,13 +90,14 @@ pub(super) async fn apply_schema( pub(super) async fn apply_schema_with_lock( db: &Omnigraph, desired_schema_source: &str, + options: SchemaApplyOptions, ) -> Result { db.ensure_schema_state_valid().await?; let branches = db.coordinator.read().await.all_branches().await?; // Skip `main` and internal system branches. The schema-apply lock branch // is excluded because it is the cluster-wide schema-apply serializer. // `__run__*` branches are no longer created; the filter remains as - // defense-in-depth for legacy repos with leftover staging branches. + // defense-in-depth for legacy graphs with leftover staging branches. // A future production sweep will let this guard go. let blocking_branches = branches .into_iter() @@ -43,15 +105,16 @@ pub(super) async fn apply_schema_with_lock( .collect::>(); if !blocking_branches.is_empty() { return Err(OmniError::manifest_conflict(format!( - "schema apply requires a repo with only main; found non-main branches: {}", + "schema apply requires a graph with only main; found non-main branches: {}", blocking_branches.join(", ") ))); } let accepted_ir = read_accepted_schema_ir(db.uri(), Arc::clone(&db.storage)).await?; let desired_ir = read_schema_ir_from_source(desired_schema_source)?; - let plan = plan_schema_migration(&accepted_ir, &desired_ir) + let mut plan = plan_schema_migration(&accepted_ir, &desired_ir) .map_err(|err| OmniError::manifest(err.to_string()))?; + promote_drops_to_hard(&mut plan, options.allow_data_loss); if !plan.supported { let message = plan .steps @@ -78,6 +141,13 @@ pub(super) async fn apply_schema_with_lock( let mut renamed_tables = HashMap::new(); let mut rewritten_tables = BTreeSet::new(); let mut indexed_tables = BTreeSet::new(); + let mut dropped_tables = BTreeSet::new(); + // Hard-drop cleanup targets: (table_key, full_dataset_uri). + // Populated for DropProperty { Hard } and DropType { Hard }; the + // post-publish cleanup runs `cleanup_old_versions` on each + // dataset to reclaim prior versions, making time-travel back + // to pre-drop state unreachable. + let mut hard_cleanup_targets: Vec<(String, String)> = Vec::new(); let mut property_renames = HashMap::>::new(); let mut changed_edge_tables = false; @@ -138,6 +208,79 @@ pub(super) async fn apply_schema_with_lock( } SchemaMigrationStep::UpdateTypeMetadata { .. } | SchemaMigrationStep::UpdatePropertyMetadata { .. } => {} + SchemaMigrationStep::DropProperty { + type_kind, + type_name, + mode, + .. + } => { + // Both Soft and Hard route through the existing + // stage_overwrite rewrite path. batch_for_schema_apply_rewrite + // iterates the *target* schema fields, so a property + // absent from desired_catalog is naturally projected + // away in the rebuilt batch. + // + // The difference between Soft and Hard is what + // happens AFTER the manifest publish: + // * Soft: nothing — the prior dataset version + // retains the dropped column; reads at + // snapshot_at_version(pre_drop) still see it. + // * Hard: run cleanup_old_versions on the dataset + // post-publish, removing the prior version (and + // reclaiming any fragments unique to it). After + // cleanup, time-travel back fails. + let table_key = schema_table_key(*type_kind, type_name); + if table_key.starts_with("edge:") { + changed_edge_tables = true; + } + if matches!(mode, DropMode::Hard) { + let entry = snapshot.entry(&table_key).ok_or_else(|| { + OmniError::manifest(format!( + "missing table '{}' for hard property drop", + table_key + )) + })?; + let full_uri = format!("{}/{}", db.root_uri, entry.table_path); + hard_cleanup_targets.push((table_key.clone(), full_uri)); + } + rewritten_tables.insert(table_key); + } + SchemaMigrationStep::DropType { + type_kind, + name, + mode, + } => { + // Both Soft and Hard tombstone the table's entry in + // the current __manifest version (no per-table write). + // + // The difference is what happens after publish: + // * Soft: dataset files retained; prior __manifest + // versions still reference them; Lance time + // travel + branch-from-snapshot can read the + // dropped table. + // * Hard: run cleanup_old_versions on the orphan + // dataset post-publish. Prior dataset versions + // (and their fragments) are reclaimed. The dataset + // directory itself persists until a future + // orphan-cleanup pass — operators who need the + // directory gone too should run `omnigraph cleanup` + // and (for now) remove the directory out-of-band. + let table_key = schema_table_key(*type_kind, name); + if table_key.starts_with("edge:") { + changed_edge_tables = true; + } + if matches!(mode, DropMode::Hard) { + let entry = snapshot.entry(&table_key).ok_or_else(|| { + OmniError::manifest(format!( + "missing table '{}' for hard type drop", + table_key + )) + })?; + let full_uri = format!("{}/{}", db.root_uri, entry.table_path); + hard_cleanup_targets.push((table_key.clone(), full_uri)); + } + dropped_tables.insert(table_key); + } step @ SchemaMigrationStep::UnsupportedChange { .. } => { return Err(OmniError::manifest( step.unsupported_error_message() @@ -208,6 +351,26 @@ pub(super) async fn apply_schema_with_lock( tombstone_version: source_entry.table_version.saturating_add(1), }); } + // Soft DropType: mark each dropped table for tombstoning in the + // recovery sidecar AND in the live table_tombstones map. The + // mechanism mirrors rename's source-table tombstone — manifest + // entry removed at version+1, dataset files retained, time-travel + // reachable until cleanup. No Phase B write happens for these + // tables; the recovery sidecar is purely the manifest delta. + for dropped_table_key in &dropped_tables { + let entry = snapshot.entry(dropped_table_key).ok_or_else(|| { + OmniError::manifest(format!( + "missing table '{}' for soft drop when building recovery sidecar", + dropped_table_key + )) + })?; + let tombstone_version = entry.table_version.saturating_add(1); + sidecar_tombstones.push(crate::db::manifest::SidecarTombstone { + table_key: dropped_table_key.clone(), + tombstone_version, + }); + table_tombstones.insert(dropped_table_key.clone(), tombstone_version); + } // Acquire per-(table_key, branch) queues for every existing table // that schema_apply will rewrite or re-index. New tables (added or @@ -523,6 +686,25 @@ pub(super) async fn apply_schema_with_lock( } } + // Hard-drop cleanup: run cleanup_old_versions on each dataset + // that had a Hard mode drop step. Best-effort — the schema apply + // is already durable. If cleanup fails, the prior data fragments + // remain on disk as orphans (reclaimable via `omnigraph cleanup`). + // We do NOT fail the apply on cleanup error; the manifest change + // is the load-bearing operation. + for (table_key, full_uri) in &hard_cleanup_targets { + match cleanup_dataset_old_versions(db, full_uri).await { + Ok(()) => {} + Err(err) => { + tracing::warn!( + error = %err, + table_key = table_key.as_str(), + "hard-drop cleanup_old_versions failed; rerun `omnigraph cleanup` to reclaim", + ); + } + } + } + Ok(SchemaApplyResult { supported: true, applied: true, @@ -531,6 +713,36 @@ pub(super) async fn apply_schema_with_lock( }) } +/// Run `cleanup_old_versions` on a dataset URI with `before_timestamp = now`. +/// Removes every version older than the current, making time-travel back +/// to those versions unreachable. Used by Hard mode drops to enforce +/// "data is gone" semantics post-apply. +/// +/// The dataset itself isn't deleted — for DropType { Hard }, the +/// dataset directory persists with only its current version (or, if +/// no current version was written, its pre-drop version). A future +/// orphan-cleanup pass should remove the directory entirely. +async fn cleanup_dataset_old_versions(db: &Omnigraph, full_uri: &str) -> Result<()> { + use chrono::Utc; + use lance::dataset::cleanup::CleanupPolicy; + let ds = lance::Dataset::open(full_uri) + .await + .map_err(|e| OmniError::Lance(e.to_string()))?; + let policy = CleanupPolicy { + before_timestamp: Some(Utc::now()), + before_version: None, + delete_unverified: false, + error_if_tagged_old_versions: false, + clean_referenced_branches: false, + delete_rate_limit: None, + }; + let _removed = lance::dataset::cleanup::cleanup_old_versions(&ds, policy) + .await + .map_err(|e| OmniError::Lance(e.to_string()))?; + let _ = db; + Ok(()) +} + pub(super) async fn ensure_schema_apply_idle(db: &Omnigraph, operation: &str) -> Result<()> { db.refresh_coordinator_only().await?; ensure_schema_apply_not_locked(db, operation).await @@ -568,7 +780,7 @@ pub(super) async fn acquire_schema_apply_lock(db: &Omnigraph) -> Result<()> { if !blocking_branches.is_empty() { let _ = release_schema_apply_lock(db).await; return Err(OmniError::manifest_conflict(format!( - "schema apply requires a repo with only main; found non-main branches: {}", + "schema apply requires a graph with only main; found non-main branches: {}", blocking_branches.join(", ") ))); } diff --git a/crates/omnigraph/src/db/omnigraph/table_ops.rs b/crates/omnigraph/src/db/omnigraph/table_ops.rs index 717f263..0e89c45 100644 --- a/crates/omnigraph/src/db/omnigraph/table_ops.rs +++ b/crates/omnigraph/src/db/omnigraph/table_ops.rs @@ -22,7 +22,12 @@ pub(super) async fn graph_index_for_resolved( } pub(super) async fn ensure_indices(db: &Omnigraph) -> Result<()> { - let current_branch = db.coordinator.read().await.current_branch().map(str::to_string); + let current_branch = db + .coordinator + .read() + .await + .current_branch() + .map(str::to_string); ensure_indices_for_branch(db, current_branch.as_deref()).await } @@ -68,10 +73,7 @@ pub(super) async fn failpoint_publish_table_head_without_index_rebuild_for_test( .await } -pub(super) async fn ensure_indices_for_branch( - db: &Omnigraph, - branch: Option<&str>, -) -> Result<()> { +pub(super) async fn ensure_indices_for_branch(db: &Omnigraph, branch: Option<&str>) -> Result<()> { db.ensure_schema_state_valid().await?; db.ensure_schema_apply_idle("ensure_indices").await?; let resolved = db.resolved_branch_target(branch).await?; @@ -403,7 +405,12 @@ pub(super) async fn open_for_mutation( table_key: &str, op_kind: crate::db::MutationOpKind, ) -> Result<(Dataset, String, Option)> { - let current_branch = db.coordinator.read().await.current_branch().map(str::to_string); + let current_branch = db + .coordinator + .read() + .await + .current_branch() + .map(str::to_string); open_for_mutation_on_branch(db, current_branch.as_deref(), table_key, op_kind).await } @@ -807,7 +814,12 @@ pub(super) async fn commit_prepared_updates_on_branch( updates: &[crate::db::SubTableUpdate], actor_id: Option<&str>, ) -> Result { - let current_branch = db.coordinator.read().await.current_branch().map(str::to_string); + let current_branch = db + .coordinator + .read() + .await + .current_branch() + .map(str::to_string); let requested_branch = branch.map(str::to_string); if requested_branch == current_branch { return commit_prepared_updates(db, updates, actor_id).await; @@ -835,7 +847,12 @@ pub(super) async fn commit_prepared_updates_on_branch_with_expected( expected_table_versions: &std::collections::HashMap, actor_id: Option<&str>, ) -> Result { - let current_branch = db.coordinator.read().await.current_branch().map(str::to_string); + let current_branch = db + .coordinator + .read() + .await + .current_branch() + .map(str::to_string); let requested_branch = branch.map(str::to_string); if requested_branch == current_branch { return commit_prepared_updates_with_expected( @@ -870,7 +887,12 @@ pub(super) async fn commit_updates( updates: &[crate::db::SubTableUpdate], ) -> Result { db.ensure_schema_apply_not_locked("write commit").await?; - let current_branch = db.coordinator.read().await.current_branch().map(str::to_string); + let current_branch = db + .coordinator + .read() + .await + .current_branch() + .map(str::to_string); let prepared = prepare_updates_for_commit(db, current_branch.as_deref(), updates).await?; commit_prepared_updates(db, &prepared, None).await } @@ -879,7 +901,11 @@ pub(super) async fn commit_manifest_updates( db: &Omnigraph, updates: &[crate::db::SubTableUpdate], ) -> Result { - db.coordinator.write().await.commit_manifest_updates(updates).await + db.coordinator + .write() + .await + .commit_manifest_updates(updates) + .await } pub(super) async fn record_merge_commit( @@ -889,7 +915,9 @@ pub(super) async fn record_merge_commit( merged_parent_commit_id: &str, actor_id: Option<&str>, ) -> Result { - db.coordinator.write().await + db.coordinator + .write() + .await .record_merge_commit( manifest_version, parent_commit_id, @@ -923,7 +951,11 @@ pub(super) async fn commit_updates_on_branch_with_expected( } pub(super) async fn ensure_commit_graph_initialized(db: &Omnigraph) -> Result<()> { - db.coordinator.write().await.ensure_commit_graph_initialized().await + db.coordinator + .write() + .await + .ensure_commit_graph_initialized() + .await } pub(super) async fn invalidate_graph_index(db: &Omnigraph) { diff --git a/crates/omnigraph/src/db/recovery_audit.rs b/crates/omnigraph/src/db/recovery_audit.rs index b7d4975..b9e8e7b 100644 --- a/crates/omnigraph/src/db/recovery_audit.rs +++ b/crates/omnigraph/src/db/recovery_audit.rs @@ -93,7 +93,7 @@ pub(crate) struct RecoveryAudit { } impl RecoveryAudit { - /// Open the recovery-audit dataset for the repo, or return a handle + /// Open the recovery-audit dataset for the graph, or return a handle /// with no dataset yet (created on first append). Mirrors the /// optional-dataset pattern from `_graph_commit_actors.lance`. pub(crate) async fn open(root_uri: &str) -> Result { @@ -205,9 +205,7 @@ fn recovery_record_to_batch(record: &RecoveryAuditRecord) -> Result vec![ Arc::new(StringArray::from(vec![record.graph_commit_id.clone()])), Arc::new(StringArray::from(vec![record.recovery_kind.as_str()])), - Arc::new(StringArray::from(vec![record - .recovery_for_actor - .clone()])), + Arc::new(StringArray::from(vec![record.recovery_for_actor.clone()])), Arc::new(StringArray::from(vec![record.operation_id.clone()])), Arc::new(StringArray::from(vec![record.sidecar_writer_kind.clone()])), Arc::new(StringArray::from(vec![outcomes_json])), @@ -221,10 +219,14 @@ fn decode_row(batch: &RecordBatch, row: usize) -> Result { let str_col = |name: &str| -> Result<&StringArray> { batch .column_by_name(name) - .ok_or_else(|| OmniError::manifest_internal(format!("missing column '{}' in recovery audit", name)))? + .ok_or_else(|| { + OmniError::manifest_internal(format!("missing column '{}' in recovery audit", name)) + })? .as_any() .downcast_ref::() - .ok_or_else(|| OmniError::manifest_internal(format!("column '{}' has wrong type", name))) + .ok_or_else(|| { + OmniError::manifest_internal(format!("column '{}' has wrong type", name)) + }) }; let ts_col = batch .column_by_name("created_at") @@ -269,9 +271,7 @@ pub(crate) fn now_micros() -> Result { SystemTime::now() .duration_since(UNIX_EPOCH) .map(|d| d.as_micros() as i64) - .map_err(|e| { - OmniError::manifest_internal(format!("system clock before unix epoch: {}", e)) - }) + .map_err(|e| OmniError::manifest_internal(format!("system clock before unix epoch: {}", e))) } #[cfg(test)] @@ -307,7 +307,7 @@ mod tests { let root = dir.path().to_str().unwrap(); let mut audit = RecoveryAudit::open(root).await.unwrap(); - // Empty repo: list returns empty. + // Empty graph: list returns empty. assert!(audit.list().await.unwrap().is_empty()); // Append + list. diff --git a/crates/omnigraph/src/db/schema_state.rs b/crates/omnigraph/src/db/schema_state.rs index 13dfccc..b131a16 100644 --- a/crates/omnigraph/src/db/schema_state.rs +++ b/crates/omnigraph/src/db/schema_state.rs @@ -61,7 +61,7 @@ pub(crate) async fn load_or_bootstrap_schema_contract( .collect::>(); if !public_non_main.is_empty() { return Err(schema_lock_conflict(format!( - "repo is missing persisted schema state and has public branches ({}); public branches block schema evolution entirely", + "graph is missing persisted schema state and has public branches ({}); public branches block schema evolution entirely", public_non_main.join(", ") ))); } @@ -70,7 +70,7 @@ pub(crate) async fn load_or_bootstrap_schema_contract( Ok((current_source_ir.clone(), state)) } SchemaContractRead::PartialMissing => Err(schema_lock_conflict( - "repo schema state is incomplete (_schema.ir.json and __schema_state.json must either both exist or both be absent)", + "graph schema state is incomplete (_schema.ir.json and __schema_state.json must either both exist or both be absent)", )), } } @@ -84,7 +84,7 @@ pub(crate) async fn validate_schema_contract( SchemaContractRead::Present { ir, state } => (ir, state), SchemaContractRead::MissingAll | SchemaContractRead::PartialMissing => { return Err(schema_lock_conflict( - "repo is missing persisted schema state; manual coordination is required before schema changes are allowed", + "graph is missing persisted schema state; manual coordination is required before schema changes are allowed", )); } }; @@ -163,7 +163,7 @@ pub(crate) async fn read_accepted_schema_ir( } SchemaContractRead::MissingAll | SchemaContractRead::PartialMissing => { Err(schema_lock_conflict( - "repo is missing persisted schema state; manual coordination is required before schema changes are allowed", + "graph is missing persisted schema state; manual coordination is required before schema changes are allowed", )) } } @@ -221,7 +221,7 @@ async fn read_schema_contract( })?; let state = serde_json::from_str::(&state_json).map_err(|err| { schema_lock_conflict(format!( - "repo schema state in {} is invalid: {}", + "graph schema state in {} is invalid: {}", SCHEMA_STATE_FILENAME, err )) })?; @@ -234,7 +234,7 @@ async fn read_schema_contract( fn validate_persisted_schema_contract(ir: &SchemaIR, state: &SchemaState) -> Result<()> { if state.format_version != SCHEMA_STATE_FORMAT_VERSION { return Err(schema_lock_conflict(format!( - "repo schema state format {} is unsupported", + "graph schema state format {} is unsupported", state.format_version ))); } @@ -344,7 +344,7 @@ pub(crate) async fn recover_schema_state_files( // to the new Lance HEADs; we MUST also rename the staging files // forward so the catalog matches. Without this, the disambiguation // logic below sees actual_keys == live_keys (manifest didn't move) - // and deletes the staging files, leaving the repo with new-schema + // and deletes the staging files, leaving the graph with new-schema // data on disk but the old `_schema.pg` live — corruption. if crate::db::manifest::has_schema_apply_sidecar(root_uri, storage.as_ref()).await? { warn!( diff --git a/crates/omnigraph/src/db/write_queue.rs b/crates/omnigraph/src/db/write_queue.rs index bb03022..1f0c53a 100644 --- a/crates/omnigraph/src/db/write_queue.rs +++ b/crates/omnigraph/src/db/write_queue.rs @@ -91,10 +91,7 @@ impl WriteQueueManager { /// Empty input returns an empty Vec without touching the map. /// Duplicates in `keys` are deduped before acquisition (the same /// key acquired twice would deadlock against itself). - pub(crate) async fn acquire_many( - &self, - keys: &[TableQueueKey], - ) -> Vec> { + pub(crate) async fn acquire_many(&self, keys: &[TableQueueKey]) -> Vec> { if keys.is_empty() { return Vec::new(); } @@ -167,7 +164,10 @@ mod tests { qm2.acquire_many(&[z_clone, a_clone]).await }) .await; - assert!(result.is_err(), "acquire_many should block on `a`, the lex-first key"); + assert!( + result.is_err(), + "acquire_many should block on `a`, the lex-first key" + ); } #[tokio::test] @@ -180,9 +180,10 @@ mod tests { // Second acquire on same key should NOT complete within 200ms. let qm2 = Arc::clone(&qm); let k2 = k.clone(); - let blocked = timeout(Duration::from_millis(200), async move { - qm2.acquire(&k2).await - }) + let blocked = timeout( + Duration::from_millis(200), + async move { qm2.acquire(&k2).await }, + ) .await; assert!(blocked.is_err(), "second acquire on same key must block"); diff --git a/crates/omnigraph/src/error.rs b/crates/omnigraph/src/error.rs index fc91090..11f4da0 100644 --- a/crates/omnigraph/src/error.rs +++ b/crates/omnigraph/src/error.rs @@ -85,6 +85,21 @@ pub enum OmniError { Manifest(ManifestError), #[error("merge conflicts: {0:?}")] MergeConflicts(Vec), + /// Engine-layer policy enforcement (MR-722). Wraps either a policy + /// denial ("you can't do that") or a policy-evaluation failure + /// ("the policy engine itself blew up"). The HTTP layer maps + /// denials to 403 and evaluation failures to 500; CLI and embedded + /// callers can match on this variant directly. + #[error("policy: {0}")] + Policy(String), + /// `Omnigraph::init` was called against a URI that already holds + /// schema artifacts from a previous init. Strict mode (the default) + /// fails fast with this error before touching disk so an existing + /// graph's metadata cannot be overwritten or destroyed. Operators + /// who actually want to overwrite pass `InitOptions { force: true }` + /// (CLI: `omnigraph init --force`). + #[error("graph already initialized at '{uri}'; pass --force to overwrite")] + AlreadyInitialized { uri: String }, } impl OmniError { diff --git a/crates/omnigraph/src/exec/merge.rs b/crates/omnigraph/src/exec/merge.rs index 7250881..2e5f32e 100644 --- a/crates/omnigraph/src/exec/merge.rs +++ b/crates/omnigraph/src/exec/merge.rs @@ -1062,6 +1062,21 @@ impl Omnigraph { target: &str, actor_id: Option<&str>, ) -> Result { + // Engine-layer policy gate (MR-722 fan-out / PR #3). Scope is + // `BranchTransition { source, target }` — matches the HTTP-layer + // convention at `server_branch_merge` (branch=Some(source), + // target_branch=Some(target)). Cedar rules using + // `target_branch_scope: protected` therefore correctly gate + // merges INTO protected branches without forbidding the + // (symmetric) source-side reference. + self.enforce( + omnigraph_policy::PolicyAction::BranchMerge, + &omnigraph_policy::ResourceScope::BranchTransition { + source: source.to_string(), + target: target.to_string(), + }, + actor_id, + )?; self.ensure_schema_apply_idle("branch_merge").await?; self.branch_merge_impl(source, target, actor_id).await } diff --git a/crates/omnigraph/src/exec/mutation.rs b/crates/omnigraph/src/exec/mutation.rs index e58b718..02b2a21 100644 --- a/crates/omnigraph/src/exec/mutation.rs +++ b/crates/omnigraph/src/exec/mutation.rs @@ -692,6 +692,16 @@ impl Omnigraph { params: &ParamMap, actor_id: Option<&str>, ) -> Result { + // Engine-layer policy gate (MR-722 fan-out / PR #3). Scope is + // `Branch(branch)` to match the HTTP-layer convention at + // `server_change` (branch=Some(branch), target_branch=None). When no + // PolicyChecker is installed this is a no-op; with policy installed + // and actor=None this fails hard (forget-the-actor footgun guard). + self.enforce( + omnigraph_policy::PolicyAction::Change, + &omnigraph_policy::ResourceScope::Branch(branch.to_string()), + actor_id, + )?; self.mutate_with_current_actor(branch, query_source, query_name, params, actor_id) .await } @@ -784,11 +794,8 @@ impl Omnigraph { // post_commit_pin) and tidies up. Failing the user // here would return an error for a write that // already landed. - if let Err(err) = crate::db::manifest::delete_sidecar( - &handle, - self.storage_adapter(), - ) - .await + if let Err(err) = + crate::db::manifest::delete_sidecar(&handle, self.storage_adapter()).await { tracing::warn!( error = %err, @@ -842,15 +849,8 @@ impl Omnigraph { assignments, predicate, } => { - self.execute_update( - type_name, - assignments, - predicate, - params, - branch, - staging, - ) - .await? + self.execute_update(type_name, assignments, predicate, params, branch, staging) + .await? } MutationOpIR::Delete { type_name, @@ -971,14 +971,8 @@ impl Omnigraph { // + iterate pending edges in-memory for the `src` column, // group-by-src. The pending side already includes the row // we just appended (above). - validate_edge_cardinality_with_pending( - self, - &ds, - staging, - &table_key, - edge_type, - ) - .await?; + validate_edge_cardinality_with_pending(self, &ds, staging, &table_key, edge_type) + .await?; self.invalidate_graph_index().await; @@ -1369,14 +1363,8 @@ async fn validate_edge_cardinality_with_pending( if edge_type.cardinality.is_default() { return Ok(()); } - let counts = super::staging::count_src_per_edge( - db, - committed_ds, - table_key, - staging, - None, - ) - .await?; + let counts = + super::staging::count_src_per_edge(db, committed_ds, table_key, staging, None).await?; super::staging::enforce_cardinality_bounds(edge_type, &counts) } diff --git a/crates/omnigraph/src/exec/projection.rs b/crates/omnigraph/src/exec/projection.rs index bcfae66..dec13a8 100644 --- a/crates/omnigraph/src/exec/projection.rs +++ b/crates/omnigraph/src/exec/projection.rs @@ -345,10 +345,7 @@ fn evaluate_projection( IRExpr::PropAccess { variable, property } => { let col_name = format!("{}.{}", variable, property); let col = wide_batch.column_by_name(&col_name).ok_or_else(|| { - OmniError::manifest(format!( - "column '{}' not found in wide batch", - col_name - )) + OmniError::manifest(format!("column '{}' not found in wide batch", col_name)) })?; Ok((col_name, col.clone())) } @@ -516,12 +513,10 @@ fn aggregate_return( } let num_groups = group_indices.len(); - let mut result_columns: Vec<(usize, String, ArrayRef)> = - Vec::with_capacity(projections.len()); + let mut result_columns: Vec<(usize, String, ArrayRef)> = Vec::with_capacity(projections.len()); for gk in &group_keys { - let first_row_indices: Vec = - group_indices.iter().map(|rows| rows[0] as u32).collect(); + let first_row_indices: Vec = group_indices.iter().map(|rows| rows[0] as u32).collect(); let take_idx = UInt32Array::from(first_row_indices); let col = arrow_select::take::take(gk.column.as_ref(), &take_idx, None) .map_err(|e| OmniError::Lance(e.to_string()))?; @@ -584,11 +579,19 @@ fn compute_aggregate( } } -fn compute_sum(arg: &ArrayRef, group_indices: &[Vec], num_groups: usize) -> Result { +fn compute_sum( + arg: &ArrayRef, + group_indices: &[Vec], + num_groups: usize, +) -> Result { macro_rules! sum_numeric { ($arr_type:ty, $arg:expr, $dt:expr) => {{ let arr = $arg.as_any().downcast_ref::<$arr_type>().ok_or_else(|| { - OmniError::manifest(format!("sum: expected {:?}, got {:?}", $dt, $arg.data_type())) + OmniError::manifest(format!( + "sum: expected {:?}, got {:?}", + $dt, + $arg.data_type() + )) })?; let mut builder = Float64Builder::with_capacity(num_groups); for group in group_indices { @@ -613,24 +616,42 @@ fn compute_sum(arg: &ArrayRef, group_indices: &[Vec], num_groups: usize) dt @ DataType::UInt64 => sum_numeric!(UInt64Array, arg, dt), dt @ DataType::Float32 => sum_numeric!(Float32Array, arg, dt), dt @ DataType::Float64 => sum_numeric!(Float64Array, arg, dt), - dt => Err(OmniError::manifest(format!("sum: unsupported type {:?}", dt))), + dt => Err(OmniError::manifest(format!( + "sum: unsupported type {:?}", + dt + ))), } } -fn compute_avg(arg: &ArrayRef, group_indices: &[Vec], num_groups: usize) -> Result { +fn compute_avg( + arg: &ArrayRef, + group_indices: &[Vec], + num_groups: usize, +) -> Result { macro_rules! avg_typed { ($arr_type:ty, $arg:expr) => {{ let arr = $arg.as_any().downcast_ref::<$arr_type>().ok_or_else(|| { - OmniError::manifest(format!("avg: expected {:?}, got {:?}", stringify!($arr_type), $arg.data_type())) + OmniError::manifest(format!( + "avg: expected {:?}, got {:?}", + stringify!($arr_type), + $arg.data_type() + )) })?; let mut builder = Float64Builder::with_capacity(num_groups); for group in group_indices { let mut sum = 0.0f64; let mut count = 0usize; for &i in group { - if !arr.is_null(i) { sum += arr.value(i) as f64; count += 1; } + if !arr.is_null(i) { + sum += arr.value(i) as f64; + count += 1; + } + } + if count > 0 { + builder.append_value(sum / count as f64); + } else { + builder.append_null(); } - if count > 0 { builder.append_value(sum / count as f64); } else { builder.append_null(); } } Ok(Arc::new(builder.finish()) as ArrayRef) }}; @@ -642,15 +663,27 @@ fn compute_avg(arg: &ArrayRef, group_indices: &[Vec], num_groups: usize) DataType::UInt64 => avg_typed!(UInt64Array, arg), DataType::Float32 => avg_typed!(Float32Array, arg), DataType::Float64 => avg_typed!(Float64Array, arg), - dt => Err(OmniError::manifest(format!("avg: unsupported type {:?}", dt))), + dt => Err(OmniError::manifest(format!( + "avg: unsupported type {:?}", + dt + ))), } } -fn compute_min_max(arg: &ArrayRef, group_indices: &[Vec], num_groups: usize, is_min: bool) -> Result { +fn compute_min_max( + arg: &ArrayRef, + group_indices: &[Vec], + num_groups: usize, + is_min: bool, +) -> Result { macro_rules! minmax_typed { ($arr_type:ty, $builder_type:ty, $arg:expr, $is_min:expr) => {{ let arr = $arg.as_any().downcast_ref::<$arr_type>().ok_or_else(|| { - OmniError::manifest(format!("min/max: expected {:?}, got {:?}", stringify!($arr_type), $arg.data_type())) + OmniError::manifest(format!( + "min/max: expected {:?}, got {:?}", + stringify!($arr_type), + $arg.data_type() + )) })?; let mut builder = <$builder_type>::with_capacity(num_groups); for group in group_indices { @@ -660,11 +693,20 @@ fn compute_min_max(arg: &ArrayRef, group_indices: &[Vec], num_groups: usi let v = arr.value(i); result = Some(match result { None => v, - Some(cur) => if $is_min { if v < cur { v } else { cur } } else { if v > cur { v } else { cur } }, + Some(cur) => { + if $is_min { + if v < cur { v } else { cur } + } else { + if v > cur { v } else { cur } + } + } }); } } - match result { Some(v) => builder.append_value(v), None => builder.append_null() } + match result { + Some(v) => builder.append_value(v), + None => builder.append_null(), + } } Ok(Arc::new(builder.finish()) as ArrayRef) }}; @@ -688,15 +730,27 @@ fn compute_min_max(arg: &ArrayRef, group_indices: &[Vec], num_groups: usi let v = arr.value(i); result = Some(match result { None => v, - Some(cur) => if is_min { if v < cur { v } else { cur } } else { if v > cur { v } else { cur } }, + Some(cur) => { + if is_min { + if v < cur { v } else { cur } + } else { + if v > cur { v } else { cur } + } + } }); } } - match result { Some(v) => builder.append_value(v), None => builder.append_null() } + match result { + Some(v) => builder.append_value(v), + None => builder.append_null(), + } } Ok(Arc::new(builder.finish()) as ArrayRef) } - dt => Err(OmniError::manifest(format!("min/max: unsupported type {:?}", dt))), + dt => Err(OmniError::manifest(format!( + "min/max: unsupported type {:?}", + dt + ))), } } @@ -715,7 +769,8 @@ fn build_empty_aggregate_result(projections: &[IRProjection]) -> Result { fields.push(Field::new(name, DataType::Float64, true)); - columns.push(Arc::new(Float64Array::from(vec![None as Option])) as ArrayRef); + columns + .push(Arc::new(Float64Array::from(vec![None as Option])) as ArrayRef); } }, _ => { diff --git a/crates/omnigraph/src/exec/query.rs b/crates/omnigraph/src/exec/query.rs index 88865d8..7590512 100644 --- a/crates/omnigraph/src/exec/query.rs +++ b/crates/omnigraph/src/exec/query.rs @@ -75,14 +75,7 @@ impl Omnigraph { None }; - execute_query( - &ir, - params, - &snapshot, - graph_index.as_deref(), - &catalog, - ) - .await + execute_query(&ir, params, &snapshot, graph_index.as_deref(), &catalog).await } } @@ -360,11 +353,23 @@ pub async fn execute_query( } let mut wide: Option = None; - execute_pipeline(&ir.pipeline, params, snapshot, graph_index, catalog, &mut wide, &search_mode).await?; + execute_pipeline( + &ir.pipeline, + params, + snapshot, + graph_index, + catalog, + &mut wide, + &search_mode, + ) + .await?; let wide_batch = wide.unwrap_or_else(|| RecordBatch::new_empty(Arc::new(Schema::empty()))); // Project return expressions - let has_aggregates = ir.return_exprs.iter().any(|p| matches!(&p.expr, IRExpr::Aggregate { .. })); + let has_aggregates = ir + .return_exprs + .iter() + .any(|p| matches!(&p.expr, IRExpr::Aggregate { .. })); let mut result_batch = project_return(&wide_batch, &ir.return_exprs, params)?; // Apply ordering (skip if search mode already ordered the results) @@ -516,9 +521,9 @@ async fn execute_rrf_query( } fn extract_id_column_by_name(batch: &RecordBatch, col_name: &str) -> Result> { - let col = batch - .column_by_name(col_name) - .ok_or_else(|| OmniError::manifest(format!("batch missing '{}' column for RRF", col_name)))?; + let col = batch.column_by_name(col_name).ok_or_else(|| { + OmniError::manifest(format!("batch missing '{}' column for RRF", col_name)) + })?; let ids = col .as_any() .downcast_ref::() @@ -653,8 +658,19 @@ fn execute_pipeline<'a>( })?; if let Some(batch) = wide.as_mut() { execute_expand( - batch, gi, snapshot, catalog, src_var, dst_var, edge_type, *direction, - dst_type, *min_hops, *max_hops, dst_filters, params, + batch, + gi, + snapshot, + catalog, + src_var, + dst_var, + edge_type, + *direction, + dst_type, + *min_hops, + *max_hops, + dst_filters, + params, ) .await?; } @@ -691,7 +707,9 @@ async fn execute_expand( let src_id_col_name = format!("{}.id", src_var); let src_ids = wide .column_by_name(&src_id_col_name) - .ok_or_else(|| OmniError::manifest(format!("wide batch missing '{}' column", src_id_col_name)))? + .ok_or_else(|| { + OmniError::manifest(format!("wide batch missing '{}' column", src_id_col_name)) + })? .as_any() .downcast_ref::() .ok_or_else(|| OmniError::manifest(format!("'{}' column is not Utf8", src_id_col_name)))? @@ -1037,8 +1055,16 @@ async fn execute_node_scan( let table_key = format!("node:{}", type_name); let ds = snapshot.open(&table_key).await?; - // Build Lance SQL filter string from non-search IR filters - let filter_sql = build_lance_filter(filters, params); + // Lower the IR filters to a DataFusion `Expr` and apply via + // `Scanner::filter_expr` inside the configure closure. The string + // pushdown path (`build_lance_filter` → `scanner.filter(&str)`) is + // gone for node scans — structured Expr unlocks `CompOp::Contains` + // pushdown (via `array_has`) and lets DF 53's optimizer rules + // (vectorized IN-list, PhysicalExprSimplifier, CASE-NULL shortcut) + // reach our predicates. Other call sites that still take string SQL + // (hydrate_nodes for the Expand pushdown, count_rows, the mutation + // delete path) migrate in follow-up MRs. + let filter_expr = build_lance_filter_expr(filters, params); // Blob columns must be excluded from scan when a filter is present // (Lance bug: BlobsDescriptions + filter triggers a projection assertion). @@ -1056,10 +1082,15 @@ async fn execute_node_scan( let batches = crate::table_store::TableStore::scan_stream_with( &ds, projection, - filter_sql.as_deref(), + None, None, false, |scanner| { + // Apply the structured IR filter via Lance's Expr pushdown. + if let Some(ref expr) = filter_expr { + scanner.filter_expr(expr.clone()); + } + // Apply FTS queries from hoisted search filters (search/fuzzy/match_text in match clause) for filter in filters { if is_search_filter(filter) { @@ -1288,23 +1319,159 @@ pub(super) fn literal_to_sql(lit: &Literal) -> String { } } +// --------------------------------------------------------------------------- +// Structured DataFusion-Expr pushdown +// +// Parallel to the `ir_*_to_sql` family above, these helpers lower the same +// IR filter shapes to `datafusion::prelude::Expr` so we can call +// `Scanner::filter_expr(Expr)` instead of `Scanner::filter(&str)`. The +// structured form unlocks two things the string path could not express: +// +// 1. `CompOp::Contains` against list-typed columns (lowered to +// `array_has(col, value)` — requires the `nested_expressions` +// feature on the `datafusion` crate, enabled in the workspace). +// 2. Optimizer rules in DataFusion 53 that act on `Expr` shapes +// (vectorized `IN`-list eq kernel, `PhysicalExprSimplifier`, the +// `CASE WHEN x THEN y ELSE NULL` shortcut, etc.). +// +// Search predicates (`is_search_filter`) are still handled separately via +// `scanner.full_text_search(...)`, not via filter_expr — they stay None +// here just like in `ir_filter_to_sql`. The `literal_to_sql` path remains +// because the mutation/update layer (`exec/mutation.rs`) still produces +// SQL strings for `Dataset::delete(&str)`; that migration is MR-A's +// territory (Lance #6658 + delete two-phase). + +/// Convert IR filters to a single DataFusion `Expr` (AND-joined), or +/// `None` if no filter is pushable. +pub(super) fn build_lance_filter_expr( + filters: &[IRFilter], + params: &ParamMap, +) -> Option { + use datafusion::logical_expr::Operator; + use datafusion::prelude::Expr; + + let mut acc: Option = None; + for f in filters { + let Some(e) = ir_filter_to_expr(f, params) else { + continue; + }; + acc = Some(match acc { + None => e, + Some(prev) => Expr::BinaryExpr(datafusion::logical_expr::BinaryExpr::new( + Box::new(prev), + Operator::And, + Box::new(e), + )), + }); + } + acc +} + +/// Convert a single IR filter to a DataFusion `Expr`. Returns `None` for +/// search-mode filters (handled via `scanner.full_text_search`) or any +/// expression shape we can't pushdown. +pub(super) fn ir_filter_to_expr( + filter: &IRFilter, + params: &ParamMap, +) -> Option { + use datafusion::functions_nested::expr_fn::array_has; + + if is_search_filter(filter) { + return None; + } + + // List-contains: `prop CONTAINS value` lowers to `array_has(prop, value)`. + // This is the case `ir_filter_to_sql` had to return None for ("Can't + // pushdown list contains"); with structured Expr it pushes down fine. + if matches!(filter.op, CompOp::Contains) { + let left = ir_expr_to_expr(&filter.left, params)?; + let right = ir_expr_to_expr(&filter.right, params)?; + return Some(array_has(left, right)); + } + + let left = ir_expr_to_expr(&filter.left, params)?; + let right = ir_expr_to_expr(&filter.right, params)?; + Some(match filter.op { + CompOp::Eq => left.eq(right), + CompOp::Ne => left.not_eq(right), + CompOp::Gt => left.gt(right), + CompOp::Lt => left.lt(right), + CompOp::Ge => left.gt_eq(right), + CompOp::Le => left.lt_eq(right), + CompOp::Contains => unreachable!("handled above"), + }) +} + +/// Convert an IR expression to a DataFusion `Expr`. Returns `None` for +/// shapes we don't support in pushdown (search funcs, RRF, aggregates, +/// variable refs that aren't a property access). +pub(super) fn ir_expr_to_expr( + expr: &IRExpr, + params: &ParamMap, +) -> Option { + use datafusion::prelude::{col, lit}; + match expr { + IRExpr::PropAccess { property, .. } => Some(col(property)), + IRExpr::Literal(l) => literal_to_expr(l), + IRExpr::Param(name) => params.get(name).and_then(literal_to_expr), + _ => None, + } +} + +/// Convert a Literal to a DataFusion `Expr`. Returns `None` for List +/// (which the existing SQL path also can't pushdown — falls through to +/// post-scan in-memory application). +fn literal_to_expr(lit: &Literal) -> Option { + use datafusion::prelude::lit as df_lit; + Some(match lit { + Literal::Null => df_lit(datafusion::scalar::ScalarValue::Null), + Literal::String(s) => df_lit(s.clone()), + Literal::Integer(n) => df_lit(*n), + Literal::Float(f) => df_lit(*f), + Literal::Bool(b) => df_lit(*b), + // Date/DateTime stored as strings; pass through as string literals + // — Lance/DataFusion handles the comparison against typed columns + // via implicit cast, matching the existing string-SQL behavior. + Literal::Date(s) => df_lit(s.clone()), + Literal::DateTime(s) => df_lit(s.clone()), + Literal::List(_) => return None, + }) +} + fn prefix_batch(batch: &RecordBatch, variable: &str) -> Result { - let fields: Vec = batch.schema().fields().iter().map(|f| { - Field::new(format!("{}.{}", variable, f.name()), f.data_type().clone(), f.is_nullable()) - }).collect(); + let fields: Vec = batch + .schema() + .fields() + .iter() + .map(|f| { + Field::new( + format!("{}.{}", variable, f.name()), + f.data_type().clone(), + f.is_nullable(), + ) + }) + .collect(); let schema = Arc::new(Schema::new(fields)); - RecordBatch::try_new(schema, batch.columns().to_vec()).map_err(|e| OmniError::Lance(e.to_string())) + RecordBatch::try_new(schema, batch.columns().to_vec()) + .map_err(|e| OmniError::Lance(e.to_string())) } fn cross_join_batches(left: &RecordBatch, right: &RecordBatch) -> Result { let n = left.num_rows(); let m = right.num_rows(); if n == 0 || m == 0 { - let mut fields: Vec = left.schema().fields().iter().map(|f| f.as_ref().clone()).collect(); + let mut fields: Vec = left + .schema() + .fields() + .iter() + .map(|f| f.as_ref().clone()) + .collect(); fields.extend(right.schema().fields().iter().map(|f| f.as_ref().clone())); return Ok(RecordBatch::new_empty(Arc::new(Schema::new(fields)))); } - let left_indices: Vec = (0..n as u32).flat_map(|i| std::iter::repeat(i).take(m)).collect(); + let left_indices: Vec = (0..n as u32) + .flat_map(|i| std::iter::repeat(i).take(m)) + .collect(); let right_indices: Vec = (0..n).flat_map(|_| 0..m as u32).collect(); let left_expanded = take_batch(left, &UInt32Array::from(left_indices))?; let right_expanded = take_batch(right, &UInt32Array::from(right_indices))?; @@ -1312,23 +1479,39 @@ fn cross_join_batches(left: &RecordBatch, right: &RecordBatch) -> Result Result { - let mut fields: Vec = left.schema().fields().iter().map(|f| f.as_ref().clone()).collect(); + let mut fields: Vec = left + .schema() + .fields() + .iter() + .map(|f| f.as_ref().clone()) + .collect(); if cfg!(debug_assertions) { let left_schema = left.schema(); - let left_names: HashSet<&str> = left_schema.fields().iter().map(|f| f.name().as_str()).collect(); + let left_names: HashSet<&str> = left_schema + .fields() + .iter() + .map(|f| f.name().as_str()) + .collect(); let right_schema = right.schema(); for f in right_schema.fields() { - debug_assert!(!left_names.contains(f.name().as_str()), "hconcat_batches: duplicate column '{}'", f.name()); + debug_assert!( + !left_names.contains(f.name().as_str()), + "hconcat_batches: duplicate column '{}'", + f.name() + ); } } fields.extend(right.schema().fields().iter().map(|f| f.as_ref().clone())); let mut columns: Vec = left.columns().to_vec(); columns.extend(right.columns().to_vec()); - RecordBatch::try_new(Arc::new(Schema::new(fields)), columns).map_err(|e| OmniError::Lance(e.to_string())) + RecordBatch::try_new(Arc::new(Schema::new(fields)), columns) + .map_err(|e| OmniError::Lance(e.to_string())) } fn take_batch(batch: &RecordBatch, indices: &UInt32Array) -> Result { - let columns: Vec = batch.columns().iter() + let columns: Vec = batch + .columns() + .iter() .map(|col| arrow_select::take::take(col.as_ref(), indices, None)) .collect::, _>>() .map_err(|e| OmniError::Lance(e.to_string()))?; diff --git a/crates/omnigraph/src/exec/staging.rs b/crates/omnigraph/src/exec/staging.rs index ad39bc0..0d26fd3 100644 --- a/crates/omnigraph/src/exec/staging.rs +++ b/crates/omnigraph/src/exec/staging.rs @@ -26,10 +26,10 @@ use arrow_schema::SchemaRef; use lance::Dataset; use omnigraph_compiler::catalog::EdgeType; -use crate::db::{MutationOpKind, SubTableUpdate}; use crate::db::manifest::{ - new_sidecar, write_sidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin, + RecoverySidecarHandle, SidecarKind, SidecarTablePin, new_sidecar, write_sidecar, }; +use crate::db::{MutationOpKind, SubTableUpdate}; use crate::error::{OmniError, Result}; /// Whether the per-table accumulator should commit via `stage_append` @@ -119,10 +119,12 @@ impl MutationStaging { expected_version: u64, op_kind: MutationOpKind, ) { - self.paths.entry(table_key.to_string()).or_insert(StagedTablePath { - full_path, - table_branch, - }); + self.paths + .entry(table_key.to_string()) + .or_insert(StagedTablePath { + full_path, + table_branch, + }); self.expected_versions .entry(table_key.to_string()) .or_insert(expected_version); @@ -202,7 +204,8 @@ impl MutationStaging { /// Record a delete that already inline-committed at the Lance layer. pub(crate) fn record_inline(&mut self, update: SubTableUpdate) { - self.inline_committed.insert(update.table_key.clone(), update); + self.inline_committed + .insert(update.table_key.clone(), update); } /// Read-your-writes accessor: the accumulated pending batches for @@ -308,18 +311,13 @@ impl MutationStaging { // mode is exempt because no-key node and edge inserts use // ULID-generated ids that are unique within a query. let combined = match table.mode { - PendingMode::Merge => { - dedupe_merge_batches_by_id(&table.schema, table.batches)? - } + PendingMode::Merge => dedupe_merge_batches_by_id(&table.schema, table.batches)?, PendingMode::Append => { if table.batches.len() == 1 { table.batches.into_iter().next().unwrap() } else { - arrow_select::concat::concat_batches( - &table.schema, - &table.batches, - ) - .map_err(|e| OmniError::Lance(e.to_string()))? + arrow_select::concat::concat_batches(&table.schema, &table.batches) + .map_err(|e| OmniError::Lance(e.to_string()))? } } }; @@ -327,9 +325,7 @@ impl MutationStaging { // Stage produces uncommitted fragments + transaction. No // Lance HEAD advance until `commit_all` runs `commit_staged`. let staged = match table.mode { - PendingMode::Append => { - db.table_store().stage_append(&ds, combined, &[]).await? - } + PendingMode::Append => db.table_store().stage_append(&ds, combined, &[]).await?, PendingMode::Merge => { db.table_store() .stage_merge_insert( @@ -420,7 +416,7 @@ impl StagedMutation { /// /// Revalidation: between `stage_all` and `commit_all`, another /// writer (in the same process or another process sharing the - /// repo) may have committed to one of our touched tables, advancing + /// graph) may have committed to one of our touched tables, advancing /// the manifest pin past our `expected_version`. We revalidate /// under the queue and fail-fast with `manifest_conflict` before /// any `commit_staged` so the orphaned uncommitted fragments stay @@ -462,9 +458,8 @@ impl StagedMutation { // from interleaving between our delete and our publish, which // would otherwise leave a Lance-HEAD-ahead residual the // delete-only sidecar (added below) would have to recover. - let mut queue_keys: Vec<(String, Option)> = Vec::with_capacity( - staged.len() + inline_committed.len(), - ); + let mut queue_keys: Vec<(String, Option)> = + Vec::with_capacity(staged.len() + inline_committed.len()); for entry in &staged { queue_keys.push((entry.table_key.clone(), entry.path.table_branch.clone())); } @@ -565,9 +560,8 @@ impl StagedMutation { // Finding 3 hazard: delete-only mutations would otherwise skip // the sidecar, leaving any commit→publish residual unreachable // by recovery. - let mut pins: Vec = Vec::with_capacity( - staged.len() + inline_committed.len(), - ); + let mut pins: Vec = + Vec::with_capacity(staged.len() + inline_committed.len()); for entry in &staged { pins.push(SidecarTablePin { table_key: entry.table_key.clone(), @@ -899,10 +893,7 @@ pub(crate) async fn count_src_per_edge( /// Count pending edges per `src` with NO dedup. Correct when caller /// guarantees pending rows have unique primary keys (engine inserts via /// fresh ULID; loader Append mode). -fn count_pending_src_naive( - pending_batches: &[RecordBatch], - counts: &mut HashMap, -) { +fn count_pending_src_naive(pending_batches: &[RecordBatch], counts: &mut HashMap) { for batch in pending_batches { let Some(col) = batch.column_by_name("src") else { continue; @@ -947,12 +938,15 @@ fn count_pending_src_with_dedupe( dedupe_key_column ))); }; - let key_arr = key_col.as_any().downcast_ref::().ok_or_else(|| { - OmniError::Lance(format!( - "count_src_per_edge: pending '{}' column is not Utf8", - dedupe_key_column - )) - })?; + let key_arr = key_col + .as_any() + .downcast_ref::() + .ok_or_else(|| { + OmniError::Lance(format!( + "count_src_per_edge: pending '{}' column is not Utf8", + dedupe_key_column + )) + })?; let src_arr = batch .column_by_name("src") .and_then(|c| c.as_any().downcast_ref::()); diff --git a/crates/omnigraph/src/lib.rs b/crates/omnigraph/src/lib.rs index d781096..ff0b3d6 100644 --- a/crates/omnigraph/src/lib.rs +++ b/crates/omnigraph/src/lib.rs @@ -1,3 +1,12 @@ +// Lance 6's trait surface (heavier futures/streams nesting around the +// staged-write API in `storage_layer.rs`) pushes us past the default +// trait-resolution recursion limit of 128 on Linux builds. Raising to +// 256 here is the upstream-suggested fix from rustc itself +// ("consider increasing the recursion limit"). macOS happens to short- +// circuit before tripping the limit; CI on Linux does not. Revisit if +// future Lance bumps stop needing this. +#![recursion_limit = "256"] + pub mod changes; pub mod db; pub mod embedding; diff --git a/crates/omnigraph/src/loader/mod.rs b/crates/omnigraph/src/loader/mod.rs index a795f28..cade1f4 100644 --- a/crates/omnigraph/src/loader/mod.rs +++ b/crates/omnigraph/src/loader/mod.rs @@ -90,6 +90,18 @@ impl Omnigraph { mode: LoadMode, actor_id: Option<&str>, ) -> Result { + // Engine-layer policy gate (MR-722 fan-out / PR #3). Scope is + // `Branch(branch)` for the data-write portion. If ingest creates + // a new branch as a side-effect (target branch doesn't exist), + // the inner `branch_create_from_as` call below additionally + // checks `BranchCreate` — both authorities are genuinely needed + // for "ingest into a fresh branch", so the layered check is + // correct, not redundant. + self.enforce( + omnigraph_policy::PolicyAction::Change, + &omnigraph_policy::ResourceScope::Branch(branch.to_string()), + actor_id, + )?; self.ingest_with_current_actor(branch, from, data, mode, actor_id) .await } @@ -135,8 +147,18 @@ impl Omnigraph { .iter() .any(|name| name == &target_branch); if branch_created { - self.branch_create_from(crate::db::ReadTarget::branch(&base_branch), &target_branch) - .await?; + // Thread the actor through to the implicit BranchCreate so + // policy decisions match what an explicit `branch_create_from_as` + // call would see. Calling the no-actor variant here would + // bypass BranchCreate enforcement when policy is installed — + // the footgun guard catches that case too, but threading is + // the correct fix. + self.branch_create_from_as( + crate::db::ReadTarget::branch(&base_branch), + &target_branch, + actor_id, + ) + .await?; } let result = self.load_as(&target_branch, data, mode, actor_id).await?; @@ -160,6 +182,17 @@ impl Omnigraph { mode: LoadMode, actor_id: Option<&str>, ) -> Result { + // Engine-layer policy gate (MR-722 fan-out / PR #3). Scope is + // `Branch(branch)` to match the HTTP-layer Change convention. + // `ingest_as` also calls `load_as` after enforcing its own + // Change gate — that double-check is fine because both gates + // resolve to identical Cedar decisions for the same actor + + // branch (the second check is a structurally-correct no-op). + self.enforce( + omnigraph_policy::PolicyAction::Change, + &omnigraph_policy::ResourceScope::Branch(branch.to_string()), + actor_id, + )?; self.ensure_schema_state_valid().await?; // Reject internal `__run__*` / system-prefixed branches at the // public write boundary. Direct-publish paths assert this @@ -179,14 +212,22 @@ impl Omnigraph { .await } - pub async fn load_file( + pub async fn load_file(&self, branch: &str, path: &str, mode: LoadMode) -> Result { + self.load_file_as(branch, path, mode, None).await + } + + /// Read a file into memory and delegate to `load_as`. Used by the + /// CLI's `omnigraph load` so file-path-based writes flow through + /// the same engine-layer policy gate as in-memory `load_as` calls. + pub async fn load_file_as( &self, branch: &str, path: &str, mode: LoadMode, + actor_id: Option<&str>, ) -> Result { let data = std::fs::read_to_string(path).map_err(|e| OmniError::Io(e))?; - self.load(branch, &data, mode).await + self.load_as(branch, &data, mode, actor_id).await } async fn load_direct_on_branch( @@ -411,13 +452,7 @@ async fn load_jsonl_reader( for (edge_name, rows) in &edge_rows { let edge_type = &catalog.edge_types[edge_name]; let from_ids = if use_staging { - collect_node_ids_with_pending( - db, - branch, - &edge_type.from_type, - &staging, - ) - .await? + collect_node_ids_with_pending(db, branch, &edge_type.from_type, &staging).await? } else { collect_node_ids( db, @@ -430,13 +465,7 @@ async fn load_jsonl_reader( .await? }; let to_ids = if use_staging { - collect_node_ids_with_pending( - db, - branch, - &edge_type.to_type, - &staging, - ) - .await? + collect_node_ids_with_pending(db, branch, &edge_type.to_type, &staging).await? } else { collect_node_ids( db, @@ -535,12 +564,7 @@ async fn load_jsonl_reader( let table_key = format!("edge:{}", edge_name); if use_staging { validate_edge_cardinality_with_pending_loader( - db, - branch, - edge_type, - &table_key, - &staging, - mode, + db, branch, edge_type, &table_key, &staging, mode, ) .await?; } else if let Some(update) = overwrite_updates.iter().find(|u| u.table_key == table_key) { @@ -1653,8 +1677,7 @@ async fn validate_edge_cardinality_with_pending_loader( LoadMode::Append | LoadMode::Overwrite => None, }; let counts = - crate::exec::staging::count_src_per_edge(db, &ds, table_key, staging, dedupe_key) - .await?; + crate::exec::staging::count_src_per_edge(db, &ds, table_key, staging, dedupe_key).await?; crate::exec::staging::enforce_cardinality_bounds(edge_type, &counts) } diff --git a/crates/omnigraph/src/storage.rs b/crates/omnigraph/src/storage.rs index 5d2e568..564b577 100644 --- a/crates/omnigraph/src/storage.rs +++ b/crates/omnigraph/src/storage.rs @@ -7,7 +7,8 @@ use async_trait::async_trait; use futures::TryStreamExt; use object_store::aws::AmazonS3Builder; use object_store::path::Path as ObjectPath; -use object_store::{DynObjectStore, ObjectStore, PutPayload}; +use object_store::{DynObjectStore, ObjectStore, PutMode, PutPayload}; +use tokio::io::AsyncWriteExt; use url::Url; use crate::error::{OmniError, Result}; @@ -19,6 +20,13 @@ const S3_SCHEME_PREFIX: &str = "s3://"; pub trait StorageAdapter: Debug + Send + Sync { async fn read_text(&self, uri: &str) -> Result; async fn write_text(&self, uri: &str, contents: &str) -> Result<()>; + /// Write a text object only if no object exists at `uri`. + /// + /// Returns `Ok(true)` when this call created the object, `Ok(false)` + /// when the object already existed, and propagates every other storage + /// error. Callers use this to establish ownership before running + /// best-effort cleanup on partial failure. + async fn write_text_if_absent(&self, uri: &str, contents: &str) -> Result; async fn exists(&self, uri: &str) -> Result; /// Move a file from `from_uri` to `to_uri`, replacing any existing file at /// `to_uri`. Atomic on local POSIX; on S3 implemented as copy + delete @@ -66,7 +74,7 @@ impl StorageAdapter for LocalStorageAdapter { // Ensure parent directory exists. S3 has no equivalent (PutObject // is path-agnostic). For local fs, callers like the recovery // sidecar protocol expect transparent directory creation under - // the repo root (the `__recovery/` directory doesn't pre-exist; + // the graph root (the `__recovery/` directory doesn't pre-exist; // first sidecar write creates it). if let Some(parent) = path.parent() { if !parent.as_os_str().is_empty() { @@ -77,6 +85,30 @@ impl StorageAdapter for LocalStorageAdapter { Ok(()) } + async fn write_text_if_absent(&self, uri: &str, contents: &str) -> Result { + let path = local_path_from_uri(uri)?; + if let Some(parent) = path.parent() { + if !parent.as_os_str().is_empty() { + tokio::fs::create_dir_all(parent).await?; + } + } + let mut file = match tokio::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&path) + .await + { + Ok(file) => file, + Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => return Ok(false), + Err(err) => return Err(err.into()), + }; + if let Err(err) = file.write_all(contents.as_bytes()).await { + let _ = tokio::fs::remove_file(&path).await; + return Err(err.into()); + } + Ok(true) + } + async fn exists(&self, uri: &str) -> Result { Ok(local_path_from_uri(uri)?.exists()) } @@ -146,6 +178,24 @@ impl StorageAdapter for S3StorageAdapter { Ok(()) } + async fn write_text_if_absent(&self, uri: &str, contents: &str) -> Result { + let location = self.object_path(uri)?; + match self + .store + .put_opts( + &location, + PutPayload::from(contents.as_bytes().to_vec()), + PutMode::Create.into(), + ) + .await + { + Ok(_) => Ok(true), + Err(object_store::Error::AlreadyExists { .. }) + | Err(object_store::Error::Precondition { .. }) => Ok(false), + Err(err) => Err(storage_backend_error("write_if_absent", uri, err)), + } + } + async fn exists(&self, uri: &str) -> Result { let location = self.object_path(uri)?; match self.store.head(&location).await { @@ -398,10 +448,13 @@ mod tests { #[test] fn storage_backend_selection_is_scheme_aware() { - assert_eq!(storage_kind_for_uri("/tmp/repo"), StorageKind::Local); - assert_eq!(storage_kind_for_uri("file:///tmp/repo"), StorageKind::Local); + assert_eq!(storage_kind_for_uri("/tmp/graph"), StorageKind::Local); assert_eq!( - storage_kind_for_uri("s3://omnigraph-preview/repo"), + storage_kind_for_uri("file:///tmp/graph"), + StorageKind::Local + ); + assert_eq!( + storage_kind_for_uri("s3://omnigraph-preview/graph"), StorageKind::S3 ); } @@ -440,8 +493,20 @@ mod tests { #[test] fn parse_s3_uri_splits_bucket_and_key() { - let location = parse_s3_uri("s3://bucket/repo/_schema.pg").unwrap(); + let location = parse_s3_uri("s3://bucket/graph/_schema.pg").unwrap(); assert_eq!(location.bucket, "bucket"); - assert_eq!(location.key, "repo/_schema.pg"); + assert_eq!(location.key, "graph/_schema.pg"); + } + + #[tokio::test] + async fn local_write_text_if_absent_creates_once_without_overwrite() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().join("claim.txt"); + let uri = uri.to_str().unwrap(); + let storage = LocalStorageAdapter; + + assert!(storage.write_text_if_absent(uri, "first").await.unwrap()); + assert!(!storage.write_text_if_absent(uri, "second").await.unwrap()); + assert_eq!(storage.read_text(uri).await.unwrap(), "first"); } } diff --git a/crates/omnigraph/src/storage_layer.rs b/crates/omnigraph/src/storage_layer.rs index f4fc657..dac9482 100644 --- a/crates/omnigraph/src/storage_layer.rs +++ b/crates/omnigraph/src/storage_layer.rs @@ -10,11 +10,15 @@ //! ## Transitional residuals on the trait //! //! Several inline-commit methods remain on the trait surface as -//! documented residuals: `delete_where` (Lance 4.0.0's `DeleteJob` is -//! `pub(crate)` — see [#6658](https://github.com/lance-format/lance/issues/6658)), +//! documented residuals: `delete_where` +//! ([#6658](https://github.com/lance-format/lance/issues/6658) closed +//! 2026-05-14, but the public `DeleteBuilder::execute_uncommitted` API +//! did not backport to the 6.x release line — it first ships in +//! `v7.0.0-beta.10`. Migration to staged two-phase delete is tracked as +//! MR-A and is gated on the Lance v7.x bump, not the current v6.0.1 pin), //! `create_vector_index` (segment-commit-path requires //! `build_index_metadata_from_segments` which is `pub(crate)` — see -//! [#6666](https://github.com/lance-format/lance/issues/6666)), and the +//! [#6666](https://github.com/lance-format/lance/issues/6666), still open), and the //! legacy `append_batch` / `merge_insert_batches` / `overwrite_batch` / //! `create_btree_index` / `create_inverted_index` paths kept while //! engine call sites finish migrating off of them (Phase 1b / Phase 9 @@ -33,8 +37,8 @@ //! `SnapshotHandle` and `StagedHandle` wrap `lance::Dataset` and //! `StagedWrite` respectively. Their inner Lance types are //! `pub(crate)` — engine code outside `table_store` cannot reach -//! through. This is the §III.9 alignment: `lance::Dataset` does not -//! appear in trait signatures. +//! through. This aligns with the storage-boundary invariant: +//! `lance::Dataset` does not appear in trait signatures. //! //! ## Migration status (MR-793 PR #70) //! @@ -90,7 +94,9 @@ impl SnapshotHandle { /// Construct from a Lance dataset. `pub(crate)` — only /// `TableStore` should produce these. pub(crate) fn new(ds: Dataset) -> Self { - Self { inner: Arc::new(ds) } + Self { + inner: Arc::new(ds), + } } /// Borrow the underlying Lance dataset. `pub(crate)` so only the @@ -238,16 +244,10 @@ pub trait TableStorage: sealed::Sealed + Send + Sync + Debug { async fn scan_batches(&self, snapshot: &SnapshotHandle) -> Result>; - async fn scan_batches_for_rewrite( - &self, - snapshot: &SnapshotHandle, - ) -> Result>; + async fn scan_batches_for_rewrite(&self, snapshot: &SnapshotHandle) + -> Result>; - async fn count_rows( - &self, - snapshot: &SnapshotHandle, - filter: Option, - ) -> Result; + async fn count_rows(&self, snapshot: &SnapshotHandle, filter: Option) -> Result; async fn count_rows_with_staged( &self, @@ -280,11 +280,8 @@ pub trait TableStorage: sealed::Sealed + Send + Sync + Debug { filter: &str, ) -> Result>; - async fn table_state( - &self, - dataset_uri: &str, - snapshot: &SnapshotHandle, - ) -> Result; + async fn table_state(&self, dataset_uri: &str, snapshot: &SnapshotHandle) + -> Result; // ── Staged writes (no HEAD advance) ──────────────────────────────── @@ -561,11 +558,7 @@ impl TableStorage for TableStore { TableStore::scan_batches_for_rewrite(self, snapshot.dataset()).await } - async fn count_rows( - &self, - snapshot: &SnapshotHandle, - filter: Option, - ) -> Result { + async fn count_rows(&self, snapshot: &SnapshotHandle, filter: Option) -> Result { TableStore::count_rows(self, snapshot.dataset(), filter).await } @@ -587,14 +580,8 @@ impl TableStorage for TableStore { filter: Option<&str>, ) -> Result> { let staged_writes = staged_handles_as_writes(staged); - TableStore::scan_with_staged( - self, - snapshot.dataset(), - &staged_writes, - projection, - filter, - ) - .await + TableStore::scan_with_staged(self, snapshot.dataset(), &staged_writes, projection, filter) + .await } async fn scan_with_pending( @@ -654,18 +641,10 @@ impl TableStorage for TableStore { when_matched: WhenMatched, when_not_matched: WhenNotMatched, ) -> Result { - let ds = Arc::try_unwrap(snapshot.into_arc()) - .unwrap_or_else(|arc| (*arc).clone()); - TableStore::stage_merge_insert( - self, - ds, - batch, - key_columns, - when_matched, - when_not_matched, - ) - .await - .map(StagedHandle::new) + let ds = Arc::try_unwrap(snapshot.into_arc()).unwrap_or_else(|arc| (*arc).clone()); + TableStore::stage_merge_insert(self, ds, batch, key_columns, when_matched, when_not_matched) + .await + .map(StagedHandle::new) } async fn commit_staged( @@ -716,8 +695,7 @@ impl TableStorage for TableStore { snapshot: SnapshotHandle, batch: RecordBatch, ) -> Result<(SnapshotHandle, TableState)> { - let mut ds = Arc::try_unwrap(snapshot.into_arc()) - .unwrap_or_else(|arc| (*arc).clone()); + let mut ds = Arc::try_unwrap(snapshot.into_arc()).unwrap_or_else(|arc| (*arc).clone()); let state = TableStore::append_batch(self, dataset_uri, &mut ds, batch).await?; Ok((SnapshotHandle::new(ds), state)) } @@ -731,8 +709,7 @@ impl TableStorage for TableStore { when_matched: WhenMatched, when_not_matched: WhenNotMatched, ) -> Result { - let ds = Arc::try_unwrap(snapshot.into_arc()) - .unwrap_or_else(|arc| (*arc).clone()); + let ds = Arc::try_unwrap(snapshot.into_arc()).unwrap_or_else(|arc| (*arc).clone()); TableStore::merge_insert_batches( self, dataset_uri, @@ -751,8 +728,7 @@ impl TableStorage for TableStore { snapshot: SnapshotHandle, batch: RecordBatch, ) -> Result<(SnapshotHandle, TableState)> { - let mut ds = Arc::try_unwrap(snapshot.into_arc()) - .unwrap_or_else(|arc| (*arc).clone()); + let mut ds = Arc::try_unwrap(snapshot.into_arc()).unwrap_or_else(|arc| (*arc).clone()); let state = TableStore::overwrite_batch(self, dataset_uri, &mut ds, batch).await?; Ok((SnapshotHandle::new(ds), state)) } @@ -763,8 +739,7 @@ impl TableStorage for TableStore { snapshot: SnapshotHandle, filter: &str, ) -> Result<(SnapshotHandle, DeleteState)> { - let mut ds = Arc::try_unwrap(snapshot.into_arc()) - .unwrap_or_else(|arc| (*arc).clone()); + let mut ds = Arc::try_unwrap(snapshot.into_arc()).unwrap_or_else(|arc| (*arc).clone()); let state = TableStore::delete_where(self, dataset_uri, &mut ds, filter).await?; Ok((SnapshotHandle::new(ds), state)) } @@ -786,8 +761,7 @@ impl TableStorage for TableStore { snapshot: SnapshotHandle, columns: &[&str], ) -> Result { - let mut ds = Arc::try_unwrap(snapshot.into_arc()) - .unwrap_or_else(|arc| (*arc).clone()); + let mut ds = Arc::try_unwrap(snapshot.into_arc()).unwrap_or_else(|arc| (*arc).clone()); TableStore::create_btree_index(self, &mut ds, columns).await?; Ok(SnapshotHandle::new(ds)) } @@ -797,8 +771,7 @@ impl TableStorage for TableStore { snapshot: SnapshotHandle, column: &str, ) -> Result { - let mut ds = Arc::try_unwrap(snapshot.into_arc()) - .unwrap_or_else(|arc| (*arc).clone()); + let mut ds = Arc::try_unwrap(snapshot.into_arc()).unwrap_or_else(|arc| (*arc).clone()); TableStore::create_inverted_index(self, &mut ds, column).await?; Ok(SnapshotHandle::new(ds)) } @@ -808,8 +781,7 @@ impl TableStorage for TableStore { snapshot: SnapshotHandle, column: &str, ) -> Result { - let mut ds = Arc::try_unwrap(snapshot.into_arc()) - .unwrap_or_else(|arc| (*arc).clone()); + let mut ds = Arc::try_unwrap(snapshot.into_arc()).unwrap_or_else(|arc| (*arc).clone()); TableStore::create_vector_index(self, &mut ds, column).await?; Ok(SnapshotHandle::new(ds)) } @@ -833,6 +805,13 @@ impl TableStorage for TableStore { // Note: existing TableStore::scan_stream is an associated fn that // takes &Dataset, so we delegate via the dataset reference held by // the snapshot. - TableStore::scan_stream(snapshot.dataset(), projection, filter, order_by, with_row_id).await + TableStore::scan_stream( + snapshot.dataset(), + projection, + filter, + order_by, + with_row_id, + ) + .await } } diff --git a/crates/omnigraph/src/table_store.rs b/crates/omnigraph/src/table_store.rs index 22d8148..ddab706 100644 --- a/crates/omnigraph/src/table_store.rs +++ b/crates/omnigraph/src/table_store.rs @@ -8,15 +8,17 @@ use lance::Dataset; use lance::blob::BlobArrayBuilder; use lance::dataset::scanner::{ColumnOrdering, DatasetRecordBatchStream, Scanner}; use lance::dataset::transaction::{Operation, Transaction, TransactionBuilder}; +use lance::dataset::write::merge_insert::SourceDedupeBehavior; use lance::dataset::{ CommitBuilder, InsertBuilder, MergeInsertBuilder, WhenMatched, WhenNotMatched, WriteMode, WriteParams, }; use lance::datatypes::BlobKind; +use lance::index::DatasetIndexExt; use lance::index::scalar::IndexDetails; use lance_file::version::LanceFileVersion; use lance_index::scalar::{InvertedIndexParams, ScalarIndexParams}; -use lance_index::{DatasetIndexExt, IndexType, is_system_index}; +use lance_index::{IndexType, is_system_index}; use lance_linalg::distance::MetricType; use lance_table::format::{Fragment, IndexMetadata, RowIdMeta}; use lance_table::rowids::{RowIdSequence, write_row_ids}; @@ -651,15 +653,58 @@ impl TableStore { return self.table_state(dataset_uri, &ds).await; } + // Precondition for the FirstSeen workaround below: every caller of + // this primitive must hand in a source batch that is unique by + // `key_columns`. Without this check, `SourceDedupeBehavior::FirstSeen` + // would silently collapse genuine duplicates instead of erroring. + check_batch_unique_by_keys(&batch, &key_columns, "merge_insert_batch")?; + // TODO(lance-upstream): MergeInsertBuilder does not accept WriteParams, // so allow_external_blob_outside_bases cannot be set here. External URI // blobs via merge_insert (LoadMode::Merge, mutations) are unsupported // until Lance exposes WriteParams on MergeInsertBuilder. let ds = Arc::new(ds); - let job = MergeInsertBuilder::try_new(ds, key_columns) - .map_err(|e| OmniError::Lance(e.to_string()))? - .when_matched(when_matched) - .when_not_matched(when_not_matched) + let mut builder = MergeInsertBuilder::try_new(ds, key_columns) + .map_err(|e| OmniError::Lance(e.to_string()))?; + builder.when_matched(when_matched); + builder.when_not_matched(when_not_matched); + // Workaround for a Lance 4.0.x bug class where sequential + // merge_insert calls against rows previously rewritten by + // merge_insert produce a spurious "Ambiguous merge inserts: + // multiple source rows match the same target row on (id = ...)" + // error. Lance's `processed_row_ids: Mutex>` + // (lance-4.0.0 `src/dataset/write/merge_insert.rs:2099`) + // double-processes the same source/target match against + // datasets previously rewritten by merge_insert, and the default + // `SourceDedupeBehavior::Fail` errors on the second insertion. + // `FirstSeen` makes Lance skip the duplicate match instead. + // + // Covers both observed surfaces: + // - PR #98 (sequential `load --mode merge` against same keys). + // - MR-920 (sequential `update T set {f} where x=y` on same row). + // + // Correctness-preserving for OmniGraph because every call path + // that reaches this primitive either pre-dedupes the source batch + // by id, or surfaces a real source dup via the + // `check_batch_unique_by_keys` precondition above (which fires + // before the FirstSeen setter has a chance to silently collapse + // anything): + // - Load path: `enforce_unique_constraints_intra_batch` + // (`loader/mod.rs:1453`) errors on intra-batch `@key` dups. + // - Mutate path: `MutationStaging::finalize` (`exec/staging.rs`) + // accumulates and dedupes by `id`. + // - Branch-merge path: `compute_source_delta` / + // `compute_three_way_delta` (`exec/merge.rs`) walk via + // `OrderedTableCursor` and `push_row` each id at most once. + // So FirstSeen only suppresses the spurious Lance behavior, never + // user data. Pinned by `loader_rejects_intra_batch_duplicate_keys` + // in `tests/consistency.rs` plus the + // `check_batch_unique_by_keys` precondition. + // + // Retire when upstream Lance fixes the bug class. Tracked at + // MR-957; upstream: lance-format/lance#6877. + builder.source_dedupe_behavior(SourceDedupeBehavior::FirstSeen); + let job = builder .try_build() .map_err(|e| OmniError::Lance(e.to_string()))?; @@ -870,11 +915,26 @@ impl TableStore { "stage_merge_insert called with empty batch".to_string(), )); } + + // Precondition for FirstSeen below. See the comment on + // `merge_insert_batch` for why this check is here, not on the caller: + // every call path that reaches stage_merge_insert (load, + // MutationStaging::finalize, branch_merge::publish_rewritten_merge_table) + // must hand in a source batch that is unique by `key_columns`. + check_batch_unique_by_keys(&batch, &key_columns, "stage_merge_insert")?; + let ds = Arc::new(ds); - let job = MergeInsertBuilder::try_new(ds, key_columns) - .map_err(|e| OmniError::Lance(e.to_string()))? - .when_matched(when_matched) - .when_not_matched(when_not_matched) + let mut builder = MergeInsertBuilder::try_new(ds, key_columns) + .map_err(|e| OmniError::Lance(e.to_string()))?; + builder.when_matched(when_matched); + builder.when_not_matched(when_not_matched); + // See `merge_insert_batch` for the FirstSeen rationale. Workaround + // for the Lance 4.0.x bug class where sequential merge_insert / + // update against rows previously rewritten by merge_insert trips + // Lance's `processed_row_ids` HashSet and errors under the default + // `SourceDedupeBehavior::Fail`. Retire when upstream Lance is fixed. + builder.source_dedupe_behavior(SourceDedupeBehavior::FirstSeen); + let job = builder .try_build() .map_err(|e| OmniError::Lance(e.to_string()))?; let schema = batch.schema(); @@ -1651,3 +1711,106 @@ fn combine_committed_with_staged(ds: &Dataset, staged: &[StagedWrite]) -> Vec Result<()> { + if key_columns.len() != 1 { + return Err(OmniError::manifest_internal(format!( + "{}: check_batch_unique_by_keys currently supports single-column keys only, got {:?}", + context, key_columns + ))); + } + let key_col_name = &key_columns[0]; + let column = batch.column_by_name(key_col_name).ok_or_else(|| { + OmniError::manifest_internal(format!( + "{}: source batch missing key column '{}'", + context, key_col_name + )) + })?; + let strs = column + .as_any() + .downcast_ref::() + .ok_or_else(|| { + OmniError::manifest_internal(format!( + "{}: key column '{}' is not a StringArray (got {:?})", + context, + key_col_name, + column.data_type() + )) + })?; + + let mut seen: std::collections::HashSet<&str> = + std::collections::HashSet::with_capacity(batch.num_rows()); + for i in 0..strs.len() { + if !strs.is_valid(i) { + continue; + } + let v = strs.value(i); + if !seen.insert(v) { + return Err(OmniError::manifest(format!( + "{}: duplicate source row for key '{}' (column '{}'); \ + callers must hand in a batch unique by `key_columns` \ + — see MR-957", + context, v, key_col_name + ))); + } + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::StringArray; + use arrow_schema::{DataType, Field, Schema}; + + fn batch_with_ids(ids: &[&str]) -> RecordBatch { + let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Utf8, false)])); + let col = Arc::new(StringArray::from(ids.to_vec())) as ArrayRef; + RecordBatch::try_new(schema, vec![col]).unwrap() + } + + #[test] + fn check_batch_unique_by_keys_passes_when_all_unique() { + let batch = batch_with_ids(&["a", "b", "c"]); + check_batch_unique_by_keys(&batch, &["id".to_string()], "test").unwrap(); + } + + #[test] + fn check_batch_unique_by_keys_errors_on_duplicate_id() { + let batch = batch_with_ids(&["a", "b", "a"]); + let err = check_batch_unique_by_keys(&batch, &["id".to_string()], "test").unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("duplicate source row for key 'a'"), + "unexpected error: {msg}" + ); + assert!( + msg.contains("MR-957"), + "error should reference MR-957: {msg}" + ); + } + + #[test] + fn check_batch_unique_by_keys_rejects_multi_column_keys() { + let batch = batch_with_ids(&["a"]); + let err = + check_batch_unique_by_keys(&batch, &["id".to_string(), "other".to_string()], "test") + .unwrap_err(); + assert!(err.to_string().contains("single-column keys only")); + } +} diff --git a/crates/omnigraph/tests/branching.rs b/crates/omnigraph/tests/branching.rs index 4d292f7..5a0c47d 100644 --- a/crates/omnigraph/tests/branching.rs +++ b/crates/omnigraph/tests/branching.rs @@ -4,7 +4,8 @@ use std::fs; use arrow_array::{Array, Int32Array, UInt64Array}; use futures::TryStreamExt; -use lance_index::{DatasetIndexExt, is_system_index}; +use lance::index::DatasetIndexExt; +use lance_index::is_system_index; use omnigraph::db::commit_graph::CommitGraph; use omnigraph::db::{MergeOutcome, Omnigraph, ReadTarget}; diff --git a/crates/omnigraph/tests/composite_flow.rs b/crates/omnigraph/tests/composite_flow.rs index 63ec8b2..6c720da 100644 --- a/crates/omnigraph/tests/composite_flow.rs +++ b/crates/omnigraph/tests/composite_flow.rs @@ -56,7 +56,7 @@ async fn composite_flow_canonical_lifecycle() { let uri = dir.path().to_str().unwrap(); // ───────────────────────────────────────────────────────────────── - // Step 1: init a fresh repo with the standard test schema. + // Step 1: init a fresh graph with the standard test schema. // ───────────────────────────────────────────────────────────────── let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); let v_init = version_branch(&db, "main").await.unwrap(); @@ -70,7 +70,9 @@ async fn composite_flow_canonical_lifecycle() { // Step 2: load JSONL seed data (Person + Company nodes, // Knows + WorksAt edges). // ───────────────────────────────────────────────────────────────── - load_jsonl(&mut db, TEST_DATA, LoadMode::Append).await.unwrap(); + load_jsonl(&mut db, TEST_DATA, LoadMode::Append) + .await + .unwrap(); let v_after_load = version_branch(&db, "main").await.unwrap(); assert!( v_after_load > v_init, @@ -119,19 +121,13 @@ async fn composite_flow_canonical_lifecycle() { "feature", MUTATION_QUERIES, "insert_person_and_friend", - &mixed_params( - &[("$name", "Frank"), ("$friend", "Eve")], - &[("$age", 33)], - ), + &mixed_params(&[("$name", "Frank"), ("$friend", "Eve")], &[("$age", 33)]), ) .await .expect("multi-statement insert+edge on feature"); // After: feature has 4 + Eve + Frank = 6 Persons. - let snap = db - .snapshot_of(ReadTarget::branch("feature")) - .await - .unwrap(); + let snap = db.snapshot_of(ReadTarget::branch("feature")).await.unwrap(); let person_ds = snap.open("node:Person").await.unwrap(); assert_eq!( person_ds.count_rows(None).await.unwrap(), @@ -321,14 +317,10 @@ async fn composite_flow_canonical_lifecycle() { ); // Re-run a query to verify post-optimize correctness. - let post_optimize_total = query_main( - &mut db, - TEST_QUERIES, - "total_people", - &ParamMap::default(), - ) - .await - .unwrap(); + let post_optimize_total = + query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default()) + .await + .unwrap(); assert!( !post_optimize_total.batches().is_empty(), "queries must still work after optimize" @@ -385,14 +377,9 @@ async fn composite_flow_canonical_lifecycle() { // post-cleanup. Post-cleanup mutation is omitted here pending // resolution of the optimize-vs-manifest-pin interaction documented // in Step 10. - let final_total = query_main( - &mut db, - TEST_QUERIES, - "total_people", - &ParamMap::default(), - ) - .await - .unwrap(); + let final_total = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default()) + .await + .unwrap(); assert!(!final_total.batches().is_empty()); } @@ -431,10 +418,12 @@ async fn composite_flow_schema_apply_then_branch_ops_no_deadlock_in_refresh() { // Step 1: init + load on handle A. let mut db_a = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); - load_jsonl(&mut db_a, TEST_DATA, LoadMode::Append).await.unwrap(); + load_jsonl(&mut db_a, TEST_DATA, LoadMode::Append) + .await + .unwrap(); assert_eq!(count_rows(&db_a, "node:Person").await, 4); - // Step 2: open handle B on the same repo. B's in-memory schema_source + // Step 2: open handle B on the same graph. B's in-memory schema_source // cache is now a snapshot of `_schema.pg` at open time. let db_b = Omnigraph::open(uri).await.unwrap(); @@ -444,7 +433,7 @@ async fn composite_flow_schema_apply_then_branch_ops_no_deadlock_in_refresh() { // to disk. const TEST_SCHEMA_V2: &str = "node Person {\n name: String @key\n age: I32?\n nickname: String?\n}\n\nnode Company {\n name: String @key\n}\n\nedge Knows: Person -> Person {\n since: Date?\n}\n\nedge WorksAt: Person -> Company\n"; let plan = db_a.apply_schema(TEST_SCHEMA_V2).await.unwrap(); - assert!(plan.applied, "apply_schema must succeed on a clean repo"); + assert!(plan.applied, "apply_schema must succeed on a clean graph"); assert!( !plan.steps.is_empty(), "apply_schema must record the AddProperty step" @@ -561,7 +550,9 @@ async fn composite_flow_multi_branch_sequential_merges() { // edges from test.jsonl). // ───────────────────────────────────────────────────────────────── let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); - load_jsonl(&mut db, TEST_DATA, LoadMode::Append).await.unwrap(); + load_jsonl(&mut db, TEST_DATA, LoadMode::Append) + .await + .unwrap(); assert_eq!(count_rows(&db, "node:Person").await, 4); assert_eq!(count_rows(&db, "edge:Knows").await, 3); @@ -687,10 +678,7 @@ async fn composite_flow_multi_branch_sequential_merges() { "feat-a", MUTATION_QUERIES, "insert_person_and_friend", - &mixed_params( - &[("$name", "Grace"), ("$friend", "Eve")], - &[("$age", 28)], - ), + &mixed_params(&[("$name", "Grace"), ("$friend", "Eve")], &[("$age", 28)]), ) .await .expect("insert Grace + Knows(Grace → Eve) on feat-a"); @@ -821,15 +809,14 @@ async fn composite_flow_multi_branch_sequential_merges() { // `total_people` returns count(Person) = 10. Catches regressions in // group-by/count execution against a multi-fragment table whose // current shape was produced by two sequential merges. - let total_post_merges = query_main( - &mut db, - TEST_QUERIES, - "total_people", - &ParamMap::default(), - ) - .await - .unwrap(); - assert_total(&total_post_merges, 10, "post both merges, main must total 10 Persons"); + let total_post_merges = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default()) + .await + .unwrap(); + assert_total( + &total_post_merges, + 10, + "post both merges, main must total 10 Persons", + ); // ───────────────────────────────────────────────────────────────── // Step 14: time-travel to pre-merge-a-version. Reads must return @@ -1021,14 +1008,9 @@ async fn composite_flow_multi_branch_sequential_merges() { // correctly to disk but the reopened catalog can't bind them. // ───────────────────────────────────────────────────────────────── let mut db = db; - let post_reopen_total = query_main( - &mut db, - TEST_QUERIES, - "total_people", - &ParamMap::default(), - ) - .await - .unwrap(); + let post_reopen_total = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default()) + .await + .unwrap(); assert_total( &post_reopen_total, 10, diff --git a/crates/omnigraph/tests/consistency.rs b/crates/omnigraph/tests/consistency.rs index 63dc3f7..26517db 100644 --- a/crates/omnigraph/tests/consistency.rs +++ b/crates/omnigraph/tests/consistency.rs @@ -119,6 +119,187 @@ async fn load_merge_upserts_existing_and_inserts_new() { } } +/// Regression: two sequential `LoadMode::Merge` invocations against the +/// same set of keys must both succeed. Pre-fix, the second one failed +/// with `Ambiguous merge inserts are prohibited: multiple source rows +/// match the same target row on (id = "TEST-1")` even though every +/// source batch had one row per key. +/// +/// Triggered by Lance's `processed_row_ids: Mutex>` +/// (lance-4.0.0 `src/dataset/write/merge_insert.rs:2099`) double- +/// processing the same source/target match against datasets previously +/// rewritten by merge_insert. Worked around by opting +/// `MergeInsertBuilder` into `SourceDedupeBehavior::FirstSeen` in +/// `crates/omnigraph/src/table_store.rs` — see that file for the full +/// rationale and the safety pin (`loader_rejects_intra_batch_duplicate_keys`). +/// Tracked at MR-957; upstream: lance-format/lance#6877. +#[tokio::test] +async fn load_merge_repeated_against_overlapping_keys_succeeds() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let schema = r#" +node Thing { + key: String @key + required_val: String + optional_val: String? +} +"#; + let mut db = Omnigraph::init(uri, schema).await.unwrap(); + + // Seed with 50 fully-populated rows (id + required + optional). + let mut seed = String::new(); + for i in 1..=50 { + seed.push_str(&format!( + r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i}","optional_val":"optional {i}"}}}} +"#, + )); + } + load_jsonl(&mut db, &seed, LoadMode::Overwrite) + .await + .unwrap(); + + // Partial-schema delta — mirrors the bug report exactly: omits + // `optional_val`. 25 existing keys + 5 new keys, one row per key. + let mut delta = String::new(); + for i in (1..=25).chain(51..=55) { + delta.push_str(&format!( + r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i} UPDATED"}}}} +"#, + )); + } + + load_jsonl(&mut db, &delta, LoadMode::Merge) + .await + .expect("first merge must succeed"); + assert_eq!(count_rows(&db, "node:Thing").await, 55); + + load_jsonl(&mut db, &delta, LoadMode::Merge) + .await + .expect("second merge against same keys must succeed"); + assert_eq!(count_rows(&db, "node:Thing").await, 55); +} + +/// Safety pin for the `SourceDedupeBehavior::FirstSeen` workaround in +/// `crates/omnigraph/src/table_store.rs`. FirstSeen tells Lance to +/// silently skip a duplicate source row instead of erroring. Our use of +/// it depends on user-provided duplicates being rejected *before* the +/// batch reaches Lance — otherwise FirstSeen could silently drop user +/// data. +/// +/// Defense in depth: +/// 1. The loader's `enforce_unique_constraints_intra_batch` +/// (`loader/mod.rs:1453`), invoked unconditionally on any node type +/// with a `@key`, errors on intra-batch duplicate `@key` values at +/// intake — pinned by this test across every `LoadMode`. +/// 2. The `check_batch_unique_by_keys` precondition at the top of +/// `merge_insert_batch` and `stage_merge_insert` is the final +/// fail-fast guard: even if a future caller bypasses the loader path +/// (e.g. branch-merge's `publish_rewritten_merge_table` builds its +/// own source batch directly), a real duplicate id reaches Lance +/// only after surfacing as an `OmniError::Manifest`, never silently +/// via FirstSeen. Pinned by the unit tests in `table_store::tests`. +#[tokio::test] +async fn loader_rejects_intra_batch_duplicate_keys() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let schema = r#" +node Thing { + key: String @key + value: String +} +"#; + let mut db = Omnigraph::init(uri, schema).await.unwrap(); + + let dupes = r#"{"type":"Thing","data":{"key":"DUP","value":"first"}} +{"type":"Thing","data":{"key":"DUP","value":"second"}} +"#; + + for mode in [LoadMode::Overwrite, LoadMode::Append, LoadMode::Merge] { + let err = load_jsonl(&mut db, dupes, mode).await.unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("@unique violation") && msg.contains("DUP"), + "load mode {mode:?} must reject intra-batch duplicate @key (got: {msg})" + ); + assert_eq!( + count_rows(&db, "node:Thing").await, + 0, + "load mode {mode:?} must not persist any rows when the batch is rejected" + ); + } +} + +/// Canary for the upstream Lance gap that the `FirstSeen` workaround +/// in `table_store.rs` masks. The bug class is "Window 2": load → +/// indices built explicitly → merge → merge. Even with the engine +/// fully aligned to the "indexes are derived state" invariant +/// (MR-848), as long as an `id` index has been built between the +/// first and second merge_insert, the Lance internal that triggers +/// the bug remains reachable. +/// +/// This test runs the Window-2 sequence under the FirstSeen workaround. +/// It is expected to pass today. If a future Lance upgrade or local +/// change makes it START failing, the workaround has lost effectiveness +/// (upstream Lance changed something, or the FirstSeen setter was +/// dropped from `table_store.rs`). If a future Lance upgrade fixes the +/// bug class, this test continues to pass and the FirstSeen setter can +/// be retired. +/// +/// Tracked at MR-957; upstream: lance-format/lance#6877. +#[tokio::test] +async fn load_merge_window_2_documents_upstream_lance_gap() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let schema = r#" +node Thing { + key: String @key + required_val: String + optional_val: String? +} +"#; + let mut db = Omnigraph::init(uri, schema).await.unwrap(); + + let mut seed = String::new(); + for i in 1..=50 { + seed.push_str(&format!( + r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i}","optional_val":"optional {i}"}}}} +"#, + )); + } + load_jsonl(&mut db, &seed, LoadMode::Overwrite) + .await + .unwrap(); + + // Explicit ensure_indices between seed and the merges — the Window + // 2 trigger. The eager-build behavior (MR-583) means the BTREE on + // `id` is already present here, but calling explicitly pins the + // invariant for the post-MR-848 future where the eager build is + // gone. + db.ensure_indices().await.unwrap(); + + let mut delta = String::new(); + for i in (1..=25).chain(51..=55) { + delta.push_str(&format!( + r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i} UPDATED"}}}} +"#, + )); + } + + // Both merges must succeed under the FirstSeen workaround. + // `processed_row_ids` re-processes the same target row_id under + // the default `SourceDedupeBehavior::Fail`; FirstSeen tolerates it. + load_jsonl(&mut db, &delta, LoadMode::Merge) + .await + .expect("first merge after ensure_indices must succeed"); + db.ensure_indices().await.unwrap(); + load_jsonl(&mut db, &delta, LoadMode::Merge).await.expect( + "second merge after ensure_indices must succeed \ + (Window 2 canary: drop the FirstSeen setter in table_store.rs \ + only when this stays green WITHOUT it)", + ); + assert_eq!(count_rows(&db, "node:Thing").await, 55); +} + #[tokio::test] async fn cross_type_traversal_deduplicates_duplicate_edges() { let dir = tempfile::tempdir().unwrap(); @@ -163,7 +344,7 @@ async fn explicit_target_query_sees_other_writer_commits_without_refresh() { let uri = dir.path().to_str().unwrap(); - // Two independent handles to the same repo + // Two independent handles to the same graph let mut db1 = Omnigraph::open(uri).await.unwrap(); let mut db2 = Omnigraph::open(uri).await.unwrap(); diff --git a/crates/omnigraph/tests/end_to_end.rs b/crates/omnigraph/tests/end_to_end.rs index 6a41830..a0fdb0e 100644 --- a/crates/omnigraph/tests/end_to_end.rs +++ b/crates/omnigraph/tests/end_to_end.rs @@ -1866,3 +1866,70 @@ async fn ensure_indices_does_not_error_on_repeated_call() { let ds = snap.open("node:Person").await.unwrap(); assert_eq!(ds.count_rows(None).await.unwrap(), 4); } + +// ─── DataFusion-Expr filter pushdown (Tier-1 follow-up to the Lance v6 bump) ── + +/// Regression for `CompOp::Contains` pushdown via `array_has` in +/// `ir_filter_to_expr`. Before the Expr-pushdown refactor, the +/// `ir_filter_to_sql` family returned `None` for list-contains (the +/// comment said *"Can't pushdown list contains"*) and the predicate was +/// applied post-scan in memory. With `Scanner::filter_expr(Expr)` and +/// DF's `array_has` builtin, the contains predicate now pushes down to +/// Lance — the test confirms results are correct AND the pushdown path +/// is exercised (a regression on the pushdown would land all rows in +/// the scan, then be filtered post-hoc; that still produces the right +/// count so this test pins correctness, while `lance_surface_guards.rs` +/// is the structural pin for the surface itself). +#[tokio::test] +async fn ir_filter_with_list_contains_pushes_down() { + let schema = r#" +node Doc { + slug: String @key + tags: [String] +} +"#; + let data = r#"{"type":"Doc","data":{"slug":"alpha","tags":["red","blue"]}} +{"type":"Doc","data":{"slug":"bravo","tags":["green"]}} +{"type":"Doc","data":{"slug":"charlie","tags":["red","green"]}} +{"type":"Doc","data":{"slug":"delta","tags":[]}}"#; + + let dir = tempfile::tempdir().unwrap(); + let mut db = Omnigraph::init(dir.path().to_str().unwrap(), schema) + .await + .unwrap(); + load_jsonl(&mut db, data, LoadMode::Overwrite) + .await + .unwrap(); + + let queries = r#" +query docs_with_tag($tag: String) { + match { + $d: Doc + $d.tags contains $tag + } + return { $d.slug } +} +"#; + let result = query_main( + &mut db, + queries, + "docs_with_tag", + ¶ms(&[("$tag", "red")]), + ) + .await + .unwrap(); + + let batch = result.concat_batches().unwrap(); + let slugs = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let mut got: Vec<&str> = (0..slugs.len()).map(|i| slugs.value(i)).collect(); + got.sort(); + assert_eq!( + got, + vec!["alpha", "charlie"], + "contains-pushdown should return exactly the rows whose tags list contains 'red'" + ); +} diff --git a/crates/omnigraph/tests/failpoints.rs b/crates/omnigraph/tests/failpoints.rs index e8de05e..5ea71c5 100644 --- a/crates/omnigraph/tests/failpoints.rs +++ b/crates/omnigraph/tests/failpoints.rs @@ -66,7 +66,7 @@ async fn graph_publish_failpoint_triggers_before_commit_append() { // Atomic schema apply: schema apply writes staging files first, then commits // the manifest, then renames staging → final. Tests below inject crashes at -// the two boundaries and assert that reopening the repo yields a consistent +// the two boundaries and assert that reopening the graph yields a consistent // state. #[tokio::test] @@ -303,14 +303,10 @@ async fn inline_delete_conflict_writes_sidecar_before_rejecting() { let person_uri = node_table_uri(&uri, "Person"); { - let _pause_delete = ScopedFailPoint::new("mutation.delete_node_pre_primary_delete", "pause"); + let _pause_delete = + ScopedFailPoint::new("mutation.delete_node_pre_primary_delete", "pause"); let delete_params = helpers::params(&[("$name", "Alice")]); - let delete = db.mutate( - "main", - MUTATION_QUERIES, - "remove_person", - &delete_params, - ); + let delete = db.mutate("main", MUTATION_QUERIES, "remove_person", &delete_params); tokio::pin!(delete); let mut concurrent_update_succeeded = false; @@ -325,15 +321,18 @@ async fn inline_delete_conflict_writes_sidecar_before_rejecting() { "set_age", &mixed_params(&[("$name", "Bob")], &[("$age", 26)]), ) - .await - .is_ok() + .await + .is_ok() { concurrent_update_succeeded = true; break; } tokio::time::sleep(std::time::Duration::from_millis(20)).await; } - assert!(concurrent_update_succeeded, "concurrent update must land while delete is paused"); + assert!( + concurrent_update_succeeded, + "concurrent update must land while delete is paused" + ); fail::remove("mutation.delete_node_pre_primary_delete"); let err = delete.await.unwrap_err(); @@ -464,7 +463,7 @@ async fn recovery_rolls_forward_load_on_feature_branch() { #[tokio::test] async fn recovery_rolls_forward_ensure_indices_on_feature_branch() { - use lance_index::DatasetIndexExt; + use lance::index::DatasetIndexExt; use omnigraph::loader::{LoadMode, load_jsonl}; use omnigraph::table_store::TableStore; @@ -925,13 +924,13 @@ async fn ensure_indices_stage_btree_failure_leaves_existing_tables_writable() { .expect("Person mutation must succeed after the failed schema apply — existing tables are not drifted"); } -fn assert_no_staging_files(repo: &std::path::Path) { +fn assert_no_staging_files(graph: &std::path::Path) { for name in [ "_schema.pg.staging", "_schema.ir.json.staging", "__schema_state.json.staging", ] { - let path = repo.join(name); + let path = graph.join(name); assert!( !path.exists(), "staging file {} still exists after recovery", @@ -1164,7 +1163,7 @@ edge WorksAt: Person -> Company // NEW schema (city column on Person, Tag node type) — not the old. // Without the schema-staging coordination, the schema-state // recovery would have deleted the staging files (because manifest - // hadn't advanced when it ran), leaving a corrupt repo with new- + // hadn't advanced when it ran), leaving a corrupt graph with new- // schema data on disk but old-schema catalog. let live_schema = std::fs::read_to_string(dir.path().join("_schema.pg")).unwrap(); assert!( @@ -1667,3 +1666,143 @@ async fn ensure_indices_phase_b_failure_does_not_leak_sidecar_when_no_work_neede "_graph_commit_recoveries.lance must NOT exist when no sidecar was processed" ); } + +// ─── MR-668 PR 2a: Omnigraph::init cleanup on partial failure ────────────── +// +// `init_with_storage` writes three schema artifacts before invoking +// `GraphCoordinator::init`. Without cleanup, a failure between any of those +// steps left orphan files behind, making the URI unusable for a retry of +// `init` (it would refuse because `_schema.pg` already exists). The tests +// below pin: on failpoint trigger at each of the three phase boundaries, +// the three schema files are removed before the error is returned. +// +// Coverage note: the third boundary (`init.after_coordinator_init`) only +// asserts cleanup of the schema files. Lance per-type directories and +// `__manifest/` are NOT cleaned up — that requires a recursive +// `StorageAdapter::delete_prefix` primitive deferred along with +// `DELETE /graphs/{id}` (MR-668 PR 2b). The orphan Lance directories +// after a coordinator-init-phase failure are documented as a known +// limitation. + +#[tokio::test] +async fn init_failpoint_after_schema_pg_written_cleans_up_schema_file() { + let _scenario = FailScenario::setup(); + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let _failpoint = ScopedFailPoint::new("init.after_schema_pg_written", "return"); + + let err = match Omnigraph::init(uri, helpers::TEST_SCHEMA).await { + Ok(_) => panic!("expected Omnigraph::init to fail at the configured failpoint"), + Err(e) => e, + }; + assert!( + err.to_string() + .contains("injected failpoint triggered: init.after_schema_pg_written"), + "got: {err}" + ); + + // Only `_schema.pg` was written at this phase boundary, but the + // cleanup attempts all three — `delete` treats not-found as Ok, + // so the other two deletes are no-ops. + assert!( + !dir.path().join("_schema.pg").exists(), + "_schema.pg must be cleaned up after init failure" + ); +} + +#[tokio::test] +async fn init_failpoint_after_schema_contract_written_cleans_up_all_schema_files() { + let _scenario = FailScenario::setup(); + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let _failpoint = ScopedFailPoint::new("init.after_schema_contract_written", "return"); + + let err = match Omnigraph::init(uri, helpers::TEST_SCHEMA).await { + Ok(_) => panic!("expected Omnigraph::init to fail at the configured failpoint"), + Err(e) => e, + }; + assert!( + err.to_string() + .contains("injected failpoint triggered: init.after_schema_contract_written"), + "got: {err}" + ); + + assert!( + !dir.path().join("_schema.pg").exists(), + "_schema.pg must be cleaned up" + ); + assert!( + !dir.path().join("_schema.ir.json").exists(), + "_schema.ir.json must be cleaned up" + ); + assert!( + !dir.path().join("__schema_state.json").exists(), + "__schema_state.json must be cleaned up" + ); +} + +#[tokio::test] +async fn init_failpoint_after_coordinator_init_cleans_up_schema_files() { + let _scenario = FailScenario::setup(); + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let _failpoint = ScopedFailPoint::new("init.after_coordinator_init", "return"); + + let err = match Omnigraph::init(uri, helpers::TEST_SCHEMA).await { + Ok(_) => panic!("expected Omnigraph::init to fail at the configured failpoint"), + Err(e) => e, + }; + assert!( + err.to_string() + .contains("injected failpoint triggered: init.after_coordinator_init"), + "got: {err}" + ); + + // Schema files are cleaned up by `best_effort_cleanup_init_artifacts`. + assert!( + !dir.path().join("_schema.pg").exists(), + "_schema.pg must be cleaned up after late-phase init failure" + ); + assert!( + !dir.path().join("_schema.ir.json").exists(), + "_schema.ir.json must be cleaned up after late-phase init failure" + ); + assert!( + !dir.path().join("__schema_state.json").exists(), + "__schema_state.json must be cleaned up after late-phase init failure" + ); + + // Documented limitation: Lance per-type datasets and `__manifest/` + // created by `GraphCoordinator::init` are NOT cleaned up — recursive + // deletion requires the deferred `delete_prefix` primitive. This + // assertion does NOT check for their absence; it merely documents + // the boundary by noting we don't validate orphan directories here. + // When PR 2b lands, this test can be tightened to assert the graph + // root is fully empty. +} + +#[tokio::test] +async fn init_failpoint_returns_original_error_not_cleanup_error() { + // The cleanup is best-effort. If `storage.delete` fails (e.g. transient + // network blip on S3), the original init failpoint error must still + // surface — not be masked by a cleanup failure. This test triggers the + // failpoint and asserts the returned error references the failpoint, + // not the cleanup. (The cleanup currently logs via `tracing::warn`; + // we can't easily fault-inject delete failures without another seam, + // so this is a smoke test for the precedence contract.) + let _scenario = FailScenario::setup(); + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let _failpoint = ScopedFailPoint::new("init.after_schema_pg_written", "return"); + + let err = match Omnigraph::init(uri, helpers::TEST_SCHEMA).await { + Ok(_) => panic!("expected Omnigraph::init to fail at the configured failpoint"), + Err(e) => e, + }; + // Failpoint message wins; no "cleanup" substring expected. + let msg = err.to_string(); + assert!( + msg.contains("init.after_schema_pg_written"), + "init error must surface the failpoint cause, got: {msg}" + ); +} diff --git a/crates/omnigraph/tests/forbidden_apis.rs b/crates/omnigraph/tests/forbidden_apis.rs index cc9f163..1936815 100644 --- a/crates/omnigraph/tests/forbidden_apis.rs +++ b/crates/omnigraph/tests/forbidden_apis.rs @@ -95,11 +95,11 @@ const FORBIDDEN_PATTERNS: &[&str] = &[ /// provide the staged primitives or to maintain the system tables /// (commit graph, manifest). const ALLOW_LIST_FILES: &[&str] = &[ - "table_store.rs", // The storage layer itself. - "storage_layer.rs", // The trait module. - "commit_graph.rs", // Maintains `_graph_commits.lance` system table. - "graph_coordinator.rs", // Drives the manifest publisher / branch coordinator. - "recovery_audit.rs", // Maintains `_graph_commit_recoveries.lance` (recovery audit trail). + "table_store.rs", // The storage layer itself. + "storage_layer.rs", // The trait module. + "commit_graph.rs", // Maintains `_graph_commits.lance` system table. + "graph_coordinator.rs", // Drives the manifest publisher / branch coordinator. + "recovery_audit.rs", // Maintains `_graph_commit_recoveries.lance` (recovery audit trail). ]; /// Directories exempt from the guard. Files under these paths may use @@ -168,10 +168,7 @@ fn engine_code_does_not_call_forbidden_lance_apis() { // comments are documentation, not code use. The trait // surface (sealed + trait-only) is the actual enforcement; // this test only catches code use. - if trimmed.starts_with("//") - || trimmed.starts_with("/*") - || trimmed.starts_with("*") - { + if trimmed.starts_with("//") || trimmed.starts_with("/*") || trimmed.starts_with("*") { continue; } // Allow lines marked with the sentinel on the SAME line or diff --git a/crates/omnigraph/tests/helpers/mod.rs b/crates/omnigraph/tests/helpers/mod.rs index e7e1efb..c97ff72 100644 --- a/crates/omnigraph/tests/helpers/mod.rs +++ b/crates/omnigraph/tests/helpers/mod.rs @@ -44,7 +44,7 @@ query insert_person_and_friend($name: String, $age: I32, $friend: String) { } "#; -/// Init a repo and load the standard test data. +/// Init a graph and load the standard test data. pub async fn init_and_load(dir: &tempfile::TempDir) -> Omnigraph { let uri = dir.path().to_str().unwrap(); let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); @@ -249,7 +249,7 @@ pub fn vector_and_string_params( map } -pub fn s3_test_repo_uri(suite: &str) -> Option { +pub fn s3_test_graph_uri(suite: &str) -> Option { let bucket = std::env::var("OMNIGRAPH_S3_TEST_BUCKET").ok()?; let prefix = std::env::var("OMNIGRAPH_S3_TEST_PREFIX") .ok() diff --git a/crates/omnigraph/tests/helpers/recovery.rs b/crates/omnigraph/tests/helpers/recovery.rs index 3a8505f..c76009e 100644 --- a/crates/omnigraph/tests/helpers/recovery.rs +++ b/crates/omnigraph/tests/helpers/recovery.rs @@ -110,8 +110,8 @@ impl FollowUpMutation { } } -pub fn single_sidecar_operation_id(repo_root: &Path) -> String { - let ids = sidecar_operation_ids(repo_root); +pub fn single_sidecar_operation_id(graph_root: &Path) -> String { + let ids = sidecar_operation_ids(graph_root); assert_eq!( ids.len(), 1, @@ -121,8 +121,8 @@ pub fn single_sidecar_operation_id(repo_root: &Path) -> String { ids.into_iter().next().unwrap() } -pub fn sidecar_operation_ids(repo_root: &Path) -> Vec { - let dir = repo_root.join("__recovery"); +pub fn sidecar_operation_ids(graph_root: &Path) -> Vec { + let dir = graph_root.join("__recovery"); if !dir.exists() { return Vec::new(); } @@ -143,10 +143,10 @@ pub fn sidecar_operation_ids(repo_root: &Path) -> Vec { ids } -pub async fn branch_head_commit_id(repo_root: &Path, branch: &str) -> Result { +pub async fn branch_head_commit_id(graph_root: &Path, branch: &str) -> Result { let graph = match branch { - "main" => CommitGraph::open(&repo_uri(repo_root)).await?, - branch => CommitGraph::open_at_branch(&repo_uri(repo_root), branch).await?, + "main" => CommitGraph::open(&graph_uri(graph_root)).await?, + branch => CommitGraph::open_at_branch(&graph_uri(graph_root), branch).await?, }; graph.head_commit_id().await?.ok_or_else(|| { OmniError::manifest_internal(format!("commit graph for branch {branch} has no head")) @@ -154,52 +154,52 @@ pub async fn branch_head_commit_id(repo_root: &Path, branch: &str) -> Result Result<()> { match expectation { RecoveryExpectation::RolledForward { tables } => { - assert_sidecar_absent(repo_root, operation_id); - let audit = read_audit_row(repo_root, operation_id).await?; + assert_sidecar_absent(graph_root, operation_id); + let audit = read_audit_row(graph_root, operation_id).await?; assert_eq!( audit.recovery_kind, "RolledForward", "audit row for {operation_id} recorded the wrong recovery_kind", ); - assert_manifest_pins_match_lance_heads(repo_root, &tables).await?; - assert_audit_to_versions_match_lance_heads(repo_root, &audit, &tables).await?; - assert_recovery_commit_shape(repo_root, &audit, &tables).await?; - assert_non_main_did_not_move_main(repo_root, &tables).await?; - assert_idempotent_reopen(repo_root, operation_id).await?; - run_follow_up_mutations(repo_root, tables).await?; + assert_manifest_pins_match_lance_heads(graph_root, &tables).await?; + assert_audit_to_versions_match_lance_heads(graph_root, &audit, &tables).await?; + assert_recovery_commit_shape(graph_root, &audit, &tables).await?; + assert_non_main_did_not_move_main(graph_root, &tables).await?; + assert_idempotent_reopen(graph_root, operation_id).await?; + run_follow_up_mutations(graph_root, tables).await?; } RecoveryExpectation::RolledBack { tables } => { - assert_sidecar_absent(repo_root, operation_id); - let audit = read_audit_row(repo_root, operation_id).await?; + assert_sidecar_absent(graph_root, operation_id); + let audit = read_audit_row(graph_root, operation_id).await?; assert_eq!( audit.recovery_kind, "RolledBack", "audit row for {operation_id} recorded the wrong recovery_kind", ); assert_rollback_outcomes_record_drift(&audit); - assert_recovery_commit_shape(repo_root, &audit, &tables).await?; - assert_non_main_did_not_move_main(repo_root, &tables).await?; - assert_idempotent_reopen(repo_root, operation_id).await?; - run_follow_up_mutations(repo_root, tables).await?; + assert_recovery_commit_shape(graph_root, &audit, &tables).await?; + assert_non_main_did_not_move_main(graph_root, &tables).await?; + assert_idempotent_reopen(graph_root, operation_id).await?; + run_follow_up_mutations(graph_root, tables).await?; } RecoveryExpectation::Deferred => { assert!( - sidecar_path(repo_root, operation_id).exists(), + sidecar_path(graph_root, operation_id).exists(), "deferred recovery must leave sidecar {operation_id} on disk", ); assert!( - read_audit_row(repo_root, operation_id).await.is_err(), + read_audit_row(graph_root, operation_id).await.is_err(), "deferred recovery must not record an audit row for {operation_id}", ); } RecoveryExpectation::NoOp => { - assert_sidecar_absent(repo_root, operation_id); + assert_sidecar_absent(graph_root, operation_id); assert!( - read_audit_row(repo_root, operation_id).await.is_err(), + read_audit_row(graph_root, operation_id).await.is_err(), "no-op recovery must not record an audit row for {operation_id}", ); } @@ -216,24 +216,24 @@ fn branch_context(tables: &[TableExpectation]) -> Option { .map(str::to_string) } -fn sidecar_path(repo_root: &Path, operation_id: &str) -> PathBuf { - repo_root +fn sidecar_path(graph_root: &Path, operation_id: &str) -> PathBuf { + graph_root .join("__recovery") .join(format!("{operation_id}.json")) } -fn assert_sidecar_absent(repo_root: &Path, operation_id: &str) { +fn assert_sidecar_absent(graph_root: &Path, operation_id: &str) { assert!( - !sidecar_path(repo_root, operation_id).exists(), + !sidecar_path(graph_root, operation_id).exists(), "recovery sidecar {operation_id} must be deleted after successful recovery", ); } async fn assert_manifest_pins_match_lance_heads( - repo_root: &Path, + graph_root: &Path, tables: &[TableExpectation], ) -> Result<()> { - let uri = repo_uri(repo_root); + let uri = graph_uri(graph_root); let db = Omnigraph::open(&uri).await?; for table in tables { let (entry, lance_head) = entry_and_lance_head(&db, &uri, table).await?; @@ -254,11 +254,11 @@ async fn assert_manifest_pins_match_lance_heads( } async fn assert_audit_to_versions_match_lance_heads( - repo_root: &Path, + graph_root: &Path, audit: &RecoveryAuditRow, tables: &[TableExpectation], ) -> Result<()> { - let uri = repo_uri(repo_root); + let uri = graph_uri(graph_root); let db = Omnigraph::open(&uri).await?; for table in tables { let (_, lance_head) = entry_and_lance_head(&db, &uri, table).await?; @@ -301,10 +301,10 @@ fn assert_rollback_outcomes_record_drift(audit: &RecoveryAuditRow) { } async fn assert_non_main_did_not_move_main( - repo_root: &Path, + graph_root: &Path, tables: &[TableExpectation], ) -> Result<()> { - let uri = repo_uri(repo_root); + let uri = graph_uri(graph_root); let db = Omnigraph::open(&uri).await?; let main = db.snapshot_of(ReadTarget::branch("main")).await?; for table in tables { @@ -327,14 +327,14 @@ async fn assert_non_main_did_not_move_main( } async fn assert_recovery_commit_shape( - repo_root: &Path, + graph_root: &Path, audit: &RecoveryAuditRow, tables: &[TableExpectation], ) -> Result<()> { let branch = branch_context(tables); let expected_parent = expected_recovery_parent(tables)?; let branch = branch.as_deref(); - let commit = read_recovery_commit(repo_root, audit, branch).await?; + let commit = read_recovery_commit(graph_root, audit, branch).await?; assert_eq!( commit.actor_id.as_deref(), @@ -362,7 +362,7 @@ async fn assert_recovery_commit_shape( ); if let Some(branch) = branch { - let graph = CommitGraph::open_at_branch(&repo_uri(repo_root), branch).await?; + let graph = CommitGraph::open_at_branch(&graph_uri(graph_root), branch).await?; let commits = graph.load_commits().await?; let parent = commit.parent_commit_id.as_deref().ok_or_else(|| { OmniError::manifest_internal(format!( @@ -403,12 +403,12 @@ fn expected_recovery_parent(tables: &[TableExpectation]) -> Result Result<()> { - let before = matching_audit_rows(repo_root, operation_id).await?; - let uri = repo_uri(repo_root); +async fn assert_idempotent_reopen(graph_root: &Path, operation_id: &str) -> Result<()> { + let before = matching_audit_rows(graph_root, operation_id).await?; + let uri = graph_uri(graph_root); let _db = Omnigraph::open(&uri).await?; - assert_sidecar_absent(repo_root, operation_id); - let after = matching_audit_rows(repo_root, operation_id).await?; + assert_sidecar_absent(graph_root, operation_id); + let after = matching_audit_rows(graph_root, operation_id).await?; assert_eq!( after.len(), before.len(), @@ -417,14 +417,14 @@ async fn assert_idempotent_reopen(repo_root: &Path, operation_id: &str) -> Resul Ok(()) } -async fn run_follow_up_mutations(repo_root: &Path, tables: Vec) -> Result<()> { +async fn run_follow_up_mutations(graph_root: &Path, tables: Vec) -> Result<()> { let mut db: Option = None; for table in tables { let Some(mutation) = table.follow_up_mutation else { continue; }; if db.is_none() { - db = Some(Omnigraph::open(&repo_uri(repo_root)).await?); + db = Some(Omnigraph::open(&graph_uri(graph_root)).await?); } let db = db.as_mut().unwrap(); db.mutate( @@ -480,11 +480,11 @@ async fn lance_head_for_entry(root_uri: &str, entry: &SubTableEntry) -> Result, ) -> Result { - let uri = repo_uri(repo_root); + let uri = graph_uri(graph_root); let graph = match branch { Some(branch) => CommitGraph::open_at_branch(&uri, branch).await?, None => CommitGraph::open(&uri).await?, @@ -502,8 +502,8 @@ async fn read_recovery_commit( }) } -async fn read_audit_row(repo_root: &Path, operation_id: &str) -> Result { - let mut rows = matching_audit_rows(repo_root, operation_id).await?; +async fn read_audit_row(graph_root: &Path, operation_id: &str) -> Result { + let mut rows = matching_audit_rows(graph_root, operation_id).await?; if rows.len() != 1 { return Err(OmniError::manifest_internal(format!( "expected exactly one recovery audit row for {operation_id}, got {}", @@ -514,10 +514,10 @@ async fn read_audit_row(repo_root: &Path, operation_id: &str) -> Result Result> { - let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance"); + let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance"); if !recoveries_dir.exists() { return Ok(Vec::new()); } @@ -575,6 +575,6 @@ fn string_column<'a>(batch: &'a RecordBatch, name: &str) -> Result<&'a StringArr }) } -fn repo_uri(repo_root: &Path) -> String { - repo_root.to_str().unwrap().to_string() +fn graph_uri(graph_root: &Path) -> String { + graph_root.to_str().unwrap().to_string() } diff --git a/crates/omnigraph/tests/lance_surface_guards.rs b/crates/omnigraph/tests/lance_surface_guards.rs new file mode 100644 index 0000000..b65a808 --- /dev/null +++ b/crates/omnigraph/tests/lance_surface_guards.rs @@ -0,0 +1,244 @@ +//! Lance API surface guards. +//! +//! Each guard pins a Lance API surface that OmniGraph relies on. If a future +//! Lance bump silently renames a variant, restructures a public struct, or +//! flips a method to async, the corresponding guard either fails to compile +//! (compile-time guards) or fails at runtime (runtime guards). The purpose +//! is to turn silent-break risks into red CI bars on the *next* Lance bump, +//! rather than into wrong-state recovery in production. +//! +//! Pair this file with `docs/dev/lance.md`'s alignment audit stanza: any +//! Lance bump runs `cargo test -p omnigraph-engine --test lance_surface_guards` +//! first as the smoke check. +//! +//! ## Compile-only guards +//! +//! Functions prefixed with `_compile_` are gated with a broad `#[allow(...)]` +//! and never called. They exist to make `cargo build -p omnigraph-engine --tests` +//! enforce the API shape. Using `unimplemented!()` as a placeholder lets type +//! inference proceed without running anything. +//! +//! ## Runtime guards +//! +//! Functions decorated `#[tokio::test]` actually run; they construct real +//! values and assert field shapes / types. + +use std::sync::Arc; + +use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray}; +use arrow_schema::{DataType, Field, Schema}; +use lance::Dataset; +use lance::dataset::builder::DatasetBuilder; +use lance::dataset::optimize::{CompactionOptions, compact_files}; +use lance::dataset::write::delete::DeleteResult; +use lance::dataset::{MergeInsertBuilder, WhenMatched, WhenNotMatched, WriteMode, WriteParams}; +use lance_file::version::LanceFileVersion; +use lance_namespace::LanceNamespace; +use lance_table::io::commit::ManifestNamingScheme; + +/// Helper: build a small fresh dataset in a tempdir. Pinned at V2_2 to match +/// production write paths (blob v2 requires V2_2; see `docs/dev/lance.md`). +async fn fresh_dataset(uri: &str) -> Dataset { + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Utf8, false), + Field::new("value", DataType::Int32, false), + ])); + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(StringArray::from(vec!["alice", "bob"])), + Arc::new(Int32Array::from(vec![1, 2])), + ], + ) + .unwrap(); + let reader = RecordBatchIterator::new(vec![Ok(batch)], schema); + let params = WriteParams { + mode: WriteMode::Create, + enable_stable_row_ids: true, + data_storage_version: Some(LanceFileVersion::V2_2), + ..Default::default() + }; + Dataset::write(reader, uri, Some(params)).await.unwrap() +} + +// --- Guard 1: LanceError::TooMuchWriteContention variant exists ------------ +// +// `db/manifest/publisher.rs::map_lance_publish_error` pattern-matches on this +// variant to surface typed `OmniError::ManifestRowLevelCasContention`. If +// Lance renames the variant or removes the builder, this guard fails. + +#[tokio::test] +async fn lance_error_too_much_write_contention_variant_exists() { + let err = lance::Error::too_much_write_contention("guard"); + assert!( + matches!(err, lance::Error::TooMuchWriteContention { .. }), + "Lance::Error::TooMuchWriteContention variant missing or renamed; \ + update db/manifest/publisher.rs::map_lance_publish_error and \ + this guard, then re-pin docs/dev/lance.md." + ); +} + +// --- Guard 2: ManifestLocation field shape --------------------------------- +// +// `db/manifest/metadata.rs:84-88` reads `.path`, `.size`, `.e_tag`, +// `.naming_scheme` off `dataset.manifest_location()`. If any field renames +// or changes type, this guard fails to compile. + +#[tokio::test] +async fn manifest_location_field_shape() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().join("guard.lance"); + let ds = fresh_dataset(uri.to_str().unwrap()).await; + + let loc = ds.manifest_location(); + // Explicit type bindings — these are the load-bearing assertions. If a + // type drifts (e.g. .size: Option → .size: u64), this fails to + // compile. + let _path: &object_store::path::Path = &loc.path; + let _size: Option = loc.size; + let _e_tag: Option = loc.e_tag.clone(); + let _scheme: ManifestNamingScheme = loc.naming_scheme; + // Runtime sanity — naming_scheme should produce a Debug string we use + // verbatim in `TableVersionMetadata::naming_scheme`. + assert!(!format!("{:?}", loc.naming_scheme).is_empty()); +} + +// --- Guard 3: checkout_version + restore async chain ----------------------- +// +// `db/manifest/recovery.rs:505-522` chains `Dataset::open(...).await? +// .checkout_version(N).await?.restore().await?` as the recovery rollback +// hammer. Compile-only — never runs. + +#[allow( + dead_code, + unreachable_code, + unused_variables, + unused_mut, + clippy::diverging_sub_expression +)] +async fn _compile_checkout_version_then_restore_signature() -> lance::Result<()> { + let ds: Dataset = unimplemented!(); + let mut ds: Dataset = ds.checkout_version(1u64).await?; + // `restore()` takes `&mut self` and returns `Result<()>`; the dataset + // mutates in place. If Lance flips this to return a fresh `Dataset` + // (consuming `self`), this guard fails to compile. + let _: () = ds.restore().await?; + Ok(()) +} + +// --- Guard 4: DatasetBuilder::from_namespace fluent chain ------------------ +// +// `db/manifest/namespace.rs:162-174` chains +// `DatasetBuilder::from_namespace(ns, vec![id]).await?.with_branch(...).with_version(...).load().await?`. +// Compile-only. + +#[allow( + dead_code, + unreachable_code, + unused_variables, + unused_mut, + clippy::diverging_sub_expression +)] +async fn _compile_dataset_builder_from_namespace_signature( + ns: Arc, +) -> lance::Result<()> { + let builder: DatasetBuilder = + DatasetBuilder::from_namespace(ns, vec!["table".to_string()]).await?; + let builder: DatasetBuilder = builder.with_branch("b", None); + let builder: DatasetBuilder = builder.with_version(1u64); + let _ds: Dataset = builder.load().await?; + Ok(()) +} + +// --- Guard 5: MergeInsertBuilder fluent chain ------------------------------ +// +// `db/manifest/publisher.rs:370-391` is the manifest CAS. If any method on +// the builder renames or changes signature, the publisher silently breaks. +// Compile-only. + +#[allow( + dead_code, + unreachable_code, + unused_variables, + unused_mut, + clippy::diverging_sub_expression +)] +async fn _compile_merge_insert_builder_method_chain() -> lance::Result<()> { + use lance::dataset::MergeStats; + + let ds: Arc = unimplemented!(); + let job = MergeInsertBuilder::try_new(ds, vec!["object_id".to_string()])? + .when_matched(WhenMatched::UpdateAll) + .when_not_matched(WhenNotMatched::InsertAll) + .conflict_retries(0) + .use_index(false) + .try_build()?; + + // execute_reader takes `impl StreamingWriteSource` (lance trait), which + // RecordBatchIterator implements. Pin the return shape + // `(Arc, MergeStats)` — the publisher's CAS loop depends on + // both: the new Dataset to advance HEAD, the stats for the audit row. + let source: RecordBatchIterator>> = + unimplemented!(); + let result: (Arc, MergeStats) = job.execute_reader(source).await?; + let _ds: Arc = result.0; + let _stats: MergeStats = result.1; + Ok(()) +} + +// --- Guard 6: WriteParams::default() leaves data_storage_version = None ---- +// +// Our V2_2 pin is load-bearing for blob v2 (verified earlier this session +// when V2_1 produced "Blob v2 requires file version >= 2.2" on 13 blob +// tests). If Lance changes the default to pin some version itself, audit +// every `data_storage_version: Some(LanceFileVersion::V2_2)` site. + +#[test] +fn write_params_default_does_not_set_storage_version() { + let params = WriteParams::default(); + assert_eq!( + params.data_storage_version, None, + "WriteParams::default().data_storage_version is no longer None; \ + audit every explicit V2_2 pin (see rg 'LanceFileVersion::V2_2')." + ); +} + +// --- Guard 7: compact_files signature -------------------------------------- +// +// `db/omnigraph/optimize.rs:107` calls `compact_files(&mut ds, options, None)`. +// Compile-only. + +#[allow( + dead_code, + unreachable_code, + unused_variables, + unused_mut, + clippy::diverging_sub_expression +)] +async fn _compile_compact_files_signature() -> lance::Result<()> { + let mut ds: Dataset = unimplemented!(); + let options: CompactionOptions = CompactionOptions::default(); + let _metrics = compact_files(&mut ds, options, None).await?; + Ok(()) +} + +// --- Guard 8: Dataset::delete returns DeleteResult { new_dataset, num_deleted_rows } --- +// +// `table_store.rs::delete_where` consumes both fields. When MR-A migrates +// `delete_where` to two-phase via `DeleteBuilder::execute_uncommitted`, this +// guard updates to pin the staged path. Compile-only. + +#[allow( + dead_code, + unreachable_code, + unused_variables, + unused_mut, + clippy::diverging_sub_expression +)] +async fn _compile_delete_result_field_shape() -> lance::Result<()> { + let mut ds: Dataset = unimplemented!(); + let result: DeleteResult = ds.delete("x = 1").await?; + let _new_dataset: Arc = result.new_dataset; + let _num_deleted: u64 = result.num_deleted_rows; + Ok(()) +} diff --git a/crates/omnigraph/tests/lifecycle.rs b/crates/omnigraph/tests/lifecycle.rs index d555cbe..a56a80c 100644 --- a/crates/omnigraph/tests/lifecycle.rs +++ b/crates/omnigraph/tests/lifecycle.rs @@ -2,14 +2,14 @@ mod helpers; use std::fs; -use omnigraph::db::{Omnigraph, ReadTarget}; -use omnigraph_compiler::{build_schema_ir, schema_ir_pretty_json}; +use omnigraph::db::{InitOptions, Omnigraph, ReadTarget}; use omnigraph_compiler::schema::parser::parse_schema; +use omnigraph_compiler::{build_schema_ir, schema_ir_pretty_json}; use helpers::*; #[tokio::test] -async fn init_creates_repo() { +async fn init_creates_graph() { let dir = tempfile::tempdir().unwrap(); let uri = dir.path().to_str().unwrap(); @@ -34,7 +34,7 @@ async fn init_creates_repo() { } #[tokio::test] -async fn open_reads_existing_repo() { +async fn open_reads_existing_graph() { let dir = tempfile::tempdir().unwrap(); let uri = dir.path().to_str().unwrap(); @@ -49,7 +49,7 @@ async fn open_reads_existing_repo() { } #[tokio::test] -async fn open_bootstraps_legacy_schema_state_for_main_only_repo() { +async fn open_bootstraps_legacy_schema_state_for_main_only_graph() { let dir = tempfile::tempdir().unwrap(); let uri = dir.path().to_str().unwrap(); Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); @@ -64,7 +64,7 @@ async fn open_bootstraps_legacy_schema_state_for_main_only_repo() { } #[tokio::test] -async fn open_rejects_legacy_repo_with_public_branch() { +async fn open_rejects_legacy_graph_with_public_branch() { let dir = tempfile::tempdir().unwrap(); let uri = dir.path().to_str().unwrap(); let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); @@ -74,7 +74,7 @@ async fn open_rejects_legacy_repo_with_public_branch() { fs::remove_file(dir.path().join("__schema_state.json")).unwrap(); let err = match Omnigraph::open(uri).await { - Ok(_) => panic!("expected legacy repo with public branch to fail schema bootstrap"), + Ok(_) => panic!("expected legacy graph with public branch to fail schema bootstrap"), Err(err) => err, }; assert!( @@ -185,3 +185,122 @@ async fn snapshot_version_is_pinned() { assert_eq!(snap1.version(), v1); } + +/// Regression for the `Omnigraph::init` re-init footgun (MR-668 +/// follow-up): a second `init` against a URI that already holds a +/// graph must NOT modify or destroy the existing graph's schema +/// artifacts. Today's behavior is destructive either way — the +/// `write_text(_schema.pg, ...)` call at the top of +/// `init_storage_phase` overwrites the existing file before any +/// preflight, and `best_effort_cleanup_init_artifacts` will later +/// delete all three files if the inner `GraphCoordinator::init` +/// fails. Both outcomes corrupt an existing graph. +/// +/// After the fix: strict-mode `init` (no `force` flag) errors out +/// before touching any file, and the original schema artifacts +/// match their pre-attempt contents byte-for-byte. +#[tokio::test] +async fn init_on_existing_graph_uri_does_not_destroy_existing_schema() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + + // Establish the first graph and snapshot its three schema files. + Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); + let original_schema_pg = fs::read_to_string(dir.path().join("_schema.pg")).unwrap(); + let original_schema_ir = fs::read_to_string(dir.path().join("_schema.ir.json")).unwrap(); + let original_schema_state = fs::read_to_string(dir.path().join("__schema_state.json")).unwrap(); + + // Attempt a re-init with a deliberately different schema so any + // overwrite would be observable in the file contents. + let different_schema = "node Other { id: String @key }\n"; + let result = Omnigraph::init(uri, different_schema).await; + + // The new init must report the conflict, not silently mutate. + assert!( + result.is_err(), + "init against an existing graph URI must error, not silently overwrite" + ); + + // The three schema files must remain present and byte-identical to + // their pre-attempt contents. + assert!( + dir.path().join("_schema.pg").exists(), + "_schema.pg must not be deleted by a failed re-init" + ); + assert!( + dir.path().join("_schema.ir.json").exists(), + "_schema.ir.json must not be deleted by a failed re-init" + ); + assert!( + dir.path().join("__schema_state.json").exists(), + "__schema_state.json must not be deleted by a failed re-init" + ); + assert_eq!( + fs::read_to_string(dir.path().join("_schema.pg")).unwrap(), + original_schema_pg, + "_schema.pg contents must be preserved when re-init is rejected" + ); + assert_eq!( + fs::read_to_string(dir.path().join("_schema.ir.json")).unwrap(), + original_schema_ir, + "_schema.ir.json contents must be preserved when re-init is rejected" + ); + assert_eq!( + fs::read_to_string(dir.path().join("__schema_state.json")).unwrap(), + original_schema_state, + "__schema_state.json contents must be preserved when re-init is rejected" + ); +} + +/// Happy-path sibling to the strict re-init regression above: +/// `InitOptions { force: true }` must skip the schema-file preflight +/// when the operator deliberately wants to recover from orphan +/// schema artifacts (e.g. files left behind by a failed prior init). +/// +/// Documented semantics per `InitOptions::force`: skips the preflight +/// only. Force does NOT purge existing Lance datasets or `__manifest/` +/// — that needs `StorageAdapter::delete_prefix`, which is tracked +/// separately. The realistic recovery scenario is "schema files +/// exist but Lance state doesn't," which this test reproduces. +/// +/// Without this test, a future refactor could invert the `if !force` +/// branch and silently break the operator-facing escape hatch. +#[tokio::test] +async fn init_with_force_recovers_from_orphan_schema_files() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + + // Simulate orphan schema files: write `_schema.pg` to disk + // without running a full init. The preflight will see it and + // bail in strict mode. + fs::write(dir.path().join("_schema.pg"), TEST_SCHEMA).unwrap(); + + // Strict mode refuses because `_schema.pg` exists. + let strict_err = match Omnigraph::init(uri, TEST_SCHEMA).await { + Ok(_) => panic!("strict init must refuse when orphan _schema.pg exists"), + Err(e) => e, + }; + assert!( + strict_err.to_string().contains("already initialized"), + "strict init must surface AlreadyInitialized (sanity check); got: {strict_err}" + ); + + // Force init succeeds: it skips the preflight, overwrites the + // orphan file, and proceeds to initialize Lance state (which + // didn't exist, so `GraphCoordinator::init` is unblocked). + let db = Omnigraph::init_with_options(uri, TEST_SCHEMA, InitOptions { force: true }) + .await + .expect("force init must succeed when only orphan schema files block strict init"); + + // Confirm the catalog is populated as expected — proves the + // graph is functional after force-recovery, not just that the + // call returned Ok. + assert!( + db.catalog().node_types.contains_key("Person"), + "force-recovered graph must have the new catalog installed" + ); + assert!( + dir.path().join("__schema_state.json").exists(), + "force-recovered graph must have full schema state written" + ); +} diff --git a/crates/omnigraph/tests/maintenance.rs b/crates/omnigraph/tests/maintenance.rs index 6bb81f2..3c6ab30 100644 --- a/crates/omnigraph/tests/maintenance.rs +++ b/crates/omnigraph/tests/maintenance.rs @@ -1,6 +1,6 @@ // Maintenance tests: `optimize` (Lance compact_files) and `cleanup` // (Lance cleanup_old_versions) at the graph level. Covers no-op edges -// (empty repo, already-optimized repo), the policy-validation contract on +// (empty graph, already-optimized graph), the policy-validation contract on // `cleanup`, and the keep-versions cap that protects head. mod helpers; @@ -13,7 +13,7 @@ use omnigraph::loader::{LoadMode, load_jsonl}; use helpers::{TEST_DATA, TEST_SCHEMA, count_rows, init_and_load}; #[tokio::test] -async fn optimize_on_empty_repo_returns_stats_per_table_with_no_changes() { +async fn optimize_on_empty_graph_returns_stats_per_table_with_no_changes() { let dir = tempfile::tempdir().unwrap(); let uri = dir.path().to_str().unwrap(); let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); @@ -37,7 +37,7 @@ async fn optimize_after_load_then_again_is_idempotent() { // First pass may compact (load wrote real fragments). let _first = db.optimize().await.unwrap(); - // Second pass should be a no-op: already-compacted repo produces no + // Second pass should be a no-op: already-compacted graph produces no // fragments_removed / fragments_added. let second = db.optimize().await.unwrap(); for s in &second { @@ -119,7 +119,9 @@ async fn cleanup_older_than_zero_preserves_head() { // Smoke test: after aggressive cleanup, we can still read and write the // graph — head wasn't pruned. - load_jsonl(&mut db, TEST_DATA, LoadMode::Merge).await.unwrap(); + load_jsonl(&mut db, TEST_DATA, LoadMode::Merge) + .await + .unwrap(); } #[tokio::test] @@ -151,6 +153,8 @@ async fn cleanup_then_optimize_preserves_rows_and_table_remains_writable() { assert_eq!(count_rows(&db, "node:Company").await, companies_before); // Table is still writable after the cleanup+optimize sequence. - load_jsonl(&mut db, TEST_DATA, LoadMode::Merge).await.unwrap(); + load_jsonl(&mut db, TEST_DATA, LoadMode::Merge) + .await + .unwrap(); assert_eq!(count_rows(&db, "node:Person").await, people_before); } diff --git a/crates/omnigraph/tests/policy_engine_chassis.rs b/crates/omnigraph/tests/policy_engine_chassis.rs new file mode 100644 index 0000000..def5349 --- /dev/null +++ b/crates/omnigraph/tests/policy_engine_chassis.rs @@ -0,0 +1,423 @@ +//! Engine-layer policy enforcement (MR-722 chassis core, PR #2 + PR #3). +//! +//! These tests exercise `Omnigraph::with_policy()` + every `_as` writer +//! via the SDK directly — *no HTTP layer involved*. They're the proof +//! that engine-layer enforcement works for embedded callers and CLI +//! direct-engine writes, not just server requests. +//! +//! PR #2 wired `apply_schema_as`. PR #3 fans the same `enforce()` call +//! out to the remaining six writers — `mutate_as`, `load_as`, +//! `ingest_as`, `branch_create_as` / `branch_create_from_as`, +//! `branch_delete_as`, `branch_merge_as`. Each writer pair below +//! covers allow + deny via the engine-side gate; the allow case proves +//! the enforce call is correctly scoped (i.e. doesn't reject a legit +//! actor), the deny case proves it actually denies an unauthorized +//! actor — and both together pin the action × scope shape to match the +//! HTTP-layer authorize_request convention so engine and HTTP fire the +//! same Cedar decision. + +mod helpers; + +use std::fs; +use std::path::Path; +use std::sync::Arc; + +use omnigraph::db::{Omnigraph, ReadTarget, SchemaApplyOptions}; +use omnigraph::error::OmniError; +use omnigraph::loader::LoadMode; +use omnigraph_policy::{PolicyChecker, PolicyEngine}; + +use helpers::*; + +/// Cedar policy: `act-allowed` may do every write; `act-denied` is in +/// the known-actors set (so Cedar evaluates the policy and doesn't +/// reject as unknown) but has no permit rule and is therefore implicitly +/// denied for every action. +/// +/// The rule split mirrors the per-action scope convention: Change uses +/// `branch_scope`; SchemaApply, BranchCreate, BranchDelete, BranchMerge +/// use `target_branch_scope` (see `PolicyAction::uses_branch_scope` and +/// `uses_target_branch_scope` in `omnigraph-policy`). +const POLICY_YAML: &str = r#" +version: 1 +groups: + writers: [act-allowed] + readers: [act-denied] +protected_branches: [main] +rules: + - id: writers-data + allow: + actors: { group: writers } + actions: [change] + branch_scope: any + - id: writers-branches-schema + allow: + actors: { group: writers } + actions: [schema_apply, branch_create, branch_delete, branch_merge] + target_branch_scope: any +"#; + +fn additive_schema() -> String { + helpers::TEST_SCHEMA.replace( + " age: I32?\n}", + " age: I32?\n nickname: String?\n}", + ) +} + +fn install_policy(db: Omnigraph, dir_path: &Path) -> (Omnigraph, Arc) { + let policy_path = dir_path.join("policy.yaml"); + fs::write(&policy_path, POLICY_YAML).unwrap(); + let engine = PolicyEngine::load_graph(&policy_path, dir_path.to_str().unwrap()).unwrap(); + let engine = Arc::new(engine); + let db = db.with_policy(Arc::clone(&engine) as Arc); + (db, engine) +} + +async fn init_with_policy(dir: &tempfile::TempDir) -> (Omnigraph, Arc) { + let db = init_and_load(dir).await; + install_policy(db, dir.path()) +} + +/// Variant for tests that need a pre-created feature branch (branch_delete / +/// branch_merge setup). Create the branch BEFORE wrapping with policy so the +/// setup itself doesn't need to satisfy BranchCreate. +async fn init_with_policy_and_feature_branch( + dir: &tempfile::TempDir, + branch: &str, +) -> (Omnigraph, Arc) { + let db = init_and_load(dir).await; + db.branch_create_from(ReadTarget::branch("main"), branch) + .await + .expect("setup: create feature branch before installing policy"); + install_policy(db, dir.path()) +} + +// `MUTATION_QUERIES` from helpers/mod.rs already defines `insert_person($name, $age)` +// — reuse it rather than redefining one here, so this test exercises the +// same surface the engine integration tests do. + +/// One JSONL record for `load_as` / `ingest_as` exercises. +const ONE_PERSON_JSONL: &str = r#"{"type": "Person", "data": {"name": "Eve"}}"#; + +fn assert_denied(result: Result, what: &str) { + match result { + Err(OmniError::Policy(msg)) => { + assert!( + msg.contains("denied"), + "{what}: expected denial message, got: {msg}" + ); + } + Err(other) => panic!("{what}: expected OmniError::Policy, got: {other:?}"), + Ok(value) => panic!("{what}: expected denial, got Ok({value:?})"), + } +} + +#[tokio::test] +async fn apply_schema_as_denies_when_policy_rejects_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy(&dir).await; + + let desired = additive_schema(); + let result = db + .apply_schema_as(&desired, SchemaApplyOptions::default(), Some("act-denied")) + .await; + + match result { + Err(OmniError::Policy(msg)) => { + assert!( + msg.contains("denied"), + "expected denial message, got: {msg}" + ); + } + Err(other) => panic!("expected OmniError::Policy, got: {other:?}"), + Ok(_) => panic!("expected denial — act-denied should not be able to SchemaApply"), + } +} + +#[tokio::test] +async fn apply_schema_as_allows_when_policy_permits_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy(&dir).await; + + let desired = additive_schema(); + let result = db + .apply_schema_as(&desired, SchemaApplyOptions::default(), Some("act-allowed")) + .await + .expect("act-allowed should be able to SchemaApply"); + assert!(result.applied); +} + +#[tokio::test] +async fn apply_schema_without_actor_when_policy_is_installed_denies() { + // MR-722 footgun guard: if a PolicyChecker is installed AND the + // call site forgets to pass an actor, enforce() fails hard. Silent + // bypass via "I forgot the actor" is exactly what the gate is + // here to prevent. + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy(&dir).await; + + let desired = additive_schema(); + // `apply_schema(...)` is the no-actor variant — delegates to + // apply_schema_as with actor=None. + let result = db.apply_schema(&desired).await; + + match result { + Err(OmniError::Policy(msg)) => { + assert!( + msg.contains("no actor"), + "expected 'no actor' message, got: {msg}" + ); + } + Err(other) => panic!("expected OmniError::Policy('no actor ...'), got: {other:?}"), + Ok(_) => panic!("expected denial — policy is installed but no actor was threaded"), + } +} + +#[tokio::test] +async fn apply_schema_without_policy_still_works() { + // Baseline: when no policy is installed (the embedded/dev default), + // apply_schema and apply_schema_as both work regardless of whether + // an actor is passed. The enforce() gate is a strict no-op in this + // shape — proves PR #2 doesn't regress the no-policy path. + let dir = tempfile::tempdir().unwrap(); + let db = init_and_load(&dir).await; + + let desired = additive_schema(); + // No-actor variant. + db.apply_schema(&desired) + .await + .expect("no policy → no enforcement → apply succeeds"); +} + +// ─── PR #3 writer fan-out ───────────────────────────────────────────────── +// +// One allow + one deny test per newly-wired writer. The allow case +// proves the enforce scope is correctly shaped (i.e. doesn't reject a +// legit actor whose policy permit matches the engine-side scope). The +// deny case proves the gate actually fires for an unauthorized actor. +// Footgun-guard (no-actor + policy-installed) is already proved by +// `apply_schema_without_actor_when_policy_is_installed_denies` and +// applies identically to every `_as` variant — duplicating it per +// writer would be redundant. + +#[tokio::test] +async fn mutate_as_denies_when_policy_rejects_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy(&dir).await; + + let params = mixed_params(&[("$name", "Eve")], &[("$age", 22)]); + let result = db + .mutate_as( + "main", + MUTATION_QUERIES, + "insert_person", + ¶ms, + Some("act-denied"), + ) + .await; + assert_denied(result, "mutate_as"); +} + +#[tokio::test] +async fn mutate_as_allows_when_policy_permits_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy(&dir).await; + + let params = mixed_params(&[("$name", "Eve")], &[("$age", 22)]); + db.mutate_as( + "main", + MUTATION_QUERIES, + "insert_person", + ¶ms, + Some("act-allowed"), + ) + .await + .expect("act-allowed should be able to Change on main"); +} + +#[tokio::test] +async fn load_as_denies_when_policy_rejects_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy(&dir).await; + + let result = db + .load_as( + "main", + ONE_PERSON_JSONL, + LoadMode::Merge, + Some("act-denied"), + ) + .await; + assert_denied(result, "load_as"); +} + +#[tokio::test] +async fn load_as_allows_when_policy_permits_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy(&dir).await; + + db.load_as( + "main", + ONE_PERSON_JSONL, + LoadMode::Merge, + Some("act-allowed"), + ) + .await + .expect("act-allowed should be able to load on main"); +} + +#[tokio::test] +async fn load_file_as_denies_when_policy_rejects_actor() { + // `load_file_as` was added in PR #104 as the actor-aware mirror of + // `load_file`, used by the CLI's `omnigraph load`. Tested + // indirectly via CLI integration; this test closes the direct-SDK + // gap so a regression in the file-read path doesn't ride through + // unnoticed. + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy(&dir).await; + let data_path = dir.path().join("one-person.jsonl"); + fs::write(&data_path, ONE_PERSON_JSONL).unwrap(); + + let result = db + .load_file_as( + "main", + data_path.to_str().unwrap(), + LoadMode::Merge, + Some("act-denied"), + ) + .await; + assert_denied(result, "load_file_as"); +} + +#[tokio::test] +async fn load_file_as_allows_when_policy_permits_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy(&dir).await; + let data_path = dir.path().join("one-person.jsonl"); + fs::write(&data_path, ONE_PERSON_JSONL).unwrap(); + + db.load_file_as( + "main", + data_path.to_str().unwrap(), + LoadMode::Merge, + Some("act-allowed"), + ) + .await + .expect("act-allowed should be able to load_file_as on main"); +} + +#[tokio::test] +async fn ingest_as_denies_when_policy_rejects_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy(&dir).await; + + let result = db + .ingest_as( + "main", + Some("main"), + ONE_PERSON_JSONL, + LoadMode::Merge, + Some("act-denied"), + ) + .await; + assert_denied(result, "ingest_as"); +} + +#[tokio::test] +async fn ingest_as_allows_when_policy_permits_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy(&dir).await; + + db.ingest_as( + "main", + Some("main"), + ONE_PERSON_JSONL, + LoadMode::Merge, + Some("act-allowed"), + ) + .await + .expect("act-allowed should be able to ingest on main"); +} + +#[tokio::test] +async fn branch_create_as_denies_when_policy_rejects_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy(&dir).await; + + let result = db.branch_create_as("feature", Some("act-denied")).await; + assert_denied(result, "branch_create_as"); +} + +#[tokio::test] +async fn branch_create_as_allows_when_policy_permits_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy(&dir).await; + + db.branch_create_as("feature", Some("act-allowed")) + .await + .expect("act-allowed should be able to BranchCreate"); +} + +#[tokio::test] +async fn branch_create_from_as_denies_when_policy_rejects_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy(&dir).await; + + let result = db + .branch_create_from_as(ReadTarget::branch("main"), "feature", Some("act-denied")) + .await; + assert_denied(result, "branch_create_from_as"); +} + +#[tokio::test] +async fn branch_create_from_as_allows_when_policy_permits_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy(&dir).await; + + db.branch_create_from_as(ReadTarget::branch("main"), "feature", Some("act-allowed")) + .await + .expect("act-allowed should be able to BranchCreate from main"); +} + +#[tokio::test] +async fn branch_delete_as_denies_when_policy_rejects_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy_and_feature_branch(&dir, "feature").await; + + let result = db.branch_delete_as("feature", Some("act-denied")).await; + assert_denied(result, "branch_delete_as"); +} + +#[tokio::test] +async fn branch_delete_as_allows_when_policy_permits_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy_and_feature_branch(&dir, "feature").await; + + db.branch_delete_as("feature", Some("act-allowed")) + .await + .expect("act-allowed should be able to BranchDelete"); +} + +#[tokio::test] +async fn branch_merge_as_denies_when_policy_rejects_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy_and_feature_branch(&dir, "feature").await; + + let result = db + .branch_merge_as("feature", "main", Some("act-denied")) + .await; + assert_denied(result, "branch_merge_as"); +} + +#[tokio::test] +async fn branch_merge_as_allows_when_policy_permits_actor() { + let dir = tempfile::tempdir().unwrap(); + let (db, _engine) = init_with_policy_and_feature_branch(&dir, "feature").await; + + // No diverging writes on feature → merge is a no-op fast-forward, + // but it still goes through enforce(BranchMerge, ...). That's the + // path under test; the actual merge outcome is incidental. + db.branch_merge_as("feature", "main", Some("act-allowed")) + .await + .expect("act-allowed should be able to BranchMerge"); +} diff --git a/crates/omnigraph/tests/recovery.rs b/crates/omnigraph/tests/recovery.rs index 5ad87e8..a090178 100644 --- a/crates/omnigraph/tests/recovery.rs +++ b/crates/omnigraph/tests/recovery.rs @@ -22,16 +22,16 @@ use helpers::recovery::{RecoveryExpectation, TableExpectation, assert_post_recov const TEST_SCHEMA: &str = include_str!("fixtures/test.pg"); -fn write_sidecar_file(repo_root: &Path, operation_id: &str, json: &str) { - let dir = repo_root.join("__recovery"); +fn write_sidecar_file(graph_root: &Path, operation_id: &str, json: &str) { + let dir = graph_root.join("__recovery"); if !dir.exists() { std::fs::create_dir(&dir).unwrap(); } std::fs::write(dir.join(format!("{}.json", operation_id)), json).unwrap(); } -fn list_recovery_dir(repo_root: &Path) -> Vec { - let dir = repo_root.join("__recovery"); +fn list_recovery_dir(graph_root: &Path) -> Vec { + let dir = graph_root.join("__recovery"); if !dir.exists() { return Vec::new(); } @@ -41,7 +41,7 @@ fn list_recovery_dir(repo_root: &Path) -> Vec { .collect() } -/// Full URI of a node-type Lance dataset under a fresh Omnigraph repo. +/// Full URI of a node-type Lance dataset under a fresh Omnigraph graph. /// Mirrors the `nodes/{fnv1a64-hex(type_name)}` layout in `db/manifest/layout.rs`. fn node_table_uri(root: &str, type_name: &str) -> String { let h: u64 = fnv1a64(type_name.as_bytes()); @@ -283,8 +283,8 @@ async fn recovery_rolls_back_synthetic_drift_on_open() { // ===================================================================== /// Helper: count rows in `_graph_commit_recoveries.lance` at the given root. -async fn count_recovery_audit_rows(repo_root: &Path) -> usize { - let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance"); +async fn count_recovery_audit_rows(graph_root: &Path) -> usize { + let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance"); if !recoveries_dir.exists() { return 0; } @@ -306,9 +306,9 @@ async fn count_recovery_audit_rows(repo_root: &Path) -> usize { /// Helper: read the most recent recovery audit row's `recovery_kind`, /// `recovery_for_actor`, and `operation_id`. Returns `None` if no rows. async fn read_latest_recovery_audit( - repo_root: &Path, + graph_root: &Path, ) -> Option<(String, Option, String, String)> { - let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance"); + let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance"); if !recoveries_dir.exists() { return None; } @@ -357,8 +357,8 @@ async fn read_latest_recovery_audit( /// storage order (multiple batches concatenated). Used by the /// multi-sidecar fresh-snapshot test as a diagnostic alongside the /// post-recovery Lance HEAD assertion. -async fn list_recovery_audit_kinds(repo_root: &Path) -> Vec { - let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance"); +async fn list_recovery_audit_kinds(graph_root: &Path) -> Vec { + let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance"); if !recoveries_dir.exists() { return Vec::new(); } @@ -391,8 +391,8 @@ async fn list_recovery_audit_kinds(repo_root: &Path) -> Vec { } /// Helper: count `_graph_commits.lance` rows tagged with the recovery actor. -async fn count_recovery_actor_commits(repo_root: &Path) -> usize { - let actors_dir = repo_root.join("_graph_commit_actors.lance"); +async fn count_recovery_actor_commits(graph_root: &Path) -> usize { + let actors_dir = graph_root.join("_graph_commit_actors.lance"); if !actors_dir.exists() { return 0; } @@ -908,7 +908,7 @@ async fn recovery_ensure_indices_steady_state_no_sidecar() { /// ran) and rolls back any sibling table's legitimate index work. /// /// Integration verification: after a real init + ensure_indices on a -/// repo where every table is empty, the recovery sweep must complete +/// graph where every table is empty, the recovery sweep must complete /// cleanly (no leftover sidecar) AND the next ensure_indices must also /// leave no sidecar — proving the empty-table-scoping behavior lets /// steady-state runs incur zero sidecar I/O. The @@ -930,7 +930,7 @@ async fn recovery_ensure_indices_handles_empty_tables() { db.ensure_indices().await.unwrap(); assert!( list_recovery_dir(dir.path()).is_empty(), - "ensure_indices on an all-empty repo must not leave a sidecar" + "ensure_indices on an all-empty graph must not leave a sidecar" ); // Reopen + ensure_indices — still steady state, still no sidecar. drop(db); @@ -938,7 +938,7 @@ async fn recovery_ensure_indices_handles_empty_tables() { db.ensure_indices().await.unwrap(); assert!( list_recovery_dir(dir.path()).is_empty(), - "second ensure_indices on an all-empty repo must also not leave a sidecar" + "second ensure_indices on an all-empty graph must also not leave a sidecar" ); } diff --git a/crates/omnigraph/tests/runs.rs b/crates/omnigraph/tests/runs.rs index 4e363bf..cfff3fc 100644 --- a/crates/omnigraph/tests/runs.rs +++ b/crates/omnigraph/tests/runs.rs @@ -127,10 +127,7 @@ async fn multi_statement_mutation_is_atomic_with_read_your_writes() { "main", MUTATION_QUERIES, "insert_person_and_friend", - &mixed_params( - &[("$name", "Eve"), ("$friend", "Alice")], - &[("$age", 22)], - ), + &mixed_params(&[("$name", "Eve"), ("$friend", "Alice")], &[("$age", 22)]), ) .await .unwrap(); @@ -187,10 +184,7 @@ async fn partial_failure_leaves_target_queryable_and_unblocks_next_mutation() { "main", MUTATION_QUERIES, "insert_person_and_friend", - &mixed_params( - &[("$name", "Eve"), ("$friend", "Missing")], - &[("$age", 22)], - ), + &mixed_params(&[("$name", "Eve"), ("$friend", "Missing")], &[("$age", 22)]), ) .await .expect_err("op-2 must fail"); @@ -521,6 +515,10 @@ query delete_two_persons($first: String, $second: String) { delete Person where name = $first delete Person where name = $second } + +query update_age_by_name($name: String, $age: I32) { + update Person set { age: $age } where name = $name +} "#; /// D₂: a query mixing inserts/updates with deletes is rejected at parse @@ -539,10 +537,7 @@ async fn mutation_rejects_mixed_insert_and_delete_at_parse_time() { "main", STAGED_QUERIES, "mixed_insert_and_delete", - &mixed_params( - &[("$name", "Eve"), ("$victim", "Alice")], - &[("$age", 22)], - ), + &mixed_params(&[("$name", "Eve"), ("$victim", "Alice")], &[("$age", 22)]), ) .await .expect_err("D₂ must reject mixed insert+delete"); @@ -555,7 +550,9 @@ async fn mutation_rejects_mixed_insert_and_delete_at_parse_time() { manifest_err.message, ); assert!( - manifest_err.message.contains("split into separate mutations"), + manifest_err + .message + .contains("split into separate mutations"), "error message should direct user to split: {}", manifest_err.message, ); @@ -664,11 +661,7 @@ async fn multiple_appends_to_same_edge_coalesce_to_one_append() { "main", STAGED_QUERIES, "insert_two_friends", - ¶ms(&[ - ("$from", "Alice"), - ("$a", "Bob"), - ("$b", "Eve"), - ]), + ¶ms(&[("$from", "Alice"), ("$a", "Bob"), ("$b", "Eve")]), ) .await .unwrap(); @@ -778,8 +771,14 @@ async fn load_with_bad_edge_reference_unblocks_next_load() { // No write made it to disk: counts unchanged. let mid_persons = count_rows(&db, "node:Person").await; let mid_edges = count_rows(&db, "edge:Knows").await; - assert_eq!(mid_persons, pre_persons, "failed load must not advance Person count"); - assert_eq!(mid_edges, pre_edges, "failed load must not advance Knows count"); + assert_eq!( + mid_persons, pre_persons, + "failed load must not advance Person count" + ); + assert_eq!( + mid_edges, pre_edges, + "failed load must not advance Knows count" + ); // Second load against the same tables — succeeds (no HEAD drift). let good = r#"{"type": "Person", "data": {"name": "Pat", "age": 55}}"#; @@ -820,7 +819,9 @@ edge WorksAt: Person -> Company @card(0..1) {"type": "Company", "data": {"name": "Acme"}} {"type": "Company", "data": {"name": "Bigco"}} "#; - load_jsonl(&mut db, seed, LoadMode::Overwrite).await.unwrap(); + load_jsonl(&mut db, seed, LoadMode::Overwrite) + .await + .unwrap(); let pre_works = count_rows(&db, "edge:WorksAt").await; @@ -1010,7 +1011,10 @@ query cascade_then_explicit($name: String, $other: String) { // — Bob→Diana would survive. The exact-count check makes both ops // independently observable. let pre_knows = count_rows(&db, "edge:Knows").await; - assert_eq!(pre_knows, 3, "fixture invariant: TEST_DATA seeds 3 Knows edges"); + assert_eq!( + pre_knows, 3, + "fixture invariant: TEST_DATA seeds 3 Knows edges" + ); db.mutate( "main", @@ -1062,7 +1066,9 @@ query add_friend($from: String, $to: String) { let seed = r#"{"type": "Person", "data": {"name": "Alice"}} {"type": "Person", "data": {"name": "Bob"}} "#; - load_jsonl(&mut db, seed, LoadMode::Overwrite).await.unwrap(); + load_jsonl(&mut db, seed, LoadMode::Overwrite) + .await + .unwrap(); // Single insert: count=1 < min=2 → reject with clear message. let err = db @@ -1078,8 +1084,7 @@ query add_friend($from: String, $to: String) { panic!("expected Manifest error, got {err:?}"); }; assert!( - manifest_err.message.contains("@card violation") - && manifest_err.message.contains("min 2"), + manifest_err.message.contains("@card violation") && manifest_err.message.contains("min 2"), "unexpected error: {}", manifest_err.message, ); @@ -1117,7 +1122,9 @@ edge WorksAt: Person -> Company @card(0..1) {"type": "Company", "data": {"name": "Bigco"}} {"edge": "WorksAt", "from": "Alice", "to": "Acme", "data": {"id": "w1"}} "#; - load_jsonl(&mut db, seed, LoadMode::Overwrite).await.unwrap(); + load_jsonl(&mut db, seed, LoadMode::Overwrite) + .await + .unwrap(); // Merge-update the same edge id w1 to point at Bigco. Counted naively // as union, Alice has 2 WorksAt (committed Acme + pending Bigco) which @@ -1163,7 +1170,9 @@ edge WorksAt: Person -> Company @card(0..1) {"type": "Company", "data": {"name": "Acme"}} {"type": "Company", "data": {"name": "Bigco"}} "#; - load_jsonl(&mut db, seed, LoadMode::Overwrite).await.unwrap(); + load_jsonl(&mut db, seed, LoadMode::Overwrite) + .await + .unwrap(); // Merge load with the SAME edge id twice — the second row supersedes // the first in the finalize-time dedupe. If pending-counting doesn't @@ -1360,5 +1369,95 @@ query insert_then_update_note( ) .await .unwrap(); - assert_eq!(qr.num_rows(), 0, "letter must not be visible after early error"); + assert_eq!( + qr.num_rows(), + 0, + "letter must not be visible after early error" + ); +} + +/// MR-920 regression: two sequential `update T set {f:v} where x=y` +/// invocations against the same row must both succeed. Pre-fix, the +/// second one failed with `Ambiguous merge inserts are prohibited: +/// multiple source rows match the same target row on (id = "Alice")` +/// even though the scan returned exactly one row. +/// +/// Root cause hypothesis (per MR-920): Lance's +/// `processed_row_ids: Mutex>` +/// (`src/dataset/write/merge_insert.rs:2099`) double-processes the +/// same target row_id against datasets previously rewritten by +/// merge_insert. `SourceDedupeBehavior::FirstSeen` makes Lance skip +/// rather than error. +/// +/// Companion to `consistency.rs::load_merge_repeated_against_overlapping_keys_succeeds` +/// (PR #98 / Window 1 of the bug class via the load surface). +#[tokio::test] +async fn second_sequential_update_on_same_row_succeeds() { + let dir = tempfile::tempdir().unwrap(); + let mut db = init_and_load(&dir).await; + + db.mutate( + "main", + STAGED_QUERIES, + "update_age_by_name", + &mixed_params(&[("$name", "Alice")], &[("$age", 99)]), + ) + .await + .expect("first sequential update on Alice must succeed"); + + let batches = read_table(&db, "node:Person").await; + let alice_count: usize = batches + .iter() + .map(|b| { + let names = b + .column_by_name("name") + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(); + (0..b.num_rows()) + .filter(|i| names.is_valid(*i) && names.value(*i) == "Alice") + .count() + }) + .sum(); + assert_eq!( + alice_count, 1, + "after first update, exactly one Alice row should be visible" + ); + + db.mutate( + "main", + STAGED_QUERIES, + "update_age_by_name", + &mixed_params(&[("$name", "Alice")], &[("$age", 42)]), + ) + .await + .expect("second sequential update on Alice must succeed"); + + let batches = read_table(&db, "node:Person").await; + let mut alice_age: Option = None; + for batch in &batches { + let names = batch + .column_by_name("name") + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(); + let ages = batch + .column_by_name("age") + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(); + for i in 0..batch.num_rows() { + if names.is_valid(i) && names.value(i) == "Alice" && ages.is_valid(i) { + alice_age = Some(ages.value(i)); + } + } + } + assert_eq!( + alice_age, + Some(42), + "Alice's age must reflect the second update" + ); } diff --git a/crates/omnigraph/tests/s3_storage.rs b/crates/omnigraph/tests/s3_storage.rs index 5b90022..7e4f0a3 100644 --- a/crates/omnigraph/tests/s3_storage.rs +++ b/crates/omnigraph/tests/s3_storage.rs @@ -7,8 +7,8 @@ use omnigraph::loader::{LoadMode, load_jsonl}; use helpers::*; #[tokio::test(flavor = "multi_thread")] -async fn s3_compatible_repo_lifecycle_works() { - let Some(uri) = s3_test_repo_uri("omnigraph-runtime") else { +async fn s3_compatible_graph_lifecycle_works() { + let Some(uri) = s3_test_graph_uri("omnigraph-runtime") else { eprintln!("skipping s3 runtime test: OMNIGRAPH_S3_TEST_BUCKET is not set"); return; }; @@ -81,7 +81,7 @@ async fn s3_compatible_repo_lifecycle_works() { #[tokio::test(flavor = "multi_thread")] async fn s3_branch_change_merge_flow_works() { - let Some(uri) = s3_test_repo_uri("omnigraph-branching") else { + let Some(uri) = s3_test_graph_uri("omnigraph-branching") else { eprintln!("skipping s3 branch test: OMNIGRAPH_S3_TEST_BUCKET is not set"); return; }; @@ -135,7 +135,7 @@ async fn s3_branch_change_merge_flow_works() { #[tokio::test(flavor = "multi_thread")] async fn s3_public_load_uses_hidden_run_and_publishes() { - let Some(uri) = s3_test_repo_uri("omnigraph-public-load") else { + let Some(uri) = s3_test_graph_uri("omnigraph-public-load") else { eprintln!("skipping s3 public load test: OMNIGRAPH_S3_TEST_BUCKET is not set"); return; }; diff --git a/crates/omnigraph/tests/schema_apply.rs b/crates/omnigraph/tests/schema_apply.rs index fac5ab4..cc0cae2 100644 --- a/crates/omnigraph/tests/schema_apply.rs +++ b/crates/omnigraph/tests/schema_apply.rs @@ -74,7 +74,7 @@ async fn apply_schema_rejects_when_non_main_branch_exists() { let err = db.apply_schema(&desired).await.unwrap_err(); assert!( err.to_string() - .contains("schema apply requires a repo with only main") + .contains("schema apply requires a graph with only main") ); } @@ -101,17 +101,23 @@ async fn apply_schema_unsupported_plan_does_not_advance_manifest() { ); } -// ─── Destructive / safety-tier rejections ──────────────────────────────────── +// ─── Destructive / safety-tier behavior ────────────────────────────────────── // -// Schema migration v1 only accepts additive change: add type, add nullable -// property, add index, rename. Every other shape returns an -// `UnsupportedChange` step that surfaces as an error from `apply_schema`, -// without advancing the manifest. These tests pin that contract for the -// destructive shapes (drop type, drop property, narrow type, add required, -// remove constraint) so a regression in the planner can't silently allow them. +// Schema migration v1 accepts: +// - Additive change: add type, add nullable property, add index, rename. +// - DropProperty { Soft } via the schema-lint v1 chassis (commit #3 of MR-694) +// — the dropped column is removed from the current manifest version but +// remains reachable via Lance time travel at the prior version, until +// `omnigraph cleanup` runs. Hard mode (immediate data cleanup) lands in +// commit #5 gated by `--allow-data-loss`. +// +// Every other destructive shape (drop type, narrow type, add required without +// backfill, remove constraint) still returns an `UnsupportedChange` step that +// surfaces as an error from `apply_schema`. These tests pin the current +// contract so a regression in the planner can't silently change behavior. #[tokio::test] -async fn apply_schema_rejects_dropping_a_property_with_data() { +async fn apply_schema_drops_a_nullable_property_softly_preserves_prior_version() { let dir = tempfile::tempdir().unwrap(); let mut db = init_and_load(&dir).await; @@ -122,29 +128,100 @@ async fn apply_schema_rejects_dropping_a_property_with_data() { .unwrap() .version(); - // Drop `age` from Person. v1 doesn't support property removal even when - // the column is nullable — it would silently destroy data. + // Drop `age` from Person. v1 + chassis commit #3 emit + // `DropProperty { Soft }`; the rewrite path projects to the + // target schema (no `age`), commits via stage_overwrite. Row + // counts are unchanged — only the column is dropped from the + // current schema view. let desired = TEST_SCHEMA.replace(" age: I32?\n", ""); - let err = db.apply_schema(&desired).await.unwrap_err(); - let msg = err.to_string(); + + // Confirm the plan emits DropProperty { Soft } (not UnsupportedChange). + let plan = db.plan_schema(&desired).await.unwrap(); + assert!(plan.supported, "drop-property plan must be supported"); assert!( - msg.contains("OG-DS-104"), - "expected schema-lint code OG-DS-104 in error, got: {msg}" + plan.steps.iter().any(|step| matches!( + step, + SchemaMigrationStep::DropProperty { + type_kind: SchemaTypeKind::Node, + type_name, + property_name, + mode: omnigraph_compiler::DropMode::Soft, + .. + } if type_name == "Person" && property_name == "age" + )), + "expected DropProperty {{ type=Person, property=age, mode=Soft }} in plan; got {plan:?}", ); - // Manifest didn't advance and existing rows are untouched. - assert_eq!( - db.snapshot_of(ReadTarget::branch("main")) - .await - .unwrap() - .version(), - before_version + let result = db.apply_schema(&desired).await.unwrap(); + assert!(result.supported); + assert!(result.applied); + + // Manifest advanced; row count unchanged. + let after_version = db + .snapshot_of(ReadTarget::branch("main")) + .await + .unwrap() + .version(); + assert!( + after_version > before_version, + "manifest version should advance after soft drop; before={before_version}, after={after_version}", ); assert_eq!(count_rows(&db, "node:Person").await, people_before); + + // (a) Current snapshot: `age` is gone from the dataset schema. + let current_snapshot = db.snapshot_of(ReadTarget::branch("main")).await.unwrap(); + let current_ds = current_snapshot.open("node:Person").await.unwrap(); + let current_fields = current_ds + .schema() + .fields + .iter() + .map(|f| f.name.clone()) + .collect::>(); + assert!( + !current_fields.iter().any(|f| f == "age"), + "current Person dataset schema must not include 'age' after soft drop; got fields {current_fields:?}", + ); + + // (b) Time travel: at the pre-drop manifest version, the prior + // Person dataset version still has `age`. Soft drop is reversible + // via Lance's version graph until `omnigraph cleanup` runs. + let pre_drop_snapshot = db.snapshot_at_version(before_version).await.unwrap(); + let pre_drop_ds = pre_drop_snapshot.open("node:Person").await.unwrap(); + let pre_drop_fields = pre_drop_ds + .schema() + .fields + .iter() + .map(|f| f.name.clone()) + .collect::>(); + assert!( + pre_drop_fields.iter().any(|f| f == "age"), + "pre-drop Person dataset schema must still include 'age' (time-travel reversibility); got fields {pre_drop_fields:?}", + ); + + // (c) Reopen consistency: close the engine, reopen, verify the + // drop is preserved (column still absent from current schema). + let uri = dir.path().to_str().unwrap().to_string(); + drop(db); + let reopened = Omnigraph::open(&uri).await.unwrap(); + let reopened_snapshot = reopened + .snapshot_of(ReadTarget::branch("main")) + .await + .unwrap(); + let reopened_ds = reopened_snapshot.open("node:Person").await.unwrap(); + let reopened_fields = reopened_ds + .schema() + .fields + .iter() + .map(|f| f.name.clone()) + .collect::>(); + assert!( + !reopened_fields.iter().any(|f| f == "age"), + "after reopen, Person dataset schema must still lack 'age'; got fields {reopened_fields:?}", + ); } #[tokio::test] -async fn apply_schema_rejects_dropping_a_node_type() { +async fn apply_schema_drops_node_and_referencing_edge_softly() { let dir = tempfile::tempdir().unwrap(); let mut db = init_and_load(&dir).await; let before_version = db @@ -153,7 +230,11 @@ async fn apply_schema_rejects_dropping_a_node_type() { .unwrap() .version(); - // Drop the `Company` node type and its outgoing edge that references it. + // Drop the `Company` node type and the `WorksAt` edge that references it. + // Per schema-lint v1 chassis commit #4 (MR-694), this emits two + // `DropType { Soft }` steps; apply tombstones both manifest entries. + // Lance dataset files are retained, so time-travel back to the + // pre-drop manifest version still resolves both tables. let desired = r#" node Person { name: String @key @@ -164,23 +245,96 @@ edge Knows: Person -> Person { since: Date? } "#; - let err = db.apply_schema(desired).await.unwrap_err(); - let msg = err.to_string(); + + // Confirm the plan emits both DropType { Soft } steps. + let plan = db.plan_schema(desired).await.unwrap(); + assert!(plan.supported, "drop-type plan must be supported"); assert!( - msg.contains("OG-DS-102") || msg.contains("OG-DS-103"), - "expected schema-lint code OG-DS-102 or OG-DS-103 in error, got: {msg}" + plan.steps.iter().any(|step| matches!( + step, + SchemaMigrationStep::DropType { + type_kind: SchemaTypeKind::Node, + name, + mode: omnigraph_compiler::DropMode::Soft, + } if name == "Company" + )), + "expected DropType {{ Node, Company, Soft }} in plan: {plan:?}", ); - assert_eq!( - db.snapshot_of(ReadTarget::branch("main")) - .await - .unwrap() - .version(), - before_version + assert!( + plan.steps.iter().any(|step| matches!( + step, + SchemaMigrationStep::DropType { + type_kind: SchemaTypeKind::Edge, + name, + mode: omnigraph_compiler::DropMode::Soft, + } if name == "WorksAt" + )), + "expected DropType {{ Edge, WorksAt, Soft }} in plan: {plan:?}", + ); + + let result = db.apply_schema(desired).await.unwrap(); + assert!(result.supported); + assert!(result.applied); + + let after_version = db + .snapshot_of(ReadTarget::branch("main")) + .await + .unwrap() + .version(); + assert!( + after_version > before_version, + "manifest version should advance after soft type drop; before={before_version}, after={after_version}", + ); + + // (a) Current snapshot: both manifest entries are gone. + let current_snapshot = db.snapshot_of(ReadTarget::branch("main")).await.unwrap(); + assert!( + current_snapshot.entry("node:Company").is_none(), + "current manifest must not list node:Company after soft drop", + ); + assert!( + current_snapshot.entry("edge:WorksAt").is_none(), + "current manifest must not list edge:WorksAt after soft drop", + ); + // Person + Knows still present (Person wasn't dropped; Knows is in desired). + assert!( + current_snapshot.entry("node:Person").is_some(), + "node:Person must remain in the manifest", + ); + + // (b) Time travel: at the pre-drop manifest version, both dropped + // tables are still listed. Soft drop is reversible via Lance's + // version graph until `omnigraph cleanup` runs. + let pre_drop_snapshot = db.snapshot_at_version(before_version).await.unwrap(); + assert!( + pre_drop_snapshot.entry("node:Company").is_some(), + "pre-drop manifest must still list node:Company (time-travel reversibility)", + ); + assert!( + pre_drop_snapshot.entry("edge:WorksAt").is_some(), + "pre-drop manifest must still list edge:WorksAt (time-travel reversibility)", + ); + + // (c) Reopen consistency: drop is preserved across engine restart. + let uri = dir.path().to_str().unwrap().to_string(); + drop(db); + let reopened = Omnigraph::open(&uri).await.unwrap(); + let reopened_snapshot = reopened + .snapshot_of(ReadTarget::branch("main")) + .await + .unwrap(); + assert!( + reopened_snapshot.entry("node:Company").is_none(), + "after reopen, node:Company must still be absent from the current manifest", + ); + assert!( + reopened_snapshot.entry("edge:WorksAt").is_none(), + "after reopen, edge:WorksAt must still be absent from the current manifest", ); } #[tokio::test] -async fn apply_schema_rejects_dropping_an_edge_type() { +async fn apply_schema_drops_an_edge_type_softly() { let dir = tempfile::tempdir().unwrap(); let mut db = init_and_load(&dir).await; let before_version = db @@ -189,20 +343,50 @@ async fn apply_schema_rejects_dropping_an_edge_type() { .unwrap() .version(); - // Drop only the `WorksAt` edge. + // Drop only the `WorksAt` edge. Per chassis v1 commit #4, this + // emits `DropType { Edge, WorksAt, Soft }`; apply tombstones the + // edge:WorksAt manifest entry. The Company node and Person node + // remain intact. let desired = TEST_SCHEMA.replace("\nedge WorksAt: Person -> Company", ""); - let err = db.apply_schema(&desired).await.unwrap_err(); - let msg = err.to_string(); + + let plan = db.plan_schema(&desired).await.unwrap(); + assert!(plan.supported); assert!( - msg.contains("OG-DS-103"), - "expected schema-lint code OG-DS-103 in error, got: {msg}" + plan.steps.iter().any(|step| matches!( + step, + SchemaMigrationStep::DropType { + type_kind: SchemaTypeKind::Edge, + name, + mode: omnigraph_compiler::DropMode::Soft, + } if name == "WorksAt" + )), + "expected DropType {{ Edge, WorksAt, Soft }} in plan: {plan:?}", ); - assert_eq!( - db.snapshot_of(ReadTarget::branch("main")) - .await - .unwrap() - .version(), - before_version + + let result = db.apply_schema(&desired).await.unwrap(); + assert!(result.applied); + + let after_version = db + .snapshot_of(ReadTarget::branch("main")) + .await + .unwrap() + .version(); + assert!(after_version > before_version); + + let current_snapshot = db.snapshot_of(ReadTarget::branch("main")).await.unwrap(); + assert!( + current_snapshot.entry("edge:WorksAt").is_none(), + "current manifest must not list edge:WorksAt", + ); + // Other tables untouched. + assert!(current_snapshot.entry("node:Person").is_some()); + assert!(current_snapshot.entry("node:Company").is_some()); + assert!(current_snapshot.entry("edge:Knows").is_some()); + + let pre_drop_snapshot = db.snapshot_at_version(before_version).await.unwrap(); + assert!( + pre_drop_snapshot.entry("edge:WorksAt").is_some(), + "pre-drop manifest must still list edge:WorksAt", ); } @@ -218,10 +402,7 @@ async fn apply_schema_rejects_adding_a_required_property_without_backfill() { // Add `email: String` (required, non-nullable, no @rename_from). Existing // rows have no value to fill in, so this is unsupported in v1. - let desired = TEST_SCHEMA.replace( - " age: I32?\n}", - " age: I32?\n email: String\n}", - ); + let desired = TEST_SCHEMA.replace(" age: I32?\n}", " age: I32?\n email: String\n}"); let err = db.apply_schema(&desired).await.unwrap_err(); let msg = err.to_string(); assert!( @@ -253,7 +434,10 @@ async fn plan_schema_for_property_type_narrowing_is_not_supported() { .unwrap(); let plan = db.plan_schema(TEST_SCHEMA).await.unwrap(); - assert!(!plan.supported, "narrowing I64 -> I32 must not be supported"); + assert!( + !plan.supported, + "narrowing I64 -> I32 must not be supported" + ); assert!(plan.steps.iter().any(|step| matches!( step, SchemaMigrationStep::UnsupportedChange { code, .. } @@ -338,3 +522,217 @@ edge WorksAt: Human -> Company "old node:Person table key should be unmapped after rename" ); } + +// ─── Hard-mode drops (chassis v1 commit #5 — --allow-data-loss) ────────────── +// +// Hard mode promotes every `DropMode::Soft` step to `DropMode::Hard` and runs +// `cleanup_old_versions` on affected datasets immediately after the manifest +// publish. For DropProperty Hard, this removes the prior dataset version +// (where the column lived), making `snapshot_at_version(pre_drop)` unable to +// open the dataset at that version. For DropType Hard, the dataset is +// untouched by the schema apply itself (no per-table write), so +// cleanup_old_versions is currently a no-op for it — the dataset directory +// persists. Full orphan-dataset deletion is a separate follow-up. + +#[tokio::test] +async fn apply_schema_with_allow_data_loss_promotes_drops_to_hard() { + let dir = tempfile::tempdir().unwrap(); + let mut db = init_and_load(&dir).await; + + let desired = TEST_SCHEMA.replace(" age: I32?\n", ""); + + // Default plan (no flag) → Soft. + let plan_soft = db.plan_schema(&desired).await.unwrap(); + assert!(plan_soft.steps.iter().any(|step| matches!( + step, + SchemaMigrationStep::DropProperty { + mode: omnigraph_compiler::DropMode::Soft, + .. + } + ))); + + // With --allow-data-loss → Hard. + let plan_hard = db + .plan_schema_with_options( + &desired, + omnigraph::db::SchemaApplyOptions { + allow_data_loss: true, + }, + ) + .await + .unwrap(); + assert!(plan_hard.supported); + assert!( + plan_hard.steps.iter().any(|step| matches!( + step, + SchemaMigrationStep::DropProperty { + mode: omnigraph_compiler::DropMode::Hard, + .. + } + )), + "with --allow-data-loss, DropProperty should be promoted to Hard: {plan_hard:?}", + ); + // Negative: no remaining Soft drops in the promoted plan. + assert!( + !plan_hard.steps.iter().any(|step| matches!( + step, + SchemaMigrationStep::DropProperty { + mode: omnigraph_compiler::DropMode::Soft, + .. + } | SchemaMigrationStep::DropType { + mode: omnigraph_compiler::DropMode::Soft, + .. + } + )), + "promoted plan should have no Soft drops left: {plan_hard:?}", + ); + + // Apply with flag succeeds. + let result = db + .apply_schema_with_options( + &desired, + omnigraph::db::SchemaApplyOptions { + allow_data_loss: true, + }, + ) + .await + .unwrap(); + assert!(result.applied); +} + +#[tokio::test] +async fn apply_schema_hard_drops_property_makes_prior_version_unreachable() { + let dir = tempfile::tempdir().unwrap(); + let mut db = init_and_load(&dir).await; + let before_version = db + .snapshot_of(ReadTarget::branch("main")) + .await + .unwrap() + .version(); + + // Hard drop the `age` column. Soft drop would leave the prior + // dataset version intact; Hard drop runs cleanup_old_versions on + // the dataset post-apply, removing the prior version. + let desired = TEST_SCHEMA.replace(" age: I32?\n", ""); + let result = db + .apply_schema_with_options( + &desired, + omnigraph::db::SchemaApplyOptions { + allow_data_loss: true, + }, + ) + .await + .unwrap(); + assert!(result.applied); + + // Current snapshot: column gone from the dataset schema. + let current_snapshot = db.snapshot_of(ReadTarget::branch("main")).await.unwrap(); + let current_ds = current_snapshot.open("node:Person").await.unwrap(); + let current_fields = current_ds + .schema() + .fields + .iter() + .map(|f| f.name.clone()) + .collect::>(); + assert!( + !current_fields.iter().any(|f| f == "age"), + "current Person schema must not include 'age' after hard drop; got {current_fields:?}", + ); + + // Time travel: at the pre-drop manifest version, the entry points + // at the OLD dataset version which has been cleaned up. Opening + // the dataset at that snapshot should fail (Lance can't load the + // dropped version). This is the Hard-mode contract — the prior + // data is unreachable. + let pre_drop = db.snapshot_at_version(before_version).await.unwrap(); + let open_result = pre_drop.open("node:Person").await; + assert!( + open_result.is_err(), + "after hard drop + cleanup, pre-drop snapshot.open() must fail (prior version was reclaimed); got {open_result:?}", + ); +} + +#[tokio::test] +async fn apply_schema_hard_drops_node_and_edge_with_flag_succeeds() { + let dir = tempfile::tempdir().unwrap(); + let mut db = init_and_load(&dir).await; + let before_version = db + .snapshot_of(ReadTarget::branch("main")) + .await + .unwrap() + .version(); + + let desired = r#" +node Person { + name: String @key + age: I32? +} + +edge Knows: Person -> Person { + since: Date? +} +"#; + + let plan = db + .plan_schema_with_options( + desired, + omnigraph::db::SchemaApplyOptions { + allow_data_loss: true, + }, + ) + .await + .unwrap(); + assert!(plan.supported); + assert!( + plan.steps.iter().any(|step| matches!( + step, + SchemaMigrationStep::DropType { + type_kind: SchemaTypeKind::Node, + mode: omnigraph_compiler::DropMode::Hard, + .. + } + )), + "with --allow-data-loss, DropType {{ Node }} should be Hard: {plan:?}", + ); + assert!( + plan.steps.iter().any(|step| matches!( + step, + SchemaMigrationStep::DropType { + type_kind: SchemaTypeKind::Edge, + mode: omnigraph_compiler::DropMode::Hard, + .. + } + )), + "with --allow-data-loss, DropType {{ Edge }} should be Hard: {plan:?}", + ); + + let result = db + .apply_schema_with_options( + desired, + omnigraph::db::SchemaApplyOptions { + allow_data_loss: true, + }, + ) + .await + .unwrap(); + assert!(result.applied); + + let after_version = db + .snapshot_of(ReadTarget::branch("main")) + .await + .unwrap() + .version(); + assert!(after_version > before_version); + + // Current manifest: both dropped entries gone. + let current = db.snapshot_of(ReadTarget::branch("main")).await.unwrap(); + assert!(current.entry("node:Company").is_none()); + assert!(current.entry("edge:WorksAt").is_none()); + + // NOTE: DropType Hard's cleanup of the orphan dataset directory + // is a known follow-up (the manifest entry is tombstoned and the + // dataset's prior versions are cleaned, but the directory itself + // persists until an orphan-cleanup pass is implemented). For the + // current contract, the data is *unreachable* via omnigraph + // (no manifest entry), which is the user-facing guarantee. +} diff --git a/crates/omnigraph/tests/search.rs b/crates/omnigraph/tests/search.rs index a611a0f..c4454cf 100644 --- a/crates/omnigraph/tests/search.rs +++ b/crates/omnigraph/tests/search.rs @@ -3,7 +3,8 @@ mod helpers; use std::env; use arrow_array::{Array, StringArray}; -use lance_index::{DatasetIndexExt, is_system_index}; +use lance::index::DatasetIndexExt; +use lance_index::is_system_index; use serial_test::serial; use omnigraph::db::Omnigraph; diff --git a/crates/omnigraph/tests/staged_writes.rs b/crates/omnigraph/tests/staged_writes.rs index 30ef28b..021b36e 100644 --- a/crates/omnigraph/tests/staged_writes.rs +++ b/crates/omnigraph/tests/staged_writes.rs @@ -132,7 +132,11 @@ async fn stage_merge_insert_dedupes_superseded_committed_fragment() { .await .unwrap(); let ids = collect_ids(&batches); - assert_eq!(ids, vec!["alice"], "merge_insert must not surface duplicates"); + assert_eq!( + ids, + vec!["alice"], + "merge_insert must not surface duplicates" + ); // Confirm the visible row is the rewritten one. let total: usize = batches.iter().map(|b| b.num_rows()).sum(); @@ -382,12 +386,7 @@ async fn scan_with_staged_with_filter_silently_drops_staged_rows() { // Actual: dave (staged, age=35) is dropped — only the committed matches // come back. let batches = store - .scan_with_staged( - &ds, - std::slice::from_ref(&staged), - None, - Some("age >= 30"), - ) + .scan_with_staged(&ds, std::slice::from_ref(&staged), None, Some("age >= 30")) .await .unwrap(); assert_eq!( @@ -403,12 +402,7 @@ async fn scan_with_staged_with_filter_silently_drops_staged_rows() { // Without filter, staged data IS visible — confirms the issue is // specifically filter pushdown, not fragment scanning per se. let unfiltered = store - .scan_with_staged( - &ds, - std::slice::from_ref(&staged), - None, - None, - ) + .scan_with_staged(&ds, std::slice::from_ref(&staged), None, None) .await .unwrap(); assert_eq!( @@ -686,10 +680,7 @@ async fn stage_create_inverted_index_does_not_advance_head_until_commit() { .unwrap(); let pre_version = ds.version().version; - let staged = store - .stage_create_inverted_index(&ds, "id") - .await - .unwrap(); + let staged = store.stage_create_inverted_index(&ds, "id").await.unwrap(); assert_eq!( ds.version().version, pre_version, @@ -781,13 +772,9 @@ async fn create_vector_index_advances_head_inline_documents_residual() { let id_arr = StringArray::from(ids); let flat: Vec = (0..(n_rows * dim)).map(|i| i as f32).collect(); let values = arrow_array::Float32Array::from(flat); - let vec_arr = - FixedSizeListArray::new(item_field, dim as i32, Arc::new(values), None); - let batch = RecordBatch::try_new( - schema.clone(), - vec![Arc::new(id_arr), Arc::new(vec_arr)], - ) - .unwrap(); + let vec_arr = FixedSizeListArray::new(item_field, dim as i32, Arc::new(values), None); + let batch = + RecordBatch::try_new(schema.clone(), vec![Arc::new(id_arr), Arc::new(vec_arr)]).unwrap(); let mut ds = TableStore::write_dataset(&uri, batch).await.unwrap(); let pre_version = ds.version().version; diff --git a/crates/omnigraph/tests/traversal.rs b/crates/omnigraph/tests/traversal.rs index 6b6fbe3..6efe7de 100644 --- a/crates/omnigraph/tests/traversal.rs +++ b/crates/omnigraph/tests/traversal.rs @@ -504,9 +504,21 @@ query fof_chain($name: String) { let batch = result.concat_batches().unwrap(); assert_eq!(batch.num_rows(), 1); - let col0 = batch.column(0).as_any().downcast_ref::().unwrap(); - let col1 = batch.column(1).as_any().downcast_ref::().unwrap(); - let col2 = batch.column(2).as_any().downcast_ref::().unwrap(); + let col0 = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let col1 = batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + let col2 = batch + .column(2) + .as_any() + .downcast_ref::() + .unwrap(); assert_eq!(col0.value(0), "Alice"); assert_eq!(col1.value(0), "Bob"); assert_eq!(col2.value(0), "Diana"); @@ -574,8 +586,16 @@ query at_acme_named() { let batch = result.concat_batches().unwrap(); assert_eq!(batch.num_rows(), 1); - let person = batch.column(0).as_any().downcast_ref::().unwrap(); - let company = batch.column(1).as_any().downcast_ref::().unwrap(); + let person = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let company = batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); assert_eq!(person.value(0), "Alice"); assert_eq!(company.value(0), "Acme"); } @@ -608,8 +628,16 @@ query at_company($company: String) { let batch = result.concat_batches().unwrap(); assert_eq!(batch.num_rows(), 1); - let person = batch.column(0).as_any().downcast_ref::().unwrap(); - let company = batch.column(1).as_any().downcast_ref::().unwrap(); + let person = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let company = batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); assert_eq!(person.value(0), "Bob"); assert_eq!(company.value(0), "Globex"); } @@ -633,19 +661,22 @@ query fan_out($name: String) { "#; // Alice knows Bob and Charlie, works at Acme. // Each friend paired with her company → 2 rows. - let result = query_main( - &mut db, - queries, - "fan_out", - ¶ms(&[("$name", "Alice")]), - ) - .await - .unwrap(); + let result = query_main(&mut db, queries, "fan_out", ¶ms(&[("$name", "Alice")])) + .await + .unwrap(); let batch = result.concat_batches().unwrap(); assert_eq!(batch.num_rows(), 2); - let friends = batch.column(0).as_any().downcast_ref::().unwrap(); - let companies = batch.column(1).as_any().downcast_ref::().unwrap(); + let friends = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let companies = batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); let mut pairs: Vec<(&str, &str)> = (0..batch.num_rows()) .map(|i| (friends.value(i), companies.value(i))) diff --git a/crates/omnigraph/tests/validators.rs b/crates/omnigraph/tests/validators.rs index 96483d3..4c7a2f3 100644 --- a/crates/omnigraph/tests/validators.rs +++ b/crates/omnigraph/tests/validators.rs @@ -76,7 +76,9 @@ async fn init_with(schema: &str, data: &str) -> (tempfile::TempDir, Omnigraph) { let uri = dir.path().to_str().unwrap(); let mut db = Omnigraph::init(uri, schema).await.unwrap(); if !data.is_empty() { - load_jsonl(&mut db, data, LoadMode::Overwrite).await.unwrap(); + load_jsonl(&mut db, data, LoadMode::Overwrite) + .await + .unwrap(); } (dir, db) } diff --git a/docs/ci.md b/docs/ci.md deleted file mode 100644 index d9855b0..0000000 --- a/docs/ci.md +++ /dev/null @@ -1,10 +0,0 @@ -# CI / Release Workflows - -`.github/workflows/`: - -- **ci.yml**: text-only changes skip; otherwise `cargo test --workspace --locked` on ubuntu-latest with protobuf compiler. OpenAPI-drift check that auto-commits the regenerated `openapi.json` for same-repo PRs. Also runs the AGENTS.md cross-link integrity check (`scripts/check-agents-md.sh`). -- **AWS feature build job**: `cargo build/test -p omnigraph-server --features aws` on ubuntu-latest. -- **RustFS S3 integration**: spins up RustFS in Docker, runs `s3_storage`, `server_opens_s3_repo_directly_and_serves_snapshot_and_read`, and `local_cli_s3_end_to_end_init_load_read_flow`. -- **release-edge.yml**: on every push to main, retags `edge`, builds Linux/macOS-Intel/macOS-arm64 archives + sha256, publishes a rolling prerelease. -- **release.yml**: on `v*` tags, builds the 3-platform matrix and updates the Homebrew tap (`scripts/update-homebrew-formula.sh`) by pushing the regenerated formula to `ModernRelay/homebrew-tap`. -- **package.yml**: manual ECR image build; emits two image tags per commit (``, `-aws`) via CodeBuild. diff --git a/docs/cli.md b/docs/cli.md deleted file mode 100644 index ae8c152..0000000 --- a/docs/cli.md +++ /dev/null @@ -1,100 +0,0 @@ -# CLI Guide - -## Core Repo Flow - -```bash -omnigraph init --schema ./schema.pg ./repo.omni -omnigraph load --data ./data.jsonl --mode overwrite ./repo.omni -omnigraph snapshot ./repo.omni --branch main --json -omnigraph read --uri ./repo.omni --query ./queries.gq --name get_person --params '{"name":"Alice"}' -omnigraph change --uri ./repo.omni --query ./queries.gq --name insert_person --params '{"name":"Mina","age":28}' -``` - -## Branching And Reviewable Data Flows - -```bash -omnigraph branch create --uri ./repo.omni --from main feature-x -omnigraph branch list --uri ./repo.omni -omnigraph branch merge --uri ./repo.omni feature-x --into main - -omnigraph ingest --data ./batch.jsonl --branch review/import-2026-04-09 ./repo.omni -omnigraph export ./repo.omni --branch main --type Person > people.jsonl -omnigraph commit list ./repo.omni --branch main --json -omnigraph commit show --uri ./repo.omni --json -``` - -## Remote Server Mode - -Serve a repo: - -```bash -omnigraph-server ./repo.omni --bind 127.0.0.1:8080 -``` - -Read through the HTTP API: - -```bash -omnigraph read \ - --target http://127.0.0.1:8080 \ - --query ./queries.gq \ - --name get_person \ - --params '{"name":"Alice"}' -``` - -If the server requires auth, set `OMNIGRAPH_SERVER_BEARER_TOKEN` on the server -and configure the matching `bearer_token_env` in `omnigraph.yaml`. - -## Runs, Policy, And Diagnostics - -```bash -omnigraph query lint --query ./queries.gq --schema ./schema.pg --json -omnigraph query check --query ./queries.gq ./repo.omni --json - -omnigraph schema plan --schema ./next.pg ./repo.omni --json -omnigraph schema apply --schema ./next.pg ./repo.omni --json -omnigraph policy validate --config ./omnigraph.yaml -omnigraph policy test --config ./omnigraph.yaml -omnigraph policy explain --config ./omnigraph.yaml --actor act-alice --action read --branch main - -omnigraph commit list ./repo.omni --json -omnigraph commit show --uri ./repo.omni --json -``` - -(The legacy `omnigraph run list/show/publish/abort` subcommands were removed in MR-771; mutations and loads publish atomically and the commit graph (`omnigraph commit list`) is the audit surface.) - -`query lint` and `query check` are the same command surface. In v1, repo-backed -lint uses local or `s3://` repo URIs; HTTP targets are only supported when you -also pass `--schema`. - -## Config - -`omnigraph.yaml` lets the CLI and server share named graphs, defaults, and -query roots: - -```yaml -graphs: - local: - uri: ./demo.omni - dev: - uri: http://127.0.0.1:8080 - bearer_token_env: OMNIGRAPH_BEARER_TOKEN - -cli: - graph: local - branch: main - -query: - roots: - - queries - - . -``` - -The config file can also define: - -- server bind defaults -- auth env files -- query aliases for common read and change commands -- `policy.file` for Cedar authorization rules - -When policy is enabled, `schema apply` is authorized through the -`schema_apply` action and is typically limited to admins on protected `main`. diff --git a/docs/architecture.md b/docs/dev/architecture.md similarity index 83% rename from docs/architecture.md rename to docs/dev/architecture.md index 173d37a..8b7fca2 100644 --- a/docs/architecture.md +++ b/docs/dev/architecture.md @@ -10,7 +10,7 @@ Three views, increasing zoom: 2. **Layer view** — the eight-layer stack inside one OmniGraph process. 3. **Component zoom-ins** — what's inside each layer. -For runtime flows (read query, mutation), see [`docs/execution.md`](execution.md). For the on-disk layout of a repo, see [`docs/storage.md`](storage.md). +For runtime flows (read query, mutation), see [`docs/dev/execution.md`](execution.md). For the on-disk layout of a graph, see [`docs/user/storage.md`](../user/storage.md). L1 (orange in the diagrams) is what we inherit from Lance; L2 (blue) is what OmniGraph adds. The L1/L2 framing is also called out in prose at the bottom of this doc. @@ -63,7 +63,7 @@ flowchart TB subgraph engine[omnigraph engine] plan[exec query and mutation]:::l2 gi[graph index CSR/CSC
RuntimeCache LRU 8]:::l2 - coord[coordinator
ManifestRepo · CommitGraph]:::l2 + coord[coordinator
ManifestCoordinator · CommitGraph]:::l2 end subgraph storage[storage trait — wraps Lance] @@ -86,7 +86,7 @@ flowchart TB lance_layer -- bytes --> object_store ``` -The `storage trait` row is partly aspirational. Today the engine calls `lance::Dataset` methods through `table_store`; a capability-bearing `Dataset` trait per [`docs/invariants.md`](invariants.md) §I.4 is on the roadmap (MR-737). The diagram shows the intended seam. +The storage seam is partly aspirational. `TableStorage` exists as the sealed staged-write trait, but capability/stat surfaces and full call-site migration are still roadmap. The diagram shows the intended boundary. ## Component zoom-ins @@ -132,7 +132,7 @@ flowchart TB subgraph state[graph state] coord[GraphCoordinator]:::l2 - mr[ManifestRepo
db/manifest.rs]:::l2 + mr[ManifestCoordinator
db/manifest.rs]:::l2 cg[CommitGraph
_graph_commits.lance]:::l2 stg[MutationStaging
per-query in-memory accumulator
exec/staging.rs]:::l2 end @@ -166,7 +166,7 @@ Code paths: - Read entry: `Omnigraph::query` at `crates/omnigraph/src/exec/query.rs:7` - Mutation entry: `Omnigraph::mutate` at `crates/omnigraph/src/exec/mutation.rs:511` -- Manifest commit: `ManifestRepo::commit` at `crates/omnigraph/src/db/manifest.rs:280` +- Manifest commit: `ManifestCoordinator::commit` at `crates/omnigraph/src/db/manifest.rs:280` - Graph index: `crates/omnigraph/src/graph_index/` - Loader: `Omnigraph::ingest` at `crates/omnigraph/src/loader/mod.rs:74` @@ -174,7 +174,7 @@ Code paths: Inserts and updates inside `mutate_as` and the bulk loader's Append/Merge modes go through `MutationStaging` -([`crates/omnigraph/src/exec/staging.rs`](../crates/omnigraph/src/exec/staging.rs)), +([`crates/omnigraph/src/exec/staging.rs`](../../crates/omnigraph/src/exec/staging.rs)), a per-query in-memory accumulator. No Lance HEAD advance happens during op execution; one `stage_*` + `commit_staged` per touched table runs at end-of-query, then the publisher commits the manifest atomically. @@ -204,11 +204,10 @@ contracts: the committed snapshot at the captured `expected_version` and unions with a DataFusion `MemTable` over the pending batches. -This pattern realizes [docs/invariants.md §VI.25](invariants.md) -(read-your-writes within a multi-statement mutation) and §VI.32 -(failure scope bounded) for inserts/updates by construction at the -writer layer. See [docs/runs.md](runs.md) for the publisher CAS -contract this builds on. +This pattern realizes read-your-writes within a multi-statement mutation +and keeps failure scope bounded for inserts/updates by construction at +the writer layer. See [docs/dev/invariants.md](invariants.md) and +[docs/dev/runs.md](runs.md) for the publisher CAS contract this builds on. ### Storage trait — today vs. roadmap @@ -222,10 +221,10 @@ flowchart LR d2[storage.rs
S3 / file URI plumbing]:::now end - subgraph roadmap[Roadmap — invariants §I.4] + subgraph roadmap[Roadmap - storage capabilities] t[trait Dataset
schema · stats · placement
capabilities · scan · write]:::future impl1[LanceStorage]:::future - impl2[MemStorage for tests]:::future + impl2[future test impl]:::future end today -.-> roadmap @@ -233,7 +232,7 @@ flowchart LR t --> impl2 ``` -The storage layer's trait surface is aspirational. Today the engine calls `lance::Dataset` methods directly. The roadmap (per [`docs/invariants.md`](invariants.md) §I.4 and MR-737) is a `Dataset` trait that surfaces capabilities and statistics so the planner can reason about pushdown opportunities. +The staged-write trait exists today as `TableStorage`, implemented by `TableStore`. Full engine migration plus capability and statistics surfaces remain roadmap, so the planner cannot yet reason about all pushdown opportunities through a documented trait surface. ### Index lifecycle — today vs. roadmap @@ -247,7 +246,7 @@ flowchart LR manual[called manually
or from optimize]:::now end - subgraph roadmap[Roadmap — invariants §VII.38] + subgraph roadmap[Roadmap - manifest reconciler] rec[Reconciler
observes manifest]:::future diff[coverage diff
fragments − fragment_bitmap]:::future wp[worker pool
builds index segments]:::future @@ -258,7 +257,7 @@ flowchart LR rec --> diff --> wp ``` -Today, indexes are built explicitly via `ensure_indices`. Reads degrade gracefully when index coverage is partial — Lance's scanner unions indexed and scan paths automatically. The roadmap reconciler (per [`docs/invariants.md`](invariants.md) §VII.38) observes manifest state and converges coverage in the background. +Today, indexes are built explicitly via `ensure_indices`. Reads degrade gracefully when index coverage is partial — Lance's scanner unions indexed and scan paths automatically. The roadmap reconciler observes manifest state and converges coverage in the background. ### Server / CLI @@ -279,7 +278,7 @@ flowchart LR eng --> wq ``` -The server applies Cedar policy at the HTTP boundary today (per [`docs/invariants.md`](invariants.md) §VII.45, the roadmap is to push policy into the planner as predicates). After Cedar, mutating handlers go through `WorkloadController` (per-actor admission cap + byte budget; PR 2 / MR-686) before reaching the engine. The engine itself holds an `Arc` so concurrent mutations on the same `(table, branch)` serialize at the queue, while disjoint keys run in parallel — see [server.md](server.md) "Per-actor admission control" and [runs.md](runs.md). The CLI bypasses the HTTP layer (and admission) and calls the engine API directly. +The server applies Cedar policy at the HTTP boundary today. The roadmap, called out in [docs/dev/invariants.md](invariants.md) as a known gap, is to push policy into the planner as predicates. After Cedar, mutating handlers go through `WorkloadController` (per-actor admission cap + byte budget; PR 2 / MR-686) before reaching the engine. The engine itself holds an `Arc` so concurrent mutations on the same `(table, branch)` serialize at the queue, while disjoint keys run in parallel — see [docs/user/server.md](../user/server.md) "Per-actor admission control" and [docs/dev/runs.md](runs.md). The CLI bypasses the HTTP layer (and admission) and calls the engine API directly. Code paths: diff --git a/docs/branch-protection.md b/docs/dev/branch-protection.md similarity index 91% rename from docs/branch-protection.md rename to docs/dev/branch-protection.md index 2f878de..9b2fa78 100644 --- a/docs/branch-protection.md +++ b/docs/dev/branch-protection.md @@ -16,12 +16,12 @@ This page explains what the policy says and how to change it. | **Disallow force pushes** | `true` | No history rewrites on `main`. | | **Disallow branch deletions** | `true` | `main` cannot be deleted. | | **Required conversation resolution** | `true` | All review comment threads must be resolved before merge. | -| **Enforce on admins** | `true` | Even repo admins go through the gates. The point is no bypasses. | +| **Enforce on admins** | `true` | Even repository admins go through the gates. The point is no bypasses. | | **Required signed commits** | not yet | Not enabled. Would lock out maintainers until everyone enrolls GPG/SSH commit signing. Tracked as a follow-up. | ## How to apply -Run from the repo root: +Run from the repository root: ```bash ./scripts/apply-branch-protection.sh @@ -29,7 +29,7 @@ Run from the repo root: The script reads `.github/branch-protection.json`, strips the human-readable `_comment` field (the GitHub API rejects unknown keys), and PUTs to `repos/ModernRelay/omnigraph/branches/main/protection`. -Requires `gh` authenticated with a token that has admin permissions on the repo. +Requires `gh` authenticated with a token that has admin permissions on the repository. To preview without applying: @@ -57,7 +57,7 @@ Outputs the live policy. Compare against `.github/branch-protection.json` to det - **Audit trail**: `git log .github/branch-protection.json` shows every change with a reviewable diff and a merge commit. - **Disaster recovery**: if branch protection is accidentally removed or weakened via the UI, the JSON is the canonical recovery point. -- **Consistency**: pairs with `.github/codeowners-roles.yml` (the CODEOWNERS source of truth). Repo policy lives in the repo. +- **Consistency**: pairs with `.github/codeowners-roles.yml` (the CODEOWNERS source of truth). Repository policy lives in the repository. ## What this gates @@ -69,7 +69,7 @@ After branch protection is applied, every PR targeting `main` must: 4. Have all review conversations resolved. 5. Be squash- or rebase-merged (no merge commits). -Even repo admins are subject to these rules. +Even repository admins are subject to these rules. ## Subsequent hardening (not in this PR) @@ -77,7 +77,7 @@ The branch-protection policy is the foundation. Future hardening adds: - **Required signed commits** (`required_signatures: true`) — once maintainers enroll GPG/SSH signing. - **Tag protection** for `v*` tags via `repos/.../tags/protection`. -- **Required reviewers from specific teams** for high-leverage paths (e.g., `docs/invariants.md`) via CODEOWNERS tier expansion + the N-unique-approvers CI workaround. +- **Required reviewers from specific teams** for high-leverage paths (e.g., `docs/dev/invariants.md`) via CODEOWNERS tier expansion + the N-unique-approvers CI workaround. - **More required CI checks**: `cargo deny`, `cargo audit`, `cargo fmt --check`, `cargo clippy -D warnings`, CodeQL, secret scanning, schema-lint (MR-946). See the hardening playbook for the full plan. diff --git a/docs/dev/ci.md b/docs/dev/ci.md new file mode 100644 index 0000000..1124cb4 --- /dev/null +++ b/docs/dev/ci.md @@ -0,0 +1,11 @@ +# CI / Release Workflows + +`.github/workflows/`: + +- **ci.yml**: text-only changes skip; otherwise `cargo test --workspace --locked` on ubuntu-latest with protobuf compiler. OpenAPI-drift check that auto-commits the regenerated `openapi.json` for same-repository PRs. Also runs the AGENTS.md cross-link integrity check (`scripts/check-agents-md.sh`). +- **AWS feature build job**: `cargo build/test -p omnigraph-server --features aws` on ubuntu-latest. +- **Windows binary build job**: `cargo build --release --locked -p omnigraph-cli -p omnigraph-server` on windows-latest with smoke checks for `omnigraph.exe version`, `omnigraph-server.exe --help`, and PowerShell installer syntax. +- **RustFS S3 integration**: spins up RustFS in Docker, runs `s3_storage`, `server_opens_s3_graph_directly_and_serves_snapshot_and_read`, and `local_cli_s3_end_to_end_init_load_read_flow`. +- **release-edge.yml**: on every push to main, retags `edge`, builds Linux x86_64 / macOS arm64 archives and Windows x86_64 zip + sha256, publishes a rolling prerelease, then smoke-tests the Windows PowerShell installer against `edge`. +- **release.yml**: on `v*` tags, builds the Linux x86_64 / macOS arm64 archives and Windows x86_64 zip release matrix, updates the Homebrew tap (`scripts/update-homebrew-formula.sh`) by pushing the regenerated formula to `ModernRelay/homebrew-tap`, and smoke-tests the Windows PowerShell installer against the tag. +- **package.yml**: manual ECR image build; emits two image tags per commit (``, `-aws`) via CodeBuild. diff --git a/docs/codeowners.md b/docs/dev/codeowners.md similarity index 73% rename from docs/codeowners.md rename to docs/dev/codeowners.md index ad388ea..9a7fb50 100644 --- a/docs/codeowners.md +++ b/docs/dev/codeowners.md @@ -2,16 +2,16 @@ `.github/CODEOWNERS` is **generated** — not hand-edited. The source of truth is `.github/codeowners-roles.yml`, expanded by `.github/scripts/render-codeowners.py`. CI rejects drift between the two and rejects direct edits to `CODEOWNERS` that don't accompany a yml change. -This setup gives every role change a reviewable PR and a permanent in-repo audit trail (`git log .github/codeowners-roles.yml`). +This setup gives every role change a reviewable PR and a permanent in-repository audit trail (`git log .github/codeowners-roles.yml`). ## Current roles | Role | Members | Scope | |---|---|---| -| `engineering` | `@aaltshuler` | All code under `crates/**`, repo infrastructure, default for unmapped paths | -| `docs` | `@aaltshuler`, `@ragnorc` | `docs/**`, README.md, AGENTS.md, CLAUDE.md, SECURITY.md | +| `engineering` | `@ragnorc` | All code under `crates/**`, repository infrastructure, default for unmapped paths | +| `docs` | `@ragnorc` | `docs/**`, README.md, AGENTS.md, CLAUDE.md, SECURITY.md | -GitHub treats multiple owners in a CODEOWNERS line as **"any one of them satisfies the review requirement"**. For docs, either named member can approve. To require N distinct approvers on a specific path, layer a CI check on top (not currently configured). +GitHub treats multiple owners in a CODEOWNERS line as **"any one of them satisfies the review requirement"**. To require N distinct approvers on a specific path, layer a CI check on top (not currently configured). ## How to change role membership or path mappings @@ -34,4 +34,4 @@ CI fails the PR if: - **Audit trail**: `git log .github/codeowners-roles.yml` is the canonical record of every role change. The rendered `CODEOWNERS` is a derived artifact. - **Roles are first-class**: paths reference roles, not raw handles. Renaming a person or rotating a role updates one place, not every path. - **Future extension**: scheduled rotation (weekly on-call, quarterly leads) plugs into the same yml without changing the path mappings. Not enabled today. -- **Consistency with the product**: omnigraph itself enforces auditable Cedar policy. The repo's code-owner policy follows the same "policy as reviewed code" pattern. +- **Consistency with the product**: omnigraph itself enforces auditable Cedar policy. The repository's code-owner policy follows the same "policy as reviewed code" pattern. diff --git a/docs/execution.md b/docs/dev/execution.md similarity index 98% rename from docs/execution.md rename to docs/dev/execution.md index bd4842c..f5c2840 100644 --- a/docs/execution.md +++ b/docs/dev/execution.md @@ -147,7 +147,7 @@ sequenceDiagram - End-of-query Lance commit: `TableStore::stage_append`, `stage_merge_insert`, `commit_staged` at `crates/omnigraph/src/table_store.rs` - Manifest commit primitive: `commit_updates_on_branch_with_expected` at `crates/omnigraph/src/db/omnigraph/table_ops.rs` -Atomicity guarantee for multi-statement mutations: a mid-query failure leaves Lance HEAD untouched on staged tables (no inline commit happened during op execution), so the next mutation proceeds normally with no `ExpectedVersionMismatch`. The publisher CAS at the very end either succeeds (manifest advances atomically across all touched sub-tables) or fails with a typed `ManifestConflictDetails::ExpectedVersionMismatch` (no partial publish). See [docs/invariants.md §VI.25 / §VI.32](invariants.md) and [docs/runs.md](runs.md). +Atomicity guarantee for multi-statement mutations: a mid-query failure leaves Lance HEAD untouched on staged tables (no inline commit happened during op execution), so the next mutation proceeds normally with no `ExpectedVersionMismatch`. The publisher CAS at the very end either succeeds (manifest advances atomically across all touched sub-tables) or fails with a typed `ManifestConflictDetails::ExpectedVersionMismatch` (no partial publish). See [docs/dev/invariants.md](invariants.md) and [docs/dev/runs.md](runs.md). ## Bulk loader (`loader/mod.rs`) @@ -177,4 +177,4 @@ For Append/Merge, a mid-load failure (RI / cardinality violation, validation err ## Embeddings during load -If a node type has `@embed` properties, the loader calls the engine embedding client (Gemini, RETRIEVAL_DOCUMENT) per row to populate the vector column. See [embeddings.md](embeddings.md). +If a node type has `@embed` properties, the loader calls the engine embedding client (Gemini, RETRIEVAL_DOCUMENT) per row to populate the vector column. See [embeddings.md](../user/embeddings.md). diff --git a/docs/dev/index.md b/docs/dev/index.md new file mode 100644 index 0000000..83df8c8 --- /dev/null +++ b/docs/dev/index.md @@ -0,0 +1,67 @@ +# Developer Docs + +**Audience:** contributors, maintainers, and coding agents + +This is the contributor-facing entry point. These docs explain architecture, +invariants, implementation contracts, test ownership, and upstream Lance +constraints. User-facing behavior should still be documented through +[docs/user/index.md](../user/index.md) and the relevant public reference docs. + +## Required For Every Non-Trivial Change + +| Need | Read | +|---|---| +| Architectural rules, known gaps, deny-list | [invariants.md](invariants.md) | +| Upstream Lance source-of-truth index | [lance.md](lance.md) | +| Existing test coverage and test placement | [testing.md](testing.md) | + +## Architecture And Storage + +| Area | Read | +|---|---| +| System structure, L1/L2 framing, component diagrams | [architecture.md](architecture.md) | +| On-disk layout, manifest schema, URI behavior | [storage.md](../user/storage.md) | +| Direct-publish writes, D2, staged writes, recovery sidecars | [runs.md](runs.md) | +| Query execution, mutation execution, loader flow | [execution.md](execution.md) | +| Index lifecycle and graph topology indexes | [indexes.md](../user/indexes.md) | +| Branch and commit internals | [branches-commits.md](../user/branches-commits.md) | +| Three-way merge implementation and conflicts | [merge.md](merge.md) | +| Diff/change-feed implementation | [changes.md](../user/changes.md) | +| Branch protection policy | [branch-protection.md](branch-protection.md) | +| CODEOWNERS source of truth | [codeowners.md](codeowners.md) | + +## Language, Runtime, And Boundaries + +| Area | Read | +|---|---| +| Schema grammar, catalog, migration planner | [schema-language.md](../user/schema-language.md) | +| Query grammar, IR, lints, mutation restrictions | [query-language.md](../user/query-language.md) | +| Embedding client and `@embed` integration | [embeddings.md](../user/embeddings.md) | +| Cedar policy surface and server gating | [policy.md](../user/policy.md) | +| Server auth, OpenAPI, endpoint handlers | [server.md](../user/server.md) | +| Error taxonomy and serialization | [errors.md](../user/errors.md) | +| Constants and tunables | [constants.md](../user/constants.md) | +| Transaction model public contract | [transactions.md](../user/transactions.md) | + +## Project Operations + +| Area | Read | +|---|---| +| CI and release workflows | [ci.md](ci.md) | +| Install and deployment packaging | [install.md](../user/install.md), [deployment.md](../user/deployment.md) | +| Release history | [releases/](../releases/) | + +## Active Implementation Plans + +Working documents for in-flight feature work. Removed when the work lands. + +| Area | Read | +|---|---| +| Schema-lint chassis v1 (MR-694) — `--allow-data-loss`, soft/hard drops | [schema-lint-v1-plan.md](schema-lint-v1-plan.md) | +| Inline + stored queries, request/response envelope, MCP (MR-656 / MR-976 / MR-969) | [rfc-001-queries-envelope-mcp.md](rfc-001-queries-envelope-mcp.md) | + +## Boundary + +Developer docs may mention implementation details, stale gaps, upstream Lance +blockers, and review rules. User docs should not require that context unless +the detail changes the public contract. diff --git a/docs/dev/invariants.md b/docs/dev/invariants.md new file mode 100644 index 0000000..958042f --- /dev/null +++ b/docs/dev/invariants.md @@ -0,0 +1,206 @@ +# Architectural Invariants + +**Type:** standing review checklist +**Status:** living document +**Audience:** anyone proposing, reviewing, or implementing an OmniGraph change + +This file is intentionally short. It records the rules that should be in +working memory for every non-trivial change. Detailed mechanics live in the +area docs linked below. + +Use it this way: + +- Review the change against **Hard Invariants** and the **Deny-list**. +- If code and docs disagree, either fix the code or add/update a **Known Gap**. +- Keep implementation ledgers, roadmap detail, and historical MR notes in the + per-area docs. This file is the filter, not the encyclopedia. + +## Hard Invariants + +1. **Respect the substrate.** Lance owns columnar storage, per-dataset + versioning, fragments, branches, compaction, cleanup, and index primitives. + DataFusion should own relational execution where it fits. Do not add custom + WALs, transaction managers, buffer pools, page formats, or local clones of + substrate behavior. Read [lance.md](lance.md) before guessing. + +2. **Graph visibility is manifest-atomic.** Lance commits are per dataset. + OmniGraph's graph-level atomicity comes from publishing one manifest update + for the whole graph, guarded by expected table versions and sidecar recovery. + No write path may make a subset of touched node/edge tables visible as a + graph commit. + +3. **A query reads one snapshot.** Query execution captures a manifest snapshot + for its lifetime. Do not re-read branch head mid-query to discover newer + table versions. + +4. **Mutations publish at one boundary.** A `mutate_as` or `load` operation + accumulates constructive writes, commits each touched table at the end, then + publishes one manifest update. Do not commit per statement. Delete-only + queries are the documented inline residual; the parse-time D2 rule prevents + mixing deletes with insert/update until Lance exposes two-phase delete. + Read [runs.md](runs.md) and [execution.md](execution.md). + +5. **Recovery is part of the commit protocol.** Writers that can advance Lance + HEAD before manifest publish must write `__recovery/{ulid}.json` sidecars. + `Omnigraph::open` in read-write mode runs the all-or-nothing sweep, and + `refresh` runs roll-forward-only recovery for long-lived processes. Do not + add a new writer kind without sidecar coverage or an explicit proof that no + Lance HEAD can move before manifest publish. + +6. **Strong consistency is the default.** Reads are snapshot-isolated, writes + are durable before acknowledgement, and branch reads observe the current + committed graph state. Any eventual-consistency mode must be explicit, + read-only, auditable, and non-default. + +7. **Indexes are derived state.** Reads must see the correct result for the + branch they read even when index coverage is partial. Expensive index work + should converge from manifest state instead of extending the critical write + path. Scalar staged index builds and vector inline residuals are documented + in [runs.md](runs.md) and [indexes.md](../user/indexes.md). + +8. **Schema identity survives renames.** Accepted schema identity must remain + stable across type and property renames. Rename support belongs in migration + planning, not in "drop and recreate" behavior. See the known gap below. + +9. **Schema/data integrity failures are loud.** Type errors, required-field + misses, invalid edge endpoints, cardinality violations, and unsupported + mixed mutation modes fail before a graph commit is published. The system must + not invent placeholder nodes or silently weaken integrity. + +10. **Query semantics are first-class IR concepts.** Search modes, mutations, + polymorphism, traversal, retrieval scores, imports, and policy predicates + belong in typed AST/IR/planner structures. Do not smuggle semantics through + strings, side tables, global state, or transport-specific flags. + +11. **Transport/auth stay at the boundary.** Kernel crates should not depend on + HTTP, OpenAPI, bearer-token parsing, or future transport protocols. The + server resolves bearer tokens to actors; clients cannot set actor identity + directly. + +12. **Bearer-token plaintext is not retained.** Server startup hashes bearer + tokens, authentication uses constant-time comparison, and request handling + carries only the resolved actor identity and hash-derived match state. + +13. **Operational failures are bounded and observable.** Timeout, memory, OOM, + partial result, recovery, and conflict paths must fail loudly or degrade in + a documented way. If a metric affects plan choice or operator behavior, it + must be exposed through the relevant trait or observability surface. + +14. **Tests match the boundary being changed.** Prefer extending the existing + test that owns the area. Planner changes need planner-level coverage, + storage changes need storage/recovery coverage, and end-to-end tests are not + a substitute for missing lower-level assertions. Read [testing.md](testing.md) + before adding tests. + +## Current Truth Matrix + +| Area | Current state | Source | +|---|---|---| +| Multi-table commit | Manifest CAS plus recovery sidecars; not a single Lance primitive | [runs.md](runs.md), [architecture.md](architecture.md) | +| Constructive mutations | In-memory `MutationStaging`, one end-of-query table commit per touched table, then one manifest publish | [runs.md](runs.md), [execution.md](execution.md) | +| Deletes | Inline-commit residual; delete-only queries allowed, mixed insert/update/delete rejected by D2 | [query-language.md](../user/query-language.md), [runs.md](runs.md) | +| Schema validation | Type checks, required fields, defaults, edge endpoint checks, and edge cardinality are enforced on write paths | [schema-language.md](../user/schema-language.md), [execution.md](execution.md) | +| Unique constraints | Intra-batch and write-path checks exist; full cross-version uniqueness is still a gap | [schema-language.md](../user/schema-language.md) | +| Storage trait | `TableStorage` exists as the sealed staged-write surface; full call-site migration and capability/stat surfaces are incomplete | [runs.md](runs.md), [architecture.md](architecture.md) | +| Index lifecycle | `ensure_indices` is explicit today; reconciler-based convergence is roadmap | [indexes.md](../user/indexes.md), [maintenance.md](../user/maintenance.md) | +| Traversal IDs | Runtime still builds `TypeIndex`; Lance stable row-id based graph IDs are roadmap | [architecture.md](architecture.md), [query-language.md](../user/query-language.md) | +| Auth | Bearer token hashing and server-side actor resolution are implemented at the HTTP boundary | [server.md](../user/server.md), [policy.md](../user/policy.md) | +| Tests | Tempdir-backed Lance tests are the current substrate; there is no `MemStorage` test backend | [testing.md](testing.md) | + +## Known Gaps + +Do not hide these behind invariant wording. Either move them forward or keep +them explicit. + +- **Rename-stable schema identity:** the invariant is that accepted IDs survive + renames. The current compiler still derives type IDs from `kind:name`; this + must be fixed before relying on renamed IDs across accepted schemas. +- **Storage abstraction:** `TableStorage` is present, sealed, and canonical for + staged writes, but older inherent `TableStore` call sites and inline residuals + remain. New write paths should use the staged shape unless a documented Lance + blocker applies. +- **Deletes and vector indexes:** `delete_where` and vector index creation still + advance Lance HEAD inline because the required public Lance APIs are missing. + Keep D2 and recovery coverage in place until those residuals are removed. +- **Planner capability/stat surfaces:** cost-aware planning, complete + capability advertisement, and explain-with-cost are roadmap. Do not describe + them as implemented. +- **Traversal execution:** current multi-hop execution still uses `TypeIndex`, + ad-hoc ID filtering, and eager materialization in places. Stable row IDs, SIP, + and factorization are target patterns, not current fact. +- **Retrieval ranks:** hybrid search works, but rank/score are not yet carried + everywhere as ordinary columns through the plan. +- **Policy pushdown and `Source`:** Cedar enforcement is at the HTTP boundary + today, and imports are still loader-shaped. Planner predicates and a unified + `Source` operator are roadmap. +- **Resource bounds:** some operations still lack enforced per-query memory or + time budgets. New long-running work should add explicit bounds rather than + widening the gap. + +## Deny-list + +If a proposal fits one of these, the burden is on the proposer to prove why the +case is exceptional. + +- Custom WAL, transaction manager, buffer pool, page format, or storage engine. +- Per-table graph publishing outside the manifest publisher. +- Re-reading current branch head during a query instead of using the captured + snapshot. +- New write paths that can advance Lance HEAD before manifest publish without a + recovery sidecar. +- Cross-query `BEGIN`/`COMMIT` transactions in the OSS engine. Use branches and + merges for multi-query workflows. +- Acknowledging writes before durable Lance and manifest persistence. +- Silent fallback to eventual consistency, partial results, or dropped rows. +- State that drifts from Lance or the manifest when it can be derived. +- Job queues for manifest-derivable state where a reconciler is the right shape. +- Synchronous inline vector/FTS index rebuilds on the query commit path, except + for documented Lance API residuals. +- Side-channels for query semantics: hidden globals, magic strings, transport + flags, or out-of-band metadata. +- Cost-blind plan choice when statistics are available or required. +- Hidden statistics for behavior that affects planning or operator choice. +- Hash-map iteration order in result ordering, plan choice, or migration output. +- String-flattened SQL/filter generation when a structured pushdown API is + available. +- Eager multi-hop cross-product materialization when factorization fits. +- Ad-hoc `IN`-list filtering where SIP or another structured selectivity path + fits. +- Discarding retrieval score/rank before fusion or projection decisions. +- Auto-creating placeholder nodes for orphan edges. +- Wire-protocol-specific code in compiler or engine crates. +- Cloud-only correctness fixes or forks of the OSS engine for correctness. +- Mutating immutable substrate state in place, including Lance fragments or + index segments. +- Shipping observable behavior as if it were not part of the contract. Output + ordering, error text, timestamp precision, defaults, and latency profiles all + become dependencies once exposed. + +## Review Checklist + +Use this as yes/no/NA for any non-trivial design or PR: + +- Does it respect Lance/DataFusion instead of rebuilding them? +- Does it preserve manifest-atomic graph visibility? +- Does every query keep one snapshot for its lifetime? +- Do mutations publish once at the commit boundary? +- Can every Lance-HEAD-before-manifest gap recover all-or-nothing? +- Are schema and edge integrity checks strict by default? +- Are query semantics represented in AST/IR/planner structures? +- Are transport, auth, and policy boundaries preserved? +- Are failures bounded, typed, and observable? +- Are result ordering and plan choices deterministic within a snapshot? +- Are stats/capabilities exposed when behavior depends on them? +- Are existing known gaps left no worse and documented if touched? +- Does the test live at the same boundary as the change? +- Does the change avoid every deny-list pattern, or justify the exception? + +## Maintenance Policy + +Update this file when an invariant changes, a known gap opens or closes, or a +new review anti-pattern deserves deny-list treatment. Prefer stable headings +over numbered sections so other docs can link here without churn. + +Removing or relaxing a hard invariant requires the same review process as code. +Adding a known gap is acceptable when it makes reality explicit; leaving stale +claims is not. diff --git a/docs/lance.md b/docs/dev/lance.md similarity index 55% rename from docs/lance.md rename to docs/dev/lance.md index f8d78fd..ef83f2c 100644 --- a/docs/lance.md +++ b/docs/dev/lance.md @@ -1,12 +1,12 @@ # Lance Docs Index (for OmniGraph agents) -OmniGraph sits on top of Lance. Many problems — index lifecycle, branching, transactions, fragments, compaction, vector/FTS internals — are answered upstream in Lance's docs, not in this repo. +OmniGraph sits on top of Lance. Many problems — index lifecycle, branching, transactions, fragments, compaction, vector/FTS internals — are answered upstream in Lance's docs, not in this codebase. This file is the curated entry point. **When you hit a Lance-shaped problem, find the matching topic below and fetch the listed URL(s) before guessing.** Don't grep our codebase for behavior that is documented authoritatively in Lance. -Base URL: `https://lance.org`. **Fetch the FULL page content, not summaries** — use `npx mdrip ` (or `npx mdrip --max-chars 200000 ` for very long pages). Tools that summarize pages (like Claude's `WebFetch`) routinely drop load-bearing details — defaults, `pub(crate)` blockers, sub-specs hidden behind navigation hubs. If `npx mdrip` is unavailable, fall back to `curl | pandoc -f html -t markdown` or paste the rendered page text manually; **never act on a summarized fetch alone**. Keep this index curated to relevant material — the upstream sitemap has hundreds of URLs (notably the Namespace REST API model surface, Spark/Trino/Databricks integrations) that we don't use. +Base URL: `https://lance.org`. **Fetch the FULL page content, not summaries** — use `curl -sL | pandoc -f html -t markdown` or paste the rendered page text manually. Tools that summarize pages (like Claude's `WebFetch`) routinely drop load-bearing details — defaults, `pub(crate)` blockers, sub-specs hidden behind navigation hubs. **Never act on a summarized fetch alone.** Keep this index curated to relevant material — the upstream sitemap has hundreds of URLs (notably the Namespace REST API model surface, Spark/Trino/Databricks integrations) that we don't use. -> **Substrate boundary check.** Before fetching, recall [docs/invariants.md §I](invariants.md): if Lance already does the thing, we don't reimplement it. The most common reason to read these docs is to confirm a substrate behavior, not to learn what to clone. +> **Substrate boundary check.** Before fetching, recall [docs/dev/invariants.md](invariants.md): if Lance already does the thing, we don't reimplement it. The most common reason to read these docs is to confirm a substrate behavior, not to learn what to clone. ## Quick-start (read these once per project) @@ -129,7 +129,7 @@ Touching `omnigraph optimize` / `cleanup`, the underlying `compact_files` / `cle ### DataFusion integration -The runtime substrate that may carry our query execution. See [docs/invariants.md §I.4](invariants.md): we don't rebuild relational machinery. +The runtime substrate that may carry our query execution. See [docs/dev/invariants.md](invariants.md): we don't rebuild relational machinery. | Topic | URL | |---|---| @@ -156,13 +156,26 @@ If a future need pulls one of these into scope, add a row to the matching domain When Lance ships a major release that changes any of the above (file format bump, new index type, transaction semantics change, new branching primitive), refresh this index in the same change as the omnigraph upgrade. Stale Lance pointers are worse than no pointers. -### Last alignment audit: 2026-05-02 (Lance 4.0.1 upstream; omnigraph pinned at 4.0.0) +### Last alignment audit: 2026-05-22 (Lance 6.0.1 upstream; omnigraph pinned at 6.0.1) -A full read-through of every index page above was performed in the MR-793 cycle. Findings (no code changes required for PR #70): +Migration from Lance 4.0.0 → 6.0.1 landed in this cycle (DataFusion 52 → 53, Arrow 57 → 58, lance-tokenizer 6.0.1 added, tantivy* removed). Direct 4 → 6 jump; v5.x was not used as an intermediate (rationale in `~/.claude/plans/shimmering-percolating-duckling.md`). Behavior-affecting findings: -- The MemWAL "three sub-pages" (Overview / Details / Implementation) turned out to be **anchor sections on the single existing page** at `https://lance.org/format/table/mem_wal/` — not separate URLs. Fetched in full via `npx mdrip`. Findings: MemWAL is opt-in (requires an unenforced primary key + explicit shard config; omnigraph doesn't use it), operates intra-table (LSM-tree for streaming writes into one Lance table), and does NOT overlap with MR-847's cross-table manifest-vs-Lance-HEAD recovery problem. MR-847's design is unaffected. -- The distributed-indexing guide names Python APIs (`commit_existing_index_segments`, `merge_existing_index_segments`); the Rust analogues exist via `CreateIndexBuilder::execute_uncommitted` for scalar indices but **`build_index_metadata_from_segments` is `pub(crate)`** and blocks vector-index two-phase commits from outside the lance crate. Filed [lance-format/lance#6666](https://github.com/lance-format/lance/issues/6666) as a companion to [#6658](https://github.com/lance-format/lance/issues/6658). -- "Stable Row ID for Index" is documented as **experimental** in lance-4.0.x. Our datasets enable stable row IDs at the dataset level (`WriteParams::enable_stable_row_ids = true`); confirming whether our created indices opt into stable-row-id mode is a follow-up worth doing before MR-848 (index reconciler) lands. -- Fragment Reuse Index (FRI) is documented as one of three compaction strategies. omnigraph currently uses option 2 (immediate index rewrite at compaction time, via `omnigraph optimize`'s post-compaction rebuild). Adopting FRI is the explicit option for compaction-friendly index updates; relevant to MR-848. +- **DatasetIndexExt moved** from `lance-index` to `lance::index` (Lance PR #6280, v5.0). Six import sites updated. `lance-index::IndexType` and `lance-index::is_system_index` stayed in `lance-index`. `omnigraph-cli` and `omnigraph-server` gained `lance = { workspace = true }` in their dev-dependencies. +- **`DescribeTableResponse` gained `is_only_declared: Option`** (lance-namespace 6.0+, v5.0 PR #6186). Set to `Some(false)` in both `BranchManifestNamespace::describe_table` and `StagedTableNamespace::describe_table` — every table we return is physically materialized via `Dataset::open`, never "declared-only." +- **`MergeInsertBuilder` execute_reader return shape preserved** `(Arc, MergeStats)`; the publisher CAS chain at `db/manifest/publisher.rs:370-391` works unchanged. Pinned by `tests/lance_surface_guards.rs::_compile_merge_insert_builder_method_chain`. +- **`LanceError::TooMuchWriteContention` variant retained** in v6.0.1 (no rename). The typed publisher translation at `db/manifest/publisher.rs:417-430` continues to apply. Pinned by `lance_surface_guards.rs::lance_error_too_much_write_contention_variant_exists`. +- **`ManifestLocation` field shape stable**: `.path: object_store::path::Path`, `.size: Option`, `.e_tag: Option`, `.naming_scheme: ManifestNamingScheme`. Pinned by `lance_surface_guards.rs::manifest_location_field_shape`. +- **`LanceFileVersion::default()` flipped V2_0 → V2_1** (v5.0). No effect — every `data_storage_version` callsite explicitly pins `Some(LanceFileVersion::V2_2)` (load-bearing for blob v2: `Blob v2 requires file version >= 2.2` enforced in `lance/src/dataset/write.rs:748`). +- **`Dataset::checkout_version(N).await?.restore().await?`**: `restore()` takes `&mut self` and returns `Result<()>` (mutates in place, does not consume + return a new dataset). The recovery rollback hammer at `db/manifest/recovery.rs:505-522` continues to work. Pinned by `lance_surface_guards.rs::_compile_checkout_version_then_restore_signature`. +- **`DatasetBuilder::from_namespace(...).with_branch(...).with_version(...).load()`** surface preserved (the namespace builder chain at `db/manifest/namespace.rs:162-174`). Pinned by `lance_surface_guards.rs::_compile_dataset_builder_from_namespace_signature`. +- **`compact_files(&mut ds, CompactionOptions::default(), None)`** signature stable. `CompactionOptions` still does not expose `data_storage_version`; `compact_files` builds its own `WriteParams { ..Default::default() }`. Note: `LanceFileVersion::default()` is now V2_1 in v6, so optimize-rewritten fragments come out at V2_1 by default (was V2_0 in v4). Existing explicit V2_2 pins on creates/appends still apply. +- **`Dataset::delete(predicate)` returns `DeleteResult { new_dataset: Arc, num_deleted_rows: u64 }`** — unchanged shape. Pinned by `lance_surface_guards.rs::_compile_delete_result_field_shape`. MR-A will repurpose this guard to the staged two-phase variant once `DeleteBuilder::execute_uncommitted` migration lands. +- **File reader read methods now async** (Lance PR #6710, v6.0). No effect — omnigraph reaches Lance exclusively through `Dataset::scan` and the staged-write API. +- **Tokenizer vendored as `lance-tokenizer`** (Lance PR #6512, v6.0). No effect — no direct tokenizer imports. +- **Lance #6658 closed** (2026-05-14) but `DeleteBuilder::execute_uncommitted` did **not** ship in v6.0.1 — binary search across the release stream shows it first appears in `v7.0.0-beta.10` (the closing commits landed on main but didn't backport to the 6.x line). Tracked as MR-A: migrate `delete_where` to staged, retire the parse-time D2 mutation rule, extend recovery sidecar coverage. **Gated on the Lance v7.x bump**, not this PR. v7.0.0-rc.1 dropped 2026-05-21. +- **Lance #6666 still open** (`build_index_metadata_from_segments` public): vector-index two-phase blocked; inline `create_vector_index` residual retained. +- **Lance #6877 still open** (`MergeInsertBuilder` dup-rowid): PR #109's `SourceDedupeBehavior::FirstSeen` + `check_batch_unique_by_keys` precondition stay load-bearing. + +Surface guards added: `crates/omnigraph/tests/lance_surface_guards.rs` (8 named guards; 3 runtime + 5 compile-only). Future Lance bumps re-run this file first as the smoke check. Two additional guards from the original plan deferred to follow-up (`manifest_cas_returns_row_level_contention_variant` needs full publisher-race harness; `table_version_metadata_byte_compatible_with_v4` needs `pub(crate)` reach extension). Bump this date stanza on the next alignment pass. diff --git a/docs/merge.md b/docs/dev/merge.md similarity index 100% rename from docs/merge.md rename to docs/dev/merge.md diff --git a/docs/dev/rfc-001-queries-envelope-mcp.md b/docs/dev/rfc-001-queries-envelope-mcp.md new file mode 100644 index 0000000..b5d62d4 --- /dev/null +++ b/docs/dev/rfc-001-queries-envelope-mcp.md @@ -0,0 +1,351 @@ +# RFC: Inline + Stored Queries, Request/Response Envelope, MCP + +**Status:** Proposed +**Date:** 2026-05-28 +**Tickets:** MR-656 (inline `-e` + URL rename), MR-668 (multi-graph, shipped), MR-976 (Phase 1 envelope parent: MR-977 / MR-978 / MR-979 / MR-980), MR-969 (stored queries + MCP) +**Target release:** v0.6.x patch series (MR-656 + Phase 1) → v0.7.0 (MR-969 PRs 1-3) + +## Summary + +OmniGraph today exposes `POST /read` and `POST /change` with a weakly-contracted body (counts only on writes) and no per-query authorization. This RFC consolidates the work landing across three Linear tickets into one coherent design: + +1. **MR-656**: rename `/read` → `/query` and `/change` → `/mutate`, add inline `-e` CLI flag, ship three-channel deprecation on the legacy URLs. **In flight, PR #110.** +2. **Envelope hardening** (this RFC adds it as a Phase 1 before MR-969): make today's mutation surface agent-grade with idempotency keys, preconditions, deadlines, and a structured response envelope carrying `audit_id`, `commit_id`, `snapshot_id`, and cost stats. +3. **MR-969**: add a stored-query registry, `POST /queries/{name}`, a new `InvokeQuery` Cedar action with per-query scope, inline pragmas in `.gq` (`@description`, `@returns`, `@mcp`), and MCP transport over the same routing primitive. + +The bet: inline and stored queries serve different stages of the same lifecycle, run through the same engine code, and are gated by different Cedar actions. HelixDB collapsed to stored-only. Postgres has neither stored-query Cedar nor MCP. The window for an OSS, declarative, agent-grade graph query surface is open. + +## Motivation + +Three problems today: + +- **Mutation responses are too thin.** `ChangeOutput { node_count, edge_count }` is the entire memory the API has of what just happened. No `commit_id`, no `audit_id`, no `snapshot_id`. Agents reporting results have nothing to cite. Humans can't reproduce a read. +- **No agent-safe surface.** Cedar gates `read` and `change` at the action level. A token either runs *any* query or *no* query of that kind. There is no way to express "this agent can invoke `find_user` and nothing else." +- **No discovery primitive.** Agents need a tool list. SDKs need a stable contract per operation. Both are absent. + +The MR-656 rename solves the cosmetic asymmetry (`/read` was a poor pair for the future `/queries/{name}`). The envelope work and MR-969 solve the substantive gaps. + +## Non-Goals + +- Compiled query bundles (HelixDB's `queries.json` shape). `.gq` files are already declarative; the file *is* the artifact. +- Hot reload of the registry. Restart-only matches the multi-graph operational model from MR-668. +- Per-query rate limits in v1. Existing `WorkloadController` covers the bulk of the risk. Punt to a future ticket. +- Cross-graph tool listing in MCP. Agents loop over per-graph endpoints when they need multi-graph access. Avoid namespacing in the contract. +- Web dashboard / control-plane management of the registry. Operators edit `.gq` + `policy.yaml` and restart. +- Schema introspection through MCP. Schema is an operator concern; agents see types through declared return shapes on the queries they're allowed to invoke. +- Per-environment override files. Environment-specific differences live in `policy.yaml`, which already has per-env variants. + +## Background + +OmniGraph runs on Lance 6.x with a property graph layered on top: typed nodes/edges in per-type Lance datasets, atomic multi-table commits via a `__manifest` table, branchable and time-travelable through Lance versioning. The HTTP server (`omnigraph-server`) is Axum + utoipa with bearer-token auth and Cedar policy enforcement at every `_as` writer. + +MR-668 shipped multi-graph mode in v0.6.0. One server process can host 1-10 graphs, with per-graph endpoints under `/graphs/{id}/...`. Cedar policy resolves against `Server::"root"` (for management actions) and `Graph::"prod"` (for per-graph actions). + +MR-656 is currently in PR #110 (CONFLICTING / DIRTY against main; rebase planned). It renames the URL surface, adds inline source support, and ships three-channel deprecation (OpenAPI `deprecated: true`, RFC 9745 `Deprecation: true` header, RFC 8288 successor `Link`). + +## Design + +### Two paths, one engine + +| Dimension | Inline (`/query`, `/mutate`) | Stored (`/queries/{name}`) | +|---|---|---| +| Source location | Request body | `queries/*.gq` on disk | +| Parse + typecheck | Per request | Once at server boot | +| Cedar action | `read` / `change` | `invoke_query` (per-name scope) | +| MCP-exposed | No (not enumerable) | Yes (when `@mcp(expose=true)`) | +| Output schema | Inferred | Declared via `@returns`, asserted at boot | +| Audit log shape | Records query hash | Records query name | +| Failure visibility | Runtime 400 | Boot-time refusal | + +Both paths converge in the engine: + +``` +POST /query ─parse→─┐ +POST /mutate ─parse→─┤ + ├─→ run_query / run_mutate(ast, params, branch) ─→ envelope +POST /queries/{name} ───────┤ +POST /mcp/invoke ───────────┘ (MCP adapter on top of the same call) +``` + +The MR-656 rebase widens `run_query` / `run_mutate` to accept a parsed AST or source string. Inline parses on each call. Stored looks up the pre-parsed AST in the registry. Same execution path beyond that point. + +### Cedar split (the LLM-safe wedge) + +Inline and stored coexist safely because they're gated by different actions: + +```yaml +# Production policy — agents locked to a curated stored-query set +- deny: + actors: { group: agents } + actions: [read, change] # blocks /query, /mutate, /read, /change + +- allow: + actors: { group: agents } + actions: [invoke_query] + resource: Graph::"prod" + query_scope: { names: [find_user, list_orders, search_docs] } +``` + +The agent's effective surface: three stored queries by name. Cannot compose inline. Cannot enumerate schema. Cannot read arbitrary entities. A developer in the same deployment with `dev-engineers` group membership might have `[read, change, invoke_query]` allowed — full access to both paths. + +Same server, same data, two completely different API surfaces depending on token. This is the posture MR-969 calls "LLM-safe API surface." + +### `.gq` pragmas + +Stored queries self-describe at the top of the source file: + +```gq +@description("Look up a user by ID. Returns name, email, last_login.") +@returns({ name: String, email: String, last_login: DateTime? }) +@mcp(expose=true) + +query find_user($id: String) { + match { $u: User { id: $id } } + return { $u.name, $u.email, $u.last_login } +} +``` + +Three pragmas in v1: + +- `@description("...")` — string surfaced in `omnigraph queries explain` and MCP tool descriptions. +- `@returns({...})` — optional output type assertion. Compiler verifies the inferred type matches; mismatch fails server startup. +- `@mcp(expose=true|false, tool_name="alt_name"?)` — controls MCP visibility. Default is `expose=false` (callable via HTTP, hidden from MCP). `tool_name` defaults to the query name. + +Pragmas live in source, not in a separate YAML registry. Drop a file in `queries/`, restart, the registry picks it up. The full agent contract is reviewable in one diff. + +### Request envelope ("before") + +Today's request carries auth + body. The envelope adds five fields, all optional: + +```http +POST /graphs/prod/queries/find_user +Authorization: Bearer +Idempotency-Key: 01HXYZ... # mutations only +If-Match: 01HABC... # optimistic concurrency +X-Deadline: 2026-05-28T19:30:00Z # or X-Timeout-Ms: 5000 +X-Trace-Id: 01HDEF... +Content-Type: application/json + +{ + "params": { "id": "u-42" }, + "branch": "main", + "expect": "read_only", # scope assertion + "dry_run": false, # mutations only + "fields": ["name", "email"] # result projection +} +``` + +Field semantics: + +| Field | Applies to | Purpose | +|---|---|---| +| `Idempotency-Key` | Mutations | Server caches `(token, key)` → response for 10 minutes. Replays return cached response with `Idempotency-Replay: true` header. Prevents double-write on retry. | +| `If-Match` | Mutations | Run only if branch HEAD matches the given commit ID. 412 Precondition Failed otherwise. Enables read-then-write without races. | +| `X-Deadline` / `X-Timeout-Ms` | All | Server respects; returns 504-typed error past the deadline. Bounds execution for context-budget-constrained callers. | +| `X-Trace-Id` | All | Caller-supplied; server echoes back. Lets agents correlate multi-call sequences. | +| `expect` | All | Caller asserts shape: `"read_only"`, `{"max_rows_scanned": 10000}`. Server validates against parsed AST or planner estimate; rejects before running. | +| `dry_run` | Mutations | Returns what *would* happen without committing. Implemented via scratch branch + diff + discard. | +| `fields` | Reads | Server returns only listed columns. Saves bandwidth + agent context window. | + +All five fields are optional; today's call shape continues working. + +### Response envelope ("after") + +The response envelope replaces today's bare-result shape with a structured wrapper. Every endpoint (inline, stored, MCP) returns the same envelope: + +```json +{ + "result": { "name": "Alice", "email": "alice@..." }, + "audit_id": "01HGHI...", + "snapshot_id": "01HJKL...", + "commit_id": null, + "stats": { + "rows_scanned": 1, + "ms_elapsed": 4, + "bytes_read": 128 + }, + "warnings": [] +} +``` + +Response headers: + +| Header | When | Purpose | +|---|---|---| +| `Idempotency-Replay: true\|false` | Mutations | Was this response served from the idempotency cache? | +| `X-Trace-Id` | All | Echo of the request's trace ID, or server-minted if absent. | +| `Deprecation: true` | `/read`, `/change` only | RFC 9745 signal from MR-656. | +| `Link: ; rel="successor-version"` | `/read`, `/change` only | RFC 8288 successor pointer from MR-656. | + +Body envelope fields: + +| Field | When | Purpose | +|---|---|---| +| `result` | All | The actual response payload. Shape determined by the query's return type. | +| `audit_id` | All | ULID for the audit log entry. Lets the caller cite exactly what ran. | +| `snapshot_id` | All | Manifest snapshot the query observed. Reproducibility — replay with `?snapshot=`. | +| `commit_id` | Mutations | ULID of the new commit. Null for reads. Lets the caller cite what changed. | +| `stats` | All | `{rows_scanned, ms_elapsed, bytes_read}`. Lets agents learn what's expensive. | +| `warnings` | All | Non-fatal observations: deprecated property access, full-scan despite available index, scan exceeded soft row limit. Empty array when none. | + +The envelope is the API's *memory of what happened*. Without `audit_id` + `commit_id` + `snapshot_id`, agent reports are hearsay and reads are not reproducible. With them, provenance is a first-class property of every response. + +### MCP integration with multi-graph + +MCP routes are per-graph, matching the rest of MR-668's hierarchy: + +``` +GET /graphs/{id}/mcp/tools # tool list for this graph, this token +POST /graphs/{id}/mcp/invoke # invoke a tool on this graph +``` + +Single-mode collapses to `/mcp/tools` and `/mcp/invoke` at the root (same shape, no `/graphs/{id}` prefix). Both modes route through identical handler code. + +Tool list response: + +```json +{ + "tools": [ + { + "name": "find_user", + "description": "Look up a user by ID.", + "inputSchema": { "id": { "type": "string", "required": true } }, + "outputSchema": { "name": "string", "email": "string", "last_login": "datetime?" }, + "read_only": true + } + ], + "graph_id": "prod", + "snapshot_id": "01HJKL..." +} +``` + +The tool list is the subset of registered queries where (a) `@mcp(expose=true)` in source and (b) Cedar permits `invoke_query` for this token on this name on this graph. Computed per request — cheap because it's just iterating the registry + one Cedar evaluation per name. + +**Token scoping.** Most tokens carry one graph claim. Cross-graph access requires multiple Cedar rules (one per graph) and is uncommon. Agents that genuinely operate across graphs loop over `/graphs/{id}/mcp/tools` themselves. The contract stays clean; graph renames don't break tool names. + +**Discovery.** Agents are told their MCP URL at provisioning: `https://omnigraph.example.com/graphs/prod/mcp`. Token authorizes; URL identifies. Same model as every OAuth-style API. + +**`/mcp/invoke` is a protocol adapter.** Unwrap MCP protocol envelope, call the same code path as `/queries/{name}`, wrap the response in MCP shape. No new execution semantics. + +### CLI surface + +The CLI mirrors the HTTP routes. Post-MR-656 and post-MR-969: + +```bash +# Inline (MR-656) +omnigraph query -e 'query test() { ... }' # /query +omnigraph mutate -e 'query bump() { update ... }' # /mutate + +# Stored (MR-969) +omnigraph queries list # GET /queries (future) +omnigraph queries explain find_user # show params + return shape + source +omnigraph queries invoke find_user --param id=u-42 # POST /queries/find_user + +# Pragma + registry validation +omnigraph lint queries/find_user.gq # parses + verifies pragmas +omnigraph queries lint # validates the whole registry +``` + +`omnigraph queries invoke` reads bearer + URL from `omnigraph.yaml` like the other remote commands. Local invocations work the same way the existing `omnigraph query`/`mutate` do. + +### Lifecycle + +The promotion path from inline to stored is the load-bearing DX story: + +``` +1. EXPLORE omnigraph query -e 'query find_user($id: String) { ... }' --params '{"id": "u-42"}' + └─ POST /query, iterate freely + +2. STABILIZE write queries/find_user.gq with @description, @returns, @mcp pragmas + └─ git diff shows the full agent contract in one file + +3. AUTHORIZE add Cedar rule allowing invoke_query for the appropriate actor group + └─ scope_names: [find_user] + +4. DEPLOY restart server + └─ /queries/find_user goes live + └─ /mcp/tools auto-lists it for any token with invoke_query[find_user] + +5. RETIRE deny: read change for the agent group + └─ inline access closed; stored remains + └─ MR-969's "LLM-safe API surface" reached +``` + +Same `.gq` source through all five steps. No rewrite. No language shift. The pragmas are the only added syntax between exploration and production. + +## Migration + +Existing callers see no breakage: + +- `POST /read` and `POST /change` keep working, now with `Deprecation: true` headers (MR-656). +- `ChangeRequest` field names `query_source` / `query_name` accepted as serde aliases (MR-656). +- `aliases:` block in `omnigraph.yaml` unchanged; both `read`/`change` and `query`/`mutate` accepted as `command:` values (MR-656). +- New envelope fields are additive; old clients ignoring them keep working. +- `Idempotency-Key`, `If-Match`, `X-Deadline` are opt-in headers; absence is the current behavior. + +Callers move at their own pace. The envelope upgrades + URL rename ship in v0.6.x (small PRs). Stored queries + MCP ship in v0.7.0. + +## Sequencing + +**Phase 1: envelope (v0.6.x, before MR-969).** Four small PRs, ~100-200 LOC each. + +1. Wrap responses in the structured envelope. Add `audit_id`, `snapshot_id`, `commit_id`, `stats`, `warnings`. Backward-compatible if we keep today's top-level fields and add new ones alongside; cleaner break if we move to nested `result.*`. Pick one and live with it. +2. Honor `Idempotency-Key` on `/mutate` (and the deprecated `/change`). Server-side cache keyed by `(token, key)`. +3. Honor `If-Match` on `/mutate`. Wire through to the publisher CAS layer. +4. Honor `X-Deadline` / `X-Timeout-Ms` on every endpoint. Return 504-typed error past deadline. + +**Phase 2: MR-969 PR 1 (registry).** The stored-query registry, `/queries/{name}` route, `InvokeQuery` Cedar action with per-name scope, `.gq` pragma parsing (`@description`, `@returns`, `@mcp`), read-vs-mutate classification at registry load. Inline keeps working unchanged. + +**Phase 3: MR-969 PR 2 (MCP).** `/graphs/{id}/mcp/tools` and `/graphs/{id}/mcp/invoke`. Tool schemas projected from declared return types and parameter declarations. Single-graph-scoped tokens. + +**Phase 4: MR-969 PR 3 (Cedar deny-on-ad-hoc sugar).** Small Cedar-language addition so operators can lock down `/read` / `/query` while keeping `/queries/*` open. Independent of PRs 1-2. + +**Phase 5: deferred.** +- Cross-graph MCP namespacing (wait for usage signal). +- Per-query rate limits (extend `WorkloadController`). +- Schema introspection as a separate Cedar action (3-line PR). +- CLI verb consolidation (`omnigraph call `). +- Cache warming (HelixDB-style; not load-bearing). + +## Rejected Alternatives + +**Per-environment override files (`_overrides.yaml`).** Initial design had a sparse YAML file for per-env tweaks: MCP exposure, row caps, kill-switch, param locks. Rejected because every override candidate either belongs in source (`@mcp` flag), Cedar policy (per-actor visibility, per-env), or `omnigraph.yaml` (operator config). Splitting query metadata across files makes it harder to review what an agent can see. Keep source authoritative; let Cedar express the per-env differences. + +**Compiled query bundle (HelixDB's `queries.json`).** HelixDB compiles their Rust-DSL queries to JSON. Rejected because `.gq` files are already declarative. The file is the artifact. Reviewers diff source, not bytecode. + +**Stored-queries-only (HelixDB's posture).** Rejected because the personal-graph / dev-iteration use case dies without inline. Inline `-e` is the REPL for human exploration; stored is the contract for production agents. Both first-class. + +**Cross-graph tool-name prefixing (`prod.find_user`).** Rejected because graph renames would break agent contracts. Per-graph URLs let graph identity live in the URL, not in tool names. + +**Body-field graph dispatch (`{tool, graph, params}`).** Rejected because it doubles the contract surface (every tool is identified by two fields). Per-graph URLs are simpler. + +**Pragmas in YAML instead of source.** Rejected because two-file definitions (source + metadata YAML) make diffs harder to review and create drift opportunities. Source is the source of truth. + +**Pragmas as in-source comments (`#[mcp]` HelixDB-style).** Considered; chose `@mcp(...)` because comment-flavored pragmas conflate documentation and machine-readable metadata. The `@` prefix makes the pragma's role explicit. + +## Open Questions + +1. **Envelope breakage vs additive.** Phase 1.1 wraps responses in a structured envelope. Do we keep today's top-level fields *and* add new ones (additive, ugly), or move result to `result.*` (clean break, requires SDK updates)? Lean toward additive — let the new envelope coexist with the old shape until v0.7.0, then collapse. + +2. **`@returns` strictness.** Should mismatched declared-vs-inferred return type be a boot-time error or a warning? Lean toward error — silent drift defeats the assertion's purpose. Operators who want flexibility omit `@returns`. + +3. **MCP protocol transport.** Streamable HTTP (the new MCP standard) vs stdio (Anthropic's original). Both have Rust crates. Lean toward streamable HTTP since we're already an HTTP server. + +4. **Stored mutation routing.** A `.gq` file that contains both reads and writes — does the registry reject it at load (parse-time D2 rule from MR-656), or accept and classify as "mixed"? Lean toward reject. Mixed queries are a footgun; force operators to split. + +5. **`expect` field strictness.** `expect: "read_only"` against a parsed mutating query is an obvious 400. But `expect: {max_rows_scanned: 10000}` requires planner estimates that don't exist today. Either ship `expect` with only the "read_only" assertion in v1 and grow it, or wait for the planner. Lean toward shipping the partial form. + +6. **CLI `queries invoke` shape.** Today's `omnigraph query` takes a file or alias. `omnigraph queries invoke find_user` takes a stored query name. Should `omnigraph query --name find_user` also work (auto-detect)? Cleaner to keep them separate verbs — the stored vs inline distinction is part of the contract. + +## References + +- MR-656: [Support inline query strings in CLI and HTTP server](https://linear.app/modernrelay/issue/MR-656) +- MR-668: [Multi-graph server mode](https://linear.app/modernrelay/issue/MR-668) (shipped, PR #119) +- MR-969: [Stored queries with MCP exposure and per-query Cedar authorization](https://linear.app/modernrelay/issue/MR-969) +- PR #110: [feat: inline query strings in CLI and HTTP server](https://github.com/ModernRelay/omnigraph/pull/110) +- HelixDB docs: [docs.helix-db.com/llms-full.txt](https://docs.helix-db.com/llms-full.txt) — `#[mcp]` macro, scoped API keys, stored query model +- RFC 9745 (`Deprecation` header) +- RFC 8288 (`Link` relations, `successor-version`) +- MCP spec: [modelcontextprotocol.io](https://modelcontextprotocol.io) +- [invariants.md](./invariants.md) — substrate boundaries this work respects +- [../user/server.md](../user/server.md) — current HTTP surface (post-MR-656 picks up the `/query`+`/mutate` rename and deprecation) diff --git a/docs/runs.md b/docs/dev/runs.md similarity index 98% rename from docs/runs.md rename to docs/dev/runs.md index 5c8dfd8..816f2ac 100644 --- a/docs/runs.md +++ b/docs/dev/runs.md @@ -22,7 +22,7 @@ A `.gq` query with multiple ops (e.g. `insert Person … insert Knows …`) must observe earlier ops' writes when validating later ops (referential integrity, edge cardinality). After MR-794 step 2+ this is implemented via an in-memory `MutationStaging` accumulator in -[`crates/omnigraph/src/exec/staging.rs`](../crates/omnigraph/src/exec/staging.rs), +[`crates/omnigraph/src/exec/staging.rs`](../../crates/omnigraph/src/exec/staging.rs), shared by both `mutate_as` and the bulk loader: - On the first touch of each table, the pre-write manifest version is @@ -48,9 +48,8 @@ shared by both `mutate_as` and the bulk loader: prevents inserts/updates from coexisting with deletes in one query, so the inline path is safe for delete-only mutations. -This upholds [docs/invariants.md §VI.23](invariants.md) (atomicity per -query) and §VI.25 (read-your-writes within a multi-statement mutation, -upheld). +This upholds the manifest-atomic mutation and read-your-writes invariants +tracked in [docs/dev/invariants.md](invariants.md). ### D₂ — parse-time mixed-mode rejection @@ -233,7 +232,7 @@ success and one failure. The losing writer's error is `ManifestConflictDetails::ExpectedVersionMismatch { table_key, expected, actual }`. The HTTP server maps this to **409 Conflict** with body `{"error": "...", "code": "conflict", "manifest_conflict": { "table_key": -"...", "expected": N, "actual": M }}` — see [docs/server.md](server.md). +"...", "expected": N, "actual": M }}` — see [docs/user/server.md](../user/server.md). ## Audit diff --git a/docs/dev/schema-lint-v1-plan.md b/docs/dev/schema-lint-v1-plan.md new file mode 100644 index 0000000..86eeb4b --- /dev/null +++ b/docs/dev/schema-lint-v1-plan.md @@ -0,0 +1,86 @@ +# Schema-lint chassis v1 — implementation plan + +Work-in-progress checklist for the next slice of the chassis. v0 (the code-tagged diagnostics layer, MR-694 first PR #87) shipped on `main`. v1 brings the chassis to **enforced behavior**: `--allow-data-loss` flag, the `Soft | Hard` mode dimension on drops, and the first real "destructive but supported" migration step. + +This document tracks scope so the PR can land in incremental commits without losing the thread. Delete after the work is merged. + +## Lance substrate alignment (revised 2026-05-13) + +After a substrate audit against the [Lance data-evolution guide](https://lance.org/guide/data_evolution/), the v1 plan was simplified. Lance's `drop_columns()` is **already metadata-only and reversible via time travel until cleanup**: + +> `drop_columns` is metadata-only and remains reversible as long as old versions are retained. After `compact_files()` rewrites data files and `cleanup_old_versions()` removes old manifests/files, removed data may become permanently unrecoverable. + +This means: +- **Soft mode = `Dataset::drop_columns([name])`**. No separate `tombstoned: bool` catalog field. Lance's version graph IS the tombstone. +- **Hard mode = `drop_columns()` + `compact_files()` + `cleanup_old_versions()`** (the existing `omnigraph cleanup` pipeline). +- **No `omnigraph schema unhide` command needed**. Undo is `omnigraph snapshot --at ` or `omnigraph branch create --from ` — the existing time-travel surface. + +Two commits dropped from the v1 plan as a result: the old commit 3 (tombstone fields on catalog IR) and commit 8 (unhide command). Net: ~250 LoC less surface, more substrate-aligned, fewer new concepts. + +The broader substrate migration (using Lance native APIs across all migration steps, not just drop) is tracked in **MR-948** — out of scope for this branch but linked from each commit below. + +## Done in this branch so far + +- [x] **Commit 1** — `SchemaMigrationStep::diagnostic()` helper + CLI plan output displays tier alongside the code: `unsupported change on node:Person.age [OG-DS-104, destructive]: ...`. No behavior change. All 11 existing `schema_apply` tests still pass. +- [x] **Commit 2** — `DropMode { Soft, Hard }` enum + dormant `DropType` and `DropProperty` variants on `SchemaMigrationStep`. Apply path has an exhaustive-match arm returning `manifest_internal` if either variant arrives via deserialization. Serde round-trip pinned for stable wire shape. + +## Next commits (in order) + +### Commit 3 — Planner emits `DropProperty { Soft }` + apply calls `Dataset::drop_columns` + +Replaces the earlier "tombstone fields on catalog IR" commit. No catalog IR changes needed — Lance handles the tombstone via its version graph. + +- [ ] In `plan_properties`'s leftover-property branch: emit `DropProperty { Soft }` instead of `UnsupportedChange` for OG-DS-104. +- [ ] Same for node-type removal (`plan_nodes` leftover → `DropType { Soft }`, OG-DS-102) and edge-type removal (`plan_edges` leftover → `DropType { Soft }`, OG-DS-103). +- [ ] `apply_schema_with_lock` handles `DropProperty { Soft }`: calls `Dataset::drop_columns(&[property_name])` and commits via the staged-write path. **Substrate primitive: Lance metadata-only commit.** +- [ ] `apply_schema_with_lock` handles `DropType { Soft }`: marks the table tombstoned in `__manifest` (data files retained). Reversible via branch / snapshot restore. +- [ ] Recovery sidecar: standard `catalog_only` discipline — the Lance commit IS the recoverable unit. +- [ ] CLI plan output renders the new variants with mode visible. +- [ ] Integration test (extends `tests/schema_apply.rs`): remove a property, assert apply succeeds, row count preserved, current-version schema query no longer surfaces the property, prior-version time-travel query still sees it. + +### Commit 4 — Convert PR #62 destructive-rejection tests + +- [ ] `tests/schema_apply.rs`'s 6 PR #62 tests currently assert "removing X fails with `OG-DS-XXX`". Convert each to: + - **Without `--allow-data-loss`**: soft drop succeeds (Lance metadata-only, rows preserved in current version, recoverable via time travel). + - **With `--allow-data-loss`**: hard drop succeeds (column data deleted after compact + cleanup) — covered by commit 5. +- [ ] Add a new test that asserts **time-travel reversibility**: drop a column, query at prior version, verify the column is still present in that snapshot. + +### Commit 5 — `--allow-data-loss` CLI flag + `Hard` mode + +- [ ] Add `--allow-data-loss` boolean flag to `omnigraph schema apply` and `omnigraph schema plan` (plan shows what would happen if applied with the flag). +- [ ] Thread through to `apply_schema_with_lock(.., allow_data_loss: bool)`. +- [ ] Planner: when `--allow-data-loss` is set, emit `Hard` mode instead of `Soft` for drop paths. +- [ ] Apply path for `Hard` mode `DropProperty`: `drop_columns()` + `compact_files()` + `cleanup_old_versions()`. **Substrate primitives:** Lance's existing cleanup pipeline; same APIs `omnigraph cleanup` already uses. +- [ ] Apply path for `Hard` mode `DropType`: remove the manifest entry + drop the Lance dataset. +- [ ] Recovery sidecar discipline: `full_rewrite` (the cleanup phase is the rewrite). +- [ ] Integration tests: hard drop deletes data; without flag, hard drop is impossible (planner only emits soft). + +## Open questions + +- **`Hard` mode cleanup: inline vs. deferred.** Should `--allow-data-loss` on apply run `compact_files` + `cleanup_old_versions` **inline** (operator gets data deletion immediately) or **defer** to the next `omnigraph cleanup` run (operator's existing pipeline)? Recommend **inline** for ergonomic guarantee — the flag is explicit consent and operators who said "yes data loss" expect data to actually be gone after the apply returns. +- **Query-level enforcement on dropped types**: should a `match { $p: DroppedType }` query fail at parse time, lint time, or runtime? Recommend: lint warning at parse time (new code in the QL family); runtime returns empty result (Lance no longer has the column). Different from before: no "tombstoned" state to surface — the type is genuinely gone in the current version's catalog. +- **`Hard` mode for `DropType` data forensics**: dataset deletion via Lance is irreversible after `cleanup_old_versions`. Operators who want forensics should take a snapshot or tag first. Document this in `docs/schema-language.md` once this lands. No special escape hatch in the apply path. + +## Not in v1 scope (deferred) + +- Severity config in `omnigraph.yaml` (per-rule `error` / `warn` / `force`). +- `@allow(OG-XXX-NNN, "rationale")` suppression directives. +- Pre-migration checks (MR-941). +- CD / VE / LK / NM family rules (MR-942..MR-945). +- CI integration (MR-946). +- The remaining 12 of 17 `UnsupportedChange` paths still untagged with codes (interface removal, edge endpoint change, edge cardinality change, etc.). Each goes through its own MR-XXX issue. +- **Substrate alignment of non-drop migration steps** (Lance native `add_columns`, `alter_columns` for renames, type casts, defaults). Tracked in **MR-948**. Today's `AddProperty` and `RenameProperty` still go through `stage_overwrite`; v1 doesn't change that. + +## What changed from the original plan + +For posterity / reviewers comparing against the initial plan committed in commit 1: + +| Was | Now | Why | +|---|---|---| +| Commit 3: `tombstoned: bool` on `NodeIR`/`EdgeIR`/`PropertyIR` | **Removed.** No catalog IR change needed. | Lance's `drop_columns()` is metadata-only; Lance's version graph is the tombstone. | +| Commit 5: apply path writes `tombstoned: true` into catalog | Apply path calls `Dataset::drop_columns([name])` | Substrate-aligned. Lance handles the metadata commit. | +| Commit 7 Hard mode: `stage_overwrite` removing the column | `drop_columns` + `compact_files` + `cleanup_old_versions` | Substrate-aligned. Reuses `omnigraph cleanup` pipeline. | +| Commit 8: `omnigraph schema unhide ` | **Removed.** | Time travel is the undo. `omnigraph snapshot --at ` reaches the pre-drop version. | +| 8 commits total | 5 commits total | Smaller surface, fewer new concepts. | + +The chassis types (`DropMode` enum, `DropType` / `DropProperty` variants — commit 2) are kept exactly as designed; only the implementation strategy changed. diff --git a/docs/testing.md b/docs/dev/testing.md similarity index 84% rename from docs/testing.md rename to docs/dev/testing.md index 43643e9..e6989ba 100644 --- a/docs/testing.md +++ b/docs/dev/testing.md @@ -1,6 +1,6 @@ # Testing -This file is the always-on map of the test surface. **Consult it before every task** so you know what tests already cover the area you're about to change, what helpers to reuse, and where a new test belongs. The architectural invariant *"tests at every boundary, not just end-to-end"* lives in [docs/invariants.md §VIII.47](invariants.md). +This file is the always-on map of the test surface. **Consult it before every task** so you know what tests already cover the area you're about to change, what helpers to reuse, and where a new test belongs. The architectural invariant for boundary-matched tests lives in [docs/dev/invariants.md](invariants.md). ## Where tests live, per crate @@ -22,7 +22,7 @@ The engine's `tests/` is the principal coverage surface; most graph-shaped behav | `merge_truth_table.rs` | Merge-pair truth table (MR-786): all 9×9 `(left_op, right_op)` cells from `{noop, addNode, removeNode, addEdge, removeEdge, setProperty, dropProperty, addLabel, removeLabel}`. Adding a new op to `OpVariant` forces a compile error in `build_case` until the new row + column are dispositioned. 36 executable cells run through real `branch_merge` with a structured oracle (`MergeOutcome` / `MergeConflictKind` + graph-state assert); 45 cells involving `dropProperty`/`addLabel`/`removeLabel` are recorded as `Unsupported` until the mutation grammar grows. | | `runs.rs` | Direct-publish writes: cancellation, concurrent-writer CAS, multi-statement atomicity, MR-794 staged-write rewire (D₂ rejection, insert+update coalesce, multi-append coalesce, partial-failure recovery, load RI/cardinality recovery) | | `staged_writes.rs` | TableStore staged-write primitives (`stage_append`, `stage_merge_insert`, `commit_staged`, `scan_with_staged`, `count_rows_with_staged`) — primitive-level only; engine code uses the in-memory `MutationStaging` accumulator instead | -| `lifecycle.rs` | Repo lifecycle, schema state | +| `lifecycle.rs` | Graph lifecycle, schema state | | `point_in_time.rs` | Snapshots, time travel (`snapshot_at_version`, `entity_at`) | | `changes.rs` | `diff_between` / `diff_commits` | | `consistency.rs` | Cross-table snapshot isolation, atomic publish | @@ -31,7 +31,7 @@ The engine's `tests/` is the principal coverage surface; most graph-shaped behav | `traversal.rs` | `Expand`, variable-length hops, anti-join | | `aggregation.rs` | `count`, `sum`, `avg`, `min`, `max` | | `export.rs` | NDJSON streaming export filters | -| `s3_storage.rs` | S3-backed repo (skipped unless `OMNIGRAPH_S3_TEST_BUCKET` is set) | +| `s3_storage.rs` | S3-backed graph (skipped unless `OMNIGRAPH_S3_TEST_BUCKET` is set) | | `lance_version_columns.rs` | Per-row `_row_last_updated_at_version` behavior | | `validators.rs` | Schema constraint enforcement (enum, range, unique, cardinality) across JSONL, insert, update paths | | `maintenance.rs` | `optimize` (compaction) + `cleanup` (version GC): empty/idempotent/no-op edges, policy validation, head preservation | @@ -45,11 +45,11 @@ The engine's `tests/` is the principal coverage surface; most graph-shaped behav ## Test helpers -- **Engine** — `crates/omnigraph/tests/helpers/mod.rs`: `init_and_load()` (bootstrap a temp repo + load standard fixture), `snapshot_main()`, `snapshot_branch()`, query/mutation runners, row collection and counting. Use these instead of hand-rolling. +- **Engine** — `crates/omnigraph/tests/helpers/mod.rs`: `init_and_load()` (bootstrap a temp graph + load standard fixture), `snapshot_main()`, `snapshot_branch()`, query/mutation runners, row collection and counting. Use these instead of hand-rolling. - **CLI** — `crates/omnigraph-cli/tests/support/mod.rs`: `Command`-style wrapper for invoking `omnigraph`, server-process spawning, fixture resolution, output assertion helpers. - **Server** — no shared helpers; server tests call the `Omnigraph` engine API directly and exercise endpoints over the wire. -> Note: there is **no `MemStorage` or in-memory backend** today. Tests use `tempfile::tempdir()` for local FS. If you find yourself needing one for layer isolation, that's an architectural ask — see [docs/invariants.md §VIII.48](invariants.md) (reference impl + test impl per trait). +> Note: there is **no `MemStorage` or in-memory backend** today. Tests use `tempfile::tempdir()` for local FS. If you find yourself needing one for layer isolation, that's an architectural ask — keep it explicit in [docs/dev/invariants.md](invariants.md) under known gaps. ## Failpoints (fault injection) @@ -63,23 +63,23 @@ The engine's `tests/` is the principal coverage surface; most graph-shaped behav CI runs three S3-backed tests against a containerized RustFS server (`.github/workflows/ci.yml` → `rustfs_integration` job): - `cargo test -p omnigraph-engine --test s3_storage` -- `cargo test -p omnigraph-server --test server server_opens_s3_repo_directly_and_serves_snapshot_and_read` +- `cargo test -p omnigraph-server --test server server_opens_s3_graph_directly_and_serves_snapshot_and_read` - `cargo test -p omnigraph-cli --test system_local local_cli_s3_end_to_end_init_load_read_flow` Locally, set `OMNIGRAPH_S3_TEST_BUCKET` (and the usual `AWS_*` vars including `AWS_ENDPOINT_URL_S3` for non-AWS) before running. Without those, S3 tests skip gracefully. ## OpenAPI drift -`crates/omnigraph-server/tests/openapi.rs` regenerates `openapi.json` and diffs against the checked-in copy. CI auto-commits the regeneration on same-repo PRs and otherwise runs in strict-check mode (env: `OMNIGRAPH_UPDATE_OPENAPI`). +`crates/omnigraph-server/tests/openapi.rs` regenerates `openapi.json` and diffs against the checked-in copy. CI auto-commits the regeneration on same-repository PRs and otherwise runs in strict-check mode (env: `OMNIGRAPH_UPDATE_OPENAPI`). ## Examples & benches - `crates/omnigraph/examples/bench_expand.rs` — runnable example (not part of CI). -- No `benches/` directories. The architectural rule [docs/invariants.md §VIII.50](invariants.md) requires benchmark motivation before optimization, so add `benches/` per crate when you ship a perf-driven change. +- No `benches/` directories. Add `benches/` per crate when you ship a perf-driven change, and include the motivating workload with the optimization. ## Coverage tooling — what's missing -There is **no** coverage tooling in the repo today: no `tarpaulin.toml`, no `codecov.yml`, no coverage CI step. If you want to know whether your change is covered, the answer comes from reading and running the relevant integration tests, not from a tool. +There is **no** coverage tooling in the repository today: no `tarpaulin.toml`, no `codecov.yml`, no coverage CI step. If you want to know whether your change is covered, the answer comes from reading and running the relevant integration tests, not from a tool. If introducing coverage tooling is in scope for your task, the natural first step is `cargo-llvm-cov` wired into a separate CI job, and a per-crate threshold rather than a global one. @@ -97,7 +97,7 @@ How to check: - *Existing test covers the area but not your case* → **add an assertion or a fixture row to the existing test**, don't write a new function with `init_and_load()` again. - *No existing coverage in any test file* → only then write a new test; put it in the file that owns the area, or open a new file only if the area itself is new. -Three duplicated `init_and_load() → run_query → assert_eq` blocks where one parameterized test would do is the most common form of test rot in this repo. Don't add to it. +Three duplicated `init_and_load() → run_query → assert_eq` blocks where one parameterized test would do is the most common form of test rot in this repository. Don't add to it. ## Before-every-task checklist @@ -106,10 +106,10 @@ When you pick up any change, walk through this: 1. **Find existing coverage** (per the principle above). Don't just look at the first test file by name — grep for the symbol you're touching across every crate's `tests/`. 2. **Run those tests locally before editing.** `cargo test --workspace --locked` for the broad pass; `-p --test ` for a focused loop. Confirm a clean baseline. 3. **Decide extend-vs-new** explicitly. If you can extend an existing test (assertion, fixture row, parameterization), do that. Only add a new test fn or new file if no existing one owns the area. -4. **Reuse the helpers.** `init_and_load()`, fixture files, the CLI `support` harness — re-use them. Don't bootstrap a fresh repo by hand if a helper exists. -5. **Mind the boundary.** Per [docs/invariants.md §VIII.47](invariants.md), test at the layer the change lives at — planner-level changes deserve planner-level tests, not just end-to-end. +4. **Reuse the helpers.** `init_and_load()`, fixture files, the CLI `support` harness — re-use them. Don't bootstrap a fresh graph by hand if a helper exists. +5. **Mind the boundary.** Per [docs/dev/invariants.md](invariants.md), test at the layer the change lives at — planner-level changes deserve planner-level tests, not just end-to-end. 6. **For substrate-touching changes** (Lance behavior), reach for `failpoints` or fixture-driven scenarios, not stubbed-out mocks. 7. **For server / API changes**, confirm the OpenAPI regeneration happens in `openapi.rs` and that the diff lands in `openapi.json`. 8. **Verify your change makes an existing test fail before it makes the new one pass.** If you can break the code without breaking a test, your coverage gap is the problem to fix first. -When in doubt, re-read [docs/invariants.md §VIII](invariants.md) — quality gates apply to every change. +When in doubt, re-read [docs/dev/invariants.md](invariants.md) — quality gates apply to every change. diff --git a/docs/invariants.md b/docs/invariants.md deleted file mode 100644 index 420d605..0000000 --- a/docs/invariants.md +++ /dev/null @@ -1,305 +0,0 @@ -# Architectural Invariants & Patterns - -**Type:** Reference / standing document -**Status:** Living — updated as decisions accrue -**Audience:** anyone proposing, reviewing, or implementing a change to any part of OmniGraph - -This document captures two things: - -- **Invariants** (Parts I–VI, VIII): load-bearing principles that hold across the architecture. Breaking one is rare and requires explicit justification. -- **Current architectural patterns** (Part VII): how we realize the invariants today. These are committed conventions, not eternal facts; they may evolve as the engine matures, but until they do, they constrain new work. - -These are not query-engine-specific. They apply to every layer. - -## Status legend - -- *Status: decided.* No annotation needed; this is the default. -- *Status: open — see MR-X.* The principle is captured, but the concrete default or mechanism is still under discussion. Future work should follow the captured intent or update this document with the resolution. -- *Status: aspirational.* The invariant describes the target state; current code may not yet uphold it. PRs that move toward upholding it are welcome; PRs that drift away need explicit justification. - -Capturing aspirational invariants on purpose: we'd rather record what we want to be true and have current code be measured against it than not have the rule at all. - -## How to use - -- **Writing an RFC or design proposal:** walk through the relevant sections and state how the proposal upholds each invariant — or why a documented exception is justified. -- **Reviewing a PR or design:** scan for invariants the change might violate. The deny-list (§IX) is the fastest first pass. -- **Debating a tradeoff:** invoke the relevant invariant and check whether the tradeoff respects it. -- **Updating this document:** add to the deny-list freely. Removing or relaxing an invariant requires the same review process as any other architectural decision. - ---- - -## I. Substrate respect — delegate, don't rebuild - -The first question for any new component: does the substrate already do this? - -Current substrate is **Lance** for storage, indexes, and MVCC; **DataFusion** is the working assumption for relational machinery. These are committed choices (MR-737 §2.2, §5.11) but not eternal facts. The invariants below are about respecting *whatever* substrate we adopt. - -1. **Don't rebuild what the substrate owns.** Storage format, durability (WAL, transaction journal), buffer pool, MVCC, index lifecycle — all delegated. Building parallel implementations turns the project into a different one and locks us out of substrate improvements. - *Check:* Does this proposal introduce a parallel storage format, custom on-disk pages, custom serialization, custom WAL, custom buffer pool? - -2. **Don't rebuild relational machinery** provided by the runtime substrate. Joins, aggregations, parallelism, spill — extension via the substrate's trait surfaces; never reimplementation. - *Check:* Are we extending the substrate via traits, or reimplementing parts of it? - -3. **Don't maintain state parallel to the substrate.** Observe substrate state and derive what we need. State that drifts from the substrate is a bug. - *Check:* Does this proposal track index coverage, manifest versions, or fragment locations independently of the substrate? - -## II. Layering — the seams hold - -4. **The IR is the contract between frontend and backend.** Frontends emit IR; planner / executor consume it. No frontend logic leaks downward; no executor concerns leak upward. - *Check:* Does the proposal add to the IR, or to a layer? If to a layer, does it cross another layer's concern? - -5. **Capabilities and statistics flow upward; data flows downward.** Lower layers expose what they can do (capabilities) and what they know (statistics). Upper layers consume both. Methods alone are insufficient — methods without capability advertisement force one-size-fits-all plans. - *Check:* When adding a method to a layer trait, did we also expose the capability so the planner can reason about it? - -6. **One trait boundary per layer.** Crossing a layer means going through its trait. Direct calls to lower-layer concrete types from upper layers are forbidden. - *Check:* Does this code call `lance::Dataset` directly outside engine-storage? Call planner internals from the executor? - -7. **No god modules.** Single-module concerns: storage, IR, planner, executor, frontend, reconciler, schema, policy. Each crate has a reference test suite that runs without the others. - *Check:* Does this PR add a concern to a crate that already owns a different one? - -8. **Wire protocols are interchangeable; the IR is the contract.** The kernel produces `Stream` end-to-end; transports (HTTP/JSON, Arrow Flight, FlightSQL, future protocols) deliver them at the server boundary. No wire-protocol-specific code in kernel crates. - *Status: aspirational — Flight not yet implemented; tracked in MR-765.* - *Check:* Does this code import `arrow_flight` (or any transport crate) outside the server layer? - -## III. Distributability — kernel stays remote-friendly - -These are technical constraints, independent of whether we ship a distributed product. They preserve the architectural seam. - -9. **The kernel admits parallel and remote implementations.** Trait surfaces are thread-safe; no in-process-only assumptions; remote dataset descriptors (URI, snapshot ref, fragment ID) are accepted without requiring an open in-process handle. - -10. **IR is location-neutral.** No IR operator embeds an assumption about where data lives. - -11. **Cost models accept new dimensions** (network, latency-tier) as additive extensions. No place hard-codes "all cost is local I/O." - -12. **Background work admits alternate implementations.** In-process default; separable worker fleet for distributed deployment uses the same trait. - *Status: aspirational — distributed deployment is out of scope today (MR-737 §2.2); these constraints preserve the seam.* - -## IV. Evolution — additive over rewrite - -13. **Additive over rewrite.** New IR variants and planner rules slot in. No "tear out and replace" PRs. - -14. **Capabilities are additive enums.** New variants are additive. Existing implementations keep working. - -15. **Feature-flag behavior changes.** Every change that alters runtime behavior ships behind a flag. Old code path stays until the new one is proven. - -16. **No data drops without a migration.** When data needs to move (e.g., adopting stable row IDs), use in-place or dual-write windows. Never "drop and recreate." - -17. **No breaking schema changes without a migration plan.** Schema-IR changes go through the migration planner with safety tier classification. See the MR-694 family. - -## V. Honesty — what the system tells operators - -18. **Estimate-vs-actual logging on every estimator.** Cost models drift; calibration is a continuous process, not a one-off. - -19. **Operationally important state is observable.** Index coverage, reconciler lag, cost-model accuracy — surfaced through the storage trait's `capabilities()` and a unified observability API. - -20. **Honest failure modes.** Cost-model misses degrade gracefully (spill, partial-result, bounded abort). No silent OOM. - -21. **Per-query resource consumption is bounded and exposed.** Memory cap, wall-clock timeout, max-rows-scanned, max-fragments-scanned. Operators respect them; bounds exposed via explain. - -22. **Plans are explainable.** Every executed query can be inspected as IR + physical plan + cost annotations. No "you'd have to read the source to know what this does." See MR-684. - -## VI. Database guarantees — what OmniGraph promises as a system of record - -These are user-visible commitments. They state what the engine guarantees and what it does not. For an "agent-native system of record," credibility lives here. - -Specific defaults (timeout values, memory caps, TTL windows) are *configuration*, not invariants — see [docs/constants.md](constants.md) and per-deployment configuration. The invariant is that bounds and contracts exist, not their numerical values. - -23. **Atomicity is per-query.** Every `.gq` query is atomic — multi-statement mutations are all-or-nothing via the substrate's atomic-commit primitive. No cross-query `BEGIN`/`COMMIT`; branches and merges fill that role for agent workflows. - *Status: upheld at the writer-trait surface, across process boundaries, AND in-process for the common case under concurrent writers (PR 2 / MR-686) — the sealed `TableStorage` trait routes inserts / updates / scalar-index builds / merge_insert / overwrite through `stage_*` + `commit_staged` (Phase A is drift-free); the open-time recovery sweep in `db/manifest/recovery.rs` (sidecars at `__recovery/{ulid}.json` written by `MutationStaging::finalize`, `schema_apply`, `branch_merge`, `ensure_indices`) closes the per-table commit_staged → manifest publish residual on the next `Omnigraph::open`; `Omnigraph::refresh` runs roll-forward-only recovery in-process so long-running servers close the common case without restart; and the per-(table, branch) writer-queue (`db/write_queue.rs`) + revalidation under the queue (`MutationStaging::commit_all`) prevents concurrent writers on the same key from corrupting each other once the HTTP server's global `RwLock` is removed (PR 2 Step F). The "Lance HEAD ahead of `__manifest`" drift class is unreachable for op-execution failures, recoverable across process boundaries for all writer kinds, and recoverable in-process for roll-forward-eligible sidecars. Sidecars that would require `Dataset::restore` are deferred to the next ReadWrite open (restore unsafe under concurrency); continuous in-process rollback recovery is the goal of a future background reconciler (MR-870). Two writer paths still inline-commit pending upstream Lance work: `delete_where` (lance-format/lance#6658) and `create_vector_index` (lance-format/lance#6666).* - -24. **Schema integrity is strict at commit.** Type validation, required-field presence (auto-filled from `@default` if declared), uniqueness across batches and versions, and referential integrity — all enforced before commit succeeds. Per-write softening flags are opt-in, never default. - *Status: aspirational — referential integrity at scale requires SIP-backed cross-table validation; not yet implemented. Cross-batch / cross-version uniqueness tracked in MR-714.* - -25. **Isolation: per-query snapshot; read-your-writes within and across queries in a session.** Each query reads from one consistent manifest version. Within a multi-statement mutation, the read subplan inside each write operator sees the writes from earlier statements. Across queries in a session, reads always resolve the latest manifest version — no reader pinning to older snapshots. - *Status: upheld for inserts/updates — `MutationStaging`'s in-memory accumulator + `TableStore::scan_with_pending` (DataFusion `MemTable` union with the committed Lance scan, with merge-shadow semantics for chained updates) implements read-your-writes within a multi-statement mutation. Delete-touching mutations are limited to delete-only by parse-time D₂; closing the within-query RYW gap for deletes requires Lance's two-phase delete API (Lance-upstream lance-format/lance#6658). The "Lance HEAD ahead of `__manifest`" drift class is unreachable for op-execution failures (the partial-failure test pins this), and the narrower finalize→publisher residual is closed across one open cycle by the open-time recovery sweep — see [docs/runs.md](runs.md) "Open-time recovery sweep".* - -26. **Durability before acknowledgement.** Commit returns only after the substrate has confirmed durable persistence. No "fast" or "fire-and-forget" durability levels. - -27. **Causal consistency across sessions.** If session A commits and session B subsequently reads, session B sees A's write. Single-coordinator: trivially via single-source manifest. Multi-coordinator: enforced via leader-for-writes plus session-token replica reads. Never weakened. - *Status: aspirational on the multi-coordinator side.* - -28. **Determinism within a snapshot.** Same query + same snapshot + same parameters → order-stable results (deterministic tie-breaks). Plan choice is deterministic given identical statistics. Cross-version determinism is best-effort, not guaranteed (statistics change, plans change). - *Status: aspirational — current code may rely on HashMap iteration in some paths.* - -29. **Writes are idempotent under retry.** Insert / Update / Merge take an explicit `on_conflict` policy. Clients may provide an idempotency key on writes; the server deduplicates retries within a configurable TTL window. Schema migrations are idempotent under replay. - *Status: open — `on_conflict` policy lands with mutation IR (MR-737 Phase 8); idempotency-key TTL default is undecided.* - -30. **No silent data loss or corruption.** Substrate-level checksums are trusted for storage integrity. Semantic-invariant checks at every commit catch higher-level cases (orphan edges, type drift, broken uniqueness). Every operation succeeds, fails loudly with cause, or degrades observably with metrics. - -31. **Every operation has a documented bound.** "May run forever" is forbidden as a default. Defaults are configurable; the invariant is that bounds exist, are documented, and are enforced. - -32. **Failure scope is bounded.** A failing query, fragment-level corruption, or background-task crash does not cascade. Per-table fragment isolation at the storage tier; per-query memory and timeout in the executor. - *Status: aspirational on the per-query side — per-query memory cap not yet enforced; planned with MR-737 Phase 7.* - -33. **Crash recovery via the same code paths as steady-state.** No special "recovery mode." On restart, the engine reads the manifest, finds the latest committed state, and resumes. Substrate atomicity ensures no partial writes survive. - -34. **Strong consistency by default; relaxation is per-query, never per-default.** Strong (read-your-writes, monotonic, snapshot) is the default for every query. Eventual consistency is opt-in per read query for analytical workloads where staleness is acceptable. Never available on writes; always logged for audit. - *Status: aspirational — eventual-consistency opt-in flag tracked in MR-425.* - -35. **Branches are the cross-query coordination primitive.** Branches are cheap to create, fully isolated, per-branch SI, with durable queryable metadata (creator, intent, parent, fork point). Agents use branches for any multi-step coordination that needs atomicity beyond a single query. Lifecycle policies (TTL, auto-cleanup) are deployment configuration; the invariant is that branches *exist* as first-class durable objects with full SI parity to main. - *Status: upheld. Lance shallow-clone gives cheap creation; per-branch SI is the same code path as main; metadata in `_refs/branches/{name}.json` already supports a queryable `metadata` map.* - -36. **Per-query isolation is adjustable per-query, never per-default.** Default is Snapshot Isolation (§VI.25). Queries can opt **up** to Serializable for cross-table-invariant safety (`USING SERIALIZABLE`) or **down** to eventual consistency for analytical reads (`USING EVENTUAL`). Stricter than Serializable (Strict Serial / linearizable-across-queries) is **not offered**; branches (§VI.35) replace that role for high-stakes coordination. Stronger and weaker are both per-query opt-ins, never per-default. - *Status: SI default upheld. Serializable opt-in aspirational — predicate revalidation under MR-686's per-(table, branch) queue is the implementation seam. Eventual-read opt-in aspirational — tracked in MR-425. Subsumes §VI.34 (which only covers the downgrade direction); §VI.34 is preserved for now to keep its MR-425 pointer addressable.* - -37. **Merges are type-aware and agent-resolvable.** Branch merge resolution combines two layers. **Structural** (row-level last-write-wins by deterministic tie-break) is exact for sets of independent rows. **Semantic** (per-type policies declared in schema) handles CRDT-shaped operations: grow-only set, monotonic counter, last-writer-wins-with-timestamp, multi-valued register, first-writer-wins. Conflicts no policy resolves pause the merge with structured `MergeConflictKind` rows; agents produce resolution rows and resume. Auto-resolution never silently picks a side when policies are ambiguous. - *Status: structural merge upheld via `OrderedTableCursor` + `StagedTableWriter`. Type-declared semantic policies aspirational. Pausable merges aspirational — current code fails on conflict, doesn't pause.* - -### Explicit non-commitments - -These are *not* part of the OmniGraph contract. Listed so reviewers and downstream users see what is intentionally out of scope. - -- **Strict Serializable across queries.** Branches (§VI.35) are the replacement for cross-query strict-serial coordination. -- **Cross-process linearizable single-object writes** in multi-coordinator deployments without explicit external coordination (Postgres advisory, S3 sentinel, leader election). §VI.27 multi-coordinator stays aspirational with a clear cost model. -- **Automatic semantic conflict resolution.** §VI.37 is explicit: ambiguous conflicts always pause for agent or human resolution; auto-resolution requires a per-type policy. - -## VII. Current architectural patterns - -These are *how* we realize the invariants today. They are committed conventions — until we explicitly revise them, new code follows them. They are not eternal: a future architecture review may replace any of these with a different mechanism that upholds the same invariants. The deny-list (§IX) protects them in the meantime. - -38. **Reconciler pattern for derivable state.** Index coverage, statistics, anything derivable from manifest state — reconciled, not job-queued. *Realizes the "don't maintain state parallel to the substrate" invariant.* See MR-737 §5.16. - *Status: partial after MR-793 PR #70 — scalar index builds (BTree, Inverted) now route through the staged primitives `stage_create_*_index` + `commit_staged` instead of inline `create_*_index`; this is the building block. The reconciler pattern itself (background `IndexReconciler` task driven by manifest commits, removing synchronous index work from the publish path) is tracked in MR-848. Vector indices remain inline-commit until lance-format/lance#6666 ships.* - -39. **Polymorphism via Union, not per-feature lowering.** Interfaces / wildcards / alternation on nodes and edges share one IR (`Polymorphism`) and one lowering (Union of per-type concrete plans). *Realizes "shared mechanism for shared shape."* See MR-737 §5.13. - *Status: aspirational — node interfaces in MR-579; edge wildcards in MR-744.* - -40. **Mutations wrap read subplans.** Insert / Update / Delete / Merge are operators that consume read-shaped subplans. Same planner, same cost model, same storage trait. *Realizes "writes share the planner with reads."* See MR-737 §5.12. - *Status: aspirational — current mutation path is separate from reads.* - -41. **SIP for cross-operator selectivity propagation.** Producers publish ID bitmaps; downstream scans consume them through structured pushdown. *Realizes "downstream operators prune via upstream selectivity."* - *Status: aspirational — current code uses IN-list flattening in `Expand`.* - -42. **Factorize multi-hop, flatten only at projection.** Lists carry multiplicity through intermediate operators. `Flatten` is inserted by the planner where required, not eagerly. *Realizes "intermediate state shouldn't materialize cross-products eagerly."* - *Status: aspirational — current code materializes cross-products eagerly.* - -43. **Stable row IDs as dense graph IDs.** Don't maintain parallel string→u32 maps. Lance's stable row IDs are the substrate's identity layer; we use them directly. *Realizes "use the substrate's identity layer."* - *Status: aspirational — current code rebuilds `TypeIndex` per query.* - -44. **Rank and score are columns.** Retrieval operators emit `_score`, `_rank`. Fusion operators consume rank-bearing batches. *Realizes "rank/score is data, not metadata."* - *Status: aspirational — current RRF runs the pipeline twice and discards rank.* - -45. **Policy as predicates.** Authorization decisions are filter expressions injected into the planner, not enforcement at the API boundary. *Realizes "authorization pushes down with other filters."* - *Status: aspirational — Cedar enforcement currently at HTTP boundary only; tracked in MR-722 / MR-725.* - -46. **Imports unify under `Source`; transport is interchangeable.** A single `Source` IR operator with provider variants (File, Flight, Lance, Stream) handles all imports. Lance-to-Lance is a fast-path that bypasses Arrow encode/decode. *Realizes "external data sources share one operator surface."* - *Status: aspirational — current loader is JSONL-only; tracked in MR-765.* - -## VIII. Quality gates — every change passes - -47. **Tests at every boundary.** `MemStorage` for engine tests; planner-only tests; executor-only tests with a stub storage. No layer tested only via end-to-end. - -48. **Reference implementation per trait.** Every trait has a primary impl (Lance for storage) and at least a test impl. - *Status: partial after MR-793 PR #70 — `TableStorage` (the engine-internal staged-write trait, sealed) has its primary impl on `TableStore` (Lance-backed). The trait's signatures use opaque `SnapshotHandle` / `StagedHandle` types so a future test impl (e.g., `MemStorage`) can land without changing call sites. No test impl yet; `tempfile::tempdir()` + Lance is the de-facto test substrate today (see [docs/testing.md](testing.md)).* - -49. **Documented capability surface.** New capabilities are documented with what they advertise, who consumes them, how the planner uses them. - -50. **Benchmark before optimization.** New optimizations land with a benchmark that motivates them; if the motivating workload doesn't exist, the feature waits. - -## IX. Anti-patterns — deny-list - -If a proposal fits one of these, the burden is on the proposer to justify why this case is the exception. - -### Invariant violations (high bar to override) - -- **Custom WAL / transaction manager / buffer pool.** Substrate owns these (§I.1). -- **Wire-protocol-specific code in kernel crates.** Kernel produces `Stream`; transport adapters live at the server boundary only (§II.8). -- **In-process-only `Dataset` impls.** Trait surfaces stay remote-friendly (§III.9). -- **State that drifts from the substrate / manifest.** Derive from observable state (§I.3). -- **Cross-query `BEGIN`/`COMMIT` transactions.** Branches replace them in OSS (§VI.23). -- **Acks before durable persistence.** "Best-effort commit" is forbidden (§VI.26). -- **Reads that see partial commits.** Atomicity is non-negotiable (§VI.23). -- **Operations without time bounds.** Every operation has a documented timeout or backoff (§VI.31). -- **"Recovery mode" code paths separate from steady-state.** Recovery uses the same code as ordinary reads (§VI.33). -- **Eventual consistency as a default.** Strong is default; eventual is opt-in per query, never on writes (§VI.34). -- **Schema migrations that are not idempotent under replay.** Idempotency is required for replay safety (§VI.29). -- **Plan choice that varies given identical input statistics.** Determinism is required (§VI.28). -- **HashMap iteration order in result ordering or plan choice.** Use deterministic tie-breaks (§VI.28). -- **Cost-blind plan choice.** Lowering-order execution is not a planner. -- **Hidden statistics.** If a metric matters for plan choice, it must be exposed through the trait surface (§II.5). -- **Side-channels for query semantics.** Search modes, mutations, polymorphism, imports — all first-class IR concepts (§II.4). -- **Hand-rolling something the substrate already does.** Check the spec first (§I.1). -- **Mutating in place** state that should be immutable (Lance fragments, index segments). New segments instead. -- **Silent failures.** OOM, timeout, partial result — all surfaced and bounded (§V.20). -- **Shipping observable behavior as if it weren't part of the contract.** Output ordering, error-message text, timestamp precision, default-flag values, latency profile, query-result column order — every observable behavior gets depended on once shipped (Hyrum's Law). Don't expose what you don't want to commit to; treat changes to undocumented-but-observable behavior as breaking changes. -- **Strict-serial coordination expressed as locks held across queries.** Branches are the agent-native primitive for that (§VI.35). -- **Auto-resolving merge conflicts when the per-type policy is silent or absent.** Pause and surface the conflict; never silently pick a side (§VI.37). - -### Pattern violations (overridable with justification) - -These protect the *current* architectural patterns (§VII). A future review may revise them. - -- **Synchronous-inline index updates** for indexes expensive to build (vector ANN, FTS). Reconciler pattern instead (§VII.38). -- **Job queue for state derivable from manifest.** Reconciler pattern instead (§VII.38). -- **Per-feature lowering for shapes that share a structure** (interfaces, wildcards, alternation). Use one mechanism (§VII.39). -- **Per-format import code paths** (one path for JSONL, another for Parquet, another for Flight). Use the `Source` IR operator (§VII.46). -- **Eager materialization of cross-products** in multi-hop. Factorize (§VII.42). -- **Ad-hoc `IN`-list filtering** when SIP fits (§VII.41). -- **String-flattened SQL filter generation** when structured pushdown is available. -- **Discarding rank in retrieval.** Score and rank propagate as columns (§VII.44). -- **Auto-creating placeholder nodes for orphan edges** (silent invention of data). Reject by default; opt-in per write (§VI.24). -- **Double-encoding data when both endpoints speak the same format** (e.g., Lance → Arrow → Lance when both are Lance). Use a fast-path (§VII.46). -- **Per-write durability fast paths** until MemWAL is stable AND a use case justifies the latency vs. risk tradeoff. - -## X. Review checklist (use against any non-trivial change) - -Print this when reviewing an RFC or PR. Each line is **yes / no / N/A**. - -- Does it respect the substrate? (§I) -- Does it cross only one trait boundary per layer? (§II) -- Are capabilities and stats exposed for any new behavior? (§II.5) -- If touching the wire / transport surface, does kernel code stay protocol-agnostic? (§II.8) -- Do trait surfaces stay remote-friendly? (§III) -- Additive, not rewrite? Feature-flagged where behavior changes? (§IV) -- Any new estimator has estimate-vs-actual logging? (§V.18) -- Coverage / lag / budget metrics surfaced? (§V.19–21) -- Failure modes graceful, bounded, observable? (§V.20) -- Atomicity scope respected per query? (§VI.23) -- Schema integrity enforced strict at commit unless explicit opt-out? (§VI.24) -- Isolation level matches default (per-query snapshot, read-your-writes)? (§VI.25) -- Durability ack only after manifest commit? (§VI.26) -- Determinism preserved (order-stable, plan-deterministic)? (§VI.28) -- Idempotency: explicit `on_conflict`; idempotency keys honored if used? (§VI.29) -- Bounded operations: explicit timeout / memory / concurrency limits? (§VI.31) -- If proposing cross-query strict-serial coordination, is it expressed via branches rather than long-held locks? (§VI.35) -- If touching merge resolution, are silent-pick paths explicitly absent? (§VI.37) -- If touching imports / external data, does it go through `Source`? (§VII.46) -- If implementing a graph / retrieval feature: reuses an existing pattern (reconciler, Union, mutation-wrap-read, SIP, factorize, Source) where applicable? (§VII) -- Tests at every boundary, not just end-to-end? (§VIII.47) -- Reference impl + test impl for any new trait? (§VIII.48) -- None of the deny-list patterns apply? (§IX) - -## XI. Living document policy - -This document is updated when: - -- A new architectural decision establishes a new invariant — add it. -- An existing invariant is challenged and either reaffirmed (with the case sharpened) or revised (with explicit migration of any affected code). -- A new architectural pattern is adopted — add to §VII. -- A current pattern (§VII) is replaced — update or remove the entry; update the deny-list. -- A new anti-pattern surfaces in review and deserves a place on the deny-list — add it. -- An *aspirational* invariant becomes upheld — remove the status annotation. -- An *open* invariant is decided — record the decision and remove the status annotation. - -Updates require the same review process as code. Adding to the deny-list (§IX) is cheap; removing or relaxing an invariant (§I–VI, VIII) requires explicit justification in the proposal. Replacing a pattern (§VII) requires a design discussion linking to the new pattern; until that lands, the existing pattern stays. - -When an invariant is contested in the moment, the resolution path is: (a) state the case in the relevant RFC or PR; (b) link it from this document; (c) update this document if the resolution changes the rule. - -## XII. Source / origin - -These invariants and patterns were extracted from the architectural decisions in: - -- **MR-737** — Query Engine v2 RFC (the kernel scope and seams) -- **MR-744** — Edge wildcards / alternation (one cell of the polymorphic-bindings matrix) -- **MR-765** — Arrow Flight transport (query, import, export) -- The schema migration program (**MR-694** family — additive evolution, safety tiers, idempotent replay) -- The policy program (**MR-722** / **MR-725** — predicate pushdown) -- The reconciler / index-lifecycle work (**MR-737 §5.16**, **MR-688**, **MR-679**, **MR-680**) -- The factorization and SIP work (**MR-737 §5.2**, **§5.3** — Kuzu / Ladybug inspiration) -- The polymorphic-bindings framing (**MR-737 §5.13** — one mechanism for eight cells) -- The Source-operator framing (**MR-737 §5.12** — one mechanism for all imports) -- The database-guarantees discussion (§VI): ACID dimensions, CAP-style consistency model, scale-system precedents (ClickHouse, Turbopuffer, LanceDB, Postgres). Each invariant in §VI corresponds to a specific named decision; see prior architecture discussions for the option space considered. -- **MR-686** — Per-table writer queues and per-actor admission. Source for §VI.35–37 and the explicit non-commitments subsection (MR-686's queue is the seam that makes Serializable opt-in implementable, and the reason §VI.27 multi-coordinator stays aspirational). - -General precedent: Lance + LanceDB Enterprise architecture; ClickHouse merge subsystem; Kubernetes controllers; Postgres autovacuum; the FDAL stack (Flight + DataFusion + Arrow + Lance). - -Adding a new invariant or pattern here means we've learned something — either from a hard call we made and want to preserve, or from a mistake we don't want to repeat. Both are worth recording. diff --git a/docs/policy.md b/docs/policy.md deleted file mode 100644 index 746e35f..0000000 --- a/docs/policy.md +++ /dev/null @@ -1,44 +0,0 @@ -# Authorization (Cedar policy) - -OmniGraph integrates AWS Cedar (`cedar-policy = 4.9`) for ABAC. - -## Policy actions - -1. `read` — query / snapshot / list branches & commits -2. `export` — NDJSON export -3. `change` — mutations -4. `schema_apply` — apply schema migrations -5. `branch_create` -6. `branch_delete` -7. `branch_merge` -8. `run_publish` -9. `run_abort` -10. `admin` — reserved - -## Scope kinds - -- `branch_scope` — applied to source branch (`read`, `export`, `change`) -- `target_branch_scope` — applied to destination (`schema_apply`, branch ops, run ops) -- `protected_branches` — named list with special rules; rule scopes are `any | protected | unprotected` - -## Configuration - -`omnigraph.yaml`: - -```yaml -policy: - file: ./policy.yaml # Cedar rules + groups - tests: ./policy.tests.yaml # declarative test cases -``` - -Each rule must use exactly one of `branch_scope` or `target_branch_scope`. - -## CLI - -- `omnigraph policy validate` — parse + count actors, exit 1 on parse error. -- `omnigraph policy test` — run cases in `policy.tests.yaml`, exit 1 on any expectation mismatch. -- `omnigraph policy explain --actor … --action … [--branch …] [--target-branch …]` — show decision and matched rule. - -## Server enforcement - -Every mutating endpoint calls `authorize_request()` *before* the handler runs; decisions are logged with actor / action / branch / outcome / matched rule. diff --git a/docs/releases/v0.4.0.md b/docs/releases/v0.4.0.md index d77ebfc..efb2da7 100644 --- a/docs/releases/v0.4.0.md +++ b/docs/releases/v0.4.0.md @@ -65,7 +65,7 @@ manifest. The next mutation against that table fails with `ExpectedVersionMismatch`. Most validation runs before any Lance write, so single-statement mutations are unaffected; the narrow path is multi-statement queries with late-op failures. Tracked as a follow-up; -see [docs/runs.md](../runs.md#known-limitation-mid-query-partial-failure-on-the-same-table) +see [docs/dev/runs.md](../dev/runs.md#known-limitation-mid-query-partial-failure-on-the-same-table) for the workaround. ## Upgrade notes diff --git a/docs/releases/v0.4.1.md b/docs/releases/v0.4.1.md index fcc9743..78211e4 100644 --- a/docs/releases/v0.4.1.md +++ b/docs/releases/v0.4.1.md @@ -19,7 +19,7 @@ mutation proceeds normally. HEAD on every staged table is untouched and the next mutation proceeds normally. A narrowed residual remains at the finalize→publisher boundary (multi-table `commit_staged` is not - atomic with the manifest commit) — see [docs/runs.md](../runs.md) + atomic with the manifest commit) — see [docs/dev/runs.md](../dev/runs.md) "Finalize → publisher residual" for details. - **D₂ parse-time rule**: a single mutation query is either insert/update-only or delete-only. Mixed → rejected with a clear @@ -40,8 +40,8 @@ mutation proceeds normally. `restore_coordinator` API and `CoordinatorRestoreGuard` are removed from `mutation.rs`. (`merge.rs` keeps its own swap pattern; that's a separate workflow.) -- **`docs/invariants.md` §VI.25** flips from `aspirational/open` to - `upheld for inserts/updates`. The within-query read-your-writes +- **`docs/dev/invariants.md` mutation atomicity / read-your-writes status** + flips from `aspirational/open` to `upheld for inserts/updates`. The within-query read-your-writes guarantee is now load-bearing for the publisher CAS contract. ## Behavior changes @@ -105,29 +105,29 @@ mutation proceeds normally. - `Cargo.toml` (workspace) + `crates/omnigraph/Cargo.toml` — added `datafusion = "52"` direct dep (transitively pulled by Lance already; required for `MemTable`). -- `docs/runs.md` — removed "Known limitation" section; documented +- `docs/dev/runs.md` — removed "Known limitation" section; documented the new accumulator + D₂ + LoadMode::Overwrite residual. -- `docs/invariants.md` — §VI.25 status flipped to `upheld for - inserts/updates`. -- `docs/architecture.md` — added "Mutation atomicity — in-memory +- `docs/dev/invariants.md` — mutation atomicity / read-your-writes status + flipped to `upheld for inserts/updates`. +- `docs/dev/architecture.md` — added "Mutation atomicity — in-memory accumulator" subsection; refreshed the engine + state diagrams to drop `RunRegistry` and add `MutationStaging`. -- `docs/execution.md` — rewrote the mutation flow sequence diagram +- `docs/dev/execution.md` — rewrote the mutation flow sequence diagram for the staged-write path; updated the `LoadMode` table to call out per-mode commit semantics; rewrote `load` vs `ingest`. -- `docs/query-language.md` — documented the D₂ parse-time rule. -- `docs/errors.md` — added the D₂ `BadRequest` rejection path. -- `docs/storage.md` — dropped the live `_graph_runs.lance` reference +- `docs/user/query-language.md` — documented the D₂ parse-time rule. +- `docs/user/errors.md` — added the D₂ `BadRequest` rejection path. +- `docs/user/storage.md` — dropped the live `_graph_runs.lance` reference from the layout diagram and prose. -- `docs/branches-commits.md` — moved `__run__` to a legacy note; +- `docs/user/branches-commits.md` — moved `__run__` to a legacy note; removed `publish_run` from the publish-trigger list. -- `docs/audit.md` — current `_as` API list refreshed; legacy +- `docs/user/audit.md` — current `_as` API list refreshed; legacy `RunRecord.actor_id` moved to a historical note. -- `docs/constants.md` — marked the run registry / branch-prefix rows +- `docs/user/constants.md` — marked the run registry / branch-prefix rows as legacy. -- `docs/cli.md` — replaced the legacy `omnigraph run *` quickstart +- `docs/user/cli.md` — replaced the legacy `omnigraph run *` quickstart block with `omnigraph commit list/show`. -- `docs/testing.md` — extended the `runs.rs` row to cover the new +- `docs/dev/testing.md` — extended the `runs.rs` row to cover the new staged-write contract tests; added the `staged_writes.rs` row. - `AGENTS.md` (CLAUDE.md symlink) — updated the atomic-per-query description and the L2 capability matrix row. diff --git a/docs/releases/v0.5.0.md b/docs/releases/v0.5.0.md new file mode 100644 index 0000000..16e284e --- /dev/null +++ b/docs/releases/v0.5.0.md @@ -0,0 +1,171 @@ +# Omnigraph v0.5.0 + +Omnigraph v0.5.0 is a substrate, security, and migration-safety release. It +jumps the storage substrate from Lance 4 to Lance 6.0.1 (DataFusion 52 → 53, +Arrow 57 → 58), introduces engine-wide Cedar policy enforcement on every +authoring path, and ships a structured schema-lint v1 chassis with +code-tagged diagnostics, soft drops, and an explicit `--allow-data-loss` +flag for destructive migrations. + +## Highlights + +- **Lance 6.0.1 substrate**: bump from Lance 4.0.0 → 6.0.1, DataFusion 52 → + 53, Arrow 57 → 58. New optimizer rules (vectorized `IN`-list eq kernel, + `PhysicalExprSimplifier`, push-limit-into-hash-join, CASE-NULL shortcut) + reach predicates that flow through the engine. `lance-tokenizer` replaces + tantivy internally; FTS behavior preserved. +- **Cedar policy engine**: a new `omnigraph-policy` crate wires + `Omnigraph::enforce(action, scope, actor)` into every `_as` writer + (`mutate_as`, `load_as`, `apply_schema_as`, `branch_create_as`, + `branch_merge_as`, `branch_delete_as`, plus the load and change + variants). The HTTP server defaults to deny-all when no Cedar policy is + configured; a YAML policy file is required to enable writes. Actor + identity comes only from signed token claims — clients cannot set actor + identity directly. +- **Schema lint v1 chassis**: diagnostics now carry stable codes of the form + `OG-XXX-NNN` instead of free-form messages. `omnigraph schema plan` and + `apply` understand soft drops on properties and types — destructive drops + require the new `--allow-data-loss` flag (Hard mode) at the CLI and an + equivalent JSON flag over HTTP. +- **Structured filter pushdown**: query-language predicates lower to + DataFusion `Expr` and push down through Lance's `Scanner::filter_expr` + instead of being flattened to SQL strings. This unlocks `CompOp::Contains` + pushdown (via `array_has`), which previously fell through to in-memory + post-scan filtering, and lets the DataFusion 53 optimizer rules above act + on our predicates. +- **HTTP `allow_data_loss` parity**: the destructive-drop guard now exists + on both the CLI (`--allow-data-loss`) and HTTP (`allow_data_loss: true` in + the schema-apply request body). +- **Inline query strings on CLI and HTTP**: `omnigraph read` / + `omnigraph mutate` and the corresponding HTTP endpoints accept inline + `.gq` source, not just a file path. Easier ad-hoc queries, clearer + request logs. +- **Browser CORS layer**: optional CORS layer on `omnigraph-server` for + browser-based UIs, gated by `OMNIGRAPH_CORS_ORIGINS`. +- **Merge-insert dup-rowid fix**: Lance's `MergeInsertBuilder` could surface + spurious `"Ambiguous merge inserts"` errors on sequential merges against + rows previously rewritten by `merge_insert`. The engine now opts into + `SourceDedupeBehavior::FirstSeen` with a `check_batch_unique_by_keys` + fail-fast precondition that guarantees source-side dedup happens before + Lance sees the batch. +- **Branch-merge error-path recovery**: a branch merge that failed + mid-flight could leave the in-process coordinator pointing at a stale + active branch. The error path now restores the prior coordinator, + matching the success path's invariant. +- **Branch merge with blob columns**: external blob URIs are now + materialized correctly during branch merge instead of being dropped or + pointing at the source branch. +- **Lance API surface guards**: a new test file + (`crates/omnigraph/tests/lance_surface_guards.rs`) pins eight specific + Lance API surfaces (`LanceError::TooMuchWriteContention`, + `ManifestLocation` fields, `MergeInsertBuilder` return shape, + `WriteParams::default`, `compact_files` signature, etc.) so the next + Lance bump fails compile or runtime on any silent drift rather than + producing wrong-state recovery in production. + +## Behavior changes + +- **On-disk format unchanged**: existing v0.4.2 datasets open unchanged. + The Lance file format pin stays at V2_2 (required by Lance's blob v2 + feature). +- **`omnigraph-server` defaults to deny-all under `--policy`**: starting a + server with the policy feature enabled but no Cedar YAML policy + configured rejects every write. Operators must supply a policy file to + authorize anything. +- **Schema-lint diagnostics carry stable codes**: messages now lead with + `OG-XXX-NNN`. CI parsers or tooling that keyed off the v0.4.2 free-form + text need to switch to code-based matching. +- **Destructive schema drops require `--allow-data-loss`**: dropping a + property or type returns a structured diagnostic by default. + `omnigraph schema apply --allow-data-loss` (CLI) or + `{"allow_data_loss": true}` (HTTP) opts into Hard mode. +- **`HashJoinExec` null-aware semantics on anti-join**: a side effect of + the DataFusion 53 bump — `NOT IN` semantics under null-valued anti-join + columns are now correct per SQL standard. Queries that depended on the + prior behavior would have been incorrect. + +## Upgrade Notes + +### Migration + +- No data migration. v0.4.2 repos open directly on v0.5.0. + +### Clients + +- HTTP and SDK clients should switch any string-matching schema-lint + parsing to code-based matching against the `OG-XXX-NNN` prefix. +- Clients exercising destructive schema drops (`DropProperty`, `DropType`) + must add the `allow_data_loss` request field (HTTP) or + `--allow-data-loss` flag (CLI). Default is soft-drop-or-reject. +- Clients consuming `mutate_as` / `load_as` / `apply_schema_as` / branch + authoring APIs now flow through the policy enforcer. Anything bypassing + authorization on v0.4.2 will be rejected on v0.5.0 once a policy is + configured. + +### Operators + +- Configure a Cedar policy YAML for production servers before enabling + writes; deny-all is the new default. The `omnigraph policy validate` / + `test` / `explain` CLI commands are unchanged. +- Bearer tokens continue to be the actor-identity source; review the + signed-token-claim-only invariant in `docs/dev/invariants.md` if you've + built custom authentication. +- If your local CI uses RustFS for S3-compatible storage testing, our CI + pins `rustfs/rustfs:1.0.0-beta.3` (the last known-good tag before the + upstream credentials-policy change). Mirror the pin or set + `RUSTFS_ALLOW_INSECURE_DEFAULT_CREDENTIALS=true` for the new image + versions. + +## Tests added or strengthened + +- `crates/omnigraph/tests/lance_surface_guards.rs` — 8 named guards pinning + Lance API surfaces against silent drift on future bumps. +- `crates/omnigraph/tests/policy_engine_chassis.rs` — engine-level policy + enforcement coverage; complements the existing HTTP policy tests. +- Policy chassis e2e gap-fills — branch-merge, branch-create, branch-delete + policy paths now have explicit end-to-end tests over HTTP and CLI. +- Merge-pair truth table — exhaustive op-variant matrix for three-way + merge across `noop`, `addNode`, `removeNode`, `addEdge`, `removeEdge`, + `setProperty`, `dropProperty`, `addLabel`, `removeLabel`; the build + fails to compile when a new op variant is added without dispositioning + every pairing. +- Merge-insert: regression for the dup-rowid bug class on the load surface + (`load_merge_repeated_against_overlapping_keys_succeeds`), the update + surface (`second_sequential_update_on_same_row_succeeds`), and the + upstream-Lance-gap canary + (`load_merge_window_2_documents_upstream_lance_gap`). +- Maintenance + destructive-migration coverage — `omnigraph optimize` / + `cleanup` boundary cases, plus schema-apply soft-drop and Hard-mode + paths. +- Stable-row-id preservation across `stage_overwrite` — pins the invariant + that staged overwrites carry stable row IDs through to the committed + fragment set. +- `CompOp::Contains` pushdown regression + (`ir_filter_with_list_contains_pushes_down`) — pins the new structured + Expr pushdown path that retired the in-memory fallback. + +## Included Changes + +- Lance 4 → 6.0.1, DataFusion 52 → 53, Arrow 57 → 58 substrate upgrade. +- `omnigraph-policy` crate with engine-wide Cedar enforcement and + signed-token-claim-only actor identity. +- Schema-lint v1 chassis with `OG-XXX-NNN` codes, soft `DropProperty` / + `DropType` semantics, and `--allow-data-loss` for Hard mode. +- HTTP `allow_data_loss` request field parity with the CLI flag. +- Structured DataFusion `Expr` filter pushdown via + `Scanner::filter_expr`, with `CompOp::Contains` lowered through + `array_has`. +- Inline `.gq` source acceptance on CLI and HTTP read/mutate endpoints. +- Optional CORS layer on `omnigraph-server` for browser UIs. +- Bug fixes: merge-insert dup-rowid (FirstSeen + uniqueness precondition), + branch-merge coordinator restore on error, blob-column materialization + during branch merge. +- New Lance API surface-guard test file as the canary for future Lance + bumps. +- Recovery-sidecar coverage extended across the four write paths + (`MutationStaging::finalize`, `schema_apply`, `branch_merge`, + `ensure_indices`) with failpoint regression tests. +- CI: pinned `rustfs/rustfs:1.0.0-beta.3` after the upstream `:latest` + introduced a credentials-policy change. +- Version bump to `0.5.0` across workspace crates, `Cargo.lock`, + `openapi.json`, and the `AGENTS.md` surveyed version. diff --git a/docs/releases/v0.6.0.md b/docs/releases/v0.6.0.md new file mode 100644 index 0000000..7984056 --- /dev/null +++ b/docs/releases/v0.6.0.md @@ -0,0 +1,141 @@ +# Omnigraph v0.6.0 + +Three pieces of work land in this release: + +1. The **graph terminology rename** (renamed `Repo` → `Graph` across the Cedar resource model, policy API, and query-lint schema source). +2. **Multi-graph server mode** — one `omnigraph-server` process can now serve 1–10 graphs concurrently behind cluster routes (`/graphs/{graph_id}/...`), with per-graph and server-level Cedar policy, read-only `GET /graphs` enumeration, and CLI parity (`omnigraph graphs list`). +3. **Inline + canonical-named queries and mutations.** New `POST /query` and `POST /mutate` endpoints pair with the CLI's new `-e/--query-string` flag for ad-hoc execution without a temp file. `POST /read` and `POST /change` continue serving indefinitely as deprecated aliases that carry RFC 9745 `Deprecation: true` and RFC 8288 `Link: ; rel="successor-version"` response headers, plus `deprecated: true` in `openapi.json`. Same canonicalization on the CLI: `omnigraph query`, `omnigraph mutate`, and top-level `omnigraph lint` / `omnigraph check` replace `omnigraph read`, `omnigraph change`, and the nested `omnigraph query lint` / `omnigraph query check`. Every deprecated spelling remains a `visible_alias` that warns to stderr once per invocation. + +Runtime add/remove (`POST /graphs`, `DELETE /graphs/{id}`, `omnigraph graphs create`) is **not** in v0.6.0. Operators add or remove graphs by editing `omnigraph.yaml` and restarting. The first cut of `POST /graphs` shipped behind an atomic-YAML-rewrite design that we pulled before release once its concurrency guarantees were challenged (flock-on-renamed-inode race, duplicate-check outside the critical section, and an init-cleanup path that could destroy an existing graph's schema on re-init). The correct fix is a Lance-style cluster catalog (reserve → init → publish with recovery sidecars); that work is deferred. + +## Breaking Changes + +### Graph terminology rename + +- Renamed the Cedar resource entity from `Omnigraph::Repo` to `Omnigraph::Graph`. +- Renamed policy API terminology from `repo_id` to `graph_id` on `PolicyCompiler::compile` (and on the new `PolicyEngine::load_graph` / `PolicyEngine::load_server` loaders described below). +- Renamed query-lint schema source JSON from `"repo"` to `"graph"` for `schema_source.kind`. + +### Multi-graph server mode + +- **Multi-graph deployments lose flat routes.** Single-graph invocation (`omnigraph-server `) is unchanged — same flat `/snapshot`, `/read`, `/branches`, etc. Multi-graph deployments serve those routes under `/graphs/{graph_id}/...`; bare flat paths return 404 in multi mode. +- **`ServerConfig` shape change** (programmatic embedders only): `ServerConfig { uri, policy_file }` is replaced by `ServerConfig { mode: ServerConfigMode }`, where `ServerConfigMode = Single { uri, policy_file } | Multi { graphs, config_path, server_policy_file }`. Callers that use `load_server_settings` are unaffected; callers that construct `ServerConfig` directly need to wrap their fields in `ServerConfigMode::Single`. +- **`AppState`'s routing surface** is `AppState::routing() -> &GraphRouting`, where `GraphRouting = Single { handle } | Multi { registry, config_path }`. The previous `AppState::uri()`, `AppState::mode()`, `AppState::registry()` accessors and the `ServerMode` enum are gone — embedders read `state.routing()` and match on the arm they need. Per-graph URIs live on `handle.uri`. +- **`AppState::new_multi`** is the new multi-graph constructor. Single-mode `new_*` / `open_*` constructors are unchanged. +- **`AuthenticatedActor(Arc)` → `ResolvedActor { actor_id, tenant_id, scopes, source }`** (programmatic embedders only). The struct shape changes, but the HTTP contract — bearer auth and the bearer-derived-actor-identity guarantee — is unchanged. Cluster-mode call sites construct with `tenant_id: None`, `scopes: vec![Scope::Full]`, `source: AuthSource::Static`. The new fields are forward-compat seams for future multi-tenant and OAuth deployments; they're inert in this release. +- **`PolicyEngine::load(path, graph_id)` removed** in favor of two kind-typed loaders: `PolicyEngine::load_graph(path, graph_id)` for per-graph policies and `PolicyEngine::load_server(path)` for server-level policies. Each loader rejects rules whose action `resource_kind()` doesn't match the engine kind — operators who put a `graph_list` rule in a per-graph file (or a `read` rule in a server file) now get a load-time error instead of a silently-never-matching rule. +- **`PolicyRequest::actor_id` field removed.** Actor identity is now a separate parameter on `PolicyEngine::authorize(actor_id, &request)`. The type system enforces the server-authoritative-actor invariant: actor identity is always sourced from the bearer-token match resolved at the auth boundary; handlers cannot smuggle identity through the request body. +- **`Omnigraph::init` is strict by default.** Initialization at a URI that already holds schema files now errors with `OmniError::AlreadyInitialized` instead of silently overwriting. Operators who actually want to overwrite use `InitOptions { force: true }` (CLI: `omnigraph init --force`). Closes the destructive-cleanup footgun where a failed re-init would delete an existing graph's schema files. +- **Top-level `policy.file` is rejected in multi-graph server mode.** It remains valid for single-graph / CLI-local policy. Multi-graph deployments must move graph rules to `graphs..policy.file` and server-scoped `graph_list` rules to `server.policy.file`. +- **Open server startup requires explicit opt-in.** A server with no bearer tokens and no policy now refuses to start unless passed `--unauthenticated` or `OMNIGRAPH_UNAUTHENTICATED=1`. +- **Policy requires bearer tokens.** Configuring any policy file without bearer tokens now refuses startup; otherwise every protected request would 401 before Cedar could evaluate it. +- **Tokens without policy default-deny non-read actions.** Existing authenticated deployments that relied on writes or admin routes without Cedar policy must add policy rules for those actions. +- **`GET /graphs` requires `server.policy.file` in every runtime state.** Even `--unauthenticated` mode keeps server topology closed until the operator explicitly authorizes `graph_list`. + +### Query / mutation rename + +- **`ChangeRequest` field rename**: `query_source` → `query`, `query_name` → `name`. Both legacy names continue to deserialize via `#[serde(alias = "...")]`, so existing clients sending the old JSON keys keep working. CLI remote calls against `/change` still emit the legacy keys verbatim through the `legacy_change_request_body` helper so a newer CLI talking to an older server keeps working byte-for-byte. +- **CLI `omnigraph query lint` / `omnigraph query check`** are now top-level — canonical name is **`omnigraph lint`**. The three deprecated invocations (`omnigraph query lint`, `omnigraph query check`, and bare `omnigraph check`) remain as argv-level shims that rewrite to `omnigraph lint` and print a one-line stderr deprecation warning. `check` is deliberately **not** a clap `visible_alias` on `lint` — two equivalent canonical names would split agent emissions between them depending on training-data drift, so the deprecation pattern (rewrite + warn) gives one unambiguous canonical name in `omnigraph --help`. + +## New + +- **Multi-graph mode**. Invoke with `omnigraph-server --config omnigraph.yaml` where the YAML has a non-empty `graphs:` map and no single-mode selector (no `server.graph`, no CLI `` or `--target`). At startup the server opens every configured graph in parallel (bounded concurrency, fail-fast). +- **`GET /graphs`**. Lists every registered graph, sorted alphabetically by `graph_id`. Auth-required when bearer tokens are configured; Cedar-gated by `PolicyAction::GraphList` against `Omnigraph::Server::"root"`. Returns 405 in single mode. Server-scoped actions require an explicit `server.policy.file` in every runtime state — the management surface is closed by default even in `--unauthenticated` mode so that server topology is never exposed without operator opt-in. +- **CLI `omnigraph graphs list`**. Mirrors the HTTP surface. Rejects local URI targets with a clear message — for remote multi-graph servers only. +- **CLI `omnigraph init --force`**. Bypasses the strict-init preflight when an operator deliberately wants to recover from orphan schema files. Does NOT purge existing Lance datasets; recursive deletion needs `StorageAdapter::delete_prefix` (deferred — see below). +- **Per-graph Cedar policy**. Each entry in the `graphs:` map can carry a `policy.file` path, loaded at startup via `PolicyEngine::load_graph`. Cedar's `Omnigraph::Graph::""` resource is per-graph; the new `Omnigraph::Server::"root"` resource governs server-level actions. +- **Server-level Cedar policy**. `server.policy.file` in the config governs the `graph_list` action on `Omnigraph::Server::"root"`. Required to expose `GET /graphs` in every runtime state — without a server policy the default-deny posture rejects `graph_list`, including in `--unauthenticated` mode. +- **Cedar action vocabulary**: `graph_list` (server-scoped). Runtime `graph_create` / `graph_delete` are reserved but not shipped — see "Deferred." +- **Canonical graph URI identity.** Server startup normalizes graph root URIs before registry insertion and response output, so aliases such as `/tmp/g`, `/tmp/g/`, and `file:///tmp/g` cannot register as distinct graphs that actually share one Lance root. +- **`POST /query`** and **`POST /mutate`**. Canonical inline endpoints. `/query` rejects mutations with a typed 400 (the D2 rule lives at the URL — read-only contract enforced before execution); body uses the clean `{ query, name, params, branch, snapshot }` shape. `/mutate` accepts the same shape for mutations. Both available in single mode and per-graph multi mode (`/graphs/{id}/query`, `/graphs/{id}/mutate`). Internal call sites share two helpers (`run_query`, `run_mutate`) that take decoupled args, not request bodies — the seam MR-969's future stored-query handler plugs into. +- **CLI `omnigraph query` / `omnigraph mutate`** as top-level canonical subcommands. Pairs with new top-level **`omnigraph lint` (alias `check`)** so query validation no longer sits under `omnigraph query`. +- **CLI `-e, --query-string `** on both `omnigraph query` and `omnigraph mutate`. 3-way mutex with `--query ` and `--alias ` — exactly one is required. Empty string rejected. Suits ad-hoc exploration, REPL workflows, and agent tool-use without temp files. +- **Three-channel deprecation signal on `/read` and `/change`**: OpenAPI `deprecated: true` on the operation (every codegen flags the generated SDK method), RFC 9745 `Deprecation: true` response header, and RFC 8288 `Link: ; rel="successor-version"` (or ``) response header. Auto-discoverable; no SDK breakage. +- **`omnigraph.yaml` `aliases..command`** now accepts `query` and `mutate` as canonical values alongside the legacy `read` and `change`. The internal `AliasCommand` enum retains the legacy variant names so serialized configs stay byte-stable. + +## Configuration + +`omnigraph.yaml` schema additions (all optional, single-mode unaffected): + +```yaml +server: + bind: 0.0.0.0:8080 + policy: + file: ./server-policy.yaml # server-level Cedar (graph_list) + +graphs: + alpha: + uri: s3://tenant-bucket/alpha + policy: + file: ./policies/alpha.yaml # per-graph Cedar + beta: + uri: s3://tenant-bucket/beta + # no per-graph policy → engine-layer enforcement is a no-op +``` + +## Deferred + +- **`POST /graphs` runtime graph creation** and **CLI `omnigraph graphs create`**. Pulled before release after the YAML-rewrite design's correctness story didn't survive review. A future release will add a managed cluster catalog (Lance-backed reserve → init → publish with recovery sidecars) and re-expose runtime creation on top of it. Until then, operators add graphs by editing `omnigraph.yaml` and restarting. +- **`DELETE /graphs/{id}`**. Never shipped in v0.6.0; deferred with the same cluster-catalog work. +- **`StorageAdapter::delete_prefix`**. The substrate primitive a managed catalog would need. Will land alongside runtime mutation. +- **`omnigraph init --force` purging Lance state.** Today `--force` only bypasses the schema-file preflight; recursive deletion of existing Lance datasets needs `delete_prefix`. +- **`X-Actor-Id` service delegation forwarding**. Needs durable both-actor audit on `_graph_commits.lance` — out of scope. +- **Hot policy reload**. Restart is cheap at N≤10 graphs. + +## User Impact + +- **No on-disk migration is required.** Existing `.omni` graphs from v0.5.0 (and earlier) open cleanly under v0.6.0 — Lance datasets, `__manifest`, `_schema.pg`, `_schema.ir.json`, `__schema_state.json`, `_graph_commits.lance`, `_graph_commit_recoveries.lance` all use unchanged formats. No conversion step. +- **Existing single-graph storage upgrades without migration.** Server deployments may need auth/policy config changes: explicitly pass `--unauthenticated` for local open mode, configure tokens when using policy, and add Cedar policy for non-read authenticated actions. +- **Multi-graph adoption is opt-in.** Add a `graphs:` map to `omnigraph.yaml` (and remove `server.graph`) to switch a deployment to multi mode. +- **Cluster routes are breaking for client SDKs targeting multi mode.** Generated clients from previous v0.5.0 OpenAPI specs will hit 404 on flat paths against a multi-mode server. Regenerate against the v0.6.0 `openapi.json`. +- **Supported YAML policy authoring is unchanged.** The Cedar `Omnigraph::Graph` and `Omnigraph::Server` entities are internally generated by `compile_policy_source` — operator YAML only references actions and groups. +- **Operators with unsupported raw Cedar policy files** should update `Omnigraph::Repo` resource references to `Omnigraph::Graph`. +- **Endpoint and CLI rename is cosmetic on the client side.** Existing callers on `/read`, `/change`, `omnigraph read`, `omnigraph change`, and `omnigraph query lint` keep working — they pick up the `Deprecation` + `Link` headers (or stderr deprecation warning on the CLI) so SDKs and proxies can surface the successor name automatically. New integrations should target the canonical names. ChangeRequest field names migrate at the caller's pace — both `query_source`/`query_name` and `query`/`name` accepted indefinitely. + +## Migration: single → multi + +```yaml +# Before (v0.5.0 single-mode invocation) +server: + graph: my-graph +graphs: + my-graph: + uri: /var/lib/omnigraph/my-graph +policy: + file: ./policy.yaml +``` + +```yaml +# After (v0.6.0 multi-mode — drop `server.graph` and the top-level `policy`) +server: + policy: + file: ./server-policy.yaml # NEW: governs GET /graphs +graphs: + my-graph: + uri: /var/lib/omnigraph/my-graph + policy: + file: ./policy.yaml # MOVED: was top-level +``` + +Same `omnigraph.yaml` file; restart the server. Clients targeting the old flat routes (`/snapshot`, `/read`, …) must update to `/graphs/my-graph/snapshot`, etc. + +To add a new graph after rollout: stop the server, append a new `graphs.` entry, restart. + +## Documentation + +- Public docs, CLI help, examples, server docs, and test helpers now consistently use "graph" for the OmniGraph data artifact. +- GitHub/source repository terminology remains spelled out as "repository" where needed. +- New: `docs/user/cli.md` documents `omnigraph graphs list`; `docs/user/server.md` documents the multi-graph mode and the cluster route convention; `docs/user/policy.md` documents the per-graph vs server-scoped action distinction. +- New: `docs/user/server.md` documents `POST /query` / `POST /mutate` and the three-channel deprecation signal on `/read` / `/change`. `docs/user/cli.md` documents the `-e/--query-string` flag with examples. `docs/user/cli-reference.md` shows the canonical CLI verbs (`query`, `mutate`, `lint`, `check`) with legacy spellings as visible aliases. +- New: `docs/dev/rfc-001-queries-envelope-mcp.md` is the cross-cutting design doc for the inline / stored query work that started landing in this release. It sequences the v0.6.x patch series (request/response envelope hardening) and the v0.7.0 stored-query + MCP work. + +## Test coverage + +- `GraphId` newtype validation, registry race tests, init failpoints (still reachable from `omnigraph init` CLI). +- Mode-inference four-rule matrix, parallel multi-graph startup, cluster routing. +- Cedar `Server` resource refactor, backwards-compat for graph-only policies, kind-alignment rejection (server actions in graph files / vice versa). +- `GET /graphs` enumeration, 405-in-single-mode, 403-in-Open-mode-without-server-policy, Cedar admin/viewer authorization. +- Cluster routes with inner path params (`/branches/{branch}`, `/commits/{commit_id}`) deserialize correctly under axum 0.8 nested routing. +- Policy-requires-tokens startup invariant enforced uniformly across single and multi mode. +- The bearer-auth-derived-actor-identity regression test (client-supplied identity headers are ignored; the server-resolved actor is the only identity Cedar sees) stays green across the entire refactor. + diff --git a/docs/server.md b/docs/server.md deleted file mode 100644 index 6904e99..0000000 --- a/docs/server.md +++ /dev/null @@ -1,101 +0,0 @@ -# HTTP Server (`omnigraph-server`) - -Axum 0.8 + tokio + utoipa-generated OpenAPI. Single repo per process; deploy multiple processes for multi-tenant. - -## Endpoint inventory - -| Method | Path | Auth | Action | Handler | -|---|---|---|---|---| -| GET | `/healthz` | none | — | `server_health` | -| GET | `/openapi.json` | none | — | `server_openapi` (strips security if auth disabled) | -| GET | `/snapshot?branch=` | bearer + `read` | snapshot of branch | `server_snapshot` | -| POST | `/read` | bearer + `read` | run named query | `server_read` | -| POST | `/export` | bearer + `export` | NDJSON stream | `server_export` | -| POST | `/change` | bearer + `change` | mutation | `server_change` | -| GET | `/schema` | bearer + `read` | get current `.pg` source | `server_schema_get` | -| POST | `/schema/apply` | bearer + `schema_apply` (target=`main`) | migrate | `server_schema_apply` | -| POST | `/ingest` | bearer + `branch_create` (if new) + `change` | bulk load | `server_ingest` (32 MB body limit) | -| GET | `/branches` | bearer + `read` | list branches | `server_branch_list` | -| POST | `/branches` | bearer + `branch_create` | create | `server_branch_create` | -| DELETE | `/branches/{branch}` | bearer + `branch_delete` | delete | `server_branch_delete` | -| POST | `/branches/merge` | bearer + `branch_merge` | merge `source → target` | `server_branch_merge` | -| GET | `/commits?branch=` | bearer + `read` | list | `server_commit_list` | -| GET | `/commits/{commit_id}` | bearer + `read` | show | `server_commit_show` | - -## Streaming - -Only `/export` streams (`application/x-ndjson`, MPSC channel + `Body::from_stream`). Everything else is buffered JSON. - -## Error model - -Uniform `ErrorOutput { error, code?, merge_conflicts[], manifest_conflict? }` with `code ∈ unauthorized | forbidden | bad_request | not_found | conflict | too_many_requests | internal`. Merge conflicts attach structured `MergeConflictOutput { table_key, row_id?, kind, message }`. - -`manifest_conflict` is set on **publisher CAS rejections** (HTTP 409): the -caller's pre-write view of one table's manifest version was stale. -`ManifestConflictOutput { table_key, expected, actual }` tells the client -which table to refresh and retry. This is the conflict shape produced by -concurrent `/change` or `/ingest` calls landing the same `(table, branch)` -race. - -HTTP status codes used: 200, 400, 401, 403, 404, 409, 429, 500. - -## Per-actor admission control - -Disjoint -`(table, branch)` writes from different actors now run concurrently, -guarded only by the engine's per-(table, branch) write queue. To keep -one heavy actor from exhausting shared capacity (Lance I/O, manifest -churn, network), the server gates mutating handlers through a -`WorkloadController` configured per-process from environment variables: - -| Env var | Default | Purpose | -|---|---|---| -| `OMNIGRAPH_PER_ACTOR_INFLIGHT_MAX` | 16 | Concurrent in-flight mutations per actor | -| `OMNIGRAPH_PER_ACTOR_BYTES_MAX` | 4 GiB | In-flight estimated bytes per actor | - -When an actor exceeds its in-flight count or byte budget, the server -returns **HTTP 429 Too Many Requests** with `code: too_many_requests` -and a `Retry-After` header (seconds). The actor should back off; other -actors are unaffected. - -Cedar policy authorization runs **before** admission accounting so -denied requests don't consume admission slots. - -Today admission gates every mutating handler: `/change`, `/ingest`, -`/branches/{create,delete,merge}`, and `/schema/apply`. Read-only -endpoints (`/snapshot`, `/read`, `/export`, `/branches` GET, `/commits`, -`/schema` GET) are not admission-gated. - -## Body limits - -- Default: 1 MB -- `/ingest`: 32 MB - -## Auth model (`bearer + SHA-256`) - -- Tokens are SHA-256 hashed on startup; plaintext is never persisted in memory. -- Constant-time comparison via `subtle::ConstantTimeEq`. -- Three sources, in precedence: - 1. `OMNIGRAPH_SERVER_BEARER_TOKENS_AWS_SECRET` — AWS Secrets Manager (build with `--features aws`) - 2. `OMNIGRAPH_SERVER_BEARER_TOKENS_FILE` or `OMNIGRAPH_SERVER_BEARER_TOKENS_JSON` — JSON `{actor_id: token, …}` - 3. `OMNIGRAPH_SERVER_BEARER_TOKEN` — single legacy token, actor `default` -- If no tokens configured, server runs unauthenticated (local dev) and `/openapi.json` strips the security scheme. - -See [deployment.md](deployment.md) for token-source operational details. - -## Tracing & observability - -- `tower_http::TraceLayer::new_for_http()` -- Policy decisions logged at INFO level with actor, action, branch, decision, matched rule -- Startup logs: token source name, repo URI, bind address -- Graceful SIGINT shutdown - -## Not implemented (by design or "TBD") - -- CORS — not configured; add `tower_http::cors` if needed. -- Rate limiting — per-actor admission control gates `/change`, `/ingest`, - `/branches/{create,delete,merge}`, `/schema/apply` (see "Per-actor - admission control" above). No global rate limiter is configured; - add `tower_http::limit` if a graph-wide cap is needed. -- Pagination — none (commits/branches return everything; export streams). -- Multi-tenant routing — one repo per process. diff --git a/docs/audit.md b/docs/user/audit.md similarity index 70% rename from docs/audit.md rename to docs/user/audit.md index 80ac137..e8abe5b 100644 --- a/docs/audit.md +++ b/docs/user/audit.md @@ -4,4 +4,4 @@ - `_as` variants of every write API let callers override the actor: `mutate_as`, `ingest_as`, `branch_merge_as`, `apply_schema_as`, etc. - Actor IDs are persisted on `GraphCommit.actor_id` with split storage in `_graph_commit_actors.lance` (the commit graph is split into `_graph_commits.lance` for the linkage and `_graph_commit_actors.lance` for the actor map). - HTTP server uses the bearer-token actor automatically; CLI uses the local user / explicit env (no implicit actor). -- Pre-v0.4.0 repos also stored actor IDs on `RunRecord.actor_id` in `_graph_runs.lance` / `_graph_run_actors.lance`. The Run state machine was removed in MR-771; those files are inert post-v0.4.0 and reclaimed by MR-770's production sweep. +- Pre-v0.4.0 graphs also stored actor IDs on `RunRecord.actor_id` in `_graph_runs.lance` / `_graph_run_actors.lance`. The Run state machine was removed in MR-771; those files are inert post-v0.4.0 and reclaimed by MR-770's production sweep. diff --git a/docs/branches-commits.md b/docs/user/branches-commits.md similarity index 100% rename from docs/branches-commits.md rename to docs/user/branches-commits.md diff --git a/docs/changes.md b/docs/user/changes.md similarity index 100% rename from docs/changes.md rename to docs/user/changes.md diff --git a/docs/cli-reference.md b/docs/user/cli-reference.md similarity index 71% rename from docs/cli-reference.md rename to docs/user/cli-reference.md index 599ee13..0326e64 100644 --- a/docs/cli-reference.md +++ b/docs/user/cli-reference.md @@ -8,18 +8,18 @@ A reference for the `omnigraph` binary's command surface and `omnigraph.yaml` sc | Command | Purpose | |---|---| -| `init` | `--schema ` → initialize a repo (also scaffolds `omnigraph.yaml` if missing) | +| `init` | `--schema ` → initialize a graph (also scaffolds `omnigraph.yaml` if missing) | | `load` | bulk load a branch (`--mode overwrite\|append\|merge`) | | `ingest` | branch-creating transactional load (`--from `) | -| `read` | run named query (params via `--params`, `--params-file`, or alias args) | -| `change` | run mutation query | +| `query` (alias: `read`) | run named read query; source via `--query `, `-e`/`--query-string `, or `--alias ` (exactly one). `read` is the deprecated previous name and prints a one-line warning to stderr | +| `mutate` (alias: `change`) | run mutation query; same `--query` / `-e` / `--alias` mutual-exclusion as `query`. `change` is the deprecated previous name and prints a one-line warning to stderr | | `snapshot` | print current snapshot (per-table version + row count) | | `export` | dump to JSONL on stdout (`--type T`, `--table K` filters) | | `branch create \| list \| delete \| merge` | branching ops | | `commit list \| show` | inspect commit graph | | `run list \| show \| publish \| abort` | transactional run ops | | `schema plan \| apply \| show (alias: get)` | migrations | -| `query lint \| check` | offline / repo-backed validation | +| `lint` (alias: `check`) | offline / graph-backed query validation. Replaces `query lint` / `query check`, which are kept as deprecated argv-level shims that print a one-line warning and rewrite to `omnigraph lint` | | `optimize` | non-destructive Lance compaction | | `cleanup --keep N --older-than 7d --confirm` | destructive version GC | | `embed` | offline JSONL embedding pipeline | @@ -49,7 +49,10 @@ auth: env_file: ./.env.omni aliases: : - command: read|change + # accepted values: `read` / `query` (read alias), `change` / `mutate` + # (write alias). `query` and `mutate` are recommended; `read` and + # `change` remain accepted forever for back-compat. + command: read|change|query|mutate query: name: args: [, …] @@ -60,7 +63,7 @@ policy: file: ./policy.yaml ``` -## Output formats (read command) +## Output formats (`query` command, alias: `read`) - `json` — pretty-printed object with metadata + rows - `jsonl` — one metadata line then one JSON object per row diff --git a/docs/user/cli.md b/docs/user/cli.md new file mode 100644 index 0000000..b6f2c09 --- /dev/null +++ b/docs/user/cli.md @@ -0,0 +1,164 @@ +# CLI Guide + +## Core Graph Flow + +```bash +omnigraph init --schema ./schema.pg ./graph.omni +omnigraph load --data ./data.jsonl --mode overwrite ./graph.omni +omnigraph snapshot ./graph.omni --branch main --json +omnigraph query --uri ./graph.omni --query ./queries.gq --name get_person --params '{"name":"Alice"}' +omnigraph mutate --uri ./graph.omni --query ./queries.gq --name insert_person --params '{"name":"Mina","age":28}' +``` + +`omnigraph query` is the canonical read command (pairs with `POST /query`); +`omnigraph mutate` is the canonical write command (pairs with `POST /mutate`). +The previous names `omnigraph read` and `omnigraph change` keep working as +visible aliases — invocations emit a one-line deprecation warning to stderr +and otherwise behave identically. See [Deprecated names](#deprecated-names) +for the migration table. + +For ad-hoc reads and mutations (REPLs, AI agents, one-off scripts), pass the +GQ source inline with `-e` / `--query-string` instead of a file path: + +```bash +omnigraph query --uri ./graph.omni \ + -e 'query find($name: String) { match { $p: Person { name: $name } } return { $p.name, $p.age } }' \ + --params '{"name":"Alice"}' + +omnigraph mutate --uri ./graph.omni \ + -e 'query add($name: String, $age: I32) { insert Person { name: $name, age: $age } }' \ + --params '{"name":"Inline","age":42}' +``` + +`-e` is mutually exclusive with `--query ` and `--alias `; exactly +one of the three must be provided. The inline source travels through the same +parser, lint, params binding, and commit machinery as a file-based query — +only the source loader changes. + +## Branching And Reviewable Data Flows + +```bash +omnigraph branch create --uri ./graph.omni --from main feature-x +omnigraph branch list --uri ./graph.omni +omnigraph branch merge --uri ./graph.omni feature-x --into main + +omnigraph ingest --data ./batch.jsonl --branch review/import-2026-04-09 ./graph.omni +omnigraph export ./graph.omni --branch main --type Person > people.jsonl +omnigraph commit list ./graph.omni --branch main --json +omnigraph commit show --uri ./graph.omni --json +``` + +## Remote Server Mode + +Serve a graph: + +```bash +omnigraph-server ./graph.omni --bind 127.0.0.1:8080 +``` + +Read through the HTTP API: + +```bash +omnigraph query \ + --target http://127.0.0.1:8080 \ + --query ./queries.gq \ + --name get_person \ + --params '{"name":"Alice"}' +``` + +If the server requires auth, set `OMNIGRAPH_SERVER_BEARER_TOKEN` on the server +and configure the matching `bearer_token_env` in `omnigraph.yaml`. + +## Multi-graph servers (v0.6.0+) + +Against a multi-graph server (started with `--config omnigraph.yaml` referencing a non-empty `graphs:` map), use `omnigraph graphs list` to enumerate the registered graphs. The server must configure bearer tokens and `server.policy.file` with a rule that allows `graph_list`; `/graphs` is closed by default even when the server runs with `--unauthenticated`. + +```bash +OMNIGRAPH_BEARER_TOKEN=admin-token \ + omnigraph graphs list --uri http://server.example.com --json +``` + +For config-driven clients, set the remote graph's `bearer_token_env` to an environment variable containing a token whose actor is authorized by `server.policy.file`. + +`list` rejects local URI targets — it's for remote multi-graph servers only. + +Runtime add/remove is **not** in v0.6.0. To add a graph, stop the server, add a `graphs.` entry to `omnigraph.yaml`, then restart. To remove, stop the server, delete the entry, restart. + +Per-graph URLs: hit a graph's cluster route from any subcommand by pointing `--uri` at it: + +```bash +omnigraph read --uri http://server.example.com/graphs/beta --query ./q.gq ... +``` + +## Runs, Policy, And Diagnostics + +```bash +omnigraph lint --query ./queries.gq --schema ./schema.pg --json +omnigraph check --query ./queries.gq ./graph.omni --json + +omnigraph schema plan --schema ./next.pg ./graph.omni --json +omnigraph schema apply --schema ./next.pg ./graph.omni --json +omnigraph policy validate --config ./omnigraph.yaml +omnigraph policy test --config ./omnigraph.yaml +omnigraph policy explain --config ./omnigraph.yaml --actor act-alice --action read --branch main + +omnigraph commit list ./graph.omni --json +omnigraph commit show --uri ./graph.omni --json +``` + +(The legacy `omnigraph run list/show/publish/abort` subcommands were removed in MR-771; mutations and loads publish atomically and the commit graph (`omnigraph commit list`) is the audit surface.) + +`query lint` and `query check` are the same command surface. In v1, graph-backed +lint uses local or `s3://` graph URIs; HTTP targets are only supported when you +also pass `--schema`. + +## Config + +`omnigraph.yaml` lets the CLI and server share named graphs, defaults, and +query roots: + +```yaml +graphs: + local: + uri: ./demo.omni + dev: + uri: http://127.0.0.1:8080 + bearer_token_env: OMNIGRAPH_BEARER_TOKEN + +cli: + graph: local + branch: main + +query: + roots: + - queries + - . +``` + +The config file can also define: + +- server bind defaults +- auth env files +- query aliases for common read and change commands +- `policy.file` for Cedar authorization rules + +When policy is enabled, `schema apply` is authorized through the +`schema_apply` action and is typically limited to admins on protected `main`. + +## Deprecated names + +The CLI was renamed to align with the HTTP server's canonical endpoint +names (`POST /query`, `POST /mutate`) and the `query` keyword in the GQ +language. The previous spellings keep working forever; invocations emit a +one-line warning to stderr and otherwise behave identically. + +| Old (deprecated) | New (canonical) | Migration | +|--------------------------|---------------------|----------------------------------------------------------| +| `omnigraph read` | `omnigraph query` | Same flags and behavior. `read` is a visible clap alias. | +| `omnigraph change` | `omnigraph mutate` | Same flags and behavior. `change` is a visible clap alias. | +| `omnigraph query lint` | `omnigraph lint` | Same flags. The argv-level shim rewrites `query lint` to `lint`. | +| `omnigraph query check` | `omnigraph check` | `check` is a visible alias of `omnigraph lint`. | + +The `command:` field in `aliases.` in `omnigraph.yaml` accepts both +`read` / `change` (legacy) and `query` / `mutate` (canonical); the two +spellings are interchangeable on the wire via serde aliases. diff --git a/docs/constants.md b/docs/user/constants.md similarity index 100% rename from docs/constants.md rename to docs/user/constants.md diff --git a/docs/deployment.md b/docs/user/deployment.md similarity index 77% rename from docs/deployment.md rename to docs/user/deployment.md index e611245..8613dec 100644 --- a/docs/deployment.md +++ b/docs/user/deployment.md @@ -8,8 +8,8 @@ internal deploy automation. Omnigraph supports two broad deployment shapes: -- local directory repos -- `s3://` repos on AWS S3 or S3-compatible object stores +- local directory graphs +- `s3://` graphs on AWS S3 or S3-compatible object stores The server binary and container image expose the same HTTP surface. @@ -20,18 +20,20 @@ Build or install: - `omnigraph` - `omnigraph-server` -Run against a local repo: +On Windows, the binaries are `omnigraph.exe` and `omnigraph-server.exe`. + +Run against a local graph: ```bash -omnigraph-server ./repo.omni --bind 0.0.0.0:8080 +omnigraph-server ./graph.omni --bind 0.0.0.0:8080 ``` -Run against an object-store-backed repo: +Run against an object-store-backed graph: ```bash OMNIGRAPH_SERVER_BEARER_TOKEN="change-me" \ AWS_REGION="us-east-1" \ -omnigraph-server s3://my-bucket/repos/example/releases/2026-04-10-v0.1.0 \ +omnigraph-server s3://my-bucket/graphs/example/releases/2026-04-10-v0.1.0 \ --bind 0.0.0.0:8080 ``` @@ -46,7 +48,7 @@ curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/ The bootstrap: - starts a local RustFS-backed object store -- creates a bucket and S3-backed Omnigraph repo +- creates a bucket and S3-backed Omnigraph graph - loads the checked-in context fixture - starts `omnigraph-server` on `127.0.0.1:8080` @@ -60,8 +62,8 @@ Useful overrides: - `WORKDIR=/path/to/state` - `BUCKET=omnigraph-local` -- `PREFIX=repos/context` -- `RESET_REPO=1` to delete an existing partially initialized repo prefix before recreating it +- `PREFIX=graphs/context` +- `RESET_REPO=1` to delete an existing partially initialized graph prefix before recreating it - `BIND=127.0.0.1:8080` - `RUSTFS_CONTAINER_NAME=omnigraph-rustfs-demo` @@ -76,7 +78,7 @@ If `aws` is not installed, the script attempts a user-local AWS CLI install via running. If a previous bootstrap left objects behind under the selected `PREFIX` but did -not finish initializing the repo, rerun with `RESET_REPO=1` or choose a new +not finish initializing the graph, rerun with `RESET_REPO=1` or choose a new `PREFIX`. ## Container Deployment @@ -87,29 +89,30 @@ Build the image: docker build -t omnigraph-server:local . ``` -Run against a local repo: +Run against a local graph: ```bash docker run --rm -p 8080:8080 \ - -v "$PWD/repo.omni:/data/repo.omni" \ + -v "$PWD/graph.omni:/data/graph.omni" \ omnigraph-server:local \ - /data/repo.omni --bind 0.0.0.0:8080 + /data/graph.omni --bind 0.0.0.0:8080 ``` -Run against an S3-backed repo: +Run against an S3-backed graph: ```bash docker run --rm -p 8080:8080 \ -e OMNIGRAPH_SERVER_BEARER_TOKEN="change-me" \ -e AWS_REGION="us-east-1" \ omnigraph-server:local \ - s3://my-bucket/repos/example/releases/2026-04-10-v0.1.0 \ + s3://my-bucket/graphs/example/releases/2026-04-10-v0.1.0 \ --bind 0.0.0.0:8080 ``` ## Auth -The server can run unauthenticated for local development, but any shared or +The server can run unauthenticated for local development only when explicitly +started with `--unauthenticated` or `OMNIGRAPH_UNAUTHENTICATED=1`. Any shared or internet-facing deployment should set a bearer token source. ### Token sources @@ -139,9 +142,11 @@ The server binary ships in two flavors: | **Default** (on-prem / local dev) | `cargo build --release` | Core server, no AWS SDK | | **AWS** | `cargo build --release --features aws` | Adds AWS Secrets Manager backend for bearer tokens | -Release artifacts are published with matching suffixes — -`omnigraph-server--.tar.gz` for the default build and -`omnigraph-server---aws.tar.gz` for the AWS-enabled build. +Tagged release archives contain the default `omnigraph` and +`omnigraph-server` binaries on macOS / Linux, and `omnigraph.exe` plus +`omnigraph-server.exe` on Windows. AWS-enabled server binaries are built from +source with `cargo build --release --features aws -p omnigraph-server` when +needed. The AWS build adds ~150 transitive deps and ~30-60s of first-build compile time. Default builds don't pay that cost. @@ -154,7 +159,7 @@ Manager secret whose `SecretString` is a JSON object of `{"actor_id": "token", ...}`: ```bash -omnigraph-server-aws s3://my-bucket/repos/example ... +omnigraph-server-aws s3://my-bucket/graphs/example ... # Environment: # OMNIGRAPH_SERVER_BEARER_TOKENS_AWS_SECRET=arn:aws:secretsmanager:us-east-1:123456789012:secret:omnigraph-tokens-AbCdEf ``` diff --git a/docs/embeddings.md b/docs/user/embeddings.md similarity index 95% rename from docs/embeddings.md rename to docs/user/embeddings.md index 596a6a0..382e683 100644 --- a/docs/embeddings.md +++ b/docs/user/embeddings.md @@ -22,7 +22,7 @@ Mark a Vector property with `@embed("source_text_property")`. At ingest, the eng ## CLI `omnigraph embed` (offline file pipeline) -Operates on **JSONL files** (not on a repo). Three modes (mutually exclusive): +Operates on **JSONL files** (not on a graph). Three modes (mutually exclusive): - (default) `fill_missing` — only embed rows whose target field is empty - `--reembed-all` — overwrite all diff --git a/docs/errors.md b/docs/user/errors.md similarity index 92% rename from docs/errors.md rename to docs/user/errors.md index ad79e66..fd047eb 100644 --- a/docs/errors.md +++ b/docs/user/errors.md @@ -9,7 +9,7 @@ - `Manifest(ManifestError { kind: BadRequest|NotFound|Conflict|Internal, details: Option, … })` - `ManifestConflictDetails::ExpectedVersionMismatch { table_key, expected, actual }` — caller's `expected_table_versions` did not match the manifest's current latest non-tombstoned version (set by `OmniError::manifest_expected_version_mismatch`). - `ManifestConflictDetails::RowLevelCasContention` — Lance row-level CAS rejected the publish because a concurrent writer landed the same `object_id`. Retried internally by the publisher; only surfaces if the retry budget exhausts. - - **D₂ parse-time rejection** (MR-794): a single mutation query that mixes inserts/updates with deletes errors out *before any I/O* with kind `BadRequest`. Message: `mutation '' on the same query mixes inserts/updates and deletes; split into separate mutations: (1) inserts and updates, then (2) deletes`. See [docs/query-language.md](query-language.md) for the rule and [docs/runs.md](runs.md) for the underlying staged-write rationale. + - **D₂ parse-time rejection** (MR-794): a single mutation query that mixes inserts/updates with deletes errors out *before any I/O* with kind `BadRequest`. Message: `mutation '' on the same query mixes inserts/updates and deletes; split into separate mutations: (1) inserts and updates, then (2) deletes`. See [docs/user/query-language.md](query-language.md) for the rule and [docs/dev/runs.md](../dev/runs.md) for the underlying staged-write rationale. - `MergeConflicts(Vec)` Compiler-side `NanoError` covers parse / catalog / type / storage / plan / execution / arrow / lance / IO / manifest / unique-constraint, each with structured spans (`SourceSpan { start, end }`) for ariadne-style diagnostics. diff --git a/docs/user/index.md b/docs/user/index.md new file mode 100644 index 0000000..1b93efa --- /dev/null +++ b/docs/user/index.md @@ -0,0 +1,52 @@ +# User Docs + +**Audience:** users, CLI users, HTTP clients, and self-hosting operators + +This is the public-facing entry point. These docs should describe behavior, +commands, configuration, and operational contracts without requiring knowledge +of MRs, internal recovery mechanics, or contributor-only invariants. + +## Start Here + +| Goal | Read | +|---|---| +| Install OmniGraph | [install.md](install.md) | +| Run the CLI locally | [cli.md](cli.md) | +| Look up every CLI flag and config field | [cli-reference.md](cli-reference.md) | +| Write schemas | [schema-language.md](schema-language.md) | +| Read schema-lint diagnostic codes | [schema-lint.md](schema-lint.md) | +| Write queries and mutations | [query-language.md](query-language.md) | +| Use embeddings | [embeddings.md](embeddings.md) | + +## Operate A Graph + +| Goal | Read | +|---|---| +| Understand graph layout and URI support | [storage.md](storage.md) | +| Work with branches, commits, and snapshots | [branches-commits.md](branches-commits.md) | +| Coordinate multi-query workflows | [transactions.md](transactions.md) | +| Read diffs and change feeds | [changes.md](changes.md) | +| Build and use indexes | [indexes.md](indexes.md) | +| Compact and clean old versions | [maintenance.md](maintenance.md) | +| Interpret errors and output formats | [errors.md](errors.md) | + +## Run The Server + +| Goal | Read | +|---|---| +| Deploy the binary or container | [deployment.md](deployment.md) | +| Use HTTP endpoints | [server.md](server.md) | +| Configure Cedar authorization | [policy.md](policy.md) | +| Track actors and audit behavior | [audit.md](audit.md) | + +## Releases + +Release notes live in [releases/](../releases/). Use them for user-visible +changes between versions, not for contributor design history. + +## Boundary + +User docs should focus on stable behavior. If a paragraph needs to explain +internal sidecars, Lance API blockers, MR numbers, test strategy, or review +rules, it probably belongs in [docs/dev/index.md](../dev/index.md) or a developer-area document +instead. diff --git a/docs/indexes.md b/docs/user/indexes.md similarity index 100% rename from docs/indexes.md rename to docs/user/indexes.md diff --git a/docs/install.md b/docs/user/install.md similarity index 57% rename from docs/install.md rename to docs/user/install.md index 725961e..4a11372 100644 --- a/docs/install.md +++ b/docs/user/install.md @@ -2,16 +2,29 @@ ## Quick Install +macOS / Linux: + ```bash curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/install.sh | bash ``` +Windows PowerShell: + +```powershell +powershell -NoProfile -ExecutionPolicy Bypass -Command "iwr -UseBasicParsing https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/install.ps1 | iex" +``` + By default the installer places: - `omnigraph` - `omnigraph-server` -in `~/.local/bin`. +in `~/.local/bin` on macOS / Linux, or: + +- `omnigraph.exe` +- `omnigraph-server.exe` + +in `%USERPROFILE%\.local\bin` on Windows. The default installer is binary-only. It downloads a published release asset, verifies the SHA256 checksum, and unpacks it. It does not build from source. @@ -39,6 +52,13 @@ Rolling edge binaries from `main`: curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/install.sh | RELEASE_CHANNEL=edge bash ``` +Windows rolling edge binaries: + +```powershell +iwr -UseBasicParsing https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/install.ps1 -OutFile install.ps1 +powershell -NoProfile -ExecutionPolicy Bypass -File .\install.ps1 -ReleaseChannel edge +``` + Install from source: ```bash @@ -53,12 +73,24 @@ Install to a different directory: curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/install.sh | INSTALL_DIR="$HOME/bin" bash ``` +Windows: + +```powershell +powershell -NoProfile -ExecutionPolicy Bypass -File .\install.ps1 -InstallDir "$env:USERPROFILE\bin" +``` + Install a specific tag: ```bash curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/install.sh | VERSION=v0.1.0 bash ``` +Windows: + +```powershell +powershell -NoProfile -ExecutionPolicy Bypass -File .\install.ps1 -Version v0.1.0 +``` + Build from a specific git ref: ```bash @@ -67,28 +99,53 @@ curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/ ## Manual Source Build +macOS / Linux: + ```bash cargo build --release --locked -p omnigraph-cli -p omnigraph-server install -m 0755 target/release/omnigraph ~/.local/bin/omnigraph install -m 0755 target/release/omnigraph-server ~/.local/bin/omnigraph-server ``` +Windows: + +```powershell +cargo build --release --locked -p omnigraph-cli -p omnigraph-server +New-Item -ItemType Directory -Force "$env:USERPROFILE\.local\bin" | Out-Null +Copy-Item target\release\omnigraph.exe "$env:USERPROFILE\.local\bin\omnigraph.exe" +Copy-Item target\release\omnigraph-server.exe "$env:USERPROFILE\.local\bin\omnigraph-server.exe" +``` + ## Release Assets Tagged releases are expected to publish: - `omnigraph-linux-x86_64.tar.gz` -- `omnigraph-macos-x86_64.tar.gz` - `omnigraph-macos-arm64.tar.gz` +- `omnigraph-windows-x86_64.zip` -Each archive contains both binaries: +The macOS / Linux archives contain both binaries: - `omnigraph` - `omnigraph-server` +The Windows archive contains: + +- `omnigraph.exe` +- `omnigraph-server.exe` + ## Verify The Install +macOS / Linux: + ```bash omnigraph version omnigraph-server --help ``` + +Windows: + +```powershell +omnigraph.exe version +omnigraph-server.exe --help +``` diff --git a/docs/maintenance.md b/docs/user/maintenance.md similarity index 100% rename from docs/maintenance.md rename to docs/user/maintenance.md diff --git a/docs/user/policy.md b/docs/user/policy.md new file mode 100644 index 0000000..749d3be --- /dev/null +++ b/docs/user/policy.md @@ -0,0 +1,164 @@ +# Authorization (Cedar policy) + +OmniGraph integrates AWS Cedar (`cedar-policy = 4.9`) for ABAC. + +## Policy actions + +Per-graph actions (bind to `Omnigraph::Graph::""`): + +1. `read` — query / snapshot / list branches & commits +2. `export` — NDJSON export +3. `change` — mutations +4. `schema_apply` — apply schema migrations +5. `branch_create` +6. `branch_delete` +7. `branch_merge` +8. `admin` — reserved for policy-management surfaces (hot reload, audit log, approvals). No call site today; see MR-724 for the reservation rationale. + +Server-scoped action (v0.6.0+; binds to `Omnigraph::Server::"root"`): + +9. `graph_list` — `GET /graphs` registry enumeration (multi-graph mode) + +Server-scoped actions cannot use `branch_scope` or `target_branch_scope` — they operate on the registry, not on a graph's branches. A rule cannot mix server-scoped and per-graph actions; split into separate rules. (Runtime `graph_create` / `graph_delete` are reserved but not shipped in v0.6.0; operators add/remove graphs by editing `omnigraph.yaml` and restarting.) + +## Scope kinds + +- `branch_scope` — applied to source branch (`read`, `export`, `change`) +- `target_branch_scope` — applied to destination (`schema_apply`, branch ops, run ops) +- `protected_branches` — named list with special rules; rule scopes are `any | protected | unprotected` + +## Per-graph vs. server-level policy (multi-graph mode) + +In multi mode (`omnigraph.yaml` with a non-empty `graphs:` map), policy files attach at two levels: + +```yaml +server: + policy: + file: ./server-policy.yaml # server-level: graph_list + +graphs: + alpha: + uri: s3://tenant-bucket/alpha + policy: + file: ./policies/alpha.yaml # per-graph: read, change, branch_*, schema_apply + beta: + uri: s3://tenant-bucket/beta + # no per-graph policy → no engine-layer Cedar enforcement on beta +``` + +Top-level `policy.file` is single-graph / CLI-local policy only. Multi-graph +server startup rejects it because applying one graph policy to every configured +graph is ambiguous. Move per-graph rules to `graphs..policy.file` and +move `graph_list` rules to `server.policy.file`. + +Each graph's HTTP request flows through its own per-graph policy. The management endpoint (`GET /graphs`) flows through the server-level policy. When `server.policy.file` is unset, `GET /graphs` is denied in every runtime state, including `--unauthenticated`; with bearer tokens configured, it returns 403 after admission control because `graph_list` is not a `read`-equivalent action. The operator must explicitly authorize via `server-policy.yaml` to expose `/graphs`. + +Example server-level policy: + +```yaml +version: 1 +groups: + admins: [act-andrew] +rules: + - id: admins-can-list-graphs + allow: + actors: { group: admins } + actions: [graph_list] +``` + +## Configuration + +`omnigraph.yaml`: + +```yaml +policy: + file: ./policy.yaml # Cedar rules + groups + tests: ./policy.tests.yaml # declarative test cases + +cli: + actor: act-andrew # default actor for CLI direct-engine writes +``` + +Each per-graph rule may use at most one of `branch_scope` or `target_branch_scope`. Server-scoped rules (`graph_list`) take neither — they have no branch context. + +`cli.actor` is the default actor identity for CLI direct-engine writes +when `policy.file` is configured. Override per-invocation with `--as +` (top-level flag) — `--as` wins, otherwise `cli.actor` is used, +otherwise no actor. With policy configured and neither set, the +engine-layer footgun guard intentionally denies the write (silent bypass +via "I forgot the actor" is exactly what the guard prevents). Remote +HTTP writes ignore both — they resolve their actor server-side from the +bearer token. + +## CLI + +- `omnigraph policy validate` — parse + count actors, exit 1 on parse error. +- `omnigraph policy test` — run cases in `policy.tests.yaml`, exit 1 on any expectation mismatch. +- `omnigraph policy explain --actor … --action … [--branch …] [--target-branch …]` — show decision and matched rule. +- `omnigraph --as ` — set the actor for the duration of one invocation. Effective for `change`, `load`, `ingest`, `branch create|delete|merge`, and `schema apply` against local URIs. No-op against remote HTTP URIs (actor is bearer-token-resolved server-side). + +## Enforcement + +Policy is a property of the **engine**, not the transport. Every mutating +write — `mutate_as`, `load_as`, `ingest_as`, `apply_schema_as`, +`branch_create_as`, `branch_create_from_as`, `branch_delete_as`, +`branch_merge_as` — calls `Omnigraph::enforce(action, scope, actor)` at +the head of the method. The gate fires identically whether the call +originates from the HTTP server, the CLI, or an embedded SDK consumer. +When no `PolicyChecker` is installed (the dev/embedded default) the gate +is a strict no-op; when one is installed and the call site forgets to +thread an actor through, the gate fails closed rather than silently +bypassing. + +## Server runtime states (MR-723) + +The HTTP server classifies its startup configuration into one of three +states based on whether bearer tokens are configured and whether a +policy file is set. The state determines what happens to a request that +reaches `authorize_request()` without a matching policy permit. + +| State | Tokens | Policy file | Behavior | +|---|---|---|---| +| **Open** | no | no | Every request is permitted. Refuses to start unless `--unauthenticated` or `OMNIGRAPH_UNAUTHENTICATED=1` is set — the operator must explicitly opt in. | +| **DefaultDeny** | yes | no | Every authenticated request for an action other than `read` is rejected with HTTP 403. Closes the "tokens but forgot the policy file" trap — an operator who sets up auth and forgot to point at a policy file used to ship the illusion of protection. | +| **PolicyEnabled** | yes | yes | Authenticated requests that reach a configured policy engine are evaluated by Cedar. Server-scoped actions still require `server.policy.file`. | + +The classifier is `classify_server_runtime_state` in +`crates/omnigraph-server/src/lib.rs`; it returns `Err` for the "no +tokens, no policy, no flag" cell and for "policy file, no tokens" so the +server refuses to start instead of silently shipping an open instance or +a policy-protected server that can only 401. Tests pin every cell of the +matrix and the State-2 deny path. + +Server-side, `authorize_request()` still runs at the HTTP boundary — +that's where actor identity is resolved from the bearer token and where +admission control / per-actor rate limits live. Engine-layer enforcement +is the **defense in depth** layer: it catches CLI direct-engine writes, +embedded SDK consumers, and any future transport that hasn't (or won't) +re-implement HTTP's authorize_request. Both layers consult the same +Cedar policy via the same `PolicyChecker` trait, so decisions cannot +disagree. + +## Coarse vs. fine enforcement + +There are two enforcement points, each with non-overlapping +responsibilities: + +| Layer | Question it answers | Where it fires | +|---|---|---| +| **Engine-layer (coarse)** | Can this actor invoke this action against this branch / branch-transition? | `Omnigraph::enforce(action, scope, actor)` at the head of every `_as` writer; one Cedar decision per call. | +| **Query-layer (fine)** | For the rows / types this action actually touches, which can the actor see or modify? | Per-row predicates pushed into DataFusion at plan time. **Not yet implemented — see MR-725.** | + +The engine-layer gate keeps `ResourceScope` deliberately at branch +granularity (`Graph`, `Branch`, `TargetBranch`, `BranchTransition`). +Per-type and per-row authority is the query-layer's job; conflating them +in `ResourceScope` would create two places per-type policy could be +evaluated and a drift surface between them. + +## Actor identity (signed-claim-only) + +The actor identity used for every policy decision comes from the matched bearer token — never from a client-supplied request header, query parameter, or body field. The server resolves the token at the auth middleware boundary, looks up the actor it was minted for, and overwrites whatever the handler may have placed in the policy request. Clients cannot set `actor_id` directly. + +This is intentional. Trusting client-supplied identity for authorization is "asking the attacker if they're an admin" — Supabase's RLS history names the same footgun. The chokepoint lives in `authorize_request` in `crates/omnigraph-server/src/lib.rs` and is named in `docs/dev/invariants.md` Hard Invariant 11. A regression test asserts the contract: a request with `Authorization: Bearer ` plus `X-Actor-Id: actor-B` always evaluates as actor A, never as actor B. + +If you find yourself wanting to let clients override `actor_id` for impersonation, delegation, or service-account flows — that's a feature, but it needs explicit design (e.g., signed delegation claims, an `On-Behalf-Of` audit trail). It is not a convenience knob. diff --git a/docs/query-language.md b/docs/user/query-language.md similarity index 98% rename from docs/query-language.md rename to docs/user/query-language.md index 5c98959..94528af 100644 --- a/docs/query-language.md +++ b/docs/user/query-language.md @@ -70,7 +70,7 @@ A single mutation query must be **either insert/update-only or delete-only**. Mi > `mutation '' on the same query mixes inserts/updates and deletes; split into separate mutations: (1) inserts and updates, then (2) deletes. This restriction lifts when Lance exposes a two-phase delete API (tracked: MR-793 / Lance-upstream).` -Reason: under the staged-write rewire (MR-794), inserts and updates accumulate in memory and commit at end-of-query, while deletes still inline-commit (Lance 4.0.0 has no public two-phase delete). Mixing creates ordering hazards (same-row insert→delete becomes a no-op because the staged insert isn't visible to delete; cascading deletes of just-inserted edges break referential integrity by silent design). Until Lance exposes `DeleteJob::execute_uncommitted`, the parse-time rejection keeps both paths atomic and correct. See [docs/runs.md](runs.md) and [docs/invariants.md §VI.25](invariants.md). +Reason: under the staged-write rewire (MR-794), inserts and updates accumulate in memory and commit at end-of-query, while deletes still inline-commit (Lance 4.0.0 has no public two-phase delete). Mixing creates ordering hazards (same-row insert→delete becomes a no-op because the staged insert isn't visible to delete; cascading deletes of just-inserted edges break referential integrity by silent design). Until Lance exposes `DeleteJob::execute_uncommitted`, the parse-time rejection keeps both paths atomic and correct. See [docs/dev/runs.md](../dev/runs.md) and [docs/dev/invariants.md](../dev/invariants.md). ## IR (Intermediate Representation) diff --git a/docs/schema-language.md b/docs/user/schema-language.md similarity index 74% rename from docs/schema-language.md rename to docs/user/schema-language.md index 8ef1285..4250676 100644 --- a/docs/schema-language.md +++ b/docs/user/schema-language.md @@ -60,7 +60,8 @@ Edge bodies only allow `@unique` and `@index`. ## Schema IR & stable type IDs - `SCHEMA_IR_VERSION = 1` (`catalog/schema_ir.rs`). -- Each interface/node/edge gets a `stable_type_id` (kind+name hashed) so renames can be tracked. +- Each interface/node/edge currently gets a `stable_type_id` from a kind+name hash. +- Rename-preserving accepted IDs are an architectural invariant, but the current hash-on-name implementation is a known gap until migration carries IDs across `@rename_from`. - Serialized as JSON for diff/migration plans. ## Schema migration planning @@ -77,3 +78,11 @@ Edge bodies only allow `@unique` and `@index`. - `UnsupportedChange { entity, reason }` (forces `supported=false`) `apply_schema()` returns `SchemaApplyResult { supported, applied, manifest_version, steps }` and is gated by an internal `__schema_apply_lock__` system branch so concurrent schema applies serialize. + +## Destructive drops — `--allow-data-loss` + +`DropProperty` and `DropType` steps default to `Soft` mode: the catalog tombstones the entry but the prior column / dataset remains time-travel-reachable via `snapshot_at_version(prev)` until `omnigraph cleanup` runs. Soft drops are reversible. + +Pass `--allow-data-loss` (CLI) or `allow_data_loss: true` (HTTP `POST /schema/apply` body, SDK `SchemaApplyOptions`) to promote every drop in the plan to `Hard` mode. Hard drops run `cleanup_old_versions` on the affected dataset immediately after the manifest publish, making the prior column / dataset unreachable. **Irreversible.** + +The flag is honored uniformly across transports — `omnigraph schema apply --allow-data-loss`, `POST /schema/apply { schema_source, allow_data_loss: true }`, and `apply_schema_with_options(.., SchemaApplyOptions { allow_data_loss: true })` produce identical plans and identical effects. diff --git a/docs/schema-lint.md b/docs/user/schema-lint.md similarity index 100% rename from docs/schema-lint.md rename to docs/user/schema-lint.md diff --git a/docs/user/server.md b/docs/user/server.md new file mode 100644 index 0000000..6f55e16 --- /dev/null +++ b/docs/user/server.md @@ -0,0 +1,192 @@ +# HTTP Server (`omnigraph-server`) + +Axum 0.8 + tokio + utoipa-generated OpenAPI. **Two modes** (v0.6.0+): single-graph (legacy) and multi-graph (MR-668). Mode is inferred from CLI args + config shape. + +## Modes + +### Single-graph mode (legacy) + +`omnigraph-server ` or `omnigraph-server --target --config omnigraph.yaml`. Routes are flat — `/snapshot`, `/read`, `/branches`, etc. Behavior unchanged from v0.6.0. + +### Multi-graph mode (v0.6.0+) + +`omnigraph-server --config omnigraph.yaml` with a non-empty `graphs:` map and **no** single-mode selector (no `server.graph`, no ``, no `--target`). The server opens every configured graph in parallel at startup (bounded concurrency = 4, fail-fast on the first open error). Routes are nested under `/graphs/{graph_id}/...`. Bare flat paths return 404 in multi mode. + +Mode inference (four-rule matrix): + +1. CLI positional `` → single +2. CLI `--target ` → single +3. `server.graph` in config → single +4. `--config` + non-empty `graphs:` + no single-mode selector → **multi** +5. otherwise → error with migration hint + +## Endpoint inventory + +Per-graph endpoints — same body shape across modes; URLs differ: + +| Method | Single-mode path | Multi-mode path | Auth | Action | Handler | +|---|---|---|---|---|---| +| GET | `/healthz` | `/healthz` | none | — | `server_health` | +| GET | `/openapi.json` | `/openapi.json` | none | — | `server_openapi` (strips security if auth disabled; in multi mode emits cluster paths with `cluster_` operation-id prefix) | +| GET | `/snapshot?branch=` | `/graphs/{id}/snapshot?branch=` | bearer + `read` | snapshot of branch | `server_snapshot` | +| POST | `/query` | `/graphs/{id}/query` | bearer + `read` | inline read query (canonical; clean field names `query`/`name`; mutations → 400) | `server_query` | +| POST | `/read` | `/graphs/{id}/read` | bearer + `read` | **deprecated** alias of `/query` (legacy field names `query_source`/`query_name`, byte-stable response; carries `Deprecation: true` + `Link: ; rel="successor-version"`) | `server_read` | +| POST | `/export` | `/graphs/{id}/export` | bearer + `export` | NDJSON stream | `server_export` | +| POST | `/mutate` | `/graphs/{id}/mutate` | bearer + `change` | mutation (canonical; `query`/`name`; accepts legacy `query_source`/`query_name` as serde aliases) | `server_mutate` | +| POST | `/change` | `/graphs/{id}/change` | bearer + `change` | **deprecated** alias of `/mutate` (carries `Deprecation: true` + `Link: ; rel="successor-version"`) | `server_change` | +| GET | `/schema` | `/graphs/{id}/schema` | bearer + `read` | get current `.pg` source | `server_schema_get` | +| POST | `/schema/apply` | `/graphs/{id}/schema/apply` | bearer + `schema_apply` (target=`main`) | migrate | `server_schema_apply` | +| POST | `/ingest` | `/graphs/{id}/ingest` | bearer + `branch_create` (if new) + `change` | bulk load | `server_ingest` (32 MB body limit) | +| GET | `/branches` | `/graphs/{id}/branches` | bearer + `read` | list branches | `server_branch_list` | +| POST | `/branches` | `/graphs/{id}/branches` | bearer + `branch_create` | create | `server_branch_create` | +| DELETE | `/branches/{branch}` | `/graphs/{id}/branches/{branch}` | bearer + `branch_delete` | delete | `server_branch_delete` | +| POST | `/branches/merge` | `/graphs/{id}/branches/merge` | bearer + `branch_merge` | merge `source → target` | `server_branch_merge` | +| GET | `/commits?branch=` | `/graphs/{id}/commits?branch=` | bearer + `read` | list | `server_commit_list` | +| GET | `/commits/{commit_id}` | `/graphs/{id}/commits/{commit_id}` | bearer + `read` | show | `server_commit_show` | + +Server-level management endpoints (v0.6.0+): + +| Method | Path | Auth | Action | Handler | +|---|---|---|---|---| +| GET | `/graphs` | bearer + `graph_list` on `Server::"root"` | list registered graphs | `server_graphs_list` (405 in single mode) | + +## Adding and removing graphs (multi mode) + +Runtime add/remove via API is **not** exposed in v0.6.0 — neither +`POST /graphs` nor `DELETE /graphs/{id}` is implemented. Operators add +or remove graphs by stopping the server, editing the `graphs:` map in +`omnigraph.yaml`, then restarting. The server treats `omnigraph.yaml` +as operator-owned configuration and never writes it. + +A future release may introduce a managed registry (Lance-backed, +catalog-style: reserve → init → publish with recovery sidecars) and +re-expose runtime mutation on top of it. + +## Inline read queries (`POST /query`) + +`POST /query` is the read-only, agent-friendly twin of `POST /read`. The +request body uses clean field names that match the CLI `-e` flag and the GQ +`query` keyword: + +```json +{ + "query": "query find($n: String) { match { $p: Person { name: $n } } return { $p.name } }", + "name": "find", + "params": { "n": "Alice" }, + "branch": "main", + "snapshot": null +} +``` + +Response shape is identical to `/read` (`ReadOutput`). If the inline source +contains mutations (`insert` / `update` / `delete`), the request is rejected +with HTTP 400 and an error pointing the caller at `POST /mutate` — the +read-only contract is enforced at the URL. + +`POST /mutate` is the canonical mutation endpoint. It accepts the same clean +field names (`query`, `name`); the legacy field names `query_source` and +`query_name` continue to deserialize as serde aliases so existing clients keep +working without changes. + +## Deprecated names (`/read`, `/change`) + +`POST /read` and `POST /change` are kept for back-compat indefinitely — they +are byte-stable on the request side and otherwise behave identically to +`/query` / `/mutate`. They are flagged as deprecated through three independent +channels: + +- **OpenAPI**: the operations carry `deprecated: true` in `openapi.json`, so + every OpenAPI codegen (typescript-fetch, openapi-generator, oapi-codegen, + …) emits a `@deprecated` marker on the generated SDK method. +- **Response headers (RFC 9745)**: every response carries `Deprecation: true`. +- **Response headers (RFC 8288)**: every response carries a `Link` header + pointing at the canonical successor: + `Link: ; rel="successor-version"` for `/read`, and + `Link: ; rel="successor-version"` for `/change`. SDKs and HTTP + proxies can pick the successor up automatically. + +Migration is purely cosmetic on the client side — swap the URL path, leave +the request body and response handling alone. + +## Streaming + +Only `/export` streams (`application/x-ndjson`, MPSC channel + `Body::from_stream`). Everything else is buffered JSON. + +## Error model + +Uniform `ErrorOutput { error, code?, merge_conflicts[], manifest_conflict? }` with `code ∈ unauthorized | forbidden | bad_request | not_found | conflict | too_many_requests | internal`. Merge conflicts attach structured `MergeConflictOutput { table_key, row_id?, kind, message }`. + +`manifest_conflict` is set on **publisher CAS rejections** (HTTP 409): the +caller's pre-write view of one table's manifest version was stale. +`ManifestConflictOutput { table_key, expected, actual }` tells the client +which table to refresh and retry. This is the conflict shape produced by +concurrent `/mutate` (or its `/change` alias) or `/ingest` calls landing +the same `(table, branch)` race. + +HTTP status codes used: 200, 400, 401, 403, 404, 409, 429, 500. + +## Per-actor admission control + +Disjoint +`(table, branch)` writes from different actors now run concurrently, +guarded only by the engine's per-(table, branch) write queue. To keep +one heavy actor from exhausting shared capacity (Lance I/O, manifest +churn, network), the server gates mutating handlers through a +`WorkloadController` configured per-process from environment variables: + +| Env var | Default | Purpose | +|---|---|---| +| `OMNIGRAPH_PER_ACTOR_INFLIGHT_MAX` | 16 | Concurrent in-flight mutations per actor | +| `OMNIGRAPH_PER_ACTOR_BYTES_MAX` | 4 GiB | In-flight estimated bytes per actor | + +When an actor exceeds its in-flight count or byte budget, the server +returns **HTTP 429 Too Many Requests** with `code: too_many_requests` +and a `Retry-After` header (seconds). The actor should back off; other +actors are unaffected. + +Cedar policy authorization runs **before** admission accounting so +denied requests don't consume admission slots. + +Today admission gates every mutating handler: `/mutate` (and its +deprecated alias `/change`), `/ingest`, `/branches/{create,delete,merge}`, +and `/schema/apply`. Read-only endpoints (`/snapshot`, `/query`, `/read`, +`/export`, `/branches` GET, `/commits`, `/schema` GET) are not +admission-gated. + +## Body limits + +- Default: 1 MB +- `/ingest`: 32 MB + +## Auth model (`bearer + SHA-256`) + +- Tokens are SHA-256 hashed on startup; plaintext is never persisted in memory. +- Constant-time comparison via `subtle::ConstantTimeEq`. +- Three sources, in precedence: + 1. `OMNIGRAPH_SERVER_BEARER_TOKENS_AWS_SECRET` — AWS Secrets Manager (build with `--features aws`) + 2. `OMNIGRAPH_SERVER_BEARER_TOKENS_FILE` or `OMNIGRAPH_SERVER_BEARER_TOKENS_JSON` — JSON `{actor_id: token, …}` + 3. `OMNIGRAPH_SERVER_BEARER_TOKEN` — single legacy token, actor `default` +- If no tokens are configured, startup refuses unless `--unauthenticated` or + `OMNIGRAPH_UNAUTHENTICATED=1` explicitly opts into open local-dev mode. A + policy file without tokens is also rejected at startup. In open mode + `/openapi.json` strips the security scheme. + +See [deployment.md](deployment.md) for token-source operational details. + +## Tracing & observability + +- `tower_http::TraceLayer::new_for_http()` +- Policy decisions logged at INFO level with actor, action, branch, decision, matched rule +- Startup logs: token source name, graph URI, bind address +- Graceful SIGINT shutdown + +## Not implemented (by design or "TBD") + +- CORS — not configured; add `tower_http::cors` if needed. +- Rate limiting — per-actor admission control gates `/mutate` (alias + `/change`), `/ingest`, `/branches/{create,delete,merge}`, + `/schema/apply` (see "Per-actor + admission control" above). No global rate limiter is configured; + add `tower_http::limit` if a graph-wide cap is needed. +- Pagination — none (commits/branches return everything; export streams). +- Runtime graph add/remove — edit `omnigraph.yaml` and restart. diff --git a/docs/storage.md b/docs/user/storage.md similarity index 87% rename from docs/storage.md rename to docs/user/storage.md index b284bc2..c22d4d6 100644 --- a/docs/storage.md +++ b/docs/user/storage.md @@ -7,7 +7,7 @@ Every node type and every edge type is its own Lance dataset: - **Columnar Arrow storage**: each property is a column; nullable per Arrow schema. - **Fragments**: data is partitioned into fragments; new writes create new fragments. - **Manifest versioning**: every commit produces a new dataset version; old versions remain readable. -- **Stable row IDs**: `enable_stable_row_ids: true` is set on every Lance dataset OmniGraph creates — node and edge data tables, `__manifest`, `_graph_commits.lance`, `_graph_commit_recoveries.lance`, and any future system tables. This is an architectural invariant: the flag is one-way at dataset create per Lance's row-id-lineage spec, so a future change that introduces a Lance dataset must preserve it. Consequences: `_row_created_at_version` and `_row_last_updated_at_version` are available on every dataset (load-bearing for change-feed validators); `CreateIndex × Rewrite` is not a retryable conflict, so indices survive `omnigraph optimize` without needing the Fragment Reuse Index; readers must use a Lance build that recognises the flag (our pinned 4.0.0 is fine). Pre-0.4.x repos created before this code path settled may have datasets without the flag and cannot be retrofitted in place — the supported path is dump-and-reload. The `stage_overwrite` rewrite path (used by `schema_apply`) preserves the flag through `Operation::Overwrite`; pinned by `stage_overwrite_preserves_stable_row_ids` in `crates/omnigraph/tests/staged_writes.rs`. +- **Stable row IDs**: `enable_stable_row_ids: true` is set on every Lance dataset OmniGraph creates — node and edge data tables, `__manifest`, `_graph_commits.lance`, `_graph_commit_recoveries.lance`, and any future system tables. This is an architectural invariant: the flag is one-way at dataset create per Lance's row-id-lineage spec, so a future change that introduces a Lance dataset must preserve it. Consequences: `_row_created_at_version` and `_row_last_updated_at_version` are available on every dataset (load-bearing for change-feed validators); `CreateIndex × Rewrite` is not a retryable conflict, so indices survive `omnigraph optimize` without needing the Fragment Reuse Index; readers must use a Lance build that recognises the flag (our pinned 4.0.0 is fine). Pre-0.4.x graphs created before this code path settled may have datasets without the flag and cannot be retrofitted in place — the supported path is dump-and-reload. The `stage_overwrite` rewrite path (used by `schema_apply`) preserves the flag through `Operation::Overwrite`; pinned by `stage_overwrite_preserves_stable_row_ids` in `crates/omnigraph/tests/staged_writes.rs`. - **Append / delete / `merge_insert`**: native Lance write modes. - **Per-dataset branches** (Lance native): copy-on-write at the dataset level. - **Object-store agnostic**: file://, s3://, gs://, az://, http (read-only via Lance) — OmniGraph wires file:// and s3:// (`storage.rs`). @@ -22,7 +22,7 @@ OmniGraph is **not** a single Lance dataset; it is a *graph* of datasets coordin - `edges/{fnv1a64-hex(edge_type_name)}` — one Lance dataset per edge type - `__manifest/` — the catalog of all sub-tables and their published versions - `_graph_commits.lance` / `_graph_commit_actors.lance` — the commit graph and its actor map - - (legacy `_graph_runs.lance` / `_graph_run_actors.lance` from pre-v0.4.0 repos are inert; the run state machine was removed in MR-771 and these files are cleaned up via MR-770's production sweep) + - (legacy `_graph_runs.lance` / `_graph_run_actors.lance` from pre-v0.4.0 graphs are inert; the run state machine was removed in MR-771 and these files are cleaned up via MR-770's production sweep) - **Manifest row schema** (`object_id, object_type, location, metadata, base_objects, table_key, table_version, table_branch, row_count`): - `object_type` ∈ `table | table_version | table_tombstone` - `table_key` ∈ `node: | edge:` @@ -36,7 +36,7 @@ OmniGraph is **not** a single Lance dataset; it is a *graph* of datasets coordin The on-disk shape of `__manifest` is reconciled with the binary via a single stamp + dispatcher. `INTERNAL_MANIFEST_SCHEMA_VERSION` declares the shape this binary writes; the on-disk stamp `omnigraph:internal_schema_version` lives in the manifest dataset's schema-level metadata (Lance `update_schema_metadata`). -- **`init_manifest_repo`** stamps the current version at creation, so newly initialized repos never need migration. +- **`init_manifest_graph`** stamps the current version at creation, so newly initialized graphs never need migration. - **Publisher open-for-write path** (`load_publish_state`) calls `migrate_internal_schema(&mut dataset)` before reading state. When the on-disk stamp matches the binary, this is a single metadata read with no writes; otherwise the dispatcher walks `match`-arm steps forward (1→2, 2→3, …) until the stamp matches, then proceeds with the publish. Reads stay side-effect-free. - **Forward-version protection**: a stamp *higher* than the binary's known version triggers a clear "upgrade omnigraph first" error. An old binary cannot clobber a newer schema by silently treating "unknown stamp" as "missing stamp". - **Idempotency**: each migration step is safe to re-run. A crash between two metadata updates inside a single step leaves the partial state; the next open re-runs the step and the second update lands. The dispatcher itself is a cheap stamp-read on the steady-state path. @@ -50,14 +50,14 @@ Adding a new on-disk shape change is one constant bump (`INTERNAL_MANIFEST_SCHEM ## On-disk layout -A repo on disk is a directory tree of Lance datasets. Each dataset follows the standard Lance layout (`_versions/`, `data/`, `_indices/`, `_refs/`); OmniGraph adds the multi-dataset coordination by keeping `__manifest/` alongside the per-type datasets. +A graph on disk is a directory tree of Lance datasets. Each dataset follows the standard Lance layout (`_versions/`, `data/`, `_indices/`, `_refs/`); OmniGraph adds the multi-dataset coordination by keeping `__manifest/` alongside the per-type datasets. ```mermaid flowchart TB classDef l1 fill:#fef3e8,stroke:#c46900,color:#000 classDef l2 fill:#e8f4fd,stroke:#1e6aa8,color:#000 - repo["repo URI
file:// or s3://bucket/prefix"]:::l2 + graph["graph URI
file:// or s3://bucket/prefix"]:::l2 manifest["__manifest/
L2 catalog of sub-tables"]:::l2 nodes["nodes/{fnv1a64-hex}/
one dataset per node type"]:::l2 @@ -66,12 +66,12 @@ flowchart TB recovery["__recovery/{ulid}.json
recovery sidecars (transient)"]:::l2 refs["_refs/branches/{name}.json
graph-level branches"]:::l2 - repo --> manifest - repo --> nodes - repo --> edges - repo --> cgraph - repo --> recovery - repo --> refs + graph --> manifest + graph --> nodes + graph --> edges + graph --> cgraph + graph --> recovery + graph --> refs subgraph dataset[Inside each Lance dataset — L1] ds_v["_versions/{n}.manifest
per-dataset versions"]:::l1 @@ -88,10 +88,10 @@ flowchart TB **What's where:** -- **Repo root** is one directory (or S3 prefix). Everything below is part of one OmniGraph repo. +- **Graph root** is one directory (or S3 prefix). Everything below is part of one OmniGraph graph. - **`__manifest/`** is a Lance dataset whose rows describe which sub-table version is published at which graph-branch. Reading a snapshot starts here. - **`nodes/`** and **`edges/`** are sibling directories holding one Lance dataset per declared type. Names are `fnv1a64-hex` of the type name to keep paths fixed-length and case-safe. -- **`_graph_commits.lance`** is an L2 dataset that records the graph-level commit DAG, with a paired `_graph_commit_actors.lance` for the actor map. (Pre-v0.4.0 repos also have inert `_graph_runs.lance` / `_graph_run_actors.lance` from the removed Run state machine; MR-770 sweeps these in production.) +- **`_graph_commits.lance`** is an L2 dataset that records the graph-level commit DAG, with a paired `_graph_commit_actors.lance` for the actor map. (Pre-v0.4.0 graphs also have inert `_graph_runs.lance` / `_graph_run_actors.lance` from the removed Run state machine; MR-770 sweeps these in production.) - **`_graph_commit_recoveries.lance`** — one row per recovery sweep action. Joined to `_graph_commits.lance` by `graph_commit_id`; the linked commit row carries `actor_id=omnigraph:recovery`. Operators correlate recoveries with the original mutations they rolled forward / back via this join. See `crates/omnigraph/src/db/recovery_audit.rs`. - **`__recovery/{ulid}.json`** — transient sidecar files written by the four migrated writers (`MutationStaging::finalize`, `schema_apply`, `branch_merge`, `ensure_indices`) before Phase B begins, deleted after Phase C succeeds. A sidecar persisting after process exit means the writer crashed in the Phase B → Phase C window; the next `Omnigraph::open` recovery sweep processes it. Steady-state directory is empty. See `crates/omnigraph/src/db/manifest/recovery.rs`. - **`_refs/branches/{name}.json`** is graph-level branch metadata — pointers from a branch name to the manifest version it heads. diff --git a/docs/transactions.md b/docs/user/transactions.md similarity index 80% rename from docs/transactions.md rename to docs/user/transactions.md index a86694f..e4ed485 100644 --- a/docs/transactions.md +++ b/docs/user/transactions.md @@ -2,9 +2,11 @@ OmniGraph does not have `BEGIN` / `COMMIT` / `ROLLBACK`. Branches do that job. This page explains the model, when to use which primitive, and shows worked examples for the patterns that come up most. -The architectural rule lives in [`docs/invariants.md`](invariants.md) §VI.23: +The architectural rule lives in [`docs/dev/invariants.md`](../dev/invariants.md): -> **Atomicity is per-query.** Every `.gq` query is atomic via the substrate's atomic-commit primitive. **No cross-query `BEGIN`/`COMMIT`; branches and merges fill that role for agent workflows.** +> **Mutations publish at one boundary.** A `mutate_as` or `load` operation +> accumulates constructive writes, commits each touched table at the end, then +> publishes one manifest update. If you need to coordinate multiple queries atomically, you fork a branch, run mutations on it, and merge when you're satisfied. If something goes wrong, you delete the branch. @@ -46,7 +48,7 @@ query register_employee_with_team($name: String, $age: I32, $team: String) { ```bash omnigraph change --query ./mutations.gq --name register_employee_with_team \ - --params '{"name":"Alice","age":30,"team":"Acme"}' ./repo.omni + --params '{"name":"Alice","age":30,"team":"Acme"}' ./graph.omni ``` If the second statement fails (e.g. `Acme` doesn't exist), the publisher never publishes; `Alice` is not in the database. Atomic. @@ -55,10 +57,10 @@ If the second statement fails (e.g. `Acme` doesn't exist), the publisher never p ```bash # Query 1 -omnigraph change --query ./mutations.gq --name register_employee --params '{"name":"Alice","age":30}' ./repo.omni +omnigraph change --query ./mutations.gq --name register_employee --params '{"name":"Alice","age":30}' ./graph.omni # Query 2 — runs after Query 1 has already published -omnigraph change --query ./mutations.gq --name link_to_team --params '{"name":"Alice","team":"Acme"}' ./repo.omni +omnigraph change --query ./mutations.gq --name link_to_team --params '{"name":"Alice","team":"Acme"}' ./graph.omni ``` These are **two publishes** on `main`. If Query 2 fails, Query 1's effects are already visible. There is no `ROLLBACK` for Query 1. @@ -73,32 +75,32 @@ The pattern when you need to run multiple queries — possibly across multiple c ```bash # Fork a working branch from main. -omnigraph branch create --from main onboarding/2026-04-25 ./repo.omni +omnigraph branch create --from main onboarding/2026-04-25 ./graph.omni # Run any number of mutations on the branch — each one is its own publish on the branch. # Concurrent reads of `main` are unaffected. omnigraph change --branch onboarding/2026-04-25 \ --query ./mutations.gq --name register_employee \ - --params '{"name":"Alice","age":30}' ./repo.omni + --params '{"name":"Alice","age":30}' ./graph.omni omnigraph change --branch onboarding/2026-04-25 \ --query ./mutations.gq --name register_employee \ - --params '{"name":"Bob","age":25}' ./repo.omni + --params '{"name":"Bob","age":25}' ./graph.omni omnigraph change --branch onboarding/2026-04-25 \ --query ./mutations.gq --name link_to_team \ - --params '{"name":"Alice","team":"Acme"}' ./repo.omni + --params '{"name":"Alice","team":"Acme"}' ./graph.omni # Inspect the branch — read queries work just like on main. omnigraph read --branch onboarding/2026-04-25 \ - --query ./queries.gq --name list_employees ./repo.omni + --query ./queries.gq --name list_employees ./graph.omni # Happy with what's on the branch? Merge it. This is one atomic publish: # `main` flips to include every commit on the branch. -omnigraph branch merge onboarding/2026-04-25 --into main ./repo.omni +omnigraph branch merge onboarding/2026-04-25 --into main ./graph.omni # OR: not happy? Throw it away. `main` is untouched. -# omnigraph branch delete onboarding/2026-04-25 ./repo.omni +# omnigraph branch delete onboarding/2026-04-25 ./graph.omni ``` Properties: @@ -113,16 +115,16 @@ Two agents writing to the same graph independently: ```bash # Agent A -omnigraph branch create --from main agent-a/work ./repo.omni -omnigraph change --branch agent-a/work … ./repo.omni +omnigraph branch create --from main agent-a/work ./graph.omni +omnigraph change --branch agent-a/work … ./graph.omni # … many mutations … -omnigraph branch merge agent-a/work --into main ./repo.omni +omnigraph branch merge agent-a/work --into main ./graph.omni # Agent B (running concurrently) -omnigraph branch create --from main agent-b/work ./repo.omni -omnigraph change --branch agent-b/work … ./repo.omni +omnigraph branch create --from main agent-b/work ./graph.omni +omnigraph change --branch agent-b/work … ./graph.omni # … many mutations … -omnigraph branch merge agent-b/work --into main ./repo.omni +omnigraph branch merge agent-b/work --into main ./graph.omni ``` Each agent sees a consistent snapshot of `main` at the time it forked. The first merge to `main` lands as a fast-forward (or a no-op if no concurrent change). The second merge runs three-way: rows touched by both branches surface as `MergeConflict`s for the caller to resolve. @@ -136,7 +138,7 @@ This is the workflow MR-797 / agentic loops are designed around: **branches are | Single query fails mid-flight | Publisher never publishes; target unchanged | Read the error, decide whether to retry | | Concurrent writers race the same `(table, branch)` | Publisher CAS rejects the loser with `ManifestConflictDetails::ExpectedVersionMismatch` | Refresh handle, retry the query | | Branch with N successful mutations, then merge fails (three-way conflict) | Each individual mutation already committed on the branch; merge surfaces `MergeConflicts` | Inspect, decide whether to keep working on the branch, abandon it (`branch_delete`), or resolve and re-merge | -| Process crashes mid-branch-workflow | Each completed mutation on the branch is durable | Re-open the repo, continue where you left off | +| Process crashes mid-branch-workflow | Each completed mutation on the branch is durable | Re-open the graph, continue where you left off | ## When to use what @@ -154,13 +156,13 @@ This is the workflow MR-797 / agentic loops are designed around: **branches are - **Cross-query atomicity on `main` without a branch.** If you don't want to fork a branch, multiple queries on `main` publish independently. There is no implicit transaction. - **Long-running interactive transactions.** No `BEGIN` over a connection. Branches are the durable equivalent. -- **Cross-graph (cross-repo) transactions.** Each repo is its own atomicity domain. +- **Cross-graph transactions.** Each graph is its own atomicity domain. - **"Pessimistic" locks** that serialize writers before they reach the storage layer. Snapshot-MVCC + publisher CAS handles concurrency optimistically; the loser retries. ## See also -- [`docs/branches-commits.md`](branches-commits.md) — branch and commit-graph mechanics. -- [`docs/merge.md`](merge.md) — three-way merge details and conflict kinds. -- [`docs/query-language.md`](query-language.md) — `.gq` syntax for the multi-statement queries used above. -- [`docs/runs.md`](runs.md) — the per-query commit pipeline that gives single-query atomicity. -- [`docs/invariants.md`](invariants.md) §VI.23 — the architectural rule. +- [`docs/user/branches-commits.md`](branches-commits.md) — branch and commit-graph mechanics. +- [`docs/dev/merge.md`](../dev/merge.md) — three-way merge details and conflict kinds. +- [`docs/user/query-language.md`](query-language.md) — `.gq` syntax for the multi-statement queries used above. +- [`docs/dev/runs.md`](../dev/runs.md) — the per-query commit pipeline that gives single-query atomicity. +- [`docs/dev/invariants.md`](../dev/invariants.md) — the architectural rule. diff --git a/og-cheet-sheet.md b/og-cheet-sheet.md index 8ae6f5c..2cb4d76 100644 --- a/og-cheet-sheet.md +++ b/og-cheet-sheet.md @@ -5,23 +5,27 @@ Use an explicit schema file: ```bash -omnigraph query lint --query ./queries.gq --schema ./schema.pg --json -omnigraph query check --query ./queries.gq --schema ./schema.pg +omnigraph lint --query ./queries.gq --schema ./schema.pg --json +omnigraph check --query ./queries.gq --schema ./schema.pg ``` Use a local or `s3://` repo target: ```bash -omnigraph query lint --query ./queries.gq ./repo.omni --json -omnigraph query check --query ./queries.gq s3://bucket/repo +omnigraph lint --query ./queries.gq ./repo.omni --json +omnigraph check --query ./queries.gq s3://bucket/repo ``` Use `omnigraph.yaml` target resolution: ```bash -omnigraph query lint --query ./queries.gq --target local --config ./omnigraph.yaml +omnigraph lint --query ./queries.gq --target local --config ./omnigraph.yaml ``` +> The previous `omnigraph query lint` / `omnigraph query check` spellings +> are kept as deprecated argv shims that print a one-line warning to +> stderr and rewrite to the canonical `omnigraph lint` / `omnigraph check`. + ## What It Checks - parses every query in the file diff --git a/openapi.json b/openapi.json index ea62e31..d1fa337 100644 --- a/openapi.json +++ b/openapi.json @@ -7,7 +7,7 @@ "name": "MIT", "identifier": "MIT" }, - "version": "0.4.2" + "version": "0.6.0" }, "paths": { "/branches": { @@ -312,8 +312,8 @@ "tags": [ "mutations" ], - "summary": "Apply a GQ mutation to a branch.", - "description": "Writes to the named `branch` (defaults to `main`). Mutations are atomic\nper call and produce a new commit. Returns counts of nodes and edges\naffected. **Destructive**: on success the branch is updated; rejected\nmutations may still acquire locks briefly. Returns 409 on merge conflict.", + "summary": "**Deprecated** — use [`POST /mutate`](#tag/mutations/operation/mutate) instead.", + "description": "Apply a GQ mutation to a branch. Behavior is unchanged; the route is\nkept indefinitely for back-compat. New integrations should target\n`POST /mutate`, which has identical semantics and a name that pairs\ncleanly with `POST /query`. Responses from this route include\n`Deprecation: true` and `Link: ; rel=\"successor-version\"`\nheaders per RFC 9745 / RFC 8288 so SDKs and proxies can surface the\nsignal.", "operationId": "change", "requestBody": { "content": { @@ -327,7 +327,7 @@ }, "responses": { "200": { - "description": "Mutation results", + "description": "Mutation results (response includes `Deprecation: true` + `Link: ; rel=\"successor-version\"`)", "content": { "application/json": { "schema": { @@ -387,6 +387,7 @@ } } }, + "deprecated": true, "security": [ { "bearer_token": [] @@ -585,6 +586,63 @@ ] } }, + "/graphs": { + "get": { + "tags": [ + "management" + ], + "summary": "List every graph currently registered with this server (MR-668).", + "description": "Multi-graph mode only. In single mode, the route returns 405 — there's\nno registry to enumerate. Cedar-gated by the server-level policy via\nthe `graph_list` action against `Omnigraph::Server::\"root\"`.\n\nOrder: alphabetical by `graph_id` (server-sorted so clients see\ndeterministic output across requests).", + "operationId": "listGraphs", + "responses": { + "200": { + "description": "List of registered graphs", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GraphListResponse" + } + } + } + }, + "401": { + "description": "Unauthorized", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorOutput" + } + } + } + }, + "403": { + "description": "Forbidden", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorOutput" + } + } + } + }, + "405": { + "description": "Method not allowed (single-graph mode)", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorOutput" + } + } + } + } + }, + "security": [ + { + "bearer_token": [] + } + ] + } + }, "/healthz": { "get": { "tags": [ @@ -684,13 +742,167 @@ ] } }, + "/mutate": { + "post": { + "tags": [ + "mutations" + ], + "summary": "Apply a GQ mutation to a branch (canonical mutation endpoint).", + "description": "Writes to the named `branch` (defaults to `main`). Mutations are atomic\nper call and produce a new commit. Returns counts of nodes and edges\naffected. **Destructive**: on success the branch is updated; rejected\nmutations may still acquire locks briefly. Returns 409 on merge conflict.\n\nPairs with `POST /query` (read-only). The legacy `POST /change` route\nhas identical semantics and is kept as a deprecated alias.", + "operationId": "mutate", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ChangeRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Mutation results", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ChangeOutput" + } + } + } + }, + "400": { + "description": "Bad request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorOutput" + } + } + } + }, + "401": { + "description": "Unauthorized", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorOutput" + } + } + } + }, + "403": { + "description": "Forbidden", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorOutput" + } + } + } + }, + "409": { + "description": "Merge conflict", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorOutput" + } + } + } + }, + "429": { + "description": "Per-actor admission cap exceeded; honor `Retry-After` header", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorOutput" + } + } + } + } + }, + "security": [ + { + "bearer_token": [] + } + ] + } + }, + "/query": { + "post": { + "tags": [ + "queries" + ], + "summary": "Execute an inline read query (friendlier-named alternative to `POST /read`).", + "description": "Designed for ad-hoc exploration and AI-agent tool-use: short field\nnames (`query`, `name`) match the CLI `-e` flag and the GQ `query`\nkeyword. Mutations (`insert`/`update`/`delete`) are rejected with 400\n-- use `POST /mutate` (or its deprecated alias `POST /change`) for\nwrite queries. Otherwise behaves identically to `POST /read`: same\ntarget semantics (branch xor snapshot), same Cedar action (Read),\nsame response shape.", + "operationId": "query", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/QueryRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Query results", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ReadOutput" + } + } + } + }, + "400": { + "description": "Bad request - also returned when the query body contains mutations; use POST /mutate (or its deprecated alias POST /change) for write queries", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorOutput" + } + } + } + }, + "401": { + "description": "Unauthorized", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorOutput" + } + } + } + }, + "403": { + "description": "Forbidden", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorOutput" + } + } + } + } + }, + "security": [ + { + "bearer_token": [] + } + ] + } + }, "/read": { "post": { "tags": [ "queries" ], - "summary": "Execute a GQ read query.", - "description": "Runs the query in `query_source` against either a branch or a frozen\nsnapshot (mutually exclusive). When `query_source` defines multiple named\nqueries, pick one with `query_name`. `params` is a JSON object whose keys\nmatch the parameters declared by the query. Returns rows as a JSON array\nplus a `columns` list. Read-only.", + "summary": "**Deprecated** — use [`POST /query`](#tag/queries/operation/query) instead.", + "description": "Execute a GQ read query. Behavior is unchanged from prior releases; the\nroute is kept indefinitely for byte-stable back-compat. New integrations\nshould target `POST /query`, which has clean field names (`query` /\n`name`) and a 400-on-mutation guard. Responses from this route include\n`Deprecation: true` and `Link: ; rel=\"successor-version\"`\nheaders per RFC 9745 / RFC 8288 so SDKs and proxies can surface the\nsignal.", "operationId": "read", "requestBody": { "content": { @@ -704,7 +916,7 @@ }, "responses": { "200": { - "description": "Query results", + "description": "Query results (response includes `Deprecation: true` + `Link: ; rel=\"successor-version\"`)", "content": { "application/json": { "schema": { @@ -744,6 +956,7 @@ } } }, + "deprecated": true, "security": [ { "bearer_token": [] @@ -1103,7 +1316,7 @@ "ChangeRequest": { "type": "object", "required": [ - "query_source" + "query" ], "properties": { "branch": { @@ -1113,19 +1326,19 @@ ], "description": "Target branch. Defaults to `main`." }, - "params": { - "description": "JSON object whose keys match the mutation's declared parameters." - }, - "query_name": { + "name": { "type": [ "string", "null" ], - "description": "Name of the mutation to run when `query_source` declares multiple." + "description": "Name of the mutation to run when `query` declares multiple.\n\nAccepts the legacy field name `query_name` as a deserialization alias." }, - "query_source": { + "params": { + "description": "JSON object whose keys match the mutation's declared parameters." + }, + "query": { "type": "string", - "description": "GQ mutation source containing `insert`, `update`, or `delete` statements.\nMay declare multiple named mutations; pick one with `query_name`.", + "description": "GQ mutation source containing `insert`, `update`, or `delete` statements.\nMay declare multiple named mutations; pick one with `name`.\n\nAccepts the legacy field name `query_source` as a deserialization alias.", "example": "query insert_person($name: String, $age: I32) {\n insert Person { name: $name, age: $age }\n}" } } @@ -1199,6 +1412,7 @@ "forbidden", "bad_request", "not_found", + "method_not_allowed", "conflict", "too_many_requests", "internal" @@ -1268,6 +1482,37 @@ } } }, + "GraphInfo": { + "type": "object", + "description": "One entry in the response from `GET /graphs`. Cluster operators\nconsume this list to discover which graphs the server is currently\nserving. The shape is intentionally minimal — `graph_id` and `uri`\nare the only fields a routing client needs.", + "required": [ + "graph_id", + "uri" + ], + "properties": { + "graph_id": { + "type": "string" + }, + "uri": { + "type": "string" + } + } + }, + "GraphListResponse": { + "type": "object", + "description": "Response from `GET /graphs`. Lists every graph registered with the\nserver in alphabetical order by `graph_id` (sorted server-side so\nclients get deterministic output across requests).", + "required": [ + "graphs" + ], + "properties": { + "graphs": { + "type": "array", + "items": { + "$ref": "#/components/schemas/GraphInfo" + } + } + } + }, "HealthOutput": { "type": "object", "required": [ @@ -1453,6 +1698,44 @@ } } }, + "QueryRequest": { + "type": "object", + "description": "Inline read-query request for `POST /query`.\n\nFriendlier-named alternative to [`ReadRequest`] for ad-hoc reads and\nAI-agent integration. Mutations are rejected with 400 — use `POST\n/mutate` (or its deprecated alias `POST /change`) for write queries.\nField names are deliberately short (`query`, `name`) to match the GQ\nkeyword and the CLI `-e` flag.", + "required": [ + "query" + ], + "properties": { + "branch": { + "type": [ + "string", + "null" + ], + "description": "Branch to read from. Mutually exclusive with `snapshot`. Defaults to `main`." + }, + "name": { + "type": [ + "string", + "null" + ], + "description": "Name of the query to run when `query` declares multiple. Optional when\nonly one query is declared." + }, + "params": { + "description": "JSON object whose keys match the query's declared parameters." + }, + "query": { + "type": "string", + "description": "GQ read-query source. May declare one or more named queries; pick one\nwith `name` when more than one is declared. Mutations\n(`insert`/`update`/`delete`) get 400 — use `POST /mutate` (or its\ndeprecated alias `POST /change`) instead.", + "example": "query get_person($name: String) {\n match {\n $p: Person { name: $name }\n }\n return { $p.name, $p.age }\n}" + }, + "snapshot": { + "type": [ + "string", + "null" + ], + "description": "Snapshot id to read from. Mutually exclusive with `branch`." + } + } + }, "ReadOutput": { "type": "object", "required": [ @@ -1576,6 +1859,10 @@ "schema_source" ], "properties": { + "allow_data_loss": { + "type": "boolean", + "description": "When true, promote every `DropMode::Soft` step in the plan to\n`DropMode::Hard`, making the prior column data unreachable\nafter the apply. Matches the CLI's `--allow-data-loss` flag.\nDefaults to `false` (drops remain reversible via time travel)." + }, "schema_source": { "type": "string", "description": "Project schema in `.pg` source form. The diff against the current\nschema produces the migration steps that will be applied.", diff --git a/scripts/apply-branch-protection.sh b/scripts/apply-branch-protection.sh index 910d5b6..25e93ee 100755 --- a/scripts/apply-branch-protection.sh +++ b/scripts/apply-branch-protection.sh @@ -3,7 +3,7 @@ # # Requires: # - `gh` CLI authenticated. -# - Repo-admin or org-admin permissions on ModernRelay/omnigraph. +# - Repository-admin or org-admin permissions on ModernRelay/omnigraph. # # This script is idempotent: re-running applies whatever is currently # declared in .github/branch-protection.json. The JSON file is the diff --git a/scripts/check-agents-md.sh b/scripts/check-agents-md.sh index 95ae05f..abc6469 100755 --- a/scripts/check-agents-md.sh +++ b/scripts/check-agents-md.sh @@ -1,77 +1,116 @@ #!/usr/bin/env bash -# Verify that AGENTS.md and docs/ stay in sync. +# Verify that AGENTS.md and the docs audience indexes stay in sync. # -# Two checks: -# 1. Every docs/*.md path linked from AGENTS.md exists on disk. -# 2. Every doc in the canonical set is linked from AGENTS.md. +# Checks: +# 1. Every docs/ link from AGENTS.md, docs/user/index.md, and +# docs/dev/index.md exists. +# 2. Every canonical docs file is discoverable from those indexes. # -# Exit non-zero on any drift. +# Release notes are represented by the docs/releases/ directory entry instead +# of requiring every per-version release note to be linked individually. set -euo pipefail repo_root="$(cd "$(dirname "$0")/.." && pwd)" cd "$repo_root" -agents_file="AGENTS.md" -if [[ ! -f "$agents_file" ]]; then - echo "error: $agents_file not found" >&2 - exit 1 -fi +index_files=(AGENTS.md docs/user/index.md docs/dev/index.md) +for index_file in "${index_files[@]}"; do + if [[ ! -f "$index_file" ]]; then + echo "error: $index_file not found" >&2 + exit 1 + fi +done + +normalize_path() { + python3 - "$1" <<'PY' +import os +import sys + +print(os.path.normpath(sys.argv[1]).replace(os.sep, "/")) +PY +} -# Canonical set: every docs/*.md (top-level), plus the releases/ index dir if present. canonical=() while IFS= read -r line; do canonical+=("$line") -done < <(find docs -mindepth 1 -maxdepth 1 -type f -name '*.md' | sort) +done < <(find docs -type f -name '*.md' ! -path 'docs/releases/*' ! -path 'docs/internal/*' | sort) if [[ -d docs/releases ]]; then canonical+=("docs/releases/") fi -# Extract docs/ links from AGENTS.md (markdown link form: (docs/...)) linked=() +for index_file in "${index_files[@]}"; do + base_dir="$(dirname "$index_file")" + + # Markdown links. + while IFS= read -r raw_link; do + link="${raw_link%%#*}" + [[ -z "$link" ]] && continue + [[ "$link" =~ ^[a-zA-Z][a-zA-Z0-9+.-]*: ]] && continue + [[ "$link" == /* ]] && continue + + if [[ "$link" == docs/* ]]; then + normalized="$(normalize_path "$link")" + else + normalized="$(normalize_path "$base_dir/$link")" + fi + if [[ "$link" == */ ]]; then + normalized="${normalized%/}/" + fi + linked+=("$normalized") + done < <( + grep -oE '\[[^]]+\]\([^)]+\)' "$index_file" \ + | sed -E 's/.*\(([^)]+)\).*/\1/' || true + ) + + # Agent import directives in AGENTS.md. + while IFS= read -r raw_link; do + link="${raw_link#@}" + linked+=("$(normalize_path "$link")") + done < <(grep -oE '^@docs/[^[:space:]]+' "$index_file" || true) +done + +deduped=() while IFS= read -r line; do - linked+=("$line") -done < <(grep -oE '\(docs/[^)]+\)' "$agents_file" | sed -E 's/^\(|\)$//g' | sort -u) + deduped+=("$line") +done < <(printf '%s\n' "${linked[@]}" | sort -u) +linked=("${deduped[@]}") fail=0 -# Check 1: every linked path exists. for link in "${linked[@]}"; do - # Strip in-page anchors like #foo - path="${link%%#*}" - if [[ "$path" == */ ]]; then - if [[ ! -d "$path" ]]; then - echo "error: AGENTS.md links to missing directory: $path" >&2 + if [[ "$link" == */ ]]; then + if [[ ! -d "$link" ]]; then + echo "error: docs index links to missing directory: $link" >&2 fail=1 fi else - if [[ ! -f "$path" ]]; then - echo "error: AGENTS.md links to missing file: $path" >&2 + if [[ ! -f "$link" ]]; then + echo "error: docs index links to missing file: $link" >&2 fail=1 fi fi done -# Check 2: every canonical doc is linked at least once. for doc in "${canonical[@]}"; do found=0 for link in "${linked[@]}"; do - path="${link%%#*}" - if [[ "$path" == "$doc" ]]; then + if [[ "$link" == "$doc" ]]; then found=1 break fi done if [[ "$found" -eq 0 ]]; then - echo "error: doc not linked from AGENTS.md: $doc" >&2 + echo "error: doc not linked from AGENTS.md or audience indexes: $doc" >&2 fail=1 fi done if [[ "$fail" -ne 0 ]]; then echo >&2 - echo "AGENTS.md / docs/ are out of sync. Either update AGENTS.md links or rename/remove the doc." >&2 + echo "AGENTS.md / docs indexes are out of sync. Update AGENTS.md, docs/user/index.md, or docs/dev/index.md." >&2 exit 1 fi -echo "AGENTS.md ↔ docs/ links OK (${#linked[@]} links, ${#canonical[@]} docs)." +echo "AGENTS.md ↔ docs indexes OK (${#linked[@]} links, ${#canonical[@]} docs)." diff --git a/scripts/install.ps1 b/scripts/install.ps1 new file mode 100644 index 0000000..3bfd0f1 --- /dev/null +++ b/scripts/install.ps1 @@ -0,0 +1,151 @@ +param( + [string]$RepoSlug = "ModernRelay/omnigraph", + [string]$InstallDir = "$env:USERPROFILE\.local\bin", + [ValidateSet("stable", "edge")] + [string]$ReleaseChannel = "stable", + [string]$Version = "" +) + +$ErrorActionPreference = "Stop" + +$assetName = "omnigraph-windows-x86_64.zip" +$assetStem = "omnigraph-windows-x86_64" +$workDir = Join-Path ([System.IO.Path]::GetTempPath()) ("omnigraph-install-" + [System.Guid]::NewGuid().ToString("N")) +$selectedChannel = "" + +function Write-Log { + param([string]$Message) + Write-Host "==> $Message" +} + +function Get-ReleaseBaseUrl { + param([string]$Channel) + + if ($Version -ne "") { + return "https://github.com/$RepoSlug/releases/download/$Version" + } + + if ($Channel -eq "stable") { + return "https://github.com/$RepoSlug/releases/latest/download" + } + + if ($Channel -eq "edge") { + return "https://github.com/$RepoSlug/releases/download/edge" + } + + throw "unsupported ReleaseChannel '$Channel' (expected stable or edge)" +} + +function Download-ReleaseFiles { + param( + [string]$BaseUrl, + [string]$ArchivePath, + [string]$ChecksumPath + ) + + try { + Invoke-WebRequest -UseBasicParsing -Uri "$BaseUrl/$assetName" -OutFile $ArchivePath + Invoke-WebRequest -UseBasicParsing -Uri "$BaseUrl/$assetStem.sha256" -OutFile $ChecksumPath + return $true + } catch { + return $false + } +} + +function Verify-Checksum { + param( + [string]$ArchivePath, + [string]$ChecksumPath + ) + + $checksumText = (Get-Content -Path $ChecksumPath -Raw).Trim() + $expected = ($checksumText -split "\s+")[0].ToLowerInvariant() + if ($expected -eq "") { + throw "checksum file did not contain a SHA256 digest" + } + + $actual = (Get-FileHash -Path $ArchivePath -Algorithm SHA256).Hash.ToLowerInvariant() + if ($actual -ne $expected) { + throw "checksum verification failed for $assetName" + } +} + +function Install-FromDirectory { + param([string]$SourceDir) + + New-Item -ItemType Directory -Force -Path $InstallDir | Out-Null + Copy-Item -Path (Join-Path $SourceDir "omnigraph.exe") -Destination (Join-Path $InstallDir "omnigraph.exe") -Force + Copy-Item -Path (Join-Path $SourceDir "omnigraph-server.exe") -Destination (Join-Path $InstallDir "omnigraph-server.exe") -Force +} + +function Install-FromRelease { + New-Item -ItemType Directory -Force -Path $workDir | Out-Null + + $archivePath = Join-Path $workDir $assetName + $checksumPath = Join-Path $workDir "$assetStem.sha256" + + if ($Version -ne "") { + $script:selectedChannel = $Version + $baseUrl = Get-ReleaseBaseUrl -Channel $ReleaseChannel + Write-Log "Downloading $assetName from $Version" + if (!(Download-ReleaseFiles -BaseUrl $baseUrl -ArchivePath $archivePath -ChecksumPath $checksumPath)) { + throw "no published binary found for $assetName at release $Version" + } + } else { + $script:selectedChannel = $ReleaseChannel + $baseUrl = Get-ReleaseBaseUrl -Channel $selectedChannel + Write-Log "Downloading $assetName from $selectedChannel" + if (!(Download-ReleaseFiles -BaseUrl $baseUrl -ArchivePath $archivePath -ChecksumPath $checksumPath)) { + if ($ReleaseChannel -ne "stable") { + throw "no published binary found for $assetName on channel $ReleaseChannel" + } + + Write-Log "Stable release binaries are not published yet; falling back to edge" + $script:selectedChannel = "edge" + $baseUrl = Get-ReleaseBaseUrl -Channel $selectedChannel + if (!(Download-ReleaseFiles -BaseUrl $baseUrl -ArchivePath $archivePath -ChecksumPath $checksumPath)) { + throw "no published binary found for $assetName on stable or edge; build from source" + } + } + } + + Verify-Checksum -ArchivePath $archivePath -ChecksumPath $checksumPath + + $extractDir = Join-Path $workDir "extract" + New-Item -ItemType Directory -Force -Path $extractDir | Out-Null + Expand-Archive -Path $archivePath -DestinationPath $extractDir -Force + Install-FromDirectory -SourceDir $extractDir +} + +function Print-Summary { + $omnigraphPath = Join-Path $InstallDir "omnigraph.exe" + $serverPath = Join-Path $InstallDir "omnigraph-server.exe" + + Write-Host "" + Write-Host "Installed:" + Write-Host " $omnigraphPath" + Write-Host " $serverPath" + Write-Host "" + Write-Host "Verify:" + Write-Host " $omnigraphPath version" + Write-Host " $serverPath --help" + Write-Host "" + + if ($selectedChannel -ne "") { + Write-Host "Installed from release channel: $selectedChannel" + } + + $pathParts = $env:Path -split [System.IO.Path]::PathSeparator + if ($pathParts -notcontains $InstallDir) { + Write-Host "Add $InstallDir to PATH if needed." + } +} + +try { + Install-FromRelease + Print-Summary +} finally { + if (Test-Path $workDir) { + Remove-Item -Path $workDir -Recurse -Force + } +} diff --git a/scripts/local-rustfs-bootstrap.sh b/scripts/local-rustfs-bootstrap.sh index a314ebd..29427de 100755 --- a/scripts/local-rustfs-bootstrap.sh +++ b/scripts/local-rustfs-bootstrap.sh @@ -74,9 +74,6 @@ platform_asset_name() { Linux/x86_64) printf 'omnigraph-linux-x86_64.tar.gz\n' ;; - Darwin/x86_64) - printf 'omnigraph-macos-x86_64.tar.gz\n' - ;; Darwin/arm64) printf 'omnigraph-macos-arm64.tar.gz\n' ;; @@ -291,7 +288,7 @@ ensure_bucket() { s3api create-bucket --bucket "$BUCKET" >/dev/null 2>&1 || true } -repo_prefix_has_objects() { +graph_prefix_has_objects() { local key_count key_count="$("$AWS_BIN" --endpoint-url "$AWS_ENDPOINT_URL_S3" \ s3api list-objects-v2 \ @@ -304,27 +301,27 @@ repo_prefix_has_objects() { [ -n "$key_count" ] && [ "$key_count" != "None" ] && [ "$key_count" != "0" ] } -reset_repo_prefix() { +reset_graph_prefix() { log "Removing existing objects under $REPO_URI" "$AWS_BIN" --endpoint-url "$AWS_ENDPOINT_URL_S3" \ s3 rm "s3://$BUCKET/$PREFIX" --recursive >/dev/null } -initialize_repo() { +initialize_graph() { if "$BIN_DIR/omnigraph" snapshot "$REPO_URI" --json >/dev/null 2>&1; then - log "Reusing existing repo at $REPO_URI" + log "Reusing existing graph at $REPO_URI" return fi - if repo_prefix_has_objects; then + if graph_prefix_has_objects; then if [ "$RESET_REPO" = "1" ]; then - reset_repo_prefix + reset_graph_prefix else - die "found existing objects under $REPO_URI but could not open an Omnigraph repo there. This usually means a previous bootstrap left a partially initialized prefix. Rerun with RESET_REPO=1 to delete that prefix and recreate it, or set PREFIX to a new value." + die "found existing objects under $REPO_URI but could not open an Omnigraph graph there. This usually means a previous bootstrap left a partially initialized prefix. Rerun with RESET_REPO=1 to delete that prefix and recreate it, or set PREFIX to a new value." fi fi - log "Initializing repo at $REPO_URI" + log "Initializing graph at $REPO_URI" "$BIN_DIR/omnigraph" init --schema "$FIXTURE_DIR/context.pg" "$REPO_URI" log "Loading context fixture into $REPO_URI" @@ -377,7 +374,7 @@ Omnigraph local RustFS demo is up. Server: $base_url -Repo URI: +Graph URI: $REPO_URI RustFS console: @@ -414,7 +411,7 @@ main() { start_rustfs wait_for_rustfs ensure_bucket - initialize_repo + initialize_graph start_server print_summary "$(wait_for_server)" } diff --git a/scripts/update-homebrew-formula.sh b/scripts/update-homebrew-formula.sh index 6b3984c..90a5dea 100755 --- a/scripts/update-homebrew-formula.sh +++ b/scripts/update-homebrew-formula.sh @@ -6,7 +6,7 @@ usage() { Usage: update-homebrew-formula.sh [formula_path] Environment: - REPO_SLUG GitHub repo that owns the Omnigraph release + REPO_SLUG GitHub repository that owns the Omnigraph release default: ModernRelay/omnigraph EOF }