mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-18 02:24:27 +02:00
Merge remote-tracking branch 'origin/main' into ragnorc/omnigraph-mcp-crate
This commit is contained in:
commit
c08e8dbac4
173 changed files with 20828 additions and 10366 deletions
1
.github/branch-protection.json
vendored
1
.github/branch-protection.json
vendored
|
|
@ -5,7 +5,6 @@
|
|||
"contexts": [
|
||||
"Classify Changes",
|
||||
"Check AGENTS.md Links",
|
||||
"Test Workspace",
|
||||
"Test omnigraph-server --features aws",
|
||||
"CODEOWNERS matches source",
|
||||
"CODEOWNERS not hand-edited"
|
||||
|
|
|
|||
20
.github/workflows/ci.yml
vendored
20
.github/workflows/ci.yml
vendored
|
|
@ -128,6 +128,23 @@ jobs:
|
|||
test:
|
||||
name: Test Workspace
|
||||
needs: classify_changes
|
||||
# PR latency: the full workspace + failpoints build/test is the slowest
|
||||
# gate (~15min warm, up to the 75min ceiling cold) and dominated PR
|
||||
# turnaround. It now runs only on push to `main` (post-merge), on tags,
|
||||
# and on manual `workflow_dispatch` — NOT on pull_request. Trade-off
|
||||
# accepted deliberately: a regression is caught on the `main` run after
|
||||
# merge rather than before it, so `main` can briefly go red. Mitigations:
|
||||
# (1) `Test Workspace` is removed from required PR checks in
|
||||
# `.github/branch-protection.json` (a required check that never
|
||||
# reports would leave every PR permanently pending);
|
||||
# (2) run the full suite locally before merging risky changes
|
||||
# (`cargo test --workspace --locked`), or trigger this workflow via
|
||||
# the Actions "Run workflow" button (workflow_dispatch) on your branch;
|
||||
# (3) openapi.json is no longer auto-regenerated on PRs (that step lived
|
||||
# here) — regenerate it locally for server/API changes
|
||||
# (`OMNIGRAPH_UPDATE_OPENAPI=1 cargo test -p omnigraph-server --test openapi`)
|
||||
# or the strict drift check fails the post-merge `main` run.
|
||||
if: github.event_name != 'pull_request'
|
||||
runs-on: ubuntu-latest
|
||||
# 75, not 45: a cold rust-cache (every Cargo.lock change) costs a full
|
||||
# workspace + failpoints-feature build on a 2-core runner, which now
|
||||
|
|
@ -274,6 +291,9 @@ jobs:
|
|||
|
||||
rustfs_integration:
|
||||
name: RustFS S3 Integration
|
||||
# `needs: test` means this is push-/dispatch-only too: on pull_request the
|
||||
# `test` job is skipped, so this dependent is skipped with it. S3
|
||||
# integration runs post-merge on `main`, alongside the workspace suite.
|
||||
needs:
|
||||
- classify_changes
|
||||
- test
|
||||
|
|
|
|||
10
.github/workflows/publish-crates.yml
vendored
10
.github/workflows/publish-crates.yml
vendored
|
|
@ -1,6 +1,6 @@
|
|||
name: Publish to crates.io
|
||||
|
||||
# Publishes the four workspace crates to crates.io in dependency order.
|
||||
# Publishes the publishable workspace crates to crates.io in dependency order.
|
||||
#
|
||||
# Triggers:
|
||||
# - push of any v* tag (future releases auto-publish alongside release.yml)
|
||||
|
|
@ -115,10 +115,14 @@ jobs:
|
|||
|
||||
# Order matters: each crate must precede anything that depends on it.
|
||||
# omnigraph-compiler and omnigraph-policy have no internal deps;
|
||||
# omnigraph-engine depends on both; server depends on engine + the
|
||||
# two leaf crates; cli depends on everything.
|
||||
# omnigraph-engine depends on both; omnigraph-api-types and
|
||||
# omnigraph-cluster depend on engine (+ compiler); server depends on
|
||||
# engine + api-types + cluster + the two leaf crates; cli depends on
|
||||
# everything.
|
||||
publish_if_new omnigraph-compiler
|
||||
publish_if_new omnigraph-policy
|
||||
publish_if_new omnigraph-engine
|
||||
publish_if_new omnigraph-api-types
|
||||
publish_if_new omnigraph-cluster
|
||||
publish_if_new omnigraph-server
|
||||
publish_if_new omnigraph-cli
|
||||
|
|
|
|||
71
.github/workflows/release.yml
vendored
71
.github/workflows/release.yml
vendored
|
|
@ -1,17 +1,34 @@
|
|||
name: Release
|
||||
|
||||
# Build per-platform binaries in a matrix, then publish the GitHub release ONCE
|
||||
# from a single job. The matrix used to call `softprops/action-gh-release`
|
||||
# concurrently — three jobs racing to create/finalize the same release, which
|
||||
# exhausted the action's finalize retries and dropped whole platforms' assets.
|
||||
# The matrix now only uploads workflow artifacts; `publish_release` is the sole
|
||||
# writer of the release (no race).
|
||||
#
|
||||
# Triggers:
|
||||
# - push of a v* tag (normal release)
|
||||
# - workflow_dispatch with an explicit `tag` (re-publish a past tag without
|
||||
# re-cutting it; resolves the same `${{ inputs.tag || github.ref_name }}`)
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v*"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: "Tag to (re)publish (e.g. v0.7.0). Required for manual dispatches."
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
build_release:
|
||||
name: Build ${{ matrix.asset_name }}
|
||||
runs-on: ${{ matrix.runner }}
|
||||
permissions:
|
||||
contents: write
|
||||
contents: read
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
|
|
@ -27,6 +44,8 @@ jobs:
|
|||
steps:
|
||||
- name: Checkout source
|
||||
uses: actions/checkout@v5.0.1
|
||||
with:
|
||||
ref: ${{ inputs.tag || github.ref_name }}
|
||||
|
||||
- name: Install Linux dependencies
|
||||
if: runner.os == 'Linux'
|
||||
|
|
@ -81,20 +100,46 @@ jobs:
|
|||
throw "Windows release archive is missing expected binaries"
|
||||
}
|
||||
|
||||
- name: Publish GitHub release assets
|
||||
# Upload artifacts only — the single `publish_release` job attaches them to
|
||||
# the release, so no two jobs ever write the release concurrently.
|
||||
- name: Upload build artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.asset_name }}
|
||||
path: |
|
||||
${{ matrix.asset_name }}.*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
publish_release:
|
||||
name: Publish GitHub release
|
||||
needs: build_release
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
steps:
|
||||
- name: Download all build artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: dist
|
||||
merge-multiple: true
|
||||
|
||||
- name: Publish release (single writer — no matrix race)
|
||||
uses: softprops/action-gh-release@v2.5.0
|
||||
with:
|
||||
files: |
|
||||
${{ matrix.asset_name }}.*
|
||||
tag_name: ${{ inputs.tag || github.ref_name }}
|
||||
files: dist/**
|
||||
overwrite_files: true
|
||||
|
||||
update_homebrew_tap:
|
||||
name: Update Homebrew tap
|
||||
needs: build_release
|
||||
needs: publish_release
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
env:
|
||||
HOMEBREW_TAP_TOKEN: ${{ secrets.HOMEBREW_TAP_TOKEN }}
|
||||
RELEASE_TAG: ${{ inputs.tag || github.ref_name }}
|
||||
steps:
|
||||
- name: Skip if HOMEBREW_TAP_TOKEN is not configured
|
||||
if: env.HOMEBREW_TAP_TOKEN == ''
|
||||
|
|
@ -105,6 +150,8 @@ jobs:
|
|||
- name: Checkout source
|
||||
if: env.HOMEBREW_TAP_SKIP != '1'
|
||||
uses: actions/checkout@v5.0.1
|
||||
with:
|
||||
ref: ${{ env.RELEASE_TAG }}
|
||||
|
||||
- name: Checkout Homebrew tap
|
||||
if: env.HOMEBREW_TAP_SKIP != '1'
|
||||
|
|
@ -119,7 +166,7 @@ jobs:
|
|||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
run: |
|
||||
./scripts/update-homebrew-formula.sh "${GITHUB_REF_NAME}" homebrew-tap/Formula/omnigraph.rb
|
||||
./scripts/update-homebrew-formula.sh "${RELEASE_TAG}" homebrew-tap/Formula/omnigraph.rb
|
||||
|
||||
# Diagnostic only: brew is not on PATH on the ubuntu runner by default, so
|
||||
# set it up explicitly. Both this setup and the audit below are best-effort
|
||||
|
|
@ -158,22 +205,26 @@ jobs:
|
|||
git config user.name "github-actions[bot]"
|
||||
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
git add Formula/omnigraph.rb
|
||||
git commit -m "Update Omnigraph formula to ${GITHUB_REF_NAME}"
|
||||
git commit -m "Update Omnigraph formula to ${RELEASE_TAG}"
|
||||
git push origin HEAD:main
|
||||
|
||||
smoke_windows_installer:
|
||||
name: Smoke Windows installer
|
||||
needs: build_release
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
needs: publish_release
|
||||
if: ${{ inputs.tag != '' || startsWith(github.ref, 'refs/tags/v') }}
|
||||
runs-on: windows-latest
|
||||
permissions:
|
||||
contents: read
|
||||
env:
|
||||
RELEASE_TAG: ${{ inputs.tag || github.ref_name }}
|
||||
steps:
|
||||
- name: Checkout source
|
||||
uses: actions/checkout@v5.0.1
|
||||
with:
|
||||
ref: ${{ env.RELEASE_TAG }}
|
||||
|
||||
- name: Install from tagged release
|
||||
run: ./scripts/install.ps1 -Version "$env:GITHUB_REF_NAME" -InstallDir "$env:RUNNER_TEMP/omnigraph-bin"
|
||||
run: ./scripts/install.ps1 -Version "$env:RELEASE_TAG" -InstallDir "$env:RUNNER_TEMP/omnigraph-bin"
|
||||
|
||||
- name: Smoke installed binaries
|
||||
run: |
|
||||
|
|
|
|||
84
AGENTS.md
84
AGENTS.md
|
|
@ -17,8 +17,8 @@ Tools that support `@`-imports (Claude Code) auto-include all three files via th
|
|||
`CLAUDE.md` is a symlink to this file — there is exactly one source of truth. Edit `AGENTS.md`.
|
||||
|
||||
**Version surveyed:** 0.7.0
|
||||
**Workspace crates:** `omnigraph-compiler`, `omnigraph` (engine), `omnigraph-policy`, `omnigraph-cluster`, `omnigraph-cli`, `omnigraph-server`
|
||||
**Storage substrate:** Lance 6.x (columnar, versioned, branchable)
|
||||
**Workspace crates:** `omnigraph-compiler`, `omnigraph` (engine), `omnigraph-policy`, `omnigraph-api-types` (shared HTTP wire DTOs), `omnigraph-cluster`, `omnigraph-cli`, `omnigraph-server`
|
||||
**Storage substrate:** Lance 7.x (columnar, versioned, branchable)
|
||||
**License:** MIT
|
||||
**Toolchain:** Rust stable, edition 2024
|
||||
|
||||
|
|
@ -33,8 +33,8 @@ OmniGraph is a typed property-graph engine built as a coordination layer over ma
|
|||
- **Multi-modal querying**: vector ANN (`nearest`), full-text (`search`/`fuzzy`/`match_text`/`bm25`), Reciprocal Rank Fusion (`rrf`), and graph traversal (`Expand`, anti-join `not { … }`) in one runtime.
|
||||
- **Branches and commits across the whole graph**: Git-style — every successful publish appends to a commit DAG; merges are three-way at the row level.
|
||||
- **Atomic per-query writes**: `mutate_as` and `load` accumulate insert/update batches into an in-memory `MutationStaging.pending` per touched table; one `stage_*` + `commit_staged` per table runs at end-of-query, then `ManifestBatchPublisher::publish` commits the manifest atomically with per-table `expected_table_versions` CAS. A mid-query failure leaves Lance HEAD untouched on staged tables — no drift, no run state machine, no staging branches. Deletes still inline-commit; D₂ at parse time prevents inserts/updates and deletes from coexisting in one query.
|
||||
- **HTTP server**: Axum + utoipa OpenAPI, bearer auth (SHA-256 hashed, optional AWS Secrets Manager). Cedar policy enforcement is engine-wide — every `_as` writer calls `Omnigraph::enforce(action, scope, actor)`, so HTTP, CLI, and embedded SDK consumers all hit the same gate. **Two modes** (v0.6.0+): single-graph (legacy flat routes) and multi-graph (`/graphs/{graph_id}/...` cluster routes + read-only `GET /graphs` enumeration). Per-graph + server-level Cedar policies. Multi-graph mode boots from a cluster directory (`--cluster <dir | s3://…>`, RFC-005) or the legacy `omnigraph.yaml` `graphs:` map. Runtime add/remove (`POST /graphs`, `DELETE /graphs/{id}`) is not exposed — operators run `cluster apply` (or edit the legacy file) and restart.
|
||||
- **CLI** with two-surface config (RFC-008): the team-owned cluster directory (`cluster.yaml`) plus the per-operator `~/.omnigraph/config.yaml` (servers, credentials, actor, aliases). The legacy combined `omnigraph.yaml` still loads with per-key deprecation warnings — `config migrate` proposes the split, `OMNIGRAPH_NO_LEGACY_CONFIG=1` enforces strict mode. **Never extend `omnigraph.yaml`.** Multi-format output (json/jsonl/csv/kv/table).
|
||||
- **HTTP server**: Axum + utoipa OpenAPI, bearer auth (SHA-256 hashed, optional AWS Secrets Manager). Cedar policy enforcement is engine-wide — every `_as` writer calls `Omnigraph::enforce(action, scope, actor)`, so HTTP, CLI, and embedded SDK consumers all hit the same gate. **Cluster-only boot** (RFC-011): the server always boots from a cluster directory (`--cluster <dir | s3://…>`, RFC-005) and serves N graphs (N ≥ 1) under multi-graph routes (`/graphs/{graph_id}/...` + read-only `GET /graphs` enumeration); there are no single-graph flat routes and no positional-URI boot. Per-graph + server-level Cedar policies. Runtime add/remove (`POST /graphs`, `DELETE /graphs/{id}`) is not exposed — operators run `cluster apply` and restart.
|
||||
- **CLI** with two-surface config (RFC-007/008): the team-owned cluster directory (`cluster.yaml`) plus the per-operator `~/.omnigraph/config.yaml` (servers, clusters, credentials, actor, profiles, aliases, defaults). Graphs are addressed via `--store`/`--server`/`--cluster`/`--profile`/operator defaults (RFC-011). Multi-format output (json/jsonl/csv/kv/table).
|
||||
|
||||
Throughout the docs, capabilities are split into **L1 — Inherited from Lance** vs **L2 — Added by OmniGraph**.
|
||||
|
||||
|
|
@ -53,7 +53,7 @@ CLI (omnigraph) HTTP Server (omnigraph-server, Axum)
|
|||
omnigraph (engine) ── ManifestCoordinator, CommitGraph, RunRegistry, GraphIndex (CSR/CSC), exec
|
||||
│
|
||||
▼
|
||||
Lance 6.x ── columnar Arrow, fragments, per-dataset versions/branches, indexes
|
||||
Lance 7.x ── columnar Arrow, fragments, per-dataset versions/branches, indexes
|
||||
│
|
||||
▼
|
||||
Object store (file / s3 / RustFS / MinIO / S3-compat)
|
||||
|
|
@ -73,32 +73,38 @@ Full diagram and concurrency model: [docs/dev/architecture.md](docs/dev/architec
|
|||
| **Lance docs index — fetch upstream Lance docs by problem domain** | **[docs/dev/lance.md](docs/dev/lance.md)** |
|
||||
| **Test coverage map — what's covered, what helpers to reuse, before-every-task checklist** | **[docs/dev/testing.md](docs/dev/testing.md)** |
|
||||
| Architecture, L1/L2 framing, concurrency model | [docs/dev/architecture.md](docs/dev/architecture.md) |
|
||||
| Storage layout, `__manifest` schema, URI schemes, S3 env vars | [docs/user/storage.md](docs/user/storage.md) |
|
||||
| `.pg` schema language, types, constraints, annotations, migration planning | [docs/user/schema-language.md](docs/user/schema-language.md) |
|
||||
| Schema-lint codes (`OG-XXX-NNN`), families, severity, suppression | [docs/user/schema-lint.md](docs/user/schema-lint.md) |
|
||||
| `.gq` query language, MATCH/RETURN/ORDER, search funcs, mutations, IR ops, lint codes | [docs/user/query-language.md](docs/user/query-language.md) |
|
||||
| Indexes (BTREE / inverted / vector / graph topology) | [docs/user/indexes.md](docs/user/indexes.md) |
|
||||
| Embeddings (compiler + engine clients, env vars, `@embed`) | [docs/user/embeddings.md](docs/user/embeddings.md) |
|
||||
| Branches, commit graph, snapshots, system branches | [docs/user/branches-commits.md](docs/user/branches-commits.md) |
|
||||
| Transactions and atomicity (per-query atomic; branches as multi-query transactions) | [docs/user/transactions.md](docs/user/transactions.md) |
|
||||
| Storage layout, `__manifest` schema, URI schemes, S3 env vars | [docs/user/concepts/storage.md](docs/user/concepts/storage.md) |
|
||||
| `.pg` schema language, types, constraints, annotations, migration planning | [docs/user/schema/index.md](docs/user/schema/index.md) |
|
||||
| Schema-lint codes (`OG-XXX-NNN`), families, severity, suppression | [docs/user/schema/lint.md](docs/user/schema/lint.md) |
|
||||
| `.gq` query language, MATCH/RETURN/ORDER, IR ops, lint codes | [docs/user/queries/index.md](docs/user/queries/index.md) |
|
||||
| Mutations — insert/update/delete, D2, atomicity | [docs/user/mutations/index.md](docs/user/mutations/index.md) |
|
||||
| Search funcs (`nearest`/`bm25`/`rrf`), hybrid ranking | [docs/user/search/index.md](docs/user/search/index.md) |
|
||||
| Indexes (BTREE / inverted / vector / graph topology) | [docs/user/search/indexes.md](docs/user/search/indexes.md) |
|
||||
| Embeddings (engine client, env vars, `@embed`) | [docs/user/search/embeddings.md](docs/user/search/embeddings.md) |
|
||||
| Concepts — what OmniGraph is, L1/L2 framing | [docs/user/concepts/index.md](docs/user/concepts/index.md) |
|
||||
| Quickstart — init → load → query → branch | [docs/user/quickstart.md](docs/user/quickstart.md) |
|
||||
| Branches, commit graph, system branches | [docs/user/branching/index.md](docs/user/branching/index.md) |
|
||||
| Snapshots & time travel | [docs/user/branching/time-travel.md](docs/user/branching/time-travel.md) |
|
||||
| Three-way merge and conflict kinds (user-facing) | [docs/user/branching/merge.md](docs/user/branching/merge.md) |
|
||||
| Transactions and atomicity (per-query atomic; branches as multi-query transactions) | [docs/user/branching/transactions.md](docs/user/branching/transactions.md) |
|
||||
| Direct-publish write path (staging, D2, recovery sidecars; the former Run state machine) | [docs/dev/writes.md](docs/dev/writes.md) |
|
||||
| Three-way merge and conflict kinds | [docs/dev/merge.md](docs/dev/merge.md) |
|
||||
| Diff / change feed (`diff_between`, `diff_commits`) | [docs/user/changes.md](docs/user/changes.md) |
|
||||
| Diff / change feed (`diff_between`, `diff_commits`) | [docs/user/branching/changes.md](docs/user/branching/changes.md) |
|
||||
| Query execution, mutation execution, bulk loader, `load` vs `ingest` | [docs/dev/execution.md](docs/dev/execution.md) |
|
||||
| `optimize` (compaction) and `cleanup` (version GC) | [docs/user/maintenance.md](docs/user/maintenance.md) |
|
||||
| Cluster operator guide (deploy/manage clusters, approvals, recovery, serving) | [docs/user/cluster.md](docs/user/cluster.md) |
|
||||
| Cedar policy actions, scopes, CLI | [docs/user/policy.md](docs/user/policy.md) |
|
||||
| HTTP server endpoints, auth, error model, body limits | [docs/user/server.md](docs/user/server.md) |
|
||||
| CLI quick-start | [docs/user/cli.md](docs/user/cli.md) |
|
||||
| CLI command surface and config schemas (`~/.omnigraph/config.yaml`, legacy `omnigraph.yaml`) | [docs/user/cli-reference.md](docs/user/cli-reference.md) |
|
||||
| Audit / actor tracking | [docs/user/audit.md](docs/user/audit.md) |
|
||||
| Error taxonomy and result serialization | [docs/user/errors.md](docs/user/errors.md) |
|
||||
| `optimize` (compaction) and `cleanup` (version GC) | [docs/user/operations/maintenance.md](docs/user/operations/maintenance.md) |
|
||||
| Cluster operator guide (deploy/manage clusters, approvals, recovery, serving) | [docs/user/clusters/index.md](docs/user/clusters/index.md) |
|
||||
| Cedar policy actions, scopes, CLI | [docs/user/operations/policy.md](docs/user/operations/policy.md) |
|
||||
| HTTP server endpoints, auth, error model, body limits | [docs/user/operations/server.md](docs/user/operations/server.md) |
|
||||
| CLI quick-start | [docs/user/cli/index.md](docs/user/cli/index.md) |
|
||||
| CLI command surface and config schema (`~/.omnigraph/config.yaml`) | [docs/user/cli/reference.md](docs/user/cli/reference.md) |
|
||||
| Audit / actor tracking | [docs/user/operations/audit.md](docs/user/operations/audit.md) |
|
||||
| Error taxonomy and result serialization | [docs/user/operations/errors.md](docs/user/operations/errors.md) |
|
||||
| Install (binary / Homebrew / source / channels) | [docs/user/install.md](docs/user/install.md) |
|
||||
| Deployment (binary / container / RustFS bootstrap / auth / build variants) | [docs/user/deployment.md](docs/user/deployment.md) |
|
||||
| Deployment (binary / container / S3-local testing / auth / build variants) | [docs/user/deployment.md](docs/user/deployment.md) |
|
||||
| CI / release workflows | [docs/dev/ci.md](docs/dev/ci.md) |
|
||||
| Code ownership (CODEOWNERS source of truth, roles, regeneration) | [docs/dev/codeowners.md](docs/dev/codeowners.md) |
|
||||
| Branch protection policy (declarative, applied via `scripts/apply-branch-protection.sh`) | [docs/dev/branch-protection.md](docs/dev/branch-protection.md) |
|
||||
| Constants & tunables cheat sheet | [docs/user/constants.md](docs/user/constants.md) |
|
||||
| Constants & tunables cheat sheet | [docs/user/reference/constants.md](docs/user/reference/constants.md) |
|
||||
| Per-version release notes | [docs/releases/](docs/releases/) |
|
||||
|
||||
---
|
||||
|
|
@ -138,6 +144,7 @@ These are architectural rules that need to be in scope on every change. They're
|
|||
4. **Bearer-token plaintext never persists in process memory.** Tokens are hashed at startup; auth uses constant-time comparison; the actor id is server-resolved from the hash match and must not be settable by the client.
|
||||
5. **Reads always see the current index state for the branch they're reading.** Indexes track the branch head, not historical snapshots. If you change index lifecycle, preserve this guarantee.
|
||||
6. **Stable type IDs survive renames.** Schema migration relies on identity that's stable across rename — don't mint new IDs on rename.
|
||||
7. **Logical contract over physical state.** Physical state (index coverage, fragment layout, compaction versions, staged writes) is derived and rebuildable; it must never fail a logical operation. Check preconditions against logical state and let reconciliation converge the physical state idempotently — genuine logical conflicts still fail loudly. This is the rule rules 1–6 instantiate; full statement and applications in [docs/dev/invariants.md](docs/dev/invariants.md).
|
||||
|
||||
### Deny-list (fast-pass review filter — full reasoning in [docs/dev/invariants.md](docs/dev/invariants.md))
|
||||
|
||||
|
|
@ -173,7 +180,7 @@ Rust stable workspace (edition 2024). `protoc` is a build dependency (`brew inst
|
|||
cargo build --workspace --locked # build everything
|
||||
cargo test --workspace --locked # the canonical CI gate (matches CI exactly)
|
||||
cargo run -p omnigraph-cli -- <args> # run the `omnigraph` CLI from source
|
||||
cargo run -p omnigraph-server -- <uri> --bind 0.0.0.0:8080 # run the server from source
|
||||
cargo run -p omnigraph-server -- --cluster <dir|s3://...> --bind 0.0.0.0:8080 # run the server from source
|
||||
|
||||
# Run one crate / one test file / one test fn
|
||||
cargo test -p omnigraph-engine --test traversal # one integration-test file (see docs/dev/testing.md)
|
||||
|
|
@ -185,7 +192,7 @@ cargo test -p omnigraph-engine --features failpoints --test failpoints # fault
|
|||
cargo build -p omnigraph-server --features aws # AWS Secrets Manager bearer-token source
|
||||
```
|
||||
|
||||
S3-backed tests (`s3_storage`, and the S3 paths in server/CLI system tests) **skip** unless `OMNIGRAPH_S3_TEST_BUCKET` + `AWS_*` (incl. `AWS_ENDPOINT_URL_S3` for non-AWS) are set; CI runs them against containerized RustFS. `scripts/local-rustfs-bootstrap.sh` stands up a local S3 environment.
|
||||
S3-backed tests (`s3_storage`, and the S3 paths in server/CLI system tests) **skip** unless `OMNIGRAPH_S3_TEST_BUCKET` + `AWS_*` (incl. `AWS_ENDPOINT_URL_S3` for non-AWS) are set; CI runs them against containerized RustFS. To run RustFS/MinIO yourself, see [docs/user/deployment.md](docs/user/deployment.md) → *Testing against S3 locally*.
|
||||
|
||||
CI does **not** run `clippy` or `rustfmt` as gates — but `cargo test --workspace --locked` is the exact gate, so run it before pushing. Two non-test CI checks: `scripts/check-agents-md.sh` (doc cross-link integrity — run it after moving/renaming docs) and OpenAPI drift (`crates/omnigraph-server/tests/openapi.rs` regenerates `openapi.json`; set `OMNIGRAPH_UPDATE_OPENAPI=1` to update the checked-in copy when a server/API change is intentional).
|
||||
|
||||
|
|
@ -203,9 +210,9 @@ omnigraph load --data ./seed.jsonl --mode overwrite s3://my-bucket/graph.omni
|
|||
# Load a review batch onto its own branch (--from forks it if missing)
|
||||
omnigraph load --branch review/2026-04-25 --from main --mode merge --data ./batch.jsonl s3://my-bucket/graph.omni
|
||||
|
||||
# Run a hybrid (vector + BM25) query
|
||||
omnigraph read --query ./queries.gq --name find_similar \
|
||||
--params '{"q":"trends in AI safety"}' --format table s3://my-bucket/graph.omni
|
||||
# Run a hybrid (vector + BM25) query — ad-hoc .gq against a store (positional = query name)
|
||||
omnigraph query --query ./queries.gq find_similar \
|
||||
--params '{"q":"trends in AI safety"}' --format table --store s3://my-bucket/graph.omni
|
||||
|
||||
# Plan + apply schema migration
|
||||
omnigraph schema plan --schema ./next.pg s3://my-bucket/graph.omni
|
||||
|
|
@ -225,10 +232,10 @@ omnigraph cleanup --keep 10 --older-than 7d --confirm s3://my-bucket/graph.omni
|
|||
|
||||
# Stand up the HTTP server (token from env)
|
||||
OMNIGRAPH_SERVER_BEARER_TOKEN=xxxx \
|
||||
omnigraph-server s3://my-bucket/graph.omni --bind 0.0.0.0:8080
|
||||
omnigraph-server --cluster s3://my-bucket/cluster --bind 0.0.0.0:8080
|
||||
|
||||
# Cedar policy explain
|
||||
omnigraph policy explain --actor act-alice --action change --branch main
|
||||
omnigraph policy explain --cluster ./company-brain --graph knowledge --actor act-alice --action change --branch main
|
||||
```
|
||||
|
||||
---
|
||||
|
|
@ -241,10 +248,10 @@ omnigraph policy explain --actor act-alice --action change --branch main
|
|||
| Per-dataset versioning + time travel | ✅ | `snapshot_at_version`, `entity_at`, snapshot-pinned reads across many tables |
|
||||
| Per-dataset branches | ✅ | **Graph-level** branches (atomic across all sub-tables), lazy fork, system branch filtering |
|
||||
| Atomic single-dataset commits | ✅ | **Multi-table publish via three layers**, NOT a single Lance primitive: (1) per-table Lance `commit_staged` for the data write, (2) `__manifest` row-level CAS via `ManifestBatchPublisher` for cross-table ordering, (3) the open-time recovery sweep for the residual gap between (1) and (2). All three layers ship; the five migrated writers (`MutationStaging::finalize`, `schema_apply`, `branch_merge`, `ensure_indices`, `optimize_all_tables`) write a `__recovery/{ulid}.json` sidecar before Phase B and delete it after Phase C. The next `Omnigraph::open` (gated on `OpenMode::ReadWrite`) runs the sweep in `db/manifest/recovery.rs`: classify, decide all-or-nothing per sidecar, roll forward via single `ManifestBatchPublisher::publish` or roll back via `Dataset::restore` followed by a manifest publish of the restored version (so both directions converge to `manifest == HEAD` — no residual drift), and record an audit row in `_graph_commit_recoveries.lance` (queryable via `omnigraph commit list --filter actor=omnigraph:recovery`). The write entry points (`load_as`, `mutate_as`, `apply_schema_as`, `branch_merge_as`) and `refresh` additionally run an in-process roll-forward-only heal (serialized against live writers via the per-table write queues), so a long-lived server converges on its next write without restart; only rollback-eligible sidecars still defer to the next read-write open (a future background reconciler's goal). Engine writes route through a sealed `TableStorage` trait (`db.storage()`) exposing only `stage_*` + `commit_staged` + reads; the inline-commit residuals (`delete_where`, `create_vector_index`) are split onto a separate sealed `InlineCommitResidual` trait reached via `db.storage_inline_residual()` (MR-854), so the default surface cannot couple a write with a HEAD advance — §1 holds by construction. `delete_where` and `create_vector_index` stay inline until upstream Lance ships a public two-phase API ([#6658](https://github.com/lance-format/lance/issues/6658), [#6666](https://github.com/lance-format/lance/issues/6666)); `LoadMode::Overwrite` uses Lance `Overwrite` staged transactions. |
|
||||
| Compaction (`compact_files`) | ✅ | `omnigraph optimize` orchestrates over all node/edge tables, bounded concurrency; **publishes each compacted table's new version to `__manifest`** (so the manifest tracks the Lance HEAD — required for reads to observe compaction and for schema apply / strict writes to pass their HEAD-vs-manifest precondition), under the per-`(table, main)` write queue with `SidecarKind::Optimize` recovery coverage; **refuses on an unrecovered graph** (errors if a `__recovery` sidecar is pending); **skips uncovered HEAD > manifest drift** with `DriftNeedsRepair` instead of interpreting it; **skips blob-bearing tables** (reported via `TableOptimizeStats.skipped`, not silent), gated on `LANCE_SUPPORTS_BLOB_COMPACTION` until the upstream blob-v2 compaction-decode bug is fixed (see [docs/dev/invariants.md](docs/dev/invariants.md) Known Gaps) |
|
||||
| Compaction (`compact_files`) + reindex (`optimize_indices`) | ✅ | `omnigraph optimize` orchestrates over all node/edge tables, bounded concurrency; per table runs `compact_files` **then Lance `optimize_indices`** (folds appended/rewritten fragments back into existing indexes — incremental merge, not retrain) and **publishes the resulting version to `__manifest`** (so the manifest tracks the Lance HEAD — required for reads to observe the work and for schema apply / strict writes to pass their HEAD-vs-manifest precondition), under the per-`(table, main)` write queue with `SidecarKind::Optimize` recovery coverage spanning both ops; **commits even with no compaction work if index coverage is stale**; **refuses on an unrecovered graph**; **skips uncovered HEAD > manifest drift** with `DriftNeedsRepair`; **skips blob-bearing tables** (reported via `TableOptimizeStats.skipped`, not silent; reindex is skipped for them too today), gated on `LANCE_SUPPORTS_BLOB_COMPACTION` until the upstream blob-v2 compaction-decode bug is fixed (see [docs/dev/invariants.md](docs/dev/invariants.md) Known Gaps) |
|
||||
| Repair uncovered drift | — | `omnigraph repair` explicitly classifies uncovered table `HEAD > manifest` drift: verified maintenance drift (`ReserveFragments`/`Rewrite`) can be published with `--confirm`; suspicious or unverifiable drift requires `--force --confirm`. Sidecar-covered crash residuals still recover automatically on open. |
|
||||
| Cleanup (`cleanup_old_versions`) | ✅ | `omnigraph cleanup` with `--keep` / `--older-than` policy |
|
||||
| BTREE / inverted (FTS) / vector indexes | ✅ | `ensure_indices` builds them on every relevant column; idempotent; lazy across branches |
|
||||
| BTREE / inverted (FTS) / vector indexes | ✅ | `@index`/`@key` declares intent; the physical index is derived state that never fails a logical op. Built per column through one chokepoint (`build_indices_on_dataset_for_catalog`, type-dispatched by `node_prop_index_kind`: enum + orderable scalar → BTREE, free-text String → FTS, Vector → vector); idempotent; lazy across branches. **Schema apply builds nothing** (records intent only); `load`/`mutate` build inline but **defer an untrainable Vector column** (no trainable vectors yet) as *pending* rather than aborting. `ensure_indices`/`optimize` is the reconciler that materializes declared-but-missing indexes and restores coverage of appended/rewritten fragments (`optimize_indices`), reporting still-pending columns (see Compaction row). |
|
||||
| `merge_insert` upsert | ✅ | `LoadMode::Merge`, mutation `update`/`insert`/`delete` lowering |
|
||||
| Vector search | ✅ | `nearest()` query op; embedding pipeline (Gemini / OpenAI clients); `@embed` in schema |
|
||||
| Full-text search | ✅ | `search/fuzzy/match_text/bm25` query ops |
|
||||
|
|
@ -257,11 +264,12 @@ omnigraph policy explain --actor act-alice --action change --branch main
|
|||
| Per-query atomic writes | — | In-memory `MutationStaging.pending` accumulator + `stage_*` / `commit_staged` per touched table at end-of-query + publisher CAS via `commit_with_expected` (single manifest commit per `mutate_as` / `load`); D₂ parse-time rule keeps inserts/updates and deletes from mixing |
|
||||
| Three-way row-level merge | — | `OrderedTableCursor` + `StagedTableWriter`, structured `MergeConflictKind` |
|
||||
| Change feeds | — | `diff_between` / `diff_commits` with manifest fast path + ID streaming |
|
||||
| Cedar policy | — | Per-graph actions plus server-scoped actions (see [docs/user/policy.md](docs/user/policy.md) for the current list), branch / target_branch / protected scopes, validate/test/explain CLI. **Engine-wide enforcement** (MR-722): every `_as` writer (`apply_schema_as`, `mutate_as`, `load_as` — the deprecated `ingest_as` shims route through it — `branch_create_as` / `branch_create_from_as`, `branch_delete_as`, `branch_merge_as`) calls `Omnigraph::enforce(action, scope, actor)` — HTTP, CLI, embedded SDK all hit the same gate. |
|
||||
| HTTP server | — | Axum, OpenAPI via utoipa, bearer auth (SHA-256, AWS Secrets Manager option), `authorize_request` at the HTTP boundary (resolves bearer→actor, applies admission control), NDJSON streaming export, **multi-graph mode (v0.6.0+) with cluster routes + read-only `GET /graphs` enumeration + per-graph + server-level Cedar policies. Multi-graph boots from a cluster directory (`--cluster`) or the legacy `omnigraph.yaml`; add/remove graphs via `cluster apply` (or by editing the legacy file) and restarting.** |
|
||||
| CLI with config | — | two-surface config (team `cluster.yaml` dir + per-operator `~/.omnigraph/config.yaml`; legacy `omnigraph.yaml` deprecated per RFC-008), aliases, multi-format output (json/jsonl/csv/kv/table) |
|
||||
| Cedar policy | — | Per-graph actions plus server-scoped actions (see [docs/user/operations/policy.md](docs/user/operations/policy.md) for the current list), branch / target_branch / protected scopes, validate/test/explain CLI. **Engine-wide enforcement** (MR-722): every `_as` writer (`apply_schema_as`, `mutate_as`, `load_as` — the deprecated `ingest_as` shims route through it — `branch_create_as` / `branch_create_from_as`, `branch_delete_as`, `branch_merge_as`) calls `Omnigraph::enforce(action, scope, actor)` — HTTP, CLI, embedded SDK all hit the same gate. |
|
||||
| HTTP server | — | Axum, OpenAPI via utoipa, bearer auth (SHA-256, AWS Secrets Manager option), `authorize_request` at the HTTP boundary (resolves bearer→actor, applies admission control), NDJSON streaming export, **cluster-only boot (RFC-011): always `--cluster <dir | s3://…>`, serving N graphs (N ≥ 1) under multi-graph routes + read-only `GET /graphs` enumeration + per-graph + server-level Cedar policies. Add/remove graphs via `cluster apply` and restart.** |
|
||||
| CLI with config | — | two-surface config (team `cluster.yaml` dir + per-operator `~/.omnigraph/config.yaml`), scope addressing (`--store`/`--server`/`--cluster`/`--profile`/defaults, RFC-011), aliases, multi-format output (json/jsonl/csv/kv/table) |
|
||||
| Audit / actor tracking | — | `_as` write APIs + actor map in commit graph |
|
||||
| Local RustFS bootstrap | — | `scripts/local-rustfs-bootstrap.sh` one-shot S3-backed dev environment |
|
||||
| Local S3 testing | — | run RustFS/MinIO + the `AWS_*` env; see [docs/user/deployment.md](docs/user/deployment.md) → *Testing against S3 locally* |
|
||||
| Agent skill | — | `skills/omnigraph` — operational playbook for driving Omnigraph; install with `npx skills add ModernRelay/omnigraph@omnigraph` |
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -282,7 +290,7 @@ Rules:
|
|||
7. **Re-verify before recommending.** If you cite a flag, env var, endpoint, or constant to the user or in code, grep for it in source first. Memory and docs go stale; the code is authoritative.
|
||||
8. **Keep AGENTS.md short.** This file is always loaded into agent context, so every added line has a recurring context-window cost. Prefer pointers and terse invariants here; put detail in `docs/`.
|
||||
9. **Keep AGENTS.md a map, not an encyclopedia.** New deep content goes into `docs/`. Add an entry to "Where to find each topic" instead of pasting prose into this file. The "Always-on rules" section is the exception — it's for invariants that should always be in scope.
|
||||
10. **Re-read on schema/query/IR changes.** Edits to `schema.pest`, `query.pest`, `ir/lower.rs`, `query/typecheck.rs`, or `query/lint.rs` should trigger a re-read of [docs/user/schema-language.md](docs/user/schema-language.md), [docs/user/query-language.md](docs/user/query-language.md), and [docs/dev/execution.md](docs/dev/execution.md) to confirm they still describe reality.
|
||||
10. **Re-read on schema/query/IR changes.** Edits to `schema.pest`, `query.pest`, `ir/lower.rs`, `query/typecheck.rs`, or `query/lint.rs` should trigger a re-read of [docs/user/schema/index.md](docs/user/schema/index.md), [docs/user/queries/index.md](docs/user/queries/index.md), and [docs/dev/execution.md](docs/dev/execution.md) to confirm they still describe reality.
|
||||
11. **Always make smaller commits.** Each commit does one thing, compiles, and passes tests; mechanical refactors land separately from the behavior changes they enable.
|
||||
12. **Test-first for bug fixes.** When fixing an identified bug, write a regression test that reproduces the failure first. Confirm it fails against the current code with the predicted symptom (not an unrelated error). Then land the fix in a separate commit and confirm the test turns green. The test commit lands just before the fix commit so the red → green pair is visible in `git log` and a reviewer can check out the test commit alone and reproduce the failure.
|
||||
13. **Correct by design over symptomatic patches.** When a bug surfaces, identify the root cause and make the fix correct by construction. Don't patch the symptom. If the design admits the bug class, the fix is to close the class, not to add a guard around the latest instance. A symptomatic patch is acceptable only as a stop-gap, with an explicit note in the commit message and a follow-up issue tracking the design fix.
|
||||
|
|
|
|||
1650
Cargo.lock
generated
1650
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
21
Cargo.toml
21
Cargo.toml
|
|
@ -4,6 +4,7 @@ members = [
|
|||
"crates/omnigraph-compiler",
|
||||
"crates/omnigraph",
|
||||
"crates/omnigraph-cli",
|
||||
"crates/omnigraph-api-types",
|
||||
"crates/omnigraph-cluster",
|
||||
"crates/omnigraph-policy",
|
||||
"crates/omnigraph-server",
|
||||
|
|
@ -30,14 +31,14 @@ datafusion-common = "53"
|
|||
datafusion-expr = "53"
|
||||
datafusion-functions-aggregate = "53"
|
||||
|
||||
lance = { version = "6.0.1", default-features = false, features = ["aws"] }
|
||||
lance-datafusion = "6.0.1"
|
||||
lance-file = "6.0.1"
|
||||
lance-index = "6.0.1"
|
||||
lance-linalg = "6.0.1"
|
||||
lance-namespace = "6.0.1"
|
||||
lance-namespace-impls = "6.0.1"
|
||||
lance-table = "6.0.1"
|
||||
lance = { version = "7.0.0", default-features = false, features = ["aws"] }
|
||||
lance-datafusion = "7.0.0"
|
||||
lance-file = "7.0.0"
|
||||
lance-index = "7.0.0"
|
||||
lance-linalg = "7.0.0"
|
||||
lance-namespace = "7.0.0"
|
||||
lance-namespace-impls = "7.0.0"
|
||||
lance-table = "7.0.0"
|
||||
|
||||
ulid = "1"
|
||||
futures = "0.3"
|
||||
|
|
@ -47,7 +48,7 @@ pest = "2"
|
|||
pest_derive = "2"
|
||||
thiserror = "2"
|
||||
tokio = { version = "1", features = ["rt-multi-thread", "macros", "time", "net", "signal", "sync"] }
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
clap = { version = "4.6", features = ["derive"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
serde_yaml = "0.9"
|
||||
|
|
@ -63,7 +64,7 @@ base64 = "0.22"
|
|||
ariadne = "0.4"
|
||||
regex = "1"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
|
||||
object_store = { version = "0.12.5", default-features = false, features = ["aws", "fs"] }
|
||||
object_store = { version = "0.13.2", default-features = false, features = ["aws", "fs"] }
|
||||
fail = "0.5"
|
||||
time = { version = "0.3", features = ["formatting"] }
|
||||
axum = { version = "0.8", features = ["json", "macros"] }
|
||||
|
|
|
|||
163
README.md
163
README.md
|
|
@ -3,15 +3,15 @@
|
|||
[](LICENSE)
|
||||
[](rust-toolchain.toml)
|
||||
[](https://crates.io/crates/omnigraph-cli)
|
||||
[](https://github.com/ModernRelay/omnigraph/actions/workflows/ci.yml)
|
||||
|
||||
**Lakehouse native graph engine built for context assembly**
|
||||
|
||||
Omnigraph acts as operational state & coordination layer for agents
|
||||
Omnigraph acts as operational state & coordination layer for agents.
|
||||
Hundreds of agents can enrich the graph on parallel isolated branches and changes can be reviewed and merged safely.
|
||||
|
||||
- Git-style versioning & branching
|
||||
- Multimodal retrieval (graph+vector/fts+filters) optimized for context assembly
|
||||
- Object storage native (S3, RustFS)
|
||||
- Runs on the local filesystem or any S3-compatible object store (AWS S3, R2, MinIO, RustFS)
|
||||
- Native blob-as-data support (docs, images, videos, etc)
|
||||
- VPC, On-prem, hybrid deployment
|
||||
- [`Lance`](https://github.com/lance-format/lance) format as open storage layer
|
||||
|
|
@ -51,62 +51,138 @@ brew tap ModernRelay/tap
|
|||
brew install ModernRelay/tap/omnigraph
|
||||
```
|
||||
|
||||
For starter graphs and agent skills to bootstrap and operate Omnigraph, see [`ModernRelay/omnigraph-cookbooks`](https://github.com/ModernRelay/omnigraph-cookbooks).
|
||||
## Quick start
|
||||
|
||||
## One-Command Local RustFS Bootstrap
|
||||
The fastest path is an **embedded, local file-backed graph** — no server, no
|
||||
object store, no Docker:
|
||||
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/local-rustfs-bootstrap.sh | bash
|
||||
# A schema and one row of data
|
||||
cat > schema.pg <<'PG'
|
||||
node Person {
|
||||
slug: String @key
|
||||
name: String
|
||||
title: String?
|
||||
}
|
||||
PG
|
||||
echo '{"type":"Person","data":{"slug":"alice","name":"Alice","title":"Engineer"}}' > people.jsonl
|
||||
|
||||
# Create → load (--mode is required) → query
|
||||
omnigraph init --schema schema.pg ./graph.omni
|
||||
omnigraph load --data people.jsonl --mode overwrite --store ./graph.omni
|
||||
omnigraph query find_people --store ./graph.omni --params '{"t":"Engineer"}' \
|
||||
-e 'query find_people($t: String) { match { $p: Person { title: $t } } return { $p.name } }'
|
||||
|
||||
# Branch, write in isolation, merge — Git-style across the whole graph
|
||||
omnigraph branch create --from main review/new-hires --store ./graph.omni
|
||||
omnigraph branch merge review/new-hires --into main --store ./graph.omni
|
||||
```
|
||||
|
||||
That bootstrap:
|
||||
**Storage backends** — the same flow runs on any backend; only the graph address changes:
|
||||
|
||||
- starts RustFS on `127.0.0.1:9000`
|
||||
- creates a bucket and S3-backed graph
|
||||
- loads the checked-in context fixture
|
||||
- launches `omnigraph-server` on `127.0.0.1:8080`
|
||||
| Backend | Use it for | Graph address |
|
||||
|---|---|---|
|
||||
| **Embedded** (local filesystem) | dev, demos, single machine — the default | `./graph.omni` |
|
||||
| **Object storage** (AWS S3, R2, GCS-S3) | shared, multi-host, durable | `s3://bucket/graph.omni` (+ the `AWS_*` env) |
|
||||
| **RustFS / MinIO** | rehearse the S3 path locally, no cloud account | `s3://…` against a local endpoint → [deployment guide](docs/user/deployment.md#testing-against-s3-locally) |
|
||||
|
||||
Docker must be installed and running first.
|
||||
`init` takes the address as its positional argument (`omnigraph init --schema schema.pg <address>`); `load`, `query`, and `branch` take it via `--store <address>`.
|
||||
|
||||
The RustFS bootstrap prefers the rolling `edge` binaries and only falls back to
|
||||
source builds when release assets are unavailable.
|
||||
For a **served, multi-graph deployment** (the cluster model), see [Common Commands](#common-commands) below.
|
||||
|
||||
If a previous run left objects under the same graph prefix but did not finish
|
||||
initializing the graph, rerun with `RESET_REPO=1` or set `PREFIX` to a new
|
||||
value.
|
||||
## Set it up with an AI agent
|
||||
|
||||
Omnigraph is built to be set up by coding agents. Paste this into Claude Code,
|
||||
Cursor, or any agent that can read a URL, install a package, and run a shell
|
||||
command — it installs the skill, reads the docs, and walks you through setup for
|
||||
your use case:
|
||||
|
||||
```text
|
||||
Help me set up Omnigraph (a lakehouse-native graph engine for agents).
|
||||
|
||||
1. Install the Omnigraph skill so you operate it correctly:
|
||||
npx skills add ModernRelay/omnigraph@omnigraph
|
||||
2. Read the docs at https://github.com/ModernRelay/omnigraph — start with
|
||||
docs/user/quickstart.md, then docs/user/clusters/index.md.
|
||||
3. Skim the starter graphs and seed data in the cookbooks:
|
||||
https://github.com/ModernRelay/omnigraph-cookbooks
|
||||
4. Ask me what I want to build (company brain, agent memory, dev graph,
|
||||
research / R&D layer, …). Then install the CLI, stand up a first graph for
|
||||
that use case, load a little data, and run a query so I can see it working.
|
||||
```
|
||||
|
||||
Works with any agent that can browse a URL, install a package, and run a shell.
|
||||
|
||||
## Agent skill & starter graphs
|
||||
|
||||
This repo ships the [**`omnigraph` agent skill**](skills/omnigraph) — the
|
||||
operational playbook (cluster mode, the two config surfaces, schema evolution,
|
||||
query linting, data writes, branches, Cedar policy, and common gotchas) that
|
||||
teaches a coding agent to drive Omnigraph correctly. Install it with:
|
||||
|
||||
```bash
|
||||
npx skills add ModernRelay/omnigraph@omnigraph
|
||||
```
|
||||
|
||||
For ready-to-run graphs with real seed data (company brain, VC operating system,
|
||||
pharma & industry intel),
|
||||
[`ModernRelay/omnigraph-cookbooks`](https://github.com/ModernRelay/omnigraph-cookbooks)
|
||||
is the fastest way to see Omnigraph shaped to a real domain. To rehearse the S3
|
||||
path locally, see [deployment.md → Testing against S3 locally](docs/user/deployment.md#testing-against-s3-locally).
|
||||
|
||||
## Common Commands
|
||||
|
||||
The same URI works for local paths, `s3://…`, or `http://host:port`.
|
||||
A deployment is a **cluster**. A `cluster.yaml` declares its graphs, schemas,
|
||||
stored queries, and policies; you converge it with `cluster apply` and serve it.
|
||||
The server is cluster-first — it boots only from a cluster and serves every graph
|
||||
under `/graphs/{id}/…`. Day-to-day work goes through that server: graphs are
|
||||
addressed with `--server <name|url>` (+ `--graph <id>`), and `query`/`mutate`
|
||||
invoke a stored query from the catalog **by name**.
|
||||
|
||||
```bash
|
||||
omnigraph init --schema ./schema.pg ./graph.omni
|
||||
omnigraph load --data ./data.jsonl ./graph.omni
|
||||
omnigraph read --query ./queries.gq --name get_person --params '{"name":"Alice"}' ./graph.omni
|
||||
omnigraph change --query ./queries.gq --name insert_person --params '{"name":"Mina"}' ./graph.omni
|
||||
omnigraph branch create --from main feature-x ./graph.omni
|
||||
omnigraph branch merge feature-x --into main ./graph.omni
|
||||
# 1. Converge the declared cluster, then serve it (--as attributes the apply)
|
||||
omnigraph cluster apply --config ./company-brain --as you
|
||||
omnigraph-server --cluster ./company-brain --bind 0.0.0.0:8080
|
||||
# or config-free from object storage — the bucket IS the deployment:
|
||||
# omnigraph-server --cluster s3://my-bucket/company-brain --bind 0.0.0.0:8080
|
||||
|
||||
# 2. Work against the served graph — stored queries invoked by name
|
||||
omnigraph query find_people --server prod --graph knowledge --params '{"q":"AI safety"}'
|
||||
omnigraph mutate add_person --server prod --graph knowledge --params '{"name":"Mina"}'
|
||||
omnigraph load --data ./data.jsonl --mode merge --server prod --graph knowledge
|
||||
|
||||
# 3. Branch and merge, Git-style across the whole graph
|
||||
omnigraph branch create --from main review/2026-06 --server prod --graph knowledge
|
||||
omnigraph branch merge review/2026-06 --into main --server prod --graph knowledge
|
||||
```
|
||||
|
||||
See [docs/user/cli.md](docs/user/cli.md) for schema apply, snapshots, data loading, commits, and policy commands.
|
||||
Set a default scope (or a `--profile`) in `~/.omnigraph/config.yaml` — operator
|
||||
identity, named servers/clusters, credentials — and the `--server`/`--graph`
|
||||
flags drop away (`omnigraph query find_people --params …`).
|
||||
|
||||
**Local / ad-hoc.** For quick iteration on a standalone graph (no cluster, no
|
||||
server), address storage directly with `--store` (or a positional `file://` /
|
||||
`s3://` URI) and run ad-hoc `.gq` with `--query` (the positional then selects
|
||||
which query in the file):
|
||||
|
||||
```bash
|
||||
omnigraph init --schema ./schema.pg ./graph.omni
|
||||
omnigraph load --data ./data.jsonl --mode merge --store ./graph.omni
|
||||
omnigraph query --query ./queries.gq get_person --params '{"name":"Alice"}' --store ./graph.omni
|
||||
```
|
||||
|
||||
See [docs/user/cli/index.md](docs/user/cli/index.md), the
|
||||
[CLI reference](docs/user/cli/reference.md), the
|
||||
[cluster guide](docs/user/clusters/index.md), and the
|
||||
[deployment guide](docs/user/deployment.md) for schema apply, snapshots, commits,
|
||||
profiles, and policy/queries tooling.
|
||||
|
||||
## Clients
|
||||
|
||||
For programmatic access to a running `omnigraph-server`:
|
||||
|
||||
- **TypeScript SDK** — [`@modernrelay/omnigraph`](https://www.npmjs.com/package/@modernrelay/omnigraph) ([source](https://github.com/ModernRelay/omnigraph-ts/tree/main/packages/sdk)). Instance-per-client, typed errors, camelCase types, async-iterator streaming export.
|
||||
|
||||
```bash
|
||||
npm install @modernrelay/omnigraph
|
||||
```
|
||||
|
||||
- **Model Context Protocol server** — [`@modernrelay/omnigraph-mcp`](https://www.npmjs.com/package/@modernrelay/omnigraph-mcp) ([source](https://github.com/ModernRelay/omnigraph-ts/tree/main/packages/mcp)). Bridges Omnigraph to LLM hosts (Claude Desktop, Claude Code, …) over stdio. Exposes tools and resources for schema, branches, queries, mutations, ingest, and bundles curated best-practices guidance from the cookbook.
|
||||
|
||||
```bash
|
||||
npm install -g @modernrelay/omnigraph-mcp
|
||||
```
|
||||
|
||||
Both packages are versioned in lockstep with `omnigraph-server` on major.minor: `@modernrelay/omnigraph@X.Y.*` targets `omnigraph-server@X.Y.*`. See [`ModernRelay/omnigraph-ts`](https://github.com/ModernRelay/omnigraph-ts) for the monorepo.
|
||||
- **TypeScript SDK + MCP server** — [`@modernrelay/omnigraph`](https://www.npmjs.com/package/@modernrelay/omnigraph) and [`@modernrelay/omnigraph-mcp`](https://www.npmjs.com/package/@modernrelay/omnigraph-mcp), versioned in lockstep with `omnigraph-server`. Source, docs, and examples: [`ModernRelay/omnigraph-ts`](https://github.com/ModernRelay/omnigraph-ts).
|
||||
- **Python SDK** — coming soon.
|
||||
|
||||
## Docs
|
||||
|
||||
|
|
@ -130,10 +206,13 @@ Notes:
|
|||
|
||||
## Workspace Crates
|
||||
|
||||
- `crates/omnigraph-compiler`: shared schema/query parser, typechecker, catalog, and IR lowering
|
||||
- `crates/omnigraph`: storage/runtime, branching, merge, change detection, and query execution
|
||||
- `crates/omnigraph-cli`: CLI for graph lifecycle (init/load), query/mutate, branch/commit/merge, schema/lint, snapshot/export, policy, and maintenance (optimize/cleanup)
|
||||
- `crates/omnigraph-server`: Axum HTTP server for remote reads, changes, ingest, export, branches, and commits
|
||||
- `crates/omnigraph-compiler`: shared schema/query parser, typechecker, catalog, and IR lowering (zero Lance dependency)
|
||||
- `crates/omnigraph` (package `omnigraph-engine`): storage/runtime, branching, merge, change detection, query execution, and embeddings
|
||||
- `crates/omnigraph-policy`: Cedar policy compilation and enforcement
|
||||
- `crates/omnigraph-api-types`: shared HTTP wire DTOs used by both the server and the CLI
|
||||
- `crates/omnigraph-cluster`: cluster config validation, planning, and apply (the control plane)
|
||||
- `crates/omnigraph-server`: Axum HTTP server — cluster-first, serving N graphs under `/graphs/{id}/…`
|
||||
- `crates/omnigraph-cli`: CLI for graph lifecycle (init/load), query/mutate, branch/commit/merge, schema/lint, snapshot/export, cluster control, policy/queries, profiles, and maintenance (optimize/repair/cleanup)
|
||||
|
||||
## Contributing
|
||||
|
||||
|
|
|
|||
16
crates/omnigraph-api-types/Cargo.toml
Normal file
16
crates/omnigraph-api-types/Cargo.toml
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
[package]
|
||||
name = "omnigraph-api-types"
|
||||
version = "0.7.0"
|
||||
edition = "2024"
|
||||
description = "Shared HTTP wire DTOs for Omnigraph — request/response types and engine-result → DTO mappings used by both omnigraph-server and omnigraph-cli (RFC-009). Plain serde/utoipa types; no transport or server internals."
|
||||
license = "MIT"
|
||||
repository = "https://github.com/ModernRelay/omnigraph"
|
||||
homepage = "https://github.com/ModernRelay/omnigraph"
|
||||
documentation = "https://docs.rs/omnigraph-api-types"
|
||||
|
||||
[dependencies]
|
||||
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.7.0" }
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.7.0" }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
utoipa = { workspace = true }
|
||||
704
crates/omnigraph-api-types/src/lib.rs
Normal file
704
crates/omnigraph-api-types/src/lib.rs
Normal file
|
|
@ -0,0 +1,704 @@
|
|||
//! Shared HTTP wire DTOs (RFC-009 Phase 2) — moved from
|
||||
//! omnigraph-server's api module so server and CLI share one definition
|
||||
//! and one engine-result -> DTO mapping per verb. Plain serde/utoipa
|
||||
//! types; no transport, no server internals.
|
||||
|
||||
use omnigraph::db::{GraphCommit, MergeOutcome, ReadTarget, SchemaApplyResult, Snapshot};
|
||||
use omnigraph::error::{MergeConflict, MergeConflictKind};
|
||||
use omnigraph::loader::{LoadMode, LoadResult};
|
||||
use omnigraph_compiler::SchemaMigrationStep;
|
||||
use omnigraph_compiler::query::ast::Param;
|
||||
use omnigraph_compiler::result::QueryResult;
|
||||
use omnigraph_compiler::types::{PropType, ScalarType};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use utoipa::{IntoParams, ToSchema};
|
||||
|
||||
/// Shadow enum for documenting [`LoadMode`] in the OpenAPI schema.
|
||||
#[derive(ToSchema)]
|
||||
#[schema(as = LoadMode)]
|
||||
#[allow(dead_code)]
|
||||
enum LoadModeSchema {
|
||||
/// Overwrite existing data.
|
||||
#[schema(rename = "overwrite")]
|
||||
Overwrite,
|
||||
/// Append to existing data.
|
||||
#[schema(rename = "append")]
|
||||
Append,
|
||||
/// Merge by id key (upsert).
|
||||
#[schema(rename = "merge")]
|
||||
Merge,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct SnapshotTableOutput {
|
||||
pub table_key: String,
|
||||
pub table_path: String,
|
||||
pub table_version: u64,
|
||||
pub table_branch: Option<String>,
|
||||
pub row_count: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct SnapshotOutput {
|
||||
pub branch: String,
|
||||
pub manifest_version: u64,
|
||||
pub tables: Vec<SnapshotTableOutput>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct BranchCreateRequest {
|
||||
/// Parent branch to fork from. Defaults to `main`.
|
||||
pub from: Option<String>,
|
||||
/// Name of the new branch. Must not already exist.
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct BranchCreateOutput {
|
||||
pub uri: String,
|
||||
pub from: String,
|
||||
pub name: String,
|
||||
pub actor_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct BranchListOutput {
|
||||
pub branches: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct BranchDeleteOutput {
|
||||
pub uri: String,
|
||||
pub name: String,
|
||||
pub actor_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct BranchMergeRequest {
|
||||
/// Source branch whose commits will be merged.
|
||||
pub source: String,
|
||||
/// Target branch that will receive the merge. Defaults to `main`.
|
||||
pub target: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum BranchMergeOutcome {
|
||||
AlreadyUpToDate,
|
||||
FastForward,
|
||||
Merged,
|
||||
}
|
||||
|
||||
impl From<MergeOutcome> for BranchMergeOutcome {
|
||||
fn from(value: MergeOutcome) -> Self {
|
||||
match value {
|
||||
MergeOutcome::AlreadyUpToDate => Self::AlreadyUpToDate,
|
||||
MergeOutcome::FastForward => Self::FastForward,
|
||||
MergeOutcome::Merged => Self::Merged,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BranchMergeOutcome {
|
||||
pub fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::AlreadyUpToDate => "already_up_to_date",
|
||||
Self::FastForward => "fast_forward",
|
||||
Self::Merged => "merged",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct BranchMergeOutput {
|
||||
pub source: String,
|
||||
pub target: String,
|
||||
pub outcome: BranchMergeOutcome,
|
||||
pub actor_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum MergeConflictKindOutput {
|
||||
DivergentInsert,
|
||||
DivergentUpdate,
|
||||
DeleteVsUpdate,
|
||||
OrphanEdge,
|
||||
UniqueViolation,
|
||||
CardinalityViolation,
|
||||
ValueConstraintViolation,
|
||||
}
|
||||
|
||||
impl MergeConflictKindOutput {
|
||||
pub fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::DivergentInsert => "divergent_insert",
|
||||
Self::DivergentUpdate => "divergent_update",
|
||||
Self::DeleteVsUpdate => "delete_vs_update",
|
||||
Self::OrphanEdge => "orphan_edge",
|
||||
Self::UniqueViolation => "unique_violation",
|
||||
Self::CardinalityViolation => "cardinality_violation",
|
||||
Self::ValueConstraintViolation => "value_constraint_violation",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<MergeConflictKind> for MergeConflictKindOutput {
|
||||
fn from(value: MergeConflictKind) -> Self {
|
||||
match value {
|
||||
MergeConflictKind::DivergentInsert => Self::DivergentInsert,
|
||||
MergeConflictKind::DivergentUpdate => Self::DivergentUpdate,
|
||||
MergeConflictKind::DeleteVsUpdate => Self::DeleteVsUpdate,
|
||||
MergeConflictKind::OrphanEdge => Self::OrphanEdge,
|
||||
MergeConflictKind::UniqueViolation => Self::UniqueViolation,
|
||||
MergeConflictKind::CardinalityViolation => Self::CardinalityViolation,
|
||||
MergeConflictKind::ValueConstraintViolation => Self::ValueConstraintViolation,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct MergeConflictOutput {
|
||||
pub table_key: String,
|
||||
pub row_id: Option<String>,
|
||||
pub kind: MergeConflictKindOutput,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
impl From<&MergeConflict> for MergeConflictOutput {
|
||||
fn from(value: &MergeConflict) -> Self {
|
||||
Self {
|
||||
table_key: value.table_key.clone(),
|
||||
row_id: value.row_id.clone(),
|
||||
kind: value.kind.into(),
|
||||
message: value.message.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ReadTargetOutput {
|
||||
pub branch: Option<String>,
|
||||
pub snapshot: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ReadOutput {
|
||||
pub query_name: String,
|
||||
pub target: ReadTargetOutput,
|
||||
pub row_count: usize,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub columns: Vec<String>,
|
||||
pub rows: Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ChangeOutput {
|
||||
pub branch: String,
|
||||
pub query_name: String,
|
||||
pub affected_nodes: usize,
|
||||
pub affected_edges: usize,
|
||||
pub actor_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct IngestTableOutput {
|
||||
pub table_key: String,
|
||||
pub rows_loaded: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct IngestOutput {
|
||||
pub uri: String,
|
||||
pub branch: String,
|
||||
/// Base branch a fork was requested from (the request's `from`), echoed
|
||||
/// even when the branch already existed. `null` when `from` was absent.
|
||||
pub base_branch: Option<String>,
|
||||
pub branch_created: bool,
|
||||
#[schema(value_type = LoadModeSchema)]
|
||||
pub mode: LoadMode,
|
||||
pub tables: Vec<IngestTableOutput>,
|
||||
pub actor_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct CommitOutput {
|
||||
pub graph_commit_id: String,
|
||||
pub manifest_branch: Option<String>,
|
||||
pub manifest_version: u64,
|
||||
pub parent_commit_id: Option<String>,
|
||||
pub merged_parent_commit_id: Option<String>,
|
||||
pub actor_id: Option<String>,
|
||||
/// Commit creation time as Unix epoch microseconds.
|
||||
#[schema(example = 1714000000000000i64)]
|
||||
pub created_at: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct CommitListOutput {
|
||||
pub commits: Vec<CommitOutput>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ReadRequest {
|
||||
/// GQ query source. May declare one or more named queries; pick one with
|
||||
/// `query_name` if there is more than one.
|
||||
#[schema(
|
||||
example = "query get_person($name: String) {\n match {\n $p: Person { name: $name }\n }\n return { $p.name, $p.age }\n}"
|
||||
)]
|
||||
pub query_source: String,
|
||||
/// Name of the query to run when `query_source` declares multiple. Optional
|
||||
/// when only one query is declared.
|
||||
pub query_name: Option<String>,
|
||||
/// JSON object whose keys match the query's declared parameters.
|
||||
pub params: Option<Value>,
|
||||
/// Branch to read from. Mutually exclusive with `snapshot`. Defaults to `main`.
|
||||
pub branch: Option<String>,
|
||||
/// Snapshot id to read from. Mutually exclusive with `branch`.
|
||||
pub snapshot: Option<String>,
|
||||
}
|
||||
|
||||
/// Inline read-query request for `POST /query`.
|
||||
///
|
||||
/// Friendlier-named alternative to [`ReadRequest`] for ad-hoc reads and
|
||||
/// AI-agent integration. Mutations are rejected with 400 — use `POST
|
||||
/// /mutate` (or its deprecated alias `POST /change`) for write queries.
|
||||
/// Field names are deliberately short (`query`, `name`) to match the GQ
|
||||
/// keyword and the CLI `-e` flag.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct QueryRequest {
|
||||
/// GQ read-query source. May declare one or more named queries; pick one
|
||||
/// with `name` when more than one is declared. Mutations
|
||||
/// (`insert`/`update`/`delete`) get 400 — use `POST /mutate` (or its
|
||||
/// deprecated alias `POST /change`) instead.
|
||||
#[schema(example = "query get_person($name: String) {\n match {\n $p: Person { name: $name }\n }\n return { $p.name, $p.age }\n}")]
|
||||
pub query: String,
|
||||
/// Name of the query to run when `query` declares multiple. Optional when
|
||||
/// only one query is declared.
|
||||
pub name: Option<String>,
|
||||
/// JSON object whose keys match the query's declared parameters.
|
||||
pub params: Option<Value>,
|
||||
/// Branch to read from. Mutually exclusive with `snapshot`. Defaults to `main`.
|
||||
pub branch: Option<String>,
|
||||
/// Snapshot id to read from. Mutually exclusive with `branch`.
|
||||
pub snapshot: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ChangeRequest {
|
||||
/// GQ mutation source containing `insert`, `update`, or `delete` statements.
|
||||
/// May declare multiple named mutations; pick one with `name`.
|
||||
///
|
||||
/// Accepts the legacy field name `query_source` as a deserialization alias.
|
||||
#[schema(
|
||||
example = "query insert_person($name: String, $age: I32) {\n insert Person { name: $name, age: $age }\n}"
|
||||
)]
|
||||
#[serde(alias = "query_source")]
|
||||
pub query: String,
|
||||
/// Name of the mutation to run when `query` declares multiple.
|
||||
///
|
||||
/// Accepts the legacy field name `query_name` as a deserialization alias.
|
||||
#[serde(default, alias = "query_name")]
|
||||
pub name: Option<String>,
|
||||
/// JSON object whose keys match the mutation's declared parameters.
|
||||
#[serde(default)]
|
||||
pub params: Option<Value>,
|
||||
/// Target branch. Defaults to `main`.
|
||||
#[serde(default)]
|
||||
pub branch: Option<String>,
|
||||
}
|
||||
|
||||
/// Body for `POST /queries/{name}` — invokes the server-side stored query
|
||||
/// named in the path. The query source and name come from the registry,
|
||||
/// never the body; only the runtime inputs are supplied here.
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
|
||||
pub struct InvokeStoredQueryRequest {
|
||||
/// JSON object whose keys match the stored query's declared parameters.
|
||||
#[serde(default)]
|
||||
pub params: Option<Value>,
|
||||
/// Branch to run against. Defaults to `main`; for a stored mutation the
|
||||
/// write targets this branch.
|
||||
#[serde(default)]
|
||||
pub branch: Option<String>,
|
||||
/// Snapshot id to read from (read queries only — rejected for a stored
|
||||
/// mutation). Mutually exclusive with `branch`.
|
||||
#[serde(default)]
|
||||
pub snapshot: Option<String>,
|
||||
/// The kind the caller expects (RFC-011 Decision 3): `Some(false)` for
|
||||
/// `omnigraph query <name>`, `Some(true)` for `omnigraph mutate <name>`.
|
||||
/// When set and it disagrees with the stored query's actual kind, the
|
||||
/// server rejects the call (400) so the verb asserts the kind. `None`
|
||||
/// (the default) skips the check — preserving older clients and aliases.
|
||||
#[serde(default)]
|
||||
pub expect_mutation: Option<bool>,
|
||||
}
|
||||
|
||||
/// Response for `POST /queries/{name}`: the read envelope for a stored
|
||||
/// read, or the mutation envelope for a stored mutation. Serialized
|
||||
/// **untagged**, so the wire shape is exactly [`ReadOutput`] or
|
||||
/// [`ChangeOutput`] — classification follows the stored query, not a
|
||||
/// wrapper field.
|
||||
#[derive(Debug, Serialize, ToSchema)]
|
||||
#[serde(untagged)]
|
||||
pub enum InvokeStoredQueryResponse {
|
||||
Read(ReadOutput),
|
||||
Change(ChangeOutput),
|
||||
}
|
||||
|
||||
/// The kind of a stored-query parameter, decomposed so a client (e.g. an
|
||||
/// MCP server) can build a typed input schema with a closed `match` and
|
||||
/// never re-parse omnigraph's type spelling. `bigint`/`date`/`datetime`/
|
||||
/// `blob` are carried as JSON strings on the wire: a 64-bit integer past
|
||||
/// 2^53 loses precision as a JSON number, and Date/DateTime are ISO
|
||||
/// strings, Blob a blob-URI string.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ParamKind {
|
||||
String,
|
||||
Bool,
|
||||
Int,
|
||||
#[serde(rename = "bigint")]
|
||||
BigInt,
|
||||
Float,
|
||||
Date,
|
||||
#[serde(rename = "datetime")]
|
||||
DateTime,
|
||||
Blob,
|
||||
Vector,
|
||||
List,
|
||||
}
|
||||
|
||||
/// One declared parameter of a stored query, projected for the catalog.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ParamDescriptor {
|
||||
pub name: String,
|
||||
pub kind: ParamKind,
|
||||
/// Element kind when `kind == list` (always a scalar — the grammar
|
||||
/// forbids lists of vectors or nested lists).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub item_kind: Option<ParamKind>,
|
||||
/// Dimension when `kind == vector`.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub vector_dim: Option<u32>,
|
||||
/// `false` → the caller must supply it; `true` → optional.
|
||||
pub nullable: bool,
|
||||
}
|
||||
|
||||
/// One entry in the stored-query catalog (`GET /queries`).
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct QueryCatalogEntry {
|
||||
/// Registry key / invoke path segment (`POST /queries/{name}`).
|
||||
pub name: String,
|
||||
/// MCP tool id (the `tool_name` override, else `name`).
|
||||
pub tool_name: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub description: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub instruction: Option<String>,
|
||||
/// `true` for a stored mutation → an MCP read-only hint of `false`.
|
||||
pub mutation: bool,
|
||||
pub params: Vec<ParamDescriptor>,
|
||||
}
|
||||
|
||||
/// Response for `GET /queries`: the `mcp.expose` subset of a graph's
|
||||
/// stored-query registry, each with typed parameters.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct QueriesCatalogOutput {
|
||||
pub queries: Vec<QueryCatalogEntry>,
|
||||
}
|
||||
|
||||
/// Total map from a resolved scalar to its catalog kind. Exhaustive on
|
||||
/// purpose: a new `ScalarType` is a compile error here until catalogued.
|
||||
fn scalar_kind(scalar: ScalarType) -> ParamKind {
|
||||
match scalar {
|
||||
ScalarType::String => ParamKind::String,
|
||||
ScalarType::Bool => ParamKind::Bool,
|
||||
ScalarType::I32 | ScalarType::U32 => ParamKind::Int,
|
||||
ScalarType::I64 | ScalarType::U64 => ParamKind::BigInt,
|
||||
ScalarType::F32 | ScalarType::F64 => ParamKind::Float,
|
||||
ScalarType::Date => ParamKind::Date,
|
||||
ScalarType::DateTime => ParamKind::DateTime,
|
||||
ScalarType::Blob => ParamKind::Blob,
|
||||
ScalarType::Vector(_) => ParamKind::Vector,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn param_descriptor(param: &Param) -> ParamDescriptor {
|
||||
match PropType::from_param_type_name(¶m.type_name, param.nullable) {
|
||||
Some(pt) if pt.list => ParamDescriptor {
|
||||
name: param.name.clone(),
|
||||
kind: ParamKind::List,
|
||||
item_kind: Some(scalar_kind(pt.scalar)),
|
||||
vector_dim: None,
|
||||
nullable: param.nullable,
|
||||
},
|
||||
Some(pt) => {
|
||||
let (kind, vector_dim) = match pt.scalar {
|
||||
ScalarType::Vector(dim) => (ParamKind::Vector, Some(dim)),
|
||||
other => (scalar_kind(other), None),
|
||||
};
|
||||
ParamDescriptor {
|
||||
name: param.name.clone(),
|
||||
kind,
|
||||
item_kind: None,
|
||||
vector_dim,
|
||||
nullable: param.nullable,
|
||||
}
|
||||
}
|
||||
// Unreachable for a parsed query (every declared param type is
|
||||
// grammatical); fall back to an opaque string so the field is still
|
||||
// usable rather than dropped.
|
||||
None => ParamDescriptor {
|
||||
name: param.name.clone(),
|
||||
kind: ParamKind::String,
|
||||
item_kind: None,
|
||||
vector_dim: None,
|
||||
nullable: param.nullable,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
|
||||
pub struct SchemaApplyRequest {
|
||||
/// Project schema in `.pg` source form. The diff against the current
|
||||
/// schema produces the migration steps that will be applied.
|
||||
#[schema(
|
||||
example = "node Person {\n name: String @key\n age: I32?\n}\n\nedge Knows: Person -> Person"
|
||||
)]
|
||||
pub schema_source: String,
|
||||
/// When true, promote every `DropMode::Soft` step in the plan to
|
||||
/// `DropMode::Hard`, making the prior column data unreachable
|
||||
/// after the apply. Matches the CLI's `--allow-data-loss` flag.
|
||||
/// Defaults to `false` (drops remain reversible via time travel).
|
||||
#[serde(default)]
|
||||
pub allow_data_loss: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct SchemaApplyOutput {
|
||||
pub uri: String,
|
||||
pub supported: bool,
|
||||
pub applied: bool,
|
||||
pub step_count: usize,
|
||||
pub manifest_version: u64,
|
||||
#[schema(value_type = Vec<Value>)]
|
||||
pub steps: Vec<SchemaMigrationStep>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct SchemaOutput {
|
||||
pub schema_source: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct IngestRequest {
|
||||
/// Target branch. Defaults to `main`. Without `from`, the branch must
|
||||
/// already exist — a missing branch is a 404, never an implicit fork.
|
||||
pub branch: Option<String>,
|
||||
/// Parent branch used to create `branch` if it does not exist. Branch
|
||||
/// creation is opt-in by presence of this field; omit it to require an
|
||||
/// existing branch.
|
||||
pub from: Option<String>,
|
||||
/// How existing rows are handled. Defaults to `merge`.
|
||||
#[schema(value_type = Option<LoadModeSchema>)]
|
||||
pub mode: Option<LoadMode>,
|
||||
/// NDJSON payload: one record per line, each shaped
|
||||
/// `{"type": "<TypeName>", "data": {...}}`.
|
||||
#[schema(
|
||||
example = "{\"type\": \"Person\", \"data\": {\"name\": \"Alice\", \"age\": 30}}\n{\"type\": \"Person\", \"data\": {\"name\": \"Bob\", \"age\": 25}}"
|
||||
)]
|
||||
pub data: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ExportRequest {
|
||||
/// Branch to export. Defaults to `main`.
|
||||
pub branch: Option<String>,
|
||||
/// Restrict the export to these node/edge type names. Empty exports all types.
|
||||
#[serde(default)]
|
||||
pub type_names: Vec<String>,
|
||||
/// Restrict the export to these table keys. Empty exports all tables.
|
||||
#[serde(default)]
|
||||
pub table_keys: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, IntoParams)]
|
||||
pub struct SnapshotQuery {
|
||||
pub branch: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, IntoParams)]
|
||||
pub struct CommitListQuery {
|
||||
pub branch: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct HealthOutput {
|
||||
pub status: String,
|
||||
pub version: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub source_version: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ErrorCode {
|
||||
Unauthorized,
|
||||
Forbidden,
|
||||
BadRequest,
|
||||
NotFound,
|
||||
/// 405 Method Not Allowed — the route exists but the active server
|
||||
/// mode doesn't serve this method (e.g. `GET /graphs` in single-graph
|
||||
/// mode). Distinct from 404 so clients can tell "wrong context" from
|
||||
/// "no such resource."
|
||||
MethodNotAllowed,
|
||||
Conflict,
|
||||
/// 429 Too Many Requests — per-actor admission cap exceeded.
|
||||
/// Clients should respect the `Retry-After` header.
|
||||
TooManyRequests,
|
||||
Internal,
|
||||
}
|
||||
|
||||
/// Structured details for a publisher-level OCC failure. Surfaces alongside
|
||||
/// HTTP 409 when a write was rejected because the caller's pre-write view of
|
||||
/// one table's manifest version was stale relative to the current head. The
|
||||
/// expected/actual fields tell the client which table to refresh.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ManifestConflictOutput {
|
||||
pub table_key: String,
|
||||
pub expected: u64,
|
||||
pub actual: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ErrorOutput {
|
||||
pub error: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub code: Option<ErrorCode>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub merge_conflicts: Vec<MergeConflictOutput>,
|
||||
/// Set when the conflict is a publisher CAS rejection
|
||||
/// (`ManifestConflictDetails::ExpectedVersionMismatch`). The caller's
|
||||
/// pre-write view of `table_key` was at version `expected` but the
|
||||
/// manifest is now at `actual`. Refresh and retry.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub manifest_conflict: Option<ManifestConflictOutput>,
|
||||
}
|
||||
|
||||
pub fn snapshot_payload(branch: &str, snapshot: &Snapshot) -> SnapshotOutput {
|
||||
let mut entries: Vec<_> = snapshot.entries().cloned().collect();
|
||||
entries.sort_by(|a, b| a.table_key.cmp(&b.table_key));
|
||||
let tables = entries
|
||||
.iter()
|
||||
.map(|entry| SnapshotTableOutput {
|
||||
table_key: entry.table_key.clone(),
|
||||
table_path: entry.table_path.clone(),
|
||||
table_version: entry.table_version,
|
||||
table_branch: entry.table_branch.clone(),
|
||||
row_count: entry.row_count,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
SnapshotOutput {
|
||||
branch: branch.to_string(),
|
||||
manifest_version: snapshot.version(),
|
||||
tables,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn schema_apply_output(uri: &str, result: SchemaApplyResult) -> SchemaApplyOutput {
|
||||
SchemaApplyOutput {
|
||||
uri: uri.to_string(),
|
||||
supported: result.supported,
|
||||
applied: result.applied,
|
||||
step_count: result.steps.len(),
|
||||
manifest_version: result.manifest_version,
|
||||
steps: result.steps,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn commit_output(commit: &GraphCommit) -> CommitOutput {
|
||||
CommitOutput {
|
||||
graph_commit_id: commit.graph_commit_id.clone(),
|
||||
manifest_branch: commit.manifest_branch.clone(),
|
||||
manifest_version: commit.manifest_version,
|
||||
parent_commit_id: commit.parent_commit_id.clone(),
|
||||
merged_parent_commit_id: commit.merged_parent_commit_id.clone(),
|
||||
actor_id: commit.actor_id.clone(),
|
||||
created_at: commit.created_at,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_output(query_name: String, target: &ReadTarget, result: QueryResult) -> ReadOutput {
|
||||
let columns = result
|
||||
.schema()
|
||||
.fields()
|
||||
.iter()
|
||||
.map(|field| field.name().clone())
|
||||
.collect();
|
||||
ReadOutput {
|
||||
query_name,
|
||||
target: read_target_output(target),
|
||||
row_count: result.num_rows(),
|
||||
columns,
|
||||
rows: result.to_rust_json(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ingest_output(
|
||||
uri: &str,
|
||||
result: &LoadResult,
|
||||
mode: LoadMode,
|
||||
actor_id: Option<String>,
|
||||
) -> IngestOutput {
|
||||
IngestOutput {
|
||||
uri: uri.to_string(),
|
||||
branch: result.branch.clone(),
|
||||
base_branch: result.base_branch.clone(),
|
||||
branch_created: result.branch_created,
|
||||
mode,
|
||||
tables: result
|
||||
.to_ingest_tables()
|
||||
.into_iter()
|
||||
.map(|table| IngestTableOutput {
|
||||
table_key: table.table_key,
|
||||
rows_loaded: table.rows_loaded,
|
||||
})
|
||||
.collect(),
|
||||
actor_id,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_target_output(target: &ReadTarget) -> ReadTargetOutput {
|
||||
match target {
|
||||
ReadTarget::Branch(branch) => ReadTargetOutput {
|
||||
branch: Some(branch.clone()),
|
||||
snapshot: None,
|
||||
},
|
||||
ReadTarget::Snapshot(snapshot) => ReadTargetOutput {
|
||||
branch: None,
|
||||
snapshot: Some(snapshot.as_str().to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ─── MR-668 — management endpoint shapes ──────────────────────────────────
|
||||
|
||||
/// One entry in the response from `GET /graphs`. Cluster operators
|
||||
/// consume this list to discover which graphs the server is currently
|
||||
/// serving. The shape is intentionally minimal — `graph_id` and `uri`
|
||||
/// are the only fields a routing client needs.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct GraphInfo {
|
||||
pub graph_id: String,
|
||||
pub uri: String,
|
||||
}
|
||||
|
||||
/// Response from `GET /graphs`. Lists every graph registered with the
|
||||
/// server in alphabetical order by `graph_id` (sorted server-side so
|
||||
/// clients get deterministic output across requests).
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct GraphListResponse {
|
||||
pub graphs: Vec<GraphInfo>,
|
||||
}
|
||||
|
|
@ -15,6 +15,7 @@ path = "src/main.rs"
|
|||
[dependencies]
|
||||
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.7.0" }
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.7.0" }
|
||||
omnigraph-api-types = { path = "../omnigraph-api-types", version = "0.7.0" }
|
||||
omnigraph-cluster = { path = "../omnigraph-cluster", version = "0.7.0" }
|
||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.7.0" }
|
||||
omnigraph-server = { path = "../omnigraph-server", version = "0.7.0" }
|
||||
|
|
|
|||
|
|
@ -9,90 +9,159 @@ pub(crate) const DEFAULT_BEARER_TOKEN_ENV: &str = "OMNIGRAPH_BEARER_TOKEN";
|
|||
#[command(name = "omnigraph")]
|
||||
#[command(about = "Omnigraph graph database CLI")]
|
||||
#[command(version = env!("CARGO_PKG_VERSION"), disable_version_flag = true)]
|
||||
// Subcommands render in declaration order (clap can't print labeled headings
|
||||
// between groups), so this legend names the capability each command needs —
|
||||
// the user-facing vocabulary (RFC-011). `Plane` stays the internal classifier.
|
||||
#[command(after_help = "\
|
||||
COMMANDS BY CAPABILITY:\n \
|
||||
any — run against a graph, served (--server / --profile) or embedded (--store / a \
|
||||
URI): query, mutate, load, branch, snapshot, export, commit, schema show/apply.\n \
|
||||
served — require a server: graphs.\n \
|
||||
direct — direct storage access; reject --server (init, optimize, repair, cleanup, \
|
||||
schema plan, lint).\n \
|
||||
control — manage or inspect a cluster (cluster via --config; policy & queries via \
|
||||
--cluster).\n \
|
||||
local — no explicit graph scope; local config & tooling: alias, embed, login, logout, profile, version.\n\
|
||||
See the 'Command capabilities' section of the CLI reference for which flags apply where.")]
|
||||
pub(crate) struct Cli {
|
||||
/// Actor identity for direct-engine writes (MR-722). Overrides
|
||||
/// `cli.actor` from `omnigraph.yaml`. When the configured policy
|
||||
/// is in effect, Cedar evaluates this actor against the requested
|
||||
/// action and scope; with policy configured but neither this flag
|
||||
/// nor `cli.actor` set, the engine-layer footgun guard fires and
|
||||
/// the write is denied (no silent bypass). Has no effect on remote
|
||||
/// HTTP writes — those resolve their actor server-side from the
|
||||
/// bearer token.
|
||||
/// Actor id for direct-engine writes; overrides `cli.actor`. No effect on
|
||||
/// remote writes (the server resolves the actor from the bearer token).
|
||||
/// With a policy configured but no actor set, the write is denied — see
|
||||
/// docs/user/operations/policy.md.
|
||||
#[arg(long = "as", global = true, value_name = "ACTOR")]
|
||||
pub(crate) as_actor: Option<String>,
|
||||
|
||||
/// Target an operator-defined server by name (RFC-007): resolves to
|
||||
/// its `url` from `servers:` in ~/.omnigraph/config.yaml. Exclusive
|
||||
/// with a positional URI or `--target`.
|
||||
#[arg(long, global = true, value_name = "NAME")]
|
||||
/// Address a server by name (resolves to its `url` from `servers:` in
|
||||
/// ~/.omnigraph/config.yaml) or by a literal `http(s)://` URL. Exclusive
|
||||
/// with a positional URI.
|
||||
#[arg(long, global = true, value_name = "NAME|URL")]
|
||||
pub(crate) server: Option<String>,
|
||||
|
||||
/// Graph id on a multi-graph `--server` (appends `/graphs/<id>` to
|
||||
/// the server url). Requires --server.
|
||||
#[arg(long, global = true, value_name = "GRAPH_ID", requires = "server")]
|
||||
/// Select a graph within a multi-graph scope: on a `--server` it appends
|
||||
/// `/graphs/<id>` to the server url; on a `--cluster` it picks which
|
||||
/// cluster graph to maintain. Rejected on a single-graph address (a
|
||||
/// positional URI / `--store`).
|
||||
#[arg(long, global = true, value_name = "GRAPH_ID")]
|
||||
pub(crate) graph: Option<String>,
|
||||
|
||||
/// Select a named scope bundle (RFC-011) from `profiles:` in
|
||||
/// ~/.omnigraph/config.yaml: fills in this command's omitted addressing
|
||||
/// (server/cluster/store + default graph). Falls back to
|
||||
/// $OMNIGRAPH_PROFILE. Config data, not state — every command resolves
|
||||
/// scope fresh.
|
||||
#[arg(long, global = true, value_name = "NAME")]
|
||||
pub(crate) profile: Option<String>,
|
||||
|
||||
/// Address a single graph's storage directly (RFC-011): a `file://` /
|
||||
/// `s3://` store URI. Explicit, ad-hoc direct access — bypasses any
|
||||
/// server. Exclusive with a positional URI / `--server`.
|
||||
#[arg(long, global = true, value_name = "URI")]
|
||||
pub(crate) store: Option<String>,
|
||||
|
||||
/// Address a cluster-managed graph's storage for maintenance (RFC-011):
|
||||
/// a cluster directory or storage-root URI — named via `clusters:` in
|
||||
/// ~/.omnigraph/config.yaml, or a literal `file://`/`s3://` root. Pair
|
||||
/// with `--graph <id>` to select the graph. Used by optimize / repair /
|
||||
/// cleanup; exclusive with a positional URI / `--store` / `--server`.
|
||||
#[arg(long, global = true, value_name = "DIR|URI")]
|
||||
pub(crate) cluster: Option<String>,
|
||||
|
||||
/// Skip the confirmation prompt for a destructive write (`cleanup`,
|
||||
/// overwrite `load`, `branch delete`) against a non-local scope (RFC-011
|
||||
/// Decision 9). Without it, a non-local destructive write prompts on a TTY
|
||||
/// and refuses (errors) when there is no TTY or `--json` is set.
|
||||
#[arg(long, global = true)]
|
||||
pub(crate) yes: bool,
|
||||
|
||||
/// Suppress the one-line resolved-write-target diagnostic that write
|
||||
/// commands echo to stderr (RFC-011 Decision 9).
|
||||
#[arg(long, global = true)]
|
||||
pub(crate) quiet: bool,
|
||||
|
||||
#[command(subcommand)]
|
||||
pub(crate) command: Command,
|
||||
}
|
||||
|
||||
#[derive(Debug, Subcommand)]
|
||||
pub(crate) enum Command {
|
||||
/// Print the CLI version
|
||||
Version,
|
||||
/// Store a bearer token for a named server in ~/.omnigraph/credentials
|
||||
/// (0600). Token from --token or one line on stdin:
|
||||
/// `echo $TOKEN | omnigraph login prod`. The keyed token applies to
|
||||
/// requests whose URL matches the server's `url` in the operator
|
||||
/// config's `servers:` map.
|
||||
Login {
|
||||
/// Server name (keys the credential; declare its url under
|
||||
/// `servers:` in ~/.omnigraph/config.yaml)
|
||||
name: String,
|
||||
/// The token. Prefer piping via stdin over this flag (shell
|
||||
/// history).
|
||||
// ── Data plane ── run against a graph (embedded or via --server).
|
||||
/// Execute a read query against a branch or snapshot.
|
||||
///
|
||||
/// Canonical read endpoint. The previous name `omnigraph read` is
|
||||
/// kept as a visible alias and prints a one-line deprecation warning
|
||||
/// when used. Pairs with `omnigraph mutate` on the write side.
|
||||
#[command(visible_alias = "read")]
|
||||
Query {
|
||||
/// Query name. With no `--query`/`-e`, the stored query to invoke from
|
||||
/// the catalog (served — addressed via --server/--profile). With
|
||||
/// `--query`/`-e`, selects which query in that ad-hoc source to run.
|
||||
name: Option<String>,
|
||||
/// Ad-hoc query file (a `.gq` you're authoring / break-glass).
|
||||
#[arg(long, conflicts_with = "query_string")]
|
||||
query: Option<PathBuf>,
|
||||
/// Inline ad-hoc GQ source — alternative to `--query <path>`.
|
||||
#[arg(short = 'e', long = "query-string", value_name = "GQ", conflicts_with = "query")]
|
||||
query_string: Option<String>,
|
||||
#[command(flatten)]
|
||||
params: ParamsArgs,
|
||||
#[arg(long, conflicts_with = "snapshot")]
|
||||
branch: Option<String>,
|
||||
#[arg(long, conflicts_with = "branch")]
|
||||
snapshot: Option<String>,
|
||||
#[arg(long, conflicts_with = "json")]
|
||||
format: Option<ReadOutputFormat>,
|
||||
#[arg(long, conflicts_with = "format")]
|
||||
json: bool,
|
||||
},
|
||||
/// Execute a graph mutation query against a branch.
|
||||
///
|
||||
/// Canonical mutation endpoint. The previous name `omnigraph change`
|
||||
/// is kept as a visible alias and prints a one-line deprecation
|
||||
/// warning when used. Pairs with `omnigraph query` on the read side.
|
||||
#[command(visible_alias = "change")]
|
||||
Mutate {
|
||||
/// Query name. With no `--query`/`-e`, the stored mutation to invoke
|
||||
/// from the catalog (served — addressed via --server/--profile). With
|
||||
/// `--query`/`-e`, selects which query in that ad-hoc source to run.
|
||||
name: Option<String>,
|
||||
/// Ad-hoc mutation file (a `.gq` you're authoring / break-glass).
|
||||
#[arg(long, conflicts_with = "query_string")]
|
||||
query: Option<PathBuf>,
|
||||
/// Inline ad-hoc GQ source — alternative to `--query <path>`.
|
||||
#[arg(short = 'e', long = "query-string", value_name = "GQ", conflicts_with = "query")]
|
||||
query_string: Option<String>,
|
||||
#[command(flatten)]
|
||||
params: ParamsArgs,
|
||||
#[arg(long)]
|
||||
token: Option<String>,
|
||||
branch: Option<String>,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Legacy-config tooling (RFC-008): split omnigraph.yaml into its
|
||||
/// two destinations.
|
||||
Config {
|
||||
#[command(subcommand)]
|
||||
command: ConfigCommand,
|
||||
},
|
||||
/// Remove a named server's stored credential. Idempotent.
|
||||
Logout {
|
||||
/// Invoke an operator alias (RFC-011 Decision 4).
|
||||
///
|
||||
/// An alias is a personal binding under `aliases:` in
|
||||
/// ~/.omnigraph/config.yaml — name → (server, graph, stored-query name,
|
||||
/// default params). `omnigraph alias <name> [args]` invokes the bound
|
||||
/// stored query on its server. Living in its own namespace, an alias can
|
||||
/// never shadow or be shadowed by a built-in verb. Replaces the removed
|
||||
/// `--alias` flag on `query`/`mutate`.
|
||||
Alias {
|
||||
/// Alias name (a key under `aliases:` in ~/.omnigraph/config.yaml).
|
||||
name: String,
|
||||
#[arg(long)]
|
||||
/// Positional args bound to the alias's declared `args` params, in order.
|
||||
args: Vec<String>,
|
||||
#[command(flatten)]
|
||||
params: ParamsArgs,
|
||||
#[arg(long, conflicts_with = "json")]
|
||||
format: Option<ReadOutputFormat>,
|
||||
#[arg(long, conflicts_with = "format")]
|
||||
json: bool,
|
||||
},
|
||||
/// Generate, clean, or refresh explicit seed embeddings
|
||||
Embed(EmbedArgs),
|
||||
/// Initialize a new graph from a schema
|
||||
Init {
|
||||
#[arg(long)]
|
||||
schema: PathBuf,
|
||||
/// Graph URI (local path or s3://)
|
||||
uri: String,
|
||||
/// Overwrite existing schema artifacts at the URI. Without
|
||||
/// this flag, init refuses to touch a URI that already holds
|
||||
/// `_schema.pg`, `_schema.ir.json`, or `__schema_state.json`
|
||||
/// — closes the re-init footgun (MR-668 follow-up). With the
|
||||
/// flag, the operator opts in to destructive semantics.
|
||||
#[arg(long)]
|
||||
force: bool,
|
||||
},
|
||||
/// Load data into a graph (local or remote)
|
||||
Load {
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
data: PathBuf,
|
||||
/// Target branch (defaults to main). Without --from it must exist.
|
||||
#[arg(long)]
|
||||
|
|
@ -109,14 +178,11 @@ pub(crate) enum Command {
|
|||
json: bool,
|
||||
},
|
||||
/// Deprecated alias of `load --from <base>` (defaults: --mode merge, --from main)
|
||||
#[command(hide = true)]
|
||||
Ingest {
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
data: PathBuf,
|
||||
#[arg(long)]
|
||||
branch: Option<String>,
|
||||
|
|
@ -132,11 +198,99 @@ pub(crate) enum Command {
|
|||
#[command(subcommand)]
|
||||
command: BranchCommand,
|
||||
},
|
||||
/// Show graph snapshot
|
||||
Snapshot {
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
branch: Option<String>,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Export a full graph snapshot as JSONL
|
||||
Export {
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
branch: Option<String>,
|
||||
#[arg(long, hide = true)]
|
||||
jsonl: bool,
|
||||
#[arg(long = "type")]
|
||||
type_names: Vec<String>,
|
||||
#[arg(long = "table")]
|
||||
table_keys: Vec<String>,
|
||||
},
|
||||
/// Commit history operations
|
||||
Commit {
|
||||
#[command(subcommand)]
|
||||
command: CommitCommand,
|
||||
},
|
||||
/// Schema planning operations
|
||||
Schema {
|
||||
#[command(subcommand)]
|
||||
command: SchemaCommand,
|
||||
},
|
||||
/// Manage graphs on a multi-graph server (MR-668)
|
||||
Graphs {
|
||||
#[command(subcommand)]
|
||||
command: GraphsCommand,
|
||||
},
|
||||
|
||||
// ── Storage / local graph ops ── direct storage or local files; reject --server.
|
||||
/// Initialize a new graph from a schema
|
||||
Init {
|
||||
#[arg(long)]
|
||||
schema: PathBuf,
|
||||
/// Graph URI (local path or s3://)
|
||||
uri: String,
|
||||
/// Overwrite existing schema artifacts at the URI. Without
|
||||
/// this flag, init refuses to touch a URI that already holds
|
||||
/// `_schema.pg`, `_schema.ir.json`, or `__schema_state.json`
|
||||
/// — closes the re-init footgun (MR-668 follow-up). With the
|
||||
/// flag, the operator opts in to destructive semantics.
|
||||
#[arg(long)]
|
||||
force: bool,
|
||||
},
|
||||
/// Compact small Lance fragments in every table of the graph
|
||||
Optimize {
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Classify and explicitly repair manifest/head drift
|
||||
Repair {
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
/// Publish verified maintenance drift. Without this flag, repair only
|
||||
/// previews what it would do.
|
||||
#[arg(long)]
|
||||
confirm: bool,
|
||||
/// Also publish suspicious or unverifiable drift. Requires
|
||||
/// `--confirm`; use only after operator review.
|
||||
#[arg(long, requires = "confirm")]
|
||||
force: bool,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Remove old Lance versions from every table of the graph (destructive)
|
||||
Cleanup {
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
/// Number of recent versions to keep per table. Either `--keep` or
|
||||
/// `--older-than` (or both) must be set.
|
||||
#[arg(long)]
|
||||
keep: Option<u32>,
|
||||
/// Only remove versions older than this duration. Accepts Go-style
|
||||
/// durations: `7d`, `24h`, `90m`. At least one of --keep / --older-than.
|
||||
#[arg(long)]
|
||||
older_than: Option<String>,
|
||||
/// Required to actually run; without it, prints what would be removed
|
||||
#[arg(long)]
|
||||
confirm: bool,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Validate queries against a schema (offline) or repo (repo-backed).
|
||||
///
|
||||
/// Canonical name is `lint` (matches the `omnigraph_compiler::lint`
|
||||
|
|
@ -152,10 +306,6 @@ pub(crate) enum Command {
|
|||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
query: PathBuf,
|
||||
#[arg(long)]
|
||||
schema: Option<PathBuf>,
|
||||
|
|
@ -167,179 +317,63 @@ pub(crate) enum Command {
|
|||
#[command(subcommand)]
|
||||
command: QueriesCommand,
|
||||
},
|
||||
/// Show graph snapshot
|
||||
Snapshot {
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
branch: Option<String>,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Export a full graph snapshot as JSONL
|
||||
Export {
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
branch: Option<String>,
|
||||
#[arg(long, hide = true)]
|
||||
jsonl: bool,
|
||||
#[arg(long = "type")]
|
||||
type_names: Vec<String>,
|
||||
#[arg(long = "table")]
|
||||
table_keys: Vec<String>,
|
||||
},
|
||||
/// Commit history operations
|
||||
Commit {
|
||||
#[command(subcommand)]
|
||||
command: CommitCommand,
|
||||
},
|
||||
/// Execute a read query against a branch or snapshot.
|
||||
///
|
||||
/// Canonical read endpoint. The previous name `omnigraph read` is
|
||||
/// kept as a visible alias and prints a one-line deprecation warning
|
||||
/// when used. Pairs with `omnigraph mutate` on the write side.
|
||||
#[command(visible_alias = "read")]
|
||||
Query {
|
||||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(hide = true)]
|
||||
legacy_uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long, conflicts_with_all = ["query", "query_string"])]
|
||||
alias: Option<String>,
|
||||
#[arg(long, conflicts_with_all = ["alias", "query_string"])]
|
||||
query: Option<PathBuf>,
|
||||
/// Inline GQ source — alternative to `--query <path>` and `--alias <name>`.
|
||||
#[arg(short = 'e', long = "query-string", value_name = "GQ", conflicts_with_all = ["query", "alias"])]
|
||||
query_string: Option<String>,
|
||||
#[arg(long)]
|
||||
name: Option<String>,
|
||||
#[command(flatten)]
|
||||
params: ParamsArgs,
|
||||
#[arg(long, conflicts_with = "snapshot")]
|
||||
branch: Option<String>,
|
||||
#[arg(long, conflicts_with = "branch")]
|
||||
snapshot: Option<String>,
|
||||
#[arg(long, conflicts_with = "json")]
|
||||
format: Option<ReadOutputFormat>,
|
||||
#[arg(long, conflicts_with = "format")]
|
||||
json: bool,
|
||||
#[arg()]
|
||||
alias_args: Vec<String>,
|
||||
},
|
||||
/// Execute a graph mutation query against a branch.
|
||||
///
|
||||
/// Canonical mutation endpoint. The previous name `omnigraph change`
|
||||
/// is kept as a visible alias and prints a one-line deprecation
|
||||
/// warning when used. Pairs with `omnigraph query` on the read side.
|
||||
#[command(visible_alias = "change")]
|
||||
Mutate {
|
||||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(hide = true)]
|
||||
legacy_uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long, conflicts_with_all = ["query", "query_string"])]
|
||||
alias: Option<String>,
|
||||
#[arg(long, conflicts_with_all = ["alias", "query_string"])]
|
||||
query: Option<PathBuf>,
|
||||
/// Inline GQ source — alternative to `--query <path>` and `--alias <name>`.
|
||||
#[arg(short = 'e', long = "query-string", value_name = "GQ", conflicts_with_all = ["query", "alias"])]
|
||||
query_string: Option<String>,
|
||||
#[arg(long)]
|
||||
name: Option<String>,
|
||||
#[command(flatten)]
|
||||
params: ParamsArgs,
|
||||
#[arg(long)]
|
||||
branch: Option<String>,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
#[arg()]
|
||||
alias_args: Vec<String>,
|
||||
},
|
||||
/// Policy administration and diagnostics
|
||||
Policy {
|
||||
#[command(subcommand)]
|
||||
command: PolicyCommand,
|
||||
},
|
||||
/// Compact small Lance fragments in every table of the graph
|
||||
Optimize {
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Classify and explicitly repair manifest/head drift
|
||||
Repair {
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
/// Publish verified maintenance drift. Without this flag, repair only
|
||||
/// previews what it would do.
|
||||
#[arg(long)]
|
||||
confirm: bool,
|
||||
/// Also publish suspicious or unverifiable drift. Requires
|
||||
/// `--confirm`; use only after operator review.
|
||||
#[arg(long, requires = "confirm")]
|
||||
force: bool,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Remove old Lance versions from every table of the graph (destructive)
|
||||
Cleanup {
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
/// Number of recent versions to keep per table. Either `--keep` or
|
||||
/// `--older-than` (or both) must be set.
|
||||
#[arg(long)]
|
||||
keep: Option<u32>,
|
||||
/// Only remove versions older than this duration. Accepts Go-style
|
||||
/// durations: `7d`, `24h`, `90m`. At least one of --keep / --older-than.
|
||||
#[arg(long)]
|
||||
older_than: Option<String>,
|
||||
/// Required to actually run; without it, prints what would be removed
|
||||
#[arg(long)]
|
||||
confirm: bool,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
|
||||
// ── Control plane ── manage a cluster directory (--config <dir>).
|
||||
/// Validate and plan read-only cluster configuration.
|
||||
Cluster {
|
||||
#[command(subcommand)]
|
||||
command: ClusterCommand,
|
||||
},
|
||||
/// Manage graphs on a multi-graph server (MR-668)
|
||||
Graphs {
|
||||
|
||||
/// Policy administration and diagnostics against a cluster's applied bundles
|
||||
Policy {
|
||||
#[command(subcommand)]
|
||||
command: GraphsCommand,
|
||||
command: PolicyCommand,
|
||||
},
|
||||
/// Generate, clean, or refresh explicit seed embeddings
|
||||
Embed(EmbedArgs),
|
||||
/// Store a bearer token for a named server (0600 credentials file). Token
|
||||
/// via --token or piped on stdin; see the CLI reference for token resolution.
|
||||
Login {
|
||||
/// Server name (keys the credential; declare its url under
|
||||
/// `servers:` in ~/.omnigraph/config.yaml)
|
||||
name: String,
|
||||
/// The token. Prefer piping via stdin over this flag (shell
|
||||
/// history).
|
||||
#[arg(long)]
|
||||
token: Option<String>,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Remove a named server's stored credential. Idempotent.
|
||||
Logout {
|
||||
name: String,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Inspect the scope profiles in ~/.omnigraph/config.yaml (read-only).
|
||||
Profile {
|
||||
#[command(subcommand)]
|
||||
command: ProfileCommand,
|
||||
},
|
||||
/// Print the CLI version
|
||||
Version,
|
||||
}
|
||||
|
||||
#[derive(Debug, Subcommand)]
|
||||
pub(crate) enum ProfileCommand {
|
||||
/// List the profiles defined in ~/.omnigraph/config.yaml.
|
||||
List {
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Show a profile's resolved scope. With no name, shows the active
|
||||
/// (`$OMNIGRAPH_PROFILE`) profile, else the flat operator defaults.
|
||||
Show {
|
||||
/// Profile name (optional).
|
||||
name: Option<String>,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
@ -439,10 +473,6 @@ pub(crate) enum GraphsCommand {
|
|||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
}
|
||||
|
|
@ -455,10 +485,6 @@ pub(crate) enum BranchCommand {
|
|||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
from: Option<String>,
|
||||
name: String,
|
||||
#[arg(long)]
|
||||
|
|
@ -470,10 +496,6 @@ pub(crate) enum BranchCommand {
|
|||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Delete a branch
|
||||
|
|
@ -481,10 +503,6 @@ pub(crate) enum BranchCommand {
|
|||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
name: String,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
|
|
@ -494,10 +512,6 @@ pub(crate) enum BranchCommand {
|
|||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
source: String,
|
||||
#[arg(long)]
|
||||
into: Option<String>,
|
||||
|
|
@ -513,10 +527,6 @@ pub(crate) enum SchemaCommand {
|
|||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
schema: PathBuf,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
|
|
@ -531,10 +541,6 @@ pub(crate) enum SchemaCommand {
|
|||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
schema: PathBuf,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
|
|
@ -556,10 +562,6 @@ pub(crate) enum SchemaCommand {
|
|||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
}
|
||||
|
|
@ -572,10 +574,6 @@ pub(crate) enum CommitCommand {
|
|||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
branch: Option<String>,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
|
|
@ -585,10 +583,6 @@ pub(crate) enum CommitCommand {
|
|||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
commit_id: String,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
|
|
@ -597,20 +591,24 @@ pub(crate) enum CommitCommand {
|
|||
|
||||
#[derive(Debug, Subcommand)]
|
||||
pub(crate) enum PolicyCommand {
|
||||
/// Validate policy YAML and compiled Cedar policy state
|
||||
Validate {
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
},
|
||||
/// Run declarative policy tests from policy.tests.yaml
|
||||
/// Compile and validate the Cedar policy bundle(s) applied in a cluster.
|
||||
///
|
||||
/// Sources the bundle(s) from the cluster's applied policies
|
||||
/// (`--cluster <dir>`); pass the global `--graph <id>` to pick one
|
||||
/// graph's bundle when several apply.
|
||||
Validate {},
|
||||
/// Run declarative policy tests against a cluster's applied bundle.
|
||||
///
|
||||
/// The cluster model has no per-bundle tests file, so the cases are
|
||||
/// supplied explicitly with `--tests <file>` and checked against the
|
||||
/// bundle selected by `--cluster` (+ optional `--graph`).
|
||||
Test {
|
||||
/// Path to a policy.tests.yaml file.
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
tests: PathBuf,
|
||||
},
|
||||
/// Explain one policy decision locally
|
||||
/// Explain one policy decision against a cluster's applied bundle.
|
||||
Explain {
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
actor: String,
|
||||
#[arg(long)]
|
||||
|
|
@ -624,28 +622,19 @@ pub(crate) enum PolicyCommand {
|
|||
|
||||
#[derive(Debug, Subcommand)]
|
||||
pub(crate) enum QueriesCommand {
|
||||
/// Type-check the stored-query registry against the live schema.
|
||||
/// Type-check a cluster's stored-query registry against its schemas.
|
||||
///
|
||||
/// Distinct from `omnigraph lint` (which lints one `.gq` file):
|
||||
/// this validates the whole `queries:` registry — opening the graph
|
||||
/// to read its schema and confirming every stored query still
|
||||
/// type-checks. Exits non-zero on any breakage.
|
||||
/// Distinct from `omnigraph lint` (which lints one `.gq` file): this
|
||||
/// validates the whole `queries:` registry of a cluster (`--cluster
|
||||
/// <dir>`, optional `--graph <id>`) by reading each graph's applied
|
||||
/// schema and confirming every stored query still type-checks. Exits
|
||||
/// non-zero on any breakage.
|
||||
Validate {
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// List the registered stored queries (name, MCP exposure, params).
|
||||
/// List a cluster's registered stored queries (name, params).
|
||||
List {
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
|
|
@ -676,7 +665,6 @@ impl From<CliLoadMode> for LoadMode {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CliLoadMode {
|
||||
pub(crate) fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
|
|
@ -686,21 +674,3 @@ impl CliLoadMode {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Subcommand)]
|
||||
pub(crate) enum ConfigCommand {
|
||||
/// Propose (and with --write, apply) the RFC-008 split of a legacy
|
||||
/// omnigraph.yaml: team half -> a ready-to-review cluster.yaml,
|
||||
/// personal half -> ~/.omnigraph/config.yaml (key-level merge,
|
||||
/// existing entries always win). Touches nothing without --write.
|
||||
Migrate {
|
||||
/// Path to the legacy omnigraph.yaml (default: ./omnigraph.yaml)
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
/// Apply the split instead of only printing it
|
||||
#[arg(long)]
|
||||
write: bool,
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
}
|
||||
|
|
|
|||
821
crates/omnigraph-cli/src/client.rs
Normal file
821
crates/omnigraph-cli/src/client.rs
Normal file
|
|
@ -0,0 +1,821 @@
|
|||
//! `GraphClient` — the one place the embedded-vs-remote split lives
|
||||
//! (RFC-009 Phase 3). A CLI command body calls a verb method; the
|
||||
//! enum routes to the engine (local URI) or HTTP (remote URI). The
|
||||
//! 15 per-command `if graph.is_remote { … } else { … }` forks collapse
|
||||
//! into two arms here.
|
||||
//!
|
||||
//! Phase 3a put the factory + the uniform read verbs in place. Phase 3b
|
||||
//! adds the data-plane writes (`load`/`ingest`/`mutate`/`branch_*`/
|
||||
//! `apply_schema`) and `query`. The wrinkle 3a deferred: writes open the
|
||||
//! local engine WITH policy (`open_local_db_with_policy`) and carry a
|
||||
//! resolved actor, while reads/`query` open WITHOUT policy. So the
|
||||
//! `Embedded` variant grows an optional policy context (`graph`/`actor`)
|
||||
//! and a second factory (`resolve_with_policy`) fills it; `resolve()`
|
||||
//! leaves it empty. The open path picks itself from whether `graph` is
|
||||
//! set, preserving today's two behaviors exactly. Export + graphs-list
|
||||
//! land in 3c. Behavior is unchanged per verb — the Phase-1 parity matrix
|
||||
//! is the referee and stays textually unchanged.
|
||||
//!
|
||||
//! Enum, not a trait (RFC sketch said "trait"): only two variants ever,
|
||||
//! and inherent async methods sidestep `async_trait` boxing plus the
|
||||
//! `apply_schema` catalog-validator closure that is not object-safe.
|
||||
//! Same one-body-two-impls collapse, less ceremony.
|
||||
|
||||
use std::io::Write;
|
||||
|
||||
use color_eyre::Result;
|
||||
use color_eyre::eyre::bail;
|
||||
use omnigraph::db::{Omnigraph, ReadTarget};
|
||||
use omnigraph_api_types::{
|
||||
BranchCreateOutput, BranchCreateRequest, BranchDeleteOutput, BranchListOutput,
|
||||
BranchMergeOutput, BranchMergeRequest, ChangeOutput, CommitListOutput, CommitOutput,
|
||||
ErrorOutput, ExportRequest, GraphListResponse, IngestOutput, IngestRequest,
|
||||
InvokeStoredQueryRequest, ReadOutput,
|
||||
ReadRequest, SchemaApplyOutput, SchemaApplyRequest, SchemaOutput, SnapshotOutput, commit_output,
|
||||
ingest_output, read_output, schema_apply_output, snapshot_payload,
|
||||
};
|
||||
use omnigraph_compiler::catalog::Catalog;
|
||||
use reqwest::Method;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::cli::CliLoadMode;
|
||||
use crate::helpers::{
|
||||
apply_bearer_token, apply_server_flag, build_http_client, is_remote_uri,
|
||||
legacy_change_request_body, query_params_from_json,
|
||||
remote_json, remote_url, resolve_cli_actor, resolve_cli_graph, resolve_remote_bearer_token,
|
||||
resolve_server_flag, select_named_query,
|
||||
};
|
||||
use crate::output::{LoadOutput, load_output_from_result, load_output_from_tables};
|
||||
|
||||
pub(crate) enum GraphClient {
|
||||
/// Local engine at `uri`. Reads (`resolve()`) leave `actor` empty;
|
||||
/// writes (`resolve_with_policy()`) attribute the resolved actor.
|
||||
/// Direct-store access carries no Cedar policy (RFC-011: policy lives
|
||||
/// in the cluster/server, not in per-operator addressing).
|
||||
Embedded {
|
||||
uri: String,
|
||||
actor: Option<String>,
|
||||
},
|
||||
/// Remote HTTP server. The actor is resolved server-side from the
|
||||
/// token; the client never sets identity.
|
||||
Remote {
|
||||
http: reqwest::Client,
|
||||
base_url: String,
|
||||
token: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
/// RFC-011 Decision 7: a server scope that selects no graph (no `--graph`, no
|
||||
/// `default_graph`) must not silently fall through to the bare server URL when
|
||||
/// the server is multi-graph. Best-effort probe `GET /graphs`: a populated list
|
||||
/// forces `--graph` (listing the candidates); a single-graph/flat server (405),
|
||||
/// a policy-gated `/graphs`, or an unreachable server all proceed — the bare URL
|
||||
/// is then correct, or the real request surfaces the failure. Only fires on the
|
||||
/// no-graph path, so a `--graph`/`default_graph` happy path does no extra I/O.
|
||||
async fn require_graph_for_multi_graph_server(
|
||||
scope: &crate::scope::ResolvedScope,
|
||||
) -> Result<()> {
|
||||
let (Some(server), None) = (scope.server.as_deref(), scope.graph.as_deref()) else {
|
||||
return Ok(());
|
||||
};
|
||||
let Some(base) = resolve_server_flag(Some(server), None)? else {
|
||||
return Ok(());
|
||||
};
|
||||
let token = resolve_remote_bearer_token(Some(&base))?;
|
||||
let probe = GraphClient::Remote {
|
||||
http: build_http_client()?,
|
||||
base_url: base,
|
||||
token,
|
||||
};
|
||||
if let Ok(resp) = probe.list_graphs().await {
|
||||
if !resp.graphs.is_empty() {
|
||||
let ids: Vec<&str> = resp.graphs.iter().map(|g| g.graph_id.as_str()).collect();
|
||||
bail!(
|
||||
"server scope '{server}' has {} {}: [{}]; pass --graph <id> to select one \
|
||||
(or set `default_graph` in your operator config)",
|
||||
ids.len(),
|
||||
if ids.len() == 1 { "graph" } else { "graphs" },
|
||||
ids.join(", ")
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// A remote graph must be addressed with `--server` (RFC-011): a positional or
|
||||
/// `--uri` `http(s)://` URL no longer auto-dispatches to a server. A remote URL
|
||||
/// produced by a server scope (`via_server`) is fine.
|
||||
fn reject_positional_remote(via_server: bool, uri: &str) -> Result<()> {
|
||||
if !via_server && is_remote_uri(uri) {
|
||||
bail!(
|
||||
"a remote graph must be addressed with `--server <url>` — a positional \
|
||||
(or `--uri`) http(s):// URL no longer dispatches to a server"
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl GraphClient {
|
||||
/// Resolve the addressing (positional URI / `--target` / `--server`)
|
||||
/// and credential once, then pick the variant by URI scheme — the
|
||||
/// single branch point that replaces every per-command `is_remote`
|
||||
/// fork. Mirrors the read verbs' current preamble (`resolve_uri`
|
||||
/// path, not the policy-bearing `resolve_cli_graph`). Used by reads
|
||||
/// and `query` (which opens without policy, like the reads).
|
||||
pub(crate) async fn resolve(
|
||||
server: Option<&str>,
|
||||
graph: Option<&str>,
|
||||
uri: Option<String>,
|
||||
profile: Option<&str>,
|
||||
store: Option<&str>,
|
||||
) -> Result<Self> {
|
||||
// RFC-011: a scope (profile / --store / operator defaults) may stand in
|
||||
// for omitted addressing. The explicit branch passes server/graph/uri
|
||||
// straight through, so existing invocations are unchanged.
|
||||
let scope = crate::scope::resolve_scope(
|
||||
&crate::operator::load_operator_config()?,
|
||||
crate::planes::Capability::Any,
|
||||
crate::scope::ScopeFlags { profile, store, server, cluster: None, graph, uri },
|
||||
)?;
|
||||
require_graph_for_multi_graph_server(&scope).await?;
|
||||
let (server, graph, uri) = (
|
||||
scope.server.as_deref(),
|
||||
scope.graph.as_deref(),
|
||||
scope.uri,
|
||||
);
|
||||
let via_server = server.is_some();
|
||||
let uri = apply_server_flag(server, graph, uri)?;
|
||||
let token = resolve_remote_bearer_token(uri.as_deref())?;
|
||||
let uri = crate::helpers::resolve_uri(uri)?;
|
||||
reject_positional_remote(via_server, &uri)?;
|
||||
if is_remote_uri(&uri) {
|
||||
Ok(GraphClient::Remote {
|
||||
http: build_http_client()?,
|
||||
base_url: uri,
|
||||
token,
|
||||
})
|
||||
} else {
|
||||
Ok(GraphClient::Embedded { uri, actor: None })
|
||||
}
|
||||
}
|
||||
|
||||
/// Write-path factory: the same addressing/credential resolution as
|
||||
/// `resolve()`, but through the stricter `resolve_cli_graph` (which
|
||||
/// carries `policy_file`/`graph_id`/`selected`), and with the actor
|
||||
/// resolved up front. The embedded arm then opens WITH policy. The
|
||||
/// resolution order matches the write arms exactly: server flag →
|
||||
/// bearer token → graph.
|
||||
pub(crate) async fn resolve_with_policy(
|
||||
server: Option<&str>,
|
||||
graph: Option<&str>,
|
||||
uri: Option<String>,
|
||||
cli_as: Option<&str>,
|
||||
profile: Option<&str>,
|
||||
store: Option<&str>,
|
||||
) -> Result<Self> {
|
||||
// RFC-011 scope translation (see `resolve`); explicit addressing passes
|
||||
// through unchanged.
|
||||
let scope = crate::scope::resolve_scope(
|
||||
&crate::operator::load_operator_config()?,
|
||||
crate::planes::Capability::Any,
|
||||
crate::scope::ScopeFlags { profile, store, server, cluster: None, graph, uri },
|
||||
)?;
|
||||
require_graph_for_multi_graph_server(&scope).await?;
|
||||
let (server, graph, uri) = (
|
||||
scope.server.as_deref(),
|
||||
scope.graph.as_deref(),
|
||||
scope.uri,
|
||||
);
|
||||
let via_server = server.is_some();
|
||||
let uri = apply_server_flag(server, graph, uri)?;
|
||||
let token = resolve_remote_bearer_token(uri.as_deref())?;
|
||||
let resolved = resolve_cli_graph(uri)?;
|
||||
reject_positional_remote(via_server, &resolved.uri)?;
|
||||
if resolved.is_remote {
|
||||
// A served write resolves the actor server-side from the bearer
|
||||
// token; `--as` cannot set identity here and is rejected.
|
||||
if cli_as.is_some() {
|
||||
bail!(
|
||||
"`--as` is not allowed on a served write — the server resolves the actor \
|
||||
from the bearer token. Remove `--as`, or run the write directly against \
|
||||
storage with `--store <uri>`."
|
||||
);
|
||||
}
|
||||
Ok(GraphClient::Remote {
|
||||
http: build_http_client()?,
|
||||
base_url: resolved.uri,
|
||||
token,
|
||||
})
|
||||
} else {
|
||||
let actor = resolve_cli_actor(cli_as)?;
|
||||
Ok(GraphClient::Embedded {
|
||||
uri: resolved.uri,
|
||||
actor,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// The graph URI (local path / remote base URL) this client addresses.
|
||||
pub(crate) fn uri(&self) -> &str {
|
||||
match self {
|
||||
GraphClient::Embedded { uri, .. } => uri,
|
||||
GraphClient::Remote { base_url, .. } => base_url,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_remote(&self) -> bool {
|
||||
matches!(self, GraphClient::Remote { .. })
|
||||
}
|
||||
|
||||
/// Open the local engine. Direct-store access carries no Cedar policy
|
||||
/// (RFC-011), so both read and write paths open bare; the actor is still
|
||||
/// attributed on the write via the `_as` engine APIs.
|
||||
async fn open_embedded(uri: &str) -> Result<Omnigraph> {
|
||||
Ok(Omnigraph::open(uri).await?)
|
||||
}
|
||||
|
||||
pub(crate) async fn branch_list(&self) -> Result<BranchListOutput> {
|
||||
match self {
|
||||
GraphClient::Remote {
|
||||
http,
|
||||
base_url,
|
||||
token,
|
||||
} => {
|
||||
remote_json(
|
||||
http,
|
||||
Method::GET,
|
||||
remote_url(base_url, &["branches"], &[])?,
|
||||
None,
|
||||
token.as_deref(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
GraphClient::Embedded { uri, .. } => {
|
||||
let db = Omnigraph::open(uri).await?;
|
||||
let mut branches = db.branch_list().await?;
|
||||
branches.sort();
|
||||
Ok(BranchListOutput { branches })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn snapshot(&self, branch: &str) -> Result<SnapshotOutput> {
|
||||
match self {
|
||||
GraphClient::Remote {
|
||||
http,
|
||||
base_url,
|
||||
token,
|
||||
} => {
|
||||
remote_json(
|
||||
http,
|
||||
Method::GET,
|
||||
remote_url(base_url, &["snapshot"], &[("branch", branch)])?,
|
||||
None,
|
||||
token.as_deref(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
GraphClient::Embedded { uri, .. } => {
|
||||
let db = Omnigraph::open(uri).await?;
|
||||
let snapshot = db.snapshot_of(ReadTarget::branch(branch)).await?;
|
||||
Ok(snapshot_payload(branch, &snapshot))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn schema_source(&self) -> Result<SchemaOutput> {
|
||||
match self {
|
||||
GraphClient::Remote {
|
||||
http,
|
||||
base_url,
|
||||
token,
|
||||
} => {
|
||||
remote_json(
|
||||
http,
|
||||
Method::GET,
|
||||
remote_url(base_url, &["schema"], &[])?,
|
||||
None,
|
||||
token.as_deref(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
GraphClient::Embedded { uri, .. } => {
|
||||
let db = Omnigraph::open(uri).await?;
|
||||
Ok(SchemaOutput {
|
||||
schema_source: db.schema_source().to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn list_commits(&self, branch: Option<&str>) -> Result<CommitListOutput> {
|
||||
match self {
|
||||
GraphClient::Remote {
|
||||
http,
|
||||
base_url,
|
||||
token,
|
||||
} => {
|
||||
let url = match branch {
|
||||
Some(branch) => remote_url(base_url, &["commits"], &[("branch", branch)])?,
|
||||
None => remote_url(base_url, &["commits"], &[])?,
|
||||
};
|
||||
remote_json(http, Method::GET, url, None, token.as_deref()).await
|
||||
}
|
||||
GraphClient::Embedded { uri, .. } => {
|
||||
let db = Omnigraph::open(uri).await?;
|
||||
let commits = db
|
||||
.list_commits(branch)
|
||||
.await?
|
||||
.iter()
|
||||
.map(commit_output)
|
||||
.collect::<Vec<_>>();
|
||||
Ok(CommitListOutput { commits })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn get_commit(&self, commit_id: &str) -> Result<CommitOutput> {
|
||||
match self {
|
||||
GraphClient::Remote {
|
||||
http,
|
||||
base_url,
|
||||
token,
|
||||
} => {
|
||||
remote_json(
|
||||
http,
|
||||
Method::GET,
|
||||
remote_url(base_url, &["commits", commit_id], &[])?,
|
||||
None,
|
||||
token.as_deref(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
GraphClient::Embedded { uri, .. } => {
|
||||
let db = Omnigraph::open(uri).await?;
|
||||
Ok(commit_output(&db.get_commit(commit_id).await?))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `load` — bulk-load `data` (a file path) onto `branch`, forking from
|
||||
/// `from` if missing. Returns the CLI `LoadOutput`; each arm keeps its
|
||||
/// own mapping (remote sums the wire `IngestOutput.tables`, embedded
|
||||
/// reads the richer `LoadResult` directly) — preserved exactly.
|
||||
pub(crate) async fn load(
|
||||
&self,
|
||||
branch: &str,
|
||||
from: Option<&str>,
|
||||
data: &str,
|
||||
mode: CliLoadMode,
|
||||
) -> Result<LoadOutput> {
|
||||
match self {
|
||||
GraphClient::Remote {
|
||||
http,
|
||||
base_url,
|
||||
token,
|
||||
} => {
|
||||
let data = std::fs::read_to_string(data)?;
|
||||
// RFC-009 Phase 5: the canonical `load` verb targets the
|
||||
// canonical `/load` route (the deprecated `ingest` verb below
|
||||
// still rides `/ingest`).
|
||||
let output = remote_json::<IngestOutput>(
|
||||
http,
|
||||
Method::POST,
|
||||
remote_url(base_url, &["load"], &[])?,
|
||||
Some(serde_json::to_value(IngestRequest {
|
||||
branch: Some(branch.to_string()),
|
||||
from: from.map(ToOwned::to_owned),
|
||||
mode: Some(mode.into()),
|
||||
data,
|
||||
})?),
|
||||
token.as_deref(),
|
||||
)
|
||||
.await?;
|
||||
Ok(load_output_from_tables(base_url, branch, mode.as_str(), &output))
|
||||
}
|
||||
GraphClient::Embedded { uri, actor } => {
|
||||
let db = Self::open_embedded(uri).await?;
|
||||
let result = db
|
||||
.load_file_as(branch, from, data, mode.into(), actor.as_deref())
|
||||
.await?;
|
||||
Ok(load_output_from_result(uri, branch, mode.as_str(), &result))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `ingest` — the deprecated alias of `load`. Same operation, but the
|
||||
/// surfaced shape is the wire `IngestOutput` (printed by
|
||||
/// `print_ingest_human`), so it is its own method. The embedded arm
|
||||
/// echoes `actor_id: None` in the output exactly as the legacy arm did
|
||||
/// (the actor is still attributed on the commit via `load_file_as`).
|
||||
pub(crate) async fn ingest(
|
||||
&self,
|
||||
branch: &str,
|
||||
from: &str,
|
||||
data: &str,
|
||||
mode: CliLoadMode,
|
||||
) -> Result<IngestOutput> {
|
||||
match self {
|
||||
GraphClient::Remote {
|
||||
http,
|
||||
base_url,
|
||||
token,
|
||||
} => {
|
||||
let data = std::fs::read_to_string(data)?;
|
||||
remote_json(
|
||||
http,
|
||||
Method::POST,
|
||||
remote_url(base_url, &["ingest"], &[])?,
|
||||
Some(serde_json::to_value(IngestRequest {
|
||||
branch: Some(branch.to_string()),
|
||||
from: Some(from.to_string()),
|
||||
mode: Some(mode.into()),
|
||||
data,
|
||||
})?),
|
||||
token.as_deref(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
GraphClient::Embedded { uri, actor } => {
|
||||
let db = Self::open_embedded(uri).await?;
|
||||
let result = db
|
||||
.load_file_as(branch, Some(from), data, mode.into(), actor.as_deref())
|
||||
.await?;
|
||||
Ok(ingest_output(uri, &result, mode.into(), None))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `mutate` — run a change query against `branch`. Folds
|
||||
/// `execute_change` / `execute_change_remote` + the legacy request body.
|
||||
pub(crate) async fn mutate(
|
||||
&self,
|
||||
branch: &str,
|
||||
query_source: &str,
|
||||
query_name: Option<&str>,
|
||||
params_json: Option<&Value>,
|
||||
) -> Result<ChangeOutput> {
|
||||
match self {
|
||||
GraphClient::Remote {
|
||||
http,
|
||||
base_url,
|
||||
token,
|
||||
} => {
|
||||
remote_json(
|
||||
http,
|
||||
Method::POST,
|
||||
remote_url(base_url, &["change"], &[])?,
|
||||
Some(legacy_change_request_body(
|
||||
query_source,
|
||||
query_name,
|
||||
branch,
|
||||
params_json,
|
||||
)),
|
||||
token.as_deref(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
GraphClient::Embedded { uri, actor } => {
|
||||
let (selected_name, query_params) = select_named_query(query_source, query_name)?;
|
||||
let params = query_params_from_json(&query_params, params_json)?;
|
||||
let db = Self::open_embedded(uri).await?;
|
||||
let actor = actor.as_deref();
|
||||
let result = db
|
||||
.mutate_as(branch, query_source, &selected_name, ¶ms, actor)
|
||||
.await?;
|
||||
Ok(ChangeOutput {
|
||||
branch: branch.to_string(),
|
||||
query_name: selected_name,
|
||||
affected_nodes: result.affected_nodes,
|
||||
affected_edges: result.affected_edges,
|
||||
actor_id: actor.map(String::from),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `query` — run a read query against `target`. Folds `execute_read` /
|
||||
/// `execute_read_remote`; the embedded arm opens WITHOUT policy (reads
|
||||
/// never attach one), so this verb resolves via `resolve()`.
|
||||
pub(crate) async fn query(
|
||||
&self,
|
||||
target: ReadTarget,
|
||||
query_source: &str,
|
||||
query_name: Option<&str>,
|
||||
params_json: Option<&Value>,
|
||||
) -> Result<ReadOutput> {
|
||||
match self {
|
||||
GraphClient::Remote {
|
||||
http,
|
||||
base_url,
|
||||
token,
|
||||
} => {
|
||||
let (branch, snapshot) = match &target {
|
||||
ReadTarget::Branch(branch) => (Some(branch.clone()), None),
|
||||
ReadTarget::Snapshot(snapshot) => (None, Some(snapshot.as_str().to_string())),
|
||||
};
|
||||
remote_json(
|
||||
http,
|
||||
Method::POST,
|
||||
remote_url(base_url, &["read"], &[])?,
|
||||
Some(serde_json::to_value(ReadRequest {
|
||||
query_source: query_source.to_string(),
|
||||
query_name: query_name.map(ToOwned::to_owned),
|
||||
params: params_json.cloned(),
|
||||
branch,
|
||||
snapshot,
|
||||
})?),
|
||||
token.as_deref(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
GraphClient::Embedded { uri, .. } => {
|
||||
let (selected_name, query_params) = select_named_query(query_source, query_name)?;
|
||||
let params = query_params_from_json(&query_params, params_json)?;
|
||||
let db = Self::open_embedded(uri).await?;
|
||||
let result = db
|
||||
.query(target.clone(), query_source, &selected_name, ¶ms)
|
||||
.await?;
|
||||
Ok(read_output(selected_name, &target, result))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `invoke_named` — run a stored query **by catalog name** (RFC-011 D3).
|
||||
/// Served-only: the catalog is server-owned, so a `--store` (embedded)
|
||||
/// scope has nothing to resolve the name against. `expect_mutation` carries
|
||||
/// the verb's asserted kind; the server rejects a mismatch (400) before
|
||||
/// running, so the response is exactly the expected envelope — the caller
|
||||
/// deserializes it as the concrete `T` (`ReadOutput` for `query`,
|
||||
/// `ChangeOutput` for `mutate`), sidestepping the untagged wire enum.
|
||||
pub(crate) async fn invoke_named<T: serde::de::DeserializeOwned>(
|
||||
&self,
|
||||
name: &str,
|
||||
expect_mutation: bool,
|
||||
params_json: Option<&Value>,
|
||||
branch: Option<String>,
|
||||
snapshot: Option<String>,
|
||||
) -> Result<T> {
|
||||
match self {
|
||||
GraphClient::Remote {
|
||||
http,
|
||||
base_url,
|
||||
token,
|
||||
} => {
|
||||
let body = InvokeStoredQueryRequest {
|
||||
params: params_json.cloned(),
|
||||
branch,
|
||||
snapshot,
|
||||
expect_mutation: Some(expect_mutation),
|
||||
};
|
||||
remote_json(
|
||||
http,
|
||||
Method::POST,
|
||||
remote_url(base_url, &["queries", name], &[])?,
|
||||
Some(serde_json::to_value(body)?),
|
||||
token.as_deref(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
GraphClient::Embedded { .. } => bail!(
|
||||
"by-name invocation needs a server (the stored-query catalog is \
|
||||
server-owned); use -e '<gq>' or --query <file> for an ad-hoc query \
|
||||
against --store, or address a server with --server / --profile"
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn branch_create_from(
|
||||
&self,
|
||||
from: &str,
|
||||
name: &str,
|
||||
) -> Result<BranchCreateOutput> {
|
||||
match self {
|
||||
GraphClient::Remote {
|
||||
http,
|
||||
base_url,
|
||||
token,
|
||||
} => {
|
||||
remote_json(
|
||||
http,
|
||||
Method::POST,
|
||||
remote_url(base_url, &["branches"], &[])?,
|
||||
Some(serde_json::to_value(BranchCreateRequest {
|
||||
from: Some(from.to_string()),
|
||||
name: name.to_string(),
|
||||
})?),
|
||||
token.as_deref(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
GraphClient::Embedded { uri, actor } => {
|
||||
let db = Self::open_embedded(uri).await?;
|
||||
let actor = actor.as_deref();
|
||||
db.branch_create_from_as(ReadTarget::branch(from), name, actor)
|
||||
.await?;
|
||||
Ok(BranchCreateOutput {
|
||||
uri: uri.clone(),
|
||||
from: from.to_string(),
|
||||
name: name.to_string(),
|
||||
actor_id: actor.map(String::from),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn branch_delete(&self, name: &str) -> Result<BranchDeleteOutput> {
|
||||
match self {
|
||||
GraphClient::Remote {
|
||||
http,
|
||||
base_url,
|
||||
token,
|
||||
} => {
|
||||
remote_json(
|
||||
http,
|
||||
Method::DELETE,
|
||||
remote_url(base_url, &["branches", name], &[])?,
|
||||
None,
|
||||
token.as_deref(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
GraphClient::Embedded { uri, actor } => {
|
||||
let db = Self::open_embedded(uri).await?;
|
||||
let actor = actor.as_deref();
|
||||
db.branch_delete_as(name, actor).await?;
|
||||
Ok(BranchDeleteOutput {
|
||||
uri: uri.clone(),
|
||||
name: name.to_string(),
|
||||
actor_id: actor.map(String::from),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn branch_merge(&self, source: &str, into: &str) -> Result<BranchMergeOutput> {
|
||||
match self {
|
||||
GraphClient::Remote {
|
||||
http,
|
||||
base_url,
|
||||
token,
|
||||
} => {
|
||||
remote_json(
|
||||
http,
|
||||
Method::POST,
|
||||
remote_url(base_url, &["branches", "merge"], &[])?,
|
||||
Some(serde_json::to_value(BranchMergeRequest {
|
||||
source: source.to_string(),
|
||||
target: Some(into.to_string()),
|
||||
})?),
|
||||
token.as_deref(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
GraphClient::Embedded { uri, actor } => {
|
||||
let db = Self::open_embedded(uri).await?;
|
||||
let actor = actor.as_deref();
|
||||
let outcome = db.branch_merge_as(source, into, actor).await?;
|
||||
Ok(BranchMergeOutput {
|
||||
source: source.to_string(),
|
||||
target: into.to_string(),
|
||||
outcome: outcome.into(),
|
||||
actor_id: actor.map(String::from),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `apply_schema` — apply `schema_source`. The embedded arm runs the
|
||||
/// caller's catalog validator (stored-query registry check) inside the
|
||||
/// engine's `apply_schema_as_with_catalog_check`; the remote arm runs
|
||||
/// the server's own check and IGNORES `validate`. The `impl FnOnce`
|
||||
/// validator is exactly why this is an enum, not a trait (non-object-
|
||||
/// safe).
|
||||
pub(crate) async fn apply_schema<F>(
|
||||
&self,
|
||||
schema_source: &str,
|
||||
allow_data_loss: bool,
|
||||
validate: F,
|
||||
) -> Result<SchemaApplyOutput>
|
||||
where
|
||||
F: FnOnce(&Catalog) -> omnigraph::error::Result<()>,
|
||||
{
|
||||
match self {
|
||||
GraphClient::Remote {
|
||||
http,
|
||||
base_url,
|
||||
token,
|
||||
} => {
|
||||
// MR-694 PR B: SchemaApplyRequest carries allow_data_loss so
|
||||
// Hard-mode drops are no longer CLI-only; the server's
|
||||
// `server_schema_apply` honors it (and runs its own catalog
|
||||
// check, so `validate` does not apply here).
|
||||
remote_json::<SchemaApplyOutput>(
|
||||
http,
|
||||
Method::POST,
|
||||
remote_url(base_url, &["schema", "apply"], &[])?,
|
||||
Some(serde_json::to_value(SchemaApplyRequest {
|
||||
schema_source: schema_source.to_string(),
|
||||
allow_data_loss,
|
||||
})?),
|
||||
token.as_deref(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
GraphClient::Embedded { uri, actor } => {
|
||||
let db = Self::open_embedded(uri).await?;
|
||||
let result = db
|
||||
.apply_schema_as_with_catalog_check(
|
||||
schema_source,
|
||||
omnigraph::db::SchemaApplyOptions { allow_data_loss },
|
||||
actor.as_deref(),
|
||||
validate,
|
||||
)
|
||||
.await?;
|
||||
Ok(schema_apply_output(uri, result))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `export` — stream the branch as JSONL into `writer`. The streaming
|
||||
/// shape (a `W: Write`, not a returned DTO) is why this lands in 3c
|
||||
/// rather than 3b. Opens WITHOUT policy (like reads), so it is reached
|
||||
/// via `resolve()`; the Embedded arm opens bare. The Remote arm streams
|
||||
/// the chunked response body straight through (no buffering the whole
|
||||
/// export in memory).
|
||||
pub(crate) async fn export<W: Write>(
|
||||
&self,
|
||||
branch: &str,
|
||||
type_names: &[String],
|
||||
table_keys: &[String],
|
||||
writer: &mut W,
|
||||
) -> Result<()> {
|
||||
match self {
|
||||
GraphClient::Remote {
|
||||
http,
|
||||
base_url,
|
||||
token,
|
||||
} => {
|
||||
let request = apply_bearer_token(
|
||||
http.request(Method::POST, remote_url(base_url, &["export"], &[])?),
|
||||
token.as_deref(),
|
||||
)
|
||||
.json(&ExportRequest {
|
||||
branch: Some(branch.to_string()),
|
||||
type_names: type_names.to_vec(),
|
||||
table_keys: table_keys.to_vec(),
|
||||
});
|
||||
let mut response = request.send().await?;
|
||||
let status = response.status();
|
||||
if !status.is_success() {
|
||||
let text = response.text().await?;
|
||||
if let Ok(error) = serde_json::from_str::<ErrorOutput>(&text) {
|
||||
bail!(error.error);
|
||||
}
|
||||
bail!("server returned {}: {}", status, text);
|
||||
}
|
||||
while let Some(chunk) = response.chunk().await? {
|
||||
writer.write_all(&chunk)?;
|
||||
}
|
||||
writer.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
GraphClient::Embedded { uri, .. } => {
|
||||
let db = Omnigraph::open(uri).await?;
|
||||
db.export_jsonl_to_writer(branch, type_names, table_keys, writer)
|
||||
.await?;
|
||||
writer.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `graphs list` — enumerate the graphs a remote multi-graph server
|
||||
/// serves (`GET /graphs`). Remote-only by design: there is no local
|
||||
/// enumeration endpoint, so the Embedded arm fails loudly. Routing it
|
||||
/// through the enum still buys the shared `resolve()` addressing/token
|
||||
/// preamble.
|
||||
pub(crate) async fn list_graphs(&self) -> Result<GraphListResponse> {
|
||||
match self {
|
||||
GraphClient::Remote {
|
||||
http,
|
||||
base_url,
|
||||
token,
|
||||
} => {
|
||||
remote_json(
|
||||
http,
|
||||
Method::GET,
|
||||
remote_url(base_url, &["graphs"], &[])?,
|
||||
None,
|
||||
token.as_deref(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
GraphClient::Embedded { .. } => bail!(
|
||||
"`omnigraph graphs list` requires a remote multi-graph server \
|
||||
(--server <url>). To enumerate the graphs in a cluster, run \
|
||||
`omnigraph cluster status --config <dir>`."
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -9,8 +9,6 @@ use omnigraph::embedding::EmbeddingClient;
|
|||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Map, Value, json};
|
||||
|
||||
const DEFAULT_EMBED_MODEL: &str = "gemini-embedding-2-preview";
|
||||
|
||||
#[derive(Debug, Args, Clone)]
|
||||
pub(crate) struct EmbedArgs {
|
||||
/// Seed manifest path
|
||||
|
|
@ -85,8 +83,6 @@ impl EmbedMode {
|
|||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
struct EmbedSpec {
|
||||
#[serde(default = "default_embed_model")]
|
||||
model: String,
|
||||
dimension: usize,
|
||||
types: BTreeMap<String, EmbedTypeSpec>,
|
||||
}
|
||||
|
|
@ -180,13 +176,6 @@ pub(crate) fn resolve_embed_job(args: &EmbedArgs) -> Result<EmbedJob> {
|
|||
(input, output, spec)
|
||||
};
|
||||
|
||||
if spec.model != DEFAULT_EMBED_MODEL {
|
||||
bail!(
|
||||
"only {} is supported for explicit seed embeddings right now",
|
||||
DEFAULT_EMBED_MODEL
|
||||
);
|
||||
}
|
||||
|
||||
Ok(EmbedJob {
|
||||
input,
|
||||
output,
|
||||
|
|
@ -305,7 +294,14 @@ pub(crate) async fn run_embed_job(job: &EmbedJob) -> Result<EmbedOutput> {
|
|||
cleaned_rows,
|
||||
mode: job.mode.as_str(!job.selectors.is_empty()),
|
||||
dimension: job.spec.dimension,
|
||||
model: job.spec.model.clone(),
|
||||
// The embedding model is resolved solely from the provider config; the
|
||||
// spec carries no model field, so there is no second source of truth to
|
||||
// silently disagree with the API. Report what was actually used (empty
|
||||
// for `--clean`, which builds no client).
|
||||
model: client
|
||||
.as_ref()
|
||||
.map(|c| c.config().model.clone())
|
||||
.unwrap_or_default(),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -315,10 +311,6 @@ fn temp_output_path(output: &Path) -> PathBuf {
|
|||
PathBuf::from(temp)
|
||||
}
|
||||
|
||||
fn default_embed_model() -> String {
|
||||
DEFAULT_EMBED_MODEL.to_string()
|
||||
}
|
||||
|
||||
fn load_embed_spec(path: &Path) -> Result<EmbedSpec> {
|
||||
Ok(serde_json::from_str(&fs::read_to_string(path)?)?)
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,22 +1,16 @@
|
|||
//! In-source test suite for the CLI binary (moved verbatim from
|
||||
//! main.rs; `use super::*` resolves through the #[path] declaration).
|
||||
|
||||
use std::fs;
|
||||
|
||||
use super::{
|
||||
DEFAULT_BEARER_TOKEN_ENV, apply_bearer_token, bearer_token_from_env_file,
|
||||
legacy_change_request_body, load_cli_config, load_env_file_into_process,
|
||||
normalize_bearer_token, parse_env_assignment, resolve_cli_graph, resolve_policy_context,
|
||||
resolve_remote_bearer_token,
|
||||
DEFAULT_BEARER_TOKEN_ENV, apply_bearer_token, legacy_change_request_body,
|
||||
normalize_bearer_token, resolve_remote_bearer_token,
|
||||
};
|
||||
use omnigraph_server::load_config;
|
||||
use reqwest::header::AUTHORIZATION;
|
||||
use serde_json::json;
|
||||
use tempfile::tempdir;
|
||||
|
||||
#[test]
|
||||
fn legacy_change_request_body_uses_legacy_field_names() {
|
||||
// `execute_change_remote` hits `POST /change`, which old
|
||||
// `mutate`'s remote arm hits `POST /change`, which old
|
||||
// `omnigraph-server` builds deserialize as `ChangeRequest` with
|
||||
// **required** `query_source` and optional `query_name` keys.
|
||||
// Newer servers accept both spellings via serde alias, but a
|
||||
|
|
@ -96,126 +90,20 @@
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn parse_env_assignment_supports_plain_and_exported_values() {
|
||||
assert_eq!(
|
||||
parse_env_assignment("DEMO_TOKEN=demo-token"),
|
||||
Some(("DEMO_TOKEN".to_string(), "demo-token".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
parse_env_assignment("export DEMO_TOKEN=\"quoted-token\""),
|
||||
Some(("DEMO_TOKEN".to_string(), "quoted-token".to_string()))
|
||||
);
|
||||
assert_eq!(parse_env_assignment("# comment"), None);
|
||||
assert_eq!(parse_env_assignment(" "), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bearer_token_from_env_file_reads_named_value() {
|
||||
let temp = tempdir().unwrap();
|
||||
let env_file = temp.path().join(".env.omni");
|
||||
fs::write(
|
||||
&env_file,
|
||||
"FIRST=ignore\nexport DEMO_TOKEN=\" demo-token \"\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
bearer_token_from_env_file(&env_file, "DEMO_TOKEN")
|
||||
.unwrap()
|
||||
.as_deref(),
|
||||
Some("demo-token")
|
||||
);
|
||||
assert_eq!(
|
||||
bearer_token_from_env_file(&env_file, "MISSING").unwrap(),
|
||||
None
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn load_env_file_into_process_sets_missing_values_without_overriding_existing_ones() {
|
||||
let temp = tempdir().unwrap();
|
||||
let env_file = temp.path().join(".env.omni");
|
||||
fs::write(
|
||||
&env_file,
|
||||
"AUTOLOAD_ONLY=from-file\nAUTOLOAD_PRESET=from-file\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let missing_key = "AUTOLOAD_ONLY";
|
||||
let preset_key = "AUTOLOAD_PRESET";
|
||||
let previous_missing = std::env::var_os(missing_key);
|
||||
let previous_preset = std::env::var_os(preset_key);
|
||||
|
||||
unsafe {
|
||||
std::env::remove_var(missing_key);
|
||||
std::env::set_var(preset_key, "from-env");
|
||||
}
|
||||
|
||||
load_env_file_into_process(&env_file).unwrap();
|
||||
|
||||
assert_eq!(std::env::var(missing_key).unwrap(), "from-file");
|
||||
assert_eq!(std::env::var(preset_key).unwrap(), "from-env");
|
||||
|
||||
unsafe {
|
||||
if let Some(value) = previous_missing {
|
||||
std::env::set_var(missing_key, value);
|
||||
} else {
|
||||
std::env::remove_var(missing_key);
|
||||
}
|
||||
|
||||
if let Some(value) = previous_preset {
|
||||
std::env::set_var(preset_key, value);
|
||||
} else {
|
||||
std::env::remove_var(preset_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_remote_bearer_token_uses_scoped_env_file_with_global_fallback() {
|
||||
let temp = tempdir().unwrap();
|
||||
fs::write(
|
||||
temp.path().join("omnigraph.yaml"),
|
||||
r#"
|
||||
graphs:
|
||||
demo:
|
||||
uri: https://example.com
|
||||
bearer_token_env: DEMO_TOKEN
|
||||
auth:
|
||||
env_file: .env.omni
|
||||
cli:
|
||||
graph: demo
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
fs::write(
|
||||
temp.path().join(".env.omni"),
|
||||
"DEMO_TOKEN=scoped-token\nOMNIGRAPH_BEARER_TOKEN=global-token\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
fn resolve_remote_bearer_token_falls_back_to_default_env() {
|
||||
// RFC-011: with no operator server matching the URL, the only chain
|
||||
// left is the default `OMNIGRAPH_BEARER_TOKEN` env (no omnigraph.yaml
|
||||
// scoped chain). Hermetic: no operator config is read for a literal URL
|
||||
// that matches no `servers:` entry.
|
||||
let previous = std::env::var_os(DEFAULT_BEARER_TOKEN_ENV);
|
||||
let previous_home = std::env::var_os("OMNIGRAPH_HOME");
|
||||
unsafe {
|
||||
std::env::remove_var(DEFAULT_BEARER_TOKEN_ENV);
|
||||
// Hermetic: the keyed hop (RFC-007 PR 2) must not pick up a real
|
||||
// ~/.omnigraph on the developer's machine — and with no operator
|
||||
// servers defined, the legacy chain below must behave
|
||||
// byte-identically to pre-PR-2 (tested-as-untouched).
|
||||
std::env::set_var("OMNIGRAPH_HOME", temp.path().join("no-operator-config"));
|
||||
std::env::set_var(DEFAULT_BEARER_TOKEN_ENV, "global-token");
|
||||
std::env::set_var("OMNIGRAPH_HOME", "/nonexistent/omnigraph-test-home");
|
||||
}
|
||||
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
let config = load_config(Some(&config_path)).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
resolve_remote_bearer_token(&config, None, Some("demo"))
|
||||
.unwrap()
|
||||
.as_deref(),
|
||||
Some("scoped-token")
|
||||
);
|
||||
assert_eq!(
|
||||
resolve_remote_bearer_token(&config, Some("https://override.example.com"), None)
|
||||
resolve_remote_bearer_token(Some("https://override.example.com"))
|
||||
.unwrap()
|
||||
.as_deref(),
|
||||
Some("global-token")
|
||||
|
|
@ -234,194 +122,3 @@ cli:
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn load_cli_config_autoloads_env_file_into_process() {
|
||||
let temp = tempdir().unwrap();
|
||||
fs::write(
|
||||
temp.path().join("omnigraph.yaml"),
|
||||
r#"
|
||||
auth:
|
||||
env_file: .env.omni
|
||||
graphs:
|
||||
demo:
|
||||
uri: s3://bucket/prefix
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
fs::write(
|
||||
temp.path().join(".env.omni"),
|
||||
"AUTOLOAD_FROM_CONFIG=loaded\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let key = "AUTOLOAD_FROM_CONFIG";
|
||||
let previous = std::env::var_os(key);
|
||||
unsafe {
|
||||
std::env::remove_var(key);
|
||||
}
|
||||
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
let config = load_cli_config(Some(&config_path)).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
config.resolve_target_uri(None, Some("demo"), None).unwrap(),
|
||||
"s3://bucket/prefix"
|
||||
);
|
||||
assert_eq!(std::env::var(key).unwrap(), "loaded");
|
||||
|
||||
unsafe {
|
||||
if let Some(value) = previous {
|
||||
std::env::set_var(key, value);
|
||||
} else {
|
||||
std::env::remove_var(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn graph_identity_resolve_policy_context_named_cli_graph_uses_graph_key_not_project_name_or_uri()
|
||||
{
|
||||
let temp = tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
project:
|
||||
name: misleading-project
|
||||
graphs:
|
||||
local:
|
||||
uri: /tmp/local-policy-graph.omni
|
||||
policy:
|
||||
file: ./policy.yaml
|
||||
cli:
|
||||
graph: local
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let config = load_config(Some(&config_path)).unwrap();
|
||||
let context = resolve_policy_context(&config).unwrap();
|
||||
assert_eq!(context.graph_id, "local");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn graph_identity_resolve_policy_context_server_graph_uses_graph_key_when_cli_graph_absent() {
|
||||
let temp = tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
project:
|
||||
name: misleading-project
|
||||
graphs:
|
||||
local:
|
||||
uri: /tmp/local-policy-graph.omni
|
||||
policy:
|
||||
file: ./server-policy.yaml
|
||||
server:
|
||||
graph: local
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let config = load_config(Some(&config_path)).unwrap();
|
||||
let context = resolve_policy_context(&config).unwrap();
|
||||
assert_eq!(context.graph_id, "local");
|
||||
assert!(context.policy_file.ends_with("server-policy.yaml"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn graph_identity_resolve_policy_context_anonymous_uses_top_level_default_identity() {
|
||||
let temp = tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
project:
|
||||
name: misleading-project
|
||||
graphs:
|
||||
local:
|
||||
uri: /tmp/local-policy-graph.omni
|
||||
policy:
|
||||
file: ./top-policy.yaml
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let config = load_config(Some(&config_path)).unwrap();
|
||||
let context = resolve_policy_context(&config).unwrap();
|
||||
assert_eq!(context.graph_id, "default");
|
||||
assert!(context.policy_file.ends_with("top-policy.yaml"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn graph_identity_resolve_cli_graph_named_target_uses_graph_key_not_project_name_or_uri() {
|
||||
let temp = tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
project:
|
||||
name: misleading-project
|
||||
graphs:
|
||||
prod:
|
||||
uri: s3://bucket/prod-graph/
|
||||
policy:
|
||||
file: ./prod-policy.yaml
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let config = load_config(Some(&config_path)).unwrap();
|
||||
let graph = resolve_cli_graph(&config, None, Some("prod")).unwrap();
|
||||
assert_eq!(graph.selected(), Some("prod"));
|
||||
assert_eq!(graph.graph_id, "prod");
|
||||
assert_eq!(graph.uri, "s3://bucket/prod-graph/");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn graph_identity_resolve_cli_graph_positional_uri_uses_anonymous_normalized_uri() {
|
||||
let temp = tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
project:
|
||||
name: misleading-project
|
||||
graphs:
|
||||
local:
|
||||
uri: /tmp/configured-graph.omni
|
||||
policy:
|
||||
file: ./policy.yaml
|
||||
cli:
|
||||
graph: local
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let config = load_config(Some(&config_path)).unwrap();
|
||||
let local_graph_path = temp.path().join("explicit-graph.omni");
|
||||
let local_graph = resolve_cli_graph(
|
||||
&config,
|
||||
Some(format!("file://{}", local_graph_path.display())),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(local_graph.selected(), None);
|
||||
assert_eq!(
|
||||
local_graph.graph_id,
|
||||
local_graph_path.to_string_lossy().as_ref()
|
||||
);
|
||||
assert_eq!(local_graph.policy_file, None);
|
||||
|
||||
let s3_graph = resolve_cli_graph(
|
||||
&config,
|
||||
Some("s3://bucket/anonymous-graph/".to_string()),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(s3_graph.selected(), None);
|
||||
assert_eq!(s3_graph.graph_id, "s3://bucket/anonymous-graph");
|
||||
assert_eq!(s3_graph.policy_file, None);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,408 +0,0 @@
|
|||
//! `omnigraph config migrate` (RFC-008 stage 2): split a legacy
|
||||
//! `omnigraph.yaml` into its two destinations — the team half as a
|
||||
//! ready-to-review `cluster.yaml` proposal, the personal half merged into
|
||||
//! `~/.omnigraph/config.yaml` — and name what's obsolete. The command is
|
||||
//! the completeness test of RFC-008's migration map: any key it cannot
|
||||
//! place is a bug in the RFC.
|
||||
//!
|
||||
//! Touches nothing without `--write`. Referenced `.gq`/policy files are
|
||||
//! never moved; manual steps are printed instead.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use color_eyre::Result;
|
||||
use color_eyre::eyre::eyre;
|
||||
use omnigraph_server::OmnigraphConfig;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::operator;
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub(crate) struct MigrateReport {
|
||||
pub(crate) source: String,
|
||||
/// The ready-to-review cluster.yaml text (None when the legacy file
|
||||
/// declares nothing team-shaped).
|
||||
pub(crate) cluster_yaml: Option<String>,
|
||||
/// Operator keys to merge: dotted key -> YAML value text.
|
||||
pub(crate) operator_merge: BTreeMap<String, String>,
|
||||
/// Keys with no destination, and why.
|
||||
pub(crate) dropped: Vec<DroppedKey>,
|
||||
/// Steps the command will not do for you.
|
||||
pub(crate) manual_steps: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub(crate) struct DroppedKey {
|
||||
pub(crate) key: String,
|
||||
pub(crate) reason: String,
|
||||
}
|
||||
|
||||
/// Classify a parsed legacy config into the report. Pure — no I/O.
|
||||
pub(crate) fn build_report(config: &OmnigraphConfig, source: &Path) -> MigrateReport {
|
||||
let mut dropped = Vec::new();
|
||||
let mut manual_steps = Vec::new();
|
||||
let mut operator_merge: BTreeMap<String, String> = BTreeMap::new();
|
||||
|
||||
// ---- personal half ----
|
||||
if let Some(actor) = &config.cli.actor {
|
||||
operator_merge.insert("operator.actor".into(), actor.clone());
|
||||
}
|
||||
if let Some(format) = config.cli.output_format {
|
||||
operator_merge.insert(
|
||||
"defaults.output".into(),
|
||||
serde_yaml::to_string(&format).unwrap_or_default().trim().to_string(),
|
||||
);
|
||||
}
|
||||
if let Some(width) = config.cli.table_max_column_width {
|
||||
operator_merge.insert("defaults.table_max_column_width".into(), width.to_string());
|
||||
}
|
||||
if let Some(layout) = config.cli.table_cell_layout {
|
||||
operator_merge.insert(
|
||||
"defaults.table_cell_layout".into(),
|
||||
serde_yaml::to_string(&layout).unwrap_or_default().trim().to_string(),
|
||||
);
|
||||
}
|
||||
if config.cli.graph.is_some() {
|
||||
dropped.push(DroppedKey {
|
||||
key: "cli.graph".into(),
|
||||
reason: "no operator default-target yet — address graphs explicitly via --target/--server (RFC-002 locator territory)".into(),
|
||||
});
|
||||
}
|
||||
if config.cli.branch.is_some() {
|
||||
dropped.push(DroppedKey {
|
||||
key: "cli.branch".into(),
|
||||
reason: "pass --branch explicitly".into(),
|
||||
});
|
||||
}
|
||||
|
||||
// Remote graphs with a token env become operator servers (the keyed
|
||||
// chain replaces invented env-var names).
|
||||
for (name, target) in &config.graphs {
|
||||
if target.uri.starts_with("http://") || target.uri.starts_with("https://") {
|
||||
operator_merge.insert(format!("servers.{name}.url"), target.uri.clone());
|
||||
if target.bearer_token_env.is_some() {
|
||||
manual_steps.push(format!(
|
||||
"store the '{name}' token in the keyed chain: echo $TOKEN | omnigraph login {name} (replaces bearer_token_env)"
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
if config.auth.env_file.is_some() {
|
||||
manual_steps.push(
|
||||
"auth.env_file keeps working during the window; prefer `omnigraph login <server>` per server going forward".into(),
|
||||
);
|
||||
}
|
||||
|
||||
// Legacy aliases split: content -> catalog stored query, binding ->
|
||||
// operator alias referencing the name.
|
||||
for (name, alias) in &config.aliases {
|
||||
let query_name = alias.name.clone().unwrap_or_else(|| name.clone());
|
||||
operator_merge.insert(
|
||||
format!("aliases.{name}"),
|
||||
format!(
|
||||
"{{ server: TODO-server-name, graph: {}, query: {query_name}, args: [{}] }}",
|
||||
alias.graph.as_deref().unwrap_or("TODO-graph-id"),
|
||||
alias.args.join(", ")
|
||||
),
|
||||
);
|
||||
manual_steps.push(format!(
|
||||
"alias '{name}': move its query content ('{}') into the cluster checkout's queries/ so '{query_name}' becomes a catalog stored query",
|
||||
alias.query
|
||||
));
|
||||
}
|
||||
|
||||
// ---- team half ----
|
||||
let has_team_content = !config.graphs.is_empty()
|
||||
|| !config.queries.is_empty()
|
||||
|| config.policy.file.is_some()
|
||||
|| config.server.policy.file.is_some();
|
||||
let cluster_yaml = has_team_content.then(|| {
|
||||
let mut out = String::from("version: 1\n");
|
||||
if let Some(name) = &config.project.name {
|
||||
out.push_str(&format!("metadata:\n name: {name}\n"));
|
||||
}
|
||||
out.push_str("# storage: s3://bucket/prefix # or omit: this folder is the root\n");
|
||||
if !config.graphs.is_empty() || !config.queries.is_empty() {
|
||||
out.push_str("graphs:\n");
|
||||
}
|
||||
// Single-graph top-level queries belong to a graph the legacy file
|
||||
// never named; propose one.
|
||||
if !config.queries.is_empty() && config.graphs.is_empty() {
|
||||
out.push_str(" default: # TODO: pick the graph id\n schema: # TODO: path to this graph's .pg schema\n queries: queries/\n");
|
||||
}
|
||||
for (name, target) in &config.graphs {
|
||||
out.push_str(&format!(" {name}:\n"));
|
||||
out.push_str(" schema: # TODO: path to this graph's .pg schema\n");
|
||||
if !target.queries.is_empty() {
|
||||
out.push_str(" queries: queries/ # move the .gq files here\n");
|
||||
}
|
||||
out.push_str(&format!(
|
||||
" # legacy root: {} — the cluster manages graph roots under its storage; run `omnigraph cluster import` after reviewing\n",
|
||||
target.uri
|
||||
));
|
||||
}
|
||||
let mut policies: Vec<(String, String, String)> = Vec::new();
|
||||
if let Some(file) = &config.policy.file {
|
||||
policies.push(("default".into(), file.clone(), "graph.<id> # TODO: bind".into()));
|
||||
}
|
||||
if let Some(file) = &config.server.policy.file {
|
||||
policies.push(("server".into(), file.clone(), "cluster".into()));
|
||||
}
|
||||
for (name, target) in &config.graphs {
|
||||
if let Some(file) = &target.policy.file {
|
||||
policies.push((name.clone(), file.clone(), format!("graph.{name}")));
|
||||
}
|
||||
}
|
||||
if !policies.is_empty() {
|
||||
out.push_str("policies:\n");
|
||||
for (name, file, binding) in policies {
|
||||
out.push_str(&format!(
|
||||
" {name}:\n file: {file}\n applies_to: [{binding}]\n"
|
||||
));
|
||||
}
|
||||
}
|
||||
out
|
||||
});
|
||||
|
||||
if !config.query.roots.is_empty() {
|
||||
dropped.push(DroppedKey {
|
||||
key: "query.roots".into(),
|
||||
reason: "obsolete — cluster query discovery (queries: <dir>) replaced it".into(),
|
||||
});
|
||||
}
|
||||
if config.server.bind.is_some() || config.server.graph.is_some() {
|
||||
dropped.push(DroppedKey {
|
||||
key: "server.bind / server.graph".into(),
|
||||
reason: "deployment runtime — pass --bind / target flags or env".into(),
|
||||
});
|
||||
}
|
||||
if config.project.name.is_some() && cluster_yaml.is_none() {
|
||||
dropped.push(DroppedKey {
|
||||
key: "project.name".into(),
|
||||
reason: "the cluster's metadata.name is the deployment label".into(),
|
||||
});
|
||||
}
|
||||
|
||||
MigrateReport {
|
||||
source: source.display().to_string(),
|
||||
cluster_yaml,
|
||||
operator_merge,
|
||||
dropped,
|
||||
manual_steps,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn render_report(report: &MigrateReport) -> String {
|
||||
let mut out = format!("migration plan for {}\n", report.source);
|
||||
if let Some(cluster) = &report.cluster_yaml {
|
||||
out.push_str("\n== team half -> cluster.yaml (ready to review) ==\n");
|
||||
out.push_str(cluster);
|
||||
}
|
||||
if !report.operator_merge.is_empty() {
|
||||
out.push_str("\n== personal half -> ~/.omnigraph/config.yaml ==\n");
|
||||
for (key, value) in &report.operator_merge {
|
||||
out.push_str(&format!(" {key}: {value}\n"));
|
||||
}
|
||||
}
|
||||
if !report.dropped.is_empty() {
|
||||
out.push_str("\n== no destination ==\n");
|
||||
for dropped in &report.dropped {
|
||||
out.push_str(&format!(" {} — {}\n", dropped.key, dropped.reason));
|
||||
}
|
||||
}
|
||||
if !report.manual_steps.is_empty() {
|
||||
out.push_str("\n== manual steps ==\n");
|
||||
for step in &report.manual_steps {
|
||||
out.push_str(&format!(" - {step}\n"));
|
||||
}
|
||||
}
|
||||
out.push_str("\n(nothing written; pass --write to apply the operator merge and emit cluster.yaml)\n");
|
||||
out
|
||||
}
|
||||
|
||||
/// `--write`: merge the personal half into the operator config (key-level,
|
||||
/// existing entries always win; the prior file is backed up) and write the
|
||||
/// team half to cluster.yaml in the legacy config's directory (or
|
||||
/// cluster.yaml.proposed when one already exists).
|
||||
pub(crate) fn apply_report(report: &MigrateReport, legacy_dir: &Path) -> Result<Vec<String>> {
|
||||
let mut written = Vec::new();
|
||||
|
||||
if !report.operator_merge.is_empty() {
|
||||
let dir = operator::operator_dir()
|
||||
.ok_or_else(|| eyre!("no home directory resolvable for the operator config"))?;
|
||||
std::fs::create_dir_all(&dir)?;
|
||||
let path = dir.join(operator::OPERATOR_CONFIG_FILE);
|
||||
let existing_text = std::fs::read_to_string(&path).unwrap_or_default();
|
||||
let mut mapping: serde_yaml::Mapping = if existing_text.trim().is_empty() {
|
||||
serde_yaml::Mapping::new()
|
||||
} else {
|
||||
serde_yaml::from_str(&existing_text)
|
||||
.map_err(|err| eyre!("operator config '{}' does not parse: {err}", path.display()))?
|
||||
};
|
||||
let mut merged_any = false;
|
||||
for (dotted, value_text) in &report.operator_merge {
|
||||
if merge_dotted_if_absent(&mut mapping, dotted, value_text)? {
|
||||
merged_any = true;
|
||||
}
|
||||
}
|
||||
if merged_any {
|
||||
if !existing_text.is_empty() {
|
||||
let backup = path.with_extension("yaml.bak");
|
||||
std::fs::write(&backup, &existing_text)?;
|
||||
written.push(format!("backed up prior operator config to {}", backup.display()));
|
||||
}
|
||||
let rendered = serde_yaml::to_string(&mapping)?;
|
||||
let tmp = path.with_extension(format!("yaml.tmp.{}", std::process::id()));
|
||||
std::fs::write(&tmp, &rendered)?;
|
||||
std::fs::rename(&tmp, &path)?;
|
||||
written.push(format!("merged personal keys into {}", path.display()));
|
||||
} else {
|
||||
written.push("operator config already carries every personal key (nothing merged)".into());
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(cluster) = &report.cluster_yaml {
|
||||
let target = legacy_dir.join("cluster.yaml");
|
||||
let target = if target.exists() {
|
||||
legacy_dir.join("cluster.yaml.proposed")
|
||||
} else {
|
||||
target
|
||||
};
|
||||
std::fs::write(&target, cluster)?;
|
||||
written.push(format!("wrote team-half proposal to {}", target.display()));
|
||||
}
|
||||
|
||||
Ok(written)
|
||||
}
|
||||
|
||||
/// Set `a.b.c` in the mapping only when absent; returns whether it wrote.
|
||||
fn merge_dotted_if_absent(
|
||||
mapping: &mut serde_yaml::Mapping,
|
||||
dotted: &str,
|
||||
value_text: &str,
|
||||
) -> Result<bool> {
|
||||
let value: serde_yaml::Value =
|
||||
serde_yaml::from_str(value_text).unwrap_or(serde_yaml::Value::String(value_text.into()));
|
||||
let parts: Vec<&str> = dotted.split('.').collect();
|
||||
let mut current = mapping;
|
||||
for part in &parts[..parts.len() - 1] {
|
||||
let key = serde_yaml::Value::String((*part).into());
|
||||
let entry = current
|
||||
.entry(key)
|
||||
.or_insert_with(|| serde_yaml::Value::Mapping(serde_yaml::Mapping::new()));
|
||||
current = entry
|
||||
.as_mapping_mut()
|
||||
.ok_or_else(|| eyre!("operator config key '{dotted}' collides with a non-mapping"))?;
|
||||
}
|
||||
let leaf = serde_yaml::Value::String(parts[parts.len() - 1].into());
|
||||
if current.contains_key(&leaf) {
|
||||
return Ok(false);
|
||||
}
|
||||
current.insert(leaf, value);
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
pub(crate) fn legacy_config_path(explicit: Option<&PathBuf>) -> PathBuf {
|
||||
explicit.cloned().unwrap_or_else(|| PathBuf::from("omnigraph.yaml"))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use omnigraph_server::config::load_config;
|
||||
|
||||
fn full_legacy_fixture(dir: &Path) -> PathBuf {
|
||||
let path = dir.join("omnigraph.yaml");
|
||||
std::fs::write(
|
||||
&path,
|
||||
r#"
|
||||
project: { name: brain }
|
||||
graphs:
|
||||
prod:
|
||||
uri: https://graph.example.com
|
||||
bearer_token_env: PROD_TOKEN
|
||||
policy: { file: ./prod.policy.yaml }
|
||||
queries:
|
||||
find: { file: ./find.gq }
|
||||
local:
|
||||
uri: /tmp/local.omni
|
||||
server: { bind: "0.0.0.0:9999", policy: { file: ./server.policy.yaml } }
|
||||
auth: { env_file: .env.omni }
|
||||
cli:
|
||||
graph: prod
|
||||
branch: main
|
||||
actor: act-me
|
||||
output_format: json
|
||||
table_max_column_width: 40
|
||||
query: { roots: ["."] }
|
||||
aliases:
|
||||
triage: { command: query, query: ./triage.gq, name: weekly_triage, args: [since], graph: prod }
|
||||
policy: { file: ./top.policy.yaml }
|
||||
queries:
|
||||
top_q: { file: ./top.gq }
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
path
|
||||
}
|
||||
|
||||
/// The RFC-008 completeness contract: every top-level key of the
|
||||
/// legacy schema must appear in the report somewhere (team half,
|
||||
/// operator merge, dropped, or manual steps).
|
||||
#[test]
|
||||
fn every_legacy_key_is_classified() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let path = full_legacy_fixture(dir.path());
|
||||
let config = load_config(Some(&path)).unwrap();
|
||||
let report = build_report(&config, &path);
|
||||
let rendered = render_report(&report);
|
||||
|
||||
let serialized =
|
||||
serde_yaml::to_value(OmnigraphConfig::default()).expect("default serializes");
|
||||
for key in serialized.as_mapping().unwrap().keys() {
|
||||
let key = key.as_str().unwrap();
|
||||
assert!(
|
||||
rendered.contains(key)
|
||||
|| report.operator_merge.keys().any(|k| k.contains(key))
|
||||
|| matches!(key, "graphs" | "queries" | "policy" | "project")
|
||||
&& report.cluster_yaml.is_some(),
|
||||
"legacy key '{key}' is unclassified — fix the RFC-008 map: {rendered}"
|
||||
);
|
||||
}
|
||||
|
||||
// spot checks on each section
|
||||
assert_eq!(report.operator_merge["operator.actor"], "act-me");
|
||||
assert_eq!(report.operator_merge["defaults.output"], "json");
|
||||
assert_eq!(
|
||||
report.operator_merge["servers.prod.url"],
|
||||
"https://graph.example.com"
|
||||
);
|
||||
assert!(report.operator_merge["aliases.triage"].contains("query: weekly_triage"));
|
||||
let cluster = report.cluster_yaml.as_deref().unwrap();
|
||||
assert!(cluster.contains("version: 1"));
|
||||
assert!(cluster.contains("name: brain"));
|
||||
assert!(cluster.contains(" prod:"));
|
||||
assert!(cluster.contains("applies_to: [cluster]"));
|
||||
assert!(cluster.contains("applies_to: [graph.prod]"));
|
||||
assert!(report.dropped.iter().any(|d| d.key == "query.roots"));
|
||||
assert!(report.dropped.iter().any(|d| d.key.contains("server.bind")));
|
||||
assert!(
|
||||
report
|
||||
.manual_steps
|
||||
.iter()
|
||||
.any(|s| s.contains("omnigraph login prod"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge_dotted_never_clobbers_existing() {
|
||||
let mut mapping: serde_yaml::Mapping =
|
||||
serde_yaml::from_str("operator:\n actor: keep-me\n").unwrap();
|
||||
assert!(!merge_dotted_if_absent(&mut mapping, "operator.actor", "new").unwrap());
|
||||
assert!(merge_dotted_if_absent(&mut mapping, "defaults.output", "json").unwrap());
|
||||
let text = serde_yaml::to_string(&mapping).unwrap();
|
||||
assert!(text.contains("keep-me") && !text.contains("new"));
|
||||
assert!(text.contains("output: json"));
|
||||
}
|
||||
}
|
||||
|
|
@ -18,10 +18,10 @@ use std::env;
|
|||
use std::path::{Path, PathBuf};
|
||||
|
||||
use color_eyre::Result;
|
||||
use color_eyre::eyre::eyre;
|
||||
use color_eyre::eyre::{bail, eyre};
|
||||
use serde::Deserialize;
|
||||
|
||||
use omnigraph_server::config::ReadOutputFormat;
|
||||
use crate::read_format::{ReadOutputFormat, TableCellLayout};
|
||||
|
||||
pub(crate) const OPERATOR_HOME_ENV: &str = "OMNIGRAPH_HOME";
|
||||
pub(crate) const OPERATOR_DIR: &str = ".omnigraph";
|
||||
|
|
@ -41,6 +41,17 @@ pub(crate) struct OperatorConfig {
|
|||
/// Personal alias bindings (RFC-007 PR 3); see OperatorAlias.
|
||||
#[serde(default)]
|
||||
pub(crate) aliases: BTreeMap<String, OperatorAlias>,
|
||||
/// Named scope bundles (RFC-011): each binds exactly one of
|
||||
/// {server, cluster, store} plus an optional default graph. Config data,
|
||||
/// not state — selecting one (`--profile`/`OMNIGRAPH_PROFILE`) fills in a
|
||||
/// command's omitted addressing; it never puts you "in" a mode.
|
||||
#[serde(default)]
|
||||
pub(crate) profiles: BTreeMap<String, OperatorProfile>,
|
||||
/// Managed-cluster storage roots (RFC-011): name → root URI. The ONLY
|
||||
/// place a storage root appears in operator config — admin-only and
|
||||
/// opt-in; a normal operator's file has none.
|
||||
#[serde(default)]
|
||||
pub(crate) clusters: BTreeMap<String, OperatorCluster>,
|
||||
/// Everything this CLI version doesn't know. Warned once at load,
|
||||
/// otherwise ignored (forward compatibility within the operator layer).
|
||||
#[serde(flatten)]
|
||||
|
|
@ -80,8 +91,7 @@ pub(crate) struct OperatorServer {
|
|||
#[derive(Debug, Default, Deserialize)]
|
||||
pub(crate) struct OperatorIdentity {
|
||||
/// Default actor for every `--as` cascade (CLI direct-engine writes and
|
||||
/// cluster commands alike): `--as` > legacy config actor (RFC-008
|
||||
/// window) > this > none.
|
||||
/// cluster commands alike): `--as` > this > none.
|
||||
pub(crate) actor: Option<String>,
|
||||
#[serde(flatten)]
|
||||
unknown: serde_yaml::Mapping,
|
||||
|
|
@ -91,14 +101,67 @@ pub(crate) struct OperatorIdentity {
|
|||
pub(crate) struct OperatorDefaults {
|
||||
/// Default read output format, below every more-specific source.
|
||||
pub(crate) output: Option<ReadOutputFormat>,
|
||||
/// Table rendering preferences (below the legacy cli.table_* keys
|
||||
/// during the RFC-008 window).
|
||||
/// Table rendering preferences for `--format table`.
|
||||
pub(crate) table_max_column_width: Option<usize>,
|
||||
pub(crate) table_cell_layout: Option<omnigraph_server::config::TableCellLayout>,
|
||||
pub(crate) table_cell_layout: Option<TableCellLayout>,
|
||||
/// Default server scope (RFC-011): the everyday addressing when no
|
||||
/// `--profile` / primitive / legacy address is given. Names an entry
|
||||
/// under `servers:`. Mutually exclusive with `store` — a scope binds one
|
||||
/// entity.
|
||||
pub(crate) server: Option<String>,
|
||||
/// Default **store** scope (RFC-011): a `file://` / `s3://` graph storage
|
||||
/// URI used as the zero-flag local default for graph commands when no
|
||||
/// `--profile` / primitive address is given. The local-dev counterpart of
|
||||
/// `server`; mutually exclusive with it.
|
||||
pub(crate) store: Option<String>,
|
||||
/// Default graph selected within a server/cluster scope when no
|
||||
/// `--graph` is passed (RFC-011).
|
||||
pub(crate) default_graph: Option<String>,
|
||||
#[serde(flatten)]
|
||||
unknown: serde_yaml::Mapping,
|
||||
}
|
||||
|
||||
/// A named scope bundle (RFC-011): exactly one of {server, cluster, store}
|
||||
/// plus an optional default graph. Validated on use (`binding()`), not at
|
||||
/// parse time, so an unknown CLI's profile still loads.
|
||||
#[derive(Debug, Default, Deserialize)]
|
||||
pub(crate) struct OperatorProfile {
|
||||
/// Names an entry under `servers:` — a served scope.
|
||||
pub(crate) server: Option<String>,
|
||||
/// Names an entry under `clusters:` — a privileged direct cluster scope.
|
||||
pub(crate) cluster: Option<String>,
|
||||
/// A single graph's storage URI — a direct store scope.
|
||||
pub(crate) store: Option<String>,
|
||||
/// Default graph within a server/cluster scope (ignored for a store,
|
||||
/// which is already one graph).
|
||||
pub(crate) default_graph: Option<String>,
|
||||
#[serde(flatten)]
|
||||
unknown: serde_yaml::Mapping,
|
||||
}
|
||||
|
||||
/// A managed-cluster storage root (RFC-011).
|
||||
#[derive(Debug, Default, Deserialize)]
|
||||
pub(crate) struct OperatorCluster {
|
||||
/// The cluster's storage-root URI (`file://` / `s3://`).
|
||||
pub(crate) root: String,
|
||||
#[serde(flatten)]
|
||||
unknown: serde_yaml::Mapping,
|
||||
}
|
||||
|
||||
/// The one entity a profile (or flat default) binds. Exactly one variant —
|
||||
/// the scope resolver consumes this; "exactly one of server/cluster/store"
|
||||
/// is enforced when producing it.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub(crate) enum ScopeBinding {
|
||||
/// Served scope: a server name (resolved against `servers:`) or a literal URL.
|
||||
Server(String),
|
||||
/// Direct cluster scope: a cluster name (resolved against `clusters:`) or a
|
||||
/// literal root URI.
|
||||
Cluster(String),
|
||||
/// Direct store scope: a single graph's storage URI.
|
||||
Store(String),
|
||||
}
|
||||
|
||||
impl OperatorConfig {
|
||||
pub(crate) fn actor(&self) -> Option<&str> {
|
||||
self.operator.actor.as_deref()
|
||||
|
|
@ -127,6 +190,83 @@ impl OperatorConfig {
|
|||
}
|
||||
best.map(|(name, _)| name)
|
||||
}
|
||||
|
||||
/// A named profile, if defined (RFC-011).
|
||||
pub(crate) fn profile(&self, name: &str) -> Option<&OperatorProfile> {
|
||||
self.profiles.get(name)
|
||||
}
|
||||
|
||||
/// The storage root of a named cluster, if defined (RFC-011).
|
||||
pub(crate) fn cluster_root(&self, name: &str) -> Option<&str> {
|
||||
self.clusters.get(name).map(|c| c.root.as_str())
|
||||
}
|
||||
|
||||
/// The flat-default server scope name, if set (RFC-011).
|
||||
pub(crate) fn default_server(&self) -> Option<&str> {
|
||||
self.defaults.server.as_deref()
|
||||
}
|
||||
|
||||
/// The flat-default store scope URI, if set (RFC-011) — the zero-flag
|
||||
/// local-dev default.
|
||||
pub(crate) fn default_store(&self) -> Option<&str> {
|
||||
self.defaults.store.as_deref()
|
||||
}
|
||||
|
||||
/// The flat-default graph within a server/cluster scope, if set (RFC-011).
|
||||
pub(crate) fn default_graph(&self) -> Option<&str> {
|
||||
self.defaults.default_graph.as_deref()
|
||||
}
|
||||
|
||||
/// A scope binds one entity (Decision 6): `defaults.server` and
|
||||
/// `defaults.store` are mutually exclusive, and a `store` (already a single
|
||||
/// graph) cannot carry a `default_graph`. Both are refused loudly rather
|
||||
/// than silently dropped.
|
||||
fn validate_defaults(&self) -> Result<()> {
|
||||
if self.defaults.server.is_some() && self.defaults.store.is_some() {
|
||||
bail!(
|
||||
"operator config `defaults` sets both `server` and `store` — a default scope \
|
||||
binds one entity; keep one (use a `profile` if you need both)"
|
||||
);
|
||||
}
|
||||
if self.defaults.store.is_some() && self.defaults.default_graph.is_some() {
|
||||
bail!(
|
||||
"operator config `defaults` sets both `store` and `default_graph` — a store is \
|
||||
already a single graph; drop `default_graph` (it applies only to a server/cluster scope)"
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl OperatorProfile {
|
||||
/// The single entity this profile binds, or a loud error if it binds zero
|
||||
/// or more than one of {server, cluster, store} (Decision 6: a scope binds
|
||||
/// exactly one entity). Validated here, on use, rather than at parse time.
|
||||
pub(crate) fn binding(&self, profile_name: &str) -> Result<ScopeBinding> {
|
||||
let set: Vec<&str> = [
|
||||
self.server.as_ref().map(|_| "server"),
|
||||
self.cluster.as_ref().map(|_| "cluster"),
|
||||
self.store.as_ref().map(|_| "store"),
|
||||
]
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.collect();
|
||||
match set.as_slice() {
|
||||
["server"] => Ok(ScopeBinding::Server(self.server.clone().unwrap())),
|
||||
["cluster"] => Ok(ScopeBinding::Cluster(self.cluster.clone().unwrap())),
|
||||
["store"] => Ok(ScopeBinding::Store(self.store.clone().unwrap())),
|
||||
[] => Err(eyre!(
|
||||
"profile '{profile_name}' binds no scope; set exactly one of \
|
||||
`server`, `cluster`, or `store`"
|
||||
)),
|
||||
many => Err(eyre!(
|
||||
"profile '{profile_name}' binds {} scopes ({}); a profile must \
|
||||
bind exactly one of `server`, `cluster`, or `store`",
|
||||
many.len(),
|
||||
many.join(", ")
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The operator dir: `$OMNIGRAPH_HOME` if set (tilde-expanded), else
|
||||
|
|
@ -172,6 +312,7 @@ pub(crate) fn load_operator_config_at(path: &Path) -> Result<OperatorConfig> {
|
|||
for warning in config.unknown_key_warnings() {
|
||||
eprintln!("warning: {warning} in operator config '{}'", path.display());
|
||||
}
|
||||
config.validate_defaults()?;
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
|
|
@ -196,6 +337,12 @@ impl OperatorConfig {
|
|||
for (name, alias) in &self.aliases {
|
||||
collect(&alias.unknown, &format!("aliases.{name}."));
|
||||
}
|
||||
for (name, profile) in &self.profiles {
|
||||
collect(&profile.unknown, &format!("profiles.{name}."));
|
||||
}
|
||||
for (name, cluster) in &self.clusters {
|
||||
collect(&cluster.unknown, &format!("clusters.{name}."));
|
||||
}
|
||||
warnings
|
||||
}
|
||||
}
|
||||
|
|
@ -444,6 +591,42 @@ mod tests {
|
|||
assert_eq!(config.output(), Some(ReadOutputFormat::Json));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn defaults_store_parses_and_is_accessible() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let path = dir.path().join("config.yaml");
|
||||
fs::write(&path, "defaults:\n store: file:///tmp/dev.omni\n").unwrap();
|
||||
let config = load_operator_config_at(&path).unwrap();
|
||||
assert_eq!(config.default_store(), Some("file:///tmp/dev.omni"));
|
||||
assert_eq!(config.default_server(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn defaults_server_and_store_together_is_a_loud_error() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let path = dir.path().join("config.yaml");
|
||||
fs::write(
|
||||
&path,
|
||||
"defaults:\n server: prod\n store: file:///tmp/dev.omni\n",
|
||||
)
|
||||
.unwrap();
|
||||
let err = load_operator_config_at(&path).unwrap_err().to_string();
|
||||
assert!(err.contains("binds one entity"), "{err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn defaults_store_with_default_graph_is_a_loud_error() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let path = dir.path().join("config.yaml");
|
||||
fs::write(
|
||||
&path,
|
||||
"defaults:\n store: file:///tmp/dev.omni\n default_graph: knowledge\n",
|
||||
)
|
||||
.unwrap();
|
||||
let err = load_operator_config_at(&path).unwrap_err().to_string();
|
||||
assert!(err.contains("already a single graph"), "{err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_keys_warn_but_load() {
|
||||
// A file written for a later slice (servers/aliases) must load
|
||||
|
|
@ -464,6 +647,82 @@ mod tests {
|
|||
assert_eq!(config.servers["prod"].url, "https://example.com");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_profiles_clusters_and_scope_defaults() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let path = dir.path().join("config.yaml");
|
||||
let yaml = "\
|
||||
defaults:
|
||||
server: prod
|
||||
default_graph: knowledge
|
||||
servers:
|
||||
prod:
|
||||
url: https://example.com
|
||||
clusters:
|
||||
brain:
|
||||
root: s3://acme/clusters/brain
|
||||
profiles:
|
||||
staging:
|
||||
server: staging
|
||||
default_graph: knowledge
|
||||
brain-admin:
|
||||
cluster: brain
|
||||
default_graph: knowledge
|
||||
";
|
||||
fs::write(&path, yaml).unwrap();
|
||||
let config = load_operator_config_at(&path).unwrap();
|
||||
assert_eq!(config.default_server(), Some("prod"));
|
||||
assert_eq!(config.default_graph(), Some("knowledge"));
|
||||
assert_eq!(config.cluster_root("brain"), Some("s3://acme/clusters/brain"));
|
||||
assert_eq!(
|
||||
config.profile("staging").unwrap().binding("staging").unwrap(),
|
||||
ScopeBinding::Server("staging".into())
|
||||
);
|
||||
assert_eq!(
|
||||
config
|
||||
.profile("brain-admin")
|
||||
.unwrap()
|
||||
.binding("brain-admin")
|
||||
.unwrap(),
|
||||
ScopeBinding::Cluster("brain".into())
|
||||
);
|
||||
// No unknown-key warnings for the new blocks.
|
||||
assert!(config.unknown_key_warnings().is_empty(), "{:?}", config.unknown_key_warnings());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn profile_binding_rejects_zero_or_multiple_entities() {
|
||||
let none = OperatorProfile::default();
|
||||
let err = none.binding("p").unwrap_err().to_string();
|
||||
assert!(err.contains("binds no scope"), "{err}");
|
||||
|
||||
let two = OperatorProfile {
|
||||
server: Some("prod".into()),
|
||||
store: Some("graph.omni".into()),
|
||||
..Default::default()
|
||||
};
|
||||
let err = two.binding("p").unwrap_err().to_string();
|
||||
assert!(err.contains("binds 2 scopes"), "{err}");
|
||||
assert!(err.contains("server") && err.contains("store"), "{err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_keys_in_a_profile_warn() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let path = dir.path().join("config.yaml");
|
||||
fs::write(
|
||||
&path,
|
||||
"profiles:\n p:\n server: prod\n flavour: spicy\n",
|
||||
)
|
||||
.unwrap();
|
||||
let config = load_operator_config_at(&path).unwrap();
|
||||
let warnings = config.unknown_key_warnings();
|
||||
assert!(
|
||||
warnings.iter().any(|w| w.contains("`profiles.p.flavour`")),
|
||||
"{warnings:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malformed_yaml_is_a_loud_error() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ pub(crate) struct LoadOutput {
|
|||
pub(crate) fn load_output_from_tables(
|
||||
uri: &str,
|
||||
branch: &str,
|
||||
mode: CliLoadMode,
|
||||
mode: &'static str,
|
||||
output: &IngestOutput,
|
||||
) -> LoadOutput {
|
||||
let mut nodes_loaded = 0;
|
||||
|
|
@ -40,7 +40,7 @@ pub(crate) fn load_output_from_tables(
|
|||
LoadOutput {
|
||||
uri: uri.to_string(),
|
||||
branch: branch.to_string(),
|
||||
mode: mode.as_str(),
|
||||
mode,
|
||||
base_branch: output.base_branch.clone(),
|
||||
branch_created: output.branch_created,
|
||||
nodes_loaded,
|
||||
|
|
@ -50,6 +50,31 @@ pub(crate) fn load_output_from_tables(
|
|||
}
|
||||
}
|
||||
|
||||
/// The local arm's twin of `load_output_from_tables`: build the same
|
||||
/// `LoadOutput` from the engine `LoadResult` directly (the remote arm only
|
||||
/// has the wire `IngestOutput`'s table list; the local arm has the full
|
||||
/// result). Both load mappings live here, next to the struct — RFC-009
|
||||
/// Phase 2's "one place" for the `-> LoadOutput` mapping that used to fork
|
||||
/// between this file and main.rs's inline construction.
|
||||
pub(crate) fn load_output_from_result(
|
||||
uri: &str,
|
||||
branch: &str,
|
||||
mode: &'static str,
|
||||
result: &omnigraph::loader::LoadResult,
|
||||
) -> LoadOutput {
|
||||
LoadOutput {
|
||||
uri: uri.to_string(),
|
||||
branch: branch.to_string(),
|
||||
mode,
|
||||
base_branch: result.base_branch.clone(),
|
||||
branch_created: result.branch_created,
|
||||
nodes_loaded: result.nodes_loaded.values().sum(),
|
||||
edges_loaded: result.edges_loaded.values().sum(),
|
||||
node_types_loaded: result.nodes_loaded.len(),
|
||||
edge_types_loaded: result.edges_loaded.len(),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub(crate) struct SchemaPlanOutput<'a> {
|
||||
pub(crate) uri: &'a str,
|
||||
|
|
@ -671,9 +696,24 @@ pub(crate) fn render_constraint(constraint: &omnigraph_compiler::schema::ast::Co
|
|||
pub(crate) fn render_annotations(annotations: &[omnigraph_compiler::schema::ast::Annotation]) -> String {
|
||||
annotations
|
||||
.iter()
|
||||
.map(|annotation| match &annotation.value {
|
||||
Some(value) => format!("@{}({})", annotation.name, value),
|
||||
None => format!("@{}", annotation.name),
|
||||
.map(|annotation| {
|
||||
let mut args: Vec<String> = Vec::new();
|
||||
// Values are parsed via `decode_string_literal` (quotes stripped), so
|
||||
// re-quote them as string literals on render — otherwise a value with
|
||||
// non-ident chars (e.g. `model=openai/text-embedding-3-large`) fails to
|
||||
// round-trip back through the schema parser (`annotation_kwarg` wants a
|
||||
// quoted `literal`, not a bare token).
|
||||
if let Some(value) = &annotation.value {
|
||||
args.push(format!("\"{}\"", value));
|
||||
}
|
||||
for (key, val) in &annotation.kwargs {
|
||||
args.push(format!("{}=\"{}\"", key, val));
|
||||
}
|
||||
if args.is_empty() {
|
||||
format!("@{}", annotation.name)
|
||||
} else {
|
||||
format!("@{}({})", annotation.name, args.join(", "))
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
|
|
@ -709,15 +749,10 @@ pub(crate) fn print_snapshot_human(branch: &str, manifest_version: u64, entries:
|
|||
pub(crate) fn print_read_output(
|
||||
output: &ReadOutput,
|
||||
format: ReadOutputFormat,
|
||||
config: &OmnigraphConfig,
|
||||
) -> Result<()> {
|
||||
println!(
|
||||
"{}",
|
||||
render_read(
|
||||
output,
|
||||
format,
|
||||
&resolve_table_render_options(config),
|
||||
)?
|
||||
render_read(output, format, &resolve_table_render_options())?
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -787,10 +822,6 @@ pub(crate) fn print_policy_explain(decision: &PolicyDecision, actor_id: &str, re
|
|||
println!("message: {}", decision.message);
|
||||
}
|
||||
|
||||
pub(crate) fn yaml_string(value: &str) -> String {
|
||||
format!("'{}'", value.replace('\'', "''"))
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize)]
|
||||
pub(crate) struct QueriesIssue {
|
||||
pub(crate) query: String,
|
||||
|
|
@ -871,20 +902,126 @@ pub(crate) fn finish_logout(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Table prefs cascade (RFC-007/008): legacy cli.table_* (window) >
|
||||
/// operator defaults.table_* > built-in.
|
||||
pub(crate) fn resolve_table_render_options(config: &OmnigraphConfig) -> ReadRenderOptions {
|
||||
#[derive(Debug, Serialize)]
|
||||
pub(crate) struct ProfileListItem {
|
||||
pub(crate) name: String,
|
||||
/// `server: <n>` / `cluster: <n>` / `store: <uri>` / `invalid: <reason>`.
|
||||
pub(crate) binding: String,
|
||||
/// `server` | `cluster` | `store` | `invalid`.
|
||||
pub(crate) scope_kind: String,
|
||||
/// The bound server/cluster name, or the store URI. `None` when invalid.
|
||||
pub(crate) target: Option<String>,
|
||||
pub(crate) valid: bool,
|
||||
pub(crate) error: Option<String>,
|
||||
pub(crate) default_graph: Option<String>,
|
||||
pub(crate) active: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub(crate) struct ProfileDetail {
|
||||
/// Profile name, or `(defaults)` for the no-name flat-defaults view.
|
||||
pub(crate) name: String,
|
||||
/// `server` | `cluster` | `store` | `none`.
|
||||
pub(crate) scope_kind: String,
|
||||
/// The bound server/cluster name, or the store URI.
|
||||
pub(crate) target: Option<String>,
|
||||
/// Resolved endpoint: a server's URL / a cluster's root / the store URI;
|
||||
/// `None` if a named server/cluster isn't defined in this config.
|
||||
pub(crate) endpoint: Option<String>,
|
||||
pub(crate) default_graph: Option<String>,
|
||||
pub(crate) output_format: Option<String>,
|
||||
}
|
||||
|
||||
pub(crate) fn print_profile_list(items: &[ProfileListItem], json: bool) -> Result<()> {
|
||||
if json {
|
||||
return print_json(&items);
|
||||
}
|
||||
if items.is_empty() {
|
||||
println!("no profiles defined in the operator config");
|
||||
return Ok(());
|
||||
}
|
||||
for item in items {
|
||||
let active = if item.active { " (active)" } else { "" };
|
||||
let graph = item
|
||||
.default_graph
|
||||
.as_deref()
|
||||
.map(|g| format!(" · graph: {g}"))
|
||||
.unwrap_or_default();
|
||||
println!("{}{active} {}{graph}", item.name, item.binding);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn print_profile_detail(detail: &ProfileDetail, json: bool) -> Result<()> {
|
||||
if json {
|
||||
return print_json(detail);
|
||||
}
|
||||
println!("profile: {}", detail.name);
|
||||
let target = detail
|
||||
.target
|
||||
.as_deref()
|
||||
.map(|t| format!(" {t}"))
|
||||
.unwrap_or_default();
|
||||
println!(" scope: {}{target}", detail.scope_kind);
|
||||
if let Some(endpoint) = &detail.endpoint {
|
||||
println!(" endpoint: {endpoint}");
|
||||
} else if matches!(detail.scope_kind.as_str(), "server" | "cluster") {
|
||||
println!(" endpoint: (undefined — name not in this config)");
|
||||
}
|
||||
if let Some(graph) = &detail.default_graph {
|
||||
println!(" default graph: {graph}");
|
||||
}
|
||||
if let Some(format) = &detail.output_format {
|
||||
println!(" output: {format}");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Table prefs cascade (RFC-011): operator defaults.table_* > built-in.
|
||||
pub(crate) fn resolve_table_render_options() -> ReadRenderOptions {
|
||||
let operator = crate::operator::load_operator_config().unwrap_or_default();
|
||||
ReadRenderOptions {
|
||||
max_column_width: config
|
||||
.cli
|
||||
.table_max_column_width
|
||||
.or(operator.defaults.table_max_column_width)
|
||||
.unwrap_or(80),
|
||||
cell_layout: config
|
||||
.cli
|
||||
.table_cell_layout
|
||||
.or(operator.defaults.table_cell_layout)
|
||||
.unwrap_or_default(),
|
||||
max_column_width: operator.defaults.table_max_column_width.unwrap_or(80),
|
||||
cell_layout: operator.defaults.table_cell_layout.unwrap_or_default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use omnigraph_compiler::schema::ast::Annotation;
|
||||
use omnigraph_compiler::schema::parser::parse_schema;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use super::render_annotations;
|
||||
|
||||
#[test]
|
||||
fn render_annotations_quotes_values_so_embed_round_trips() {
|
||||
let mut kwargs = BTreeMap::new();
|
||||
kwargs.insert(
|
||||
"model".to_string(),
|
||||
"openai/text-embedding-3-large".to_string(),
|
||||
);
|
||||
let embed = Annotation {
|
||||
name: "embed".to_string(),
|
||||
value: Some("title".to_string()),
|
||||
kwargs,
|
||||
};
|
||||
|
||||
let rendered = render_annotations(std::slice::from_ref(&embed));
|
||||
assert_eq!(
|
||||
rendered,
|
||||
r#"@embed("title", model="openai/text-embedding-3-large")"#
|
||||
);
|
||||
|
||||
// The bug: an unquoted `model=openai/text-embedding-3-large` is not a
|
||||
// valid `annotation_kwarg` literal, so `schema show` output did not
|
||||
// re-parse. The rendered form must round-trip through the grammar.
|
||||
let schema = format!("node Doc {{\ntitle: String\nembedding: Vector(3) {rendered}\n}}\n");
|
||||
let parsed = parse_schema(&schema);
|
||||
assert!(
|
||||
parsed.is_ok(),
|
||||
"rendered @embed must re-parse: {:?}",
|
||||
parsed.err()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
357
crates/omnigraph-cli/src/planes.rs
Normal file
357
crates/omnigraph-cli/src/planes.rs
Normal file
|
|
@ -0,0 +1,357 @@
|
|||
//! Declared CLI "planes" (RFC-010 Slice 1).
|
||||
//!
|
||||
//! Every subcommand belongs to exactly one plane. This classification is the
|
||||
//! single source of truth the wrong-plane guard consumes — and that later
|
||||
//! RFC-010 slices (the capability surface, plane-grouped help) will consume
|
||||
//! too. The `command_plane` match is **exhaustive on purpose**: adding a
|
||||
//! `Command` variant is a compile error until its plane is declared, so the
|
||||
//! surface cannot silently drift from the command set.
|
||||
//!
|
||||
//! See [docs/dev/rfc-010-cli-planes-restructure.md].
|
||||
|
||||
use color_eyre::Result;
|
||||
use color_eyre::eyre::bail;
|
||||
|
||||
use crate::cli::{Cli, Command, QueriesCommand, SchemaCommand};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub(crate) enum Plane {
|
||||
/// Runs against a graph, embedded **or** via `--server` (the `GraphClient`
|
||||
/// axis). The only plane on which the data-plane addressing flags
|
||||
/// (`--server`/`--graph`) apply.
|
||||
Data,
|
||||
/// Direct storage access; no server. Maintenance + local-only inspection
|
||||
/// that must work with the server down.
|
||||
Storage,
|
||||
/// Operates on a cluster directory, not a graph URI.
|
||||
Control,
|
||||
/// Touches no graph at all — session / config / local tooling.
|
||||
Session,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Plane {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(match self {
|
||||
Plane::Data => "data",
|
||||
Plane::Storage => "storage",
|
||||
Plane::Control => "control",
|
||||
Plane::Session => "session",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// What a command *needs*, in the user-facing vocabulary (RFC-011). This is the
|
||||
/// language CLI errors and `--help` speak; `Plane` stays the internal classifier
|
||||
/// (`Capability` is derived from it, so the two cannot drift).
|
||||
///
|
||||
/// - `any` — graph-scoped data; served via a server scope, or direct against a
|
||||
/// store scope. Accepts `--server`/`--graph`.
|
||||
/// - `served` — requires a server. Accepts `--server`/`--graph`.
|
||||
/// - `direct` — storage-native; opens storage directly, never through a server.
|
||||
/// - `control` — operates on a cluster (control plane).
|
||||
/// - `local` — addresses no graph at all.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub(crate) enum Capability {
|
||||
Any,
|
||||
Served,
|
||||
Direct,
|
||||
Control,
|
||||
Local,
|
||||
}
|
||||
|
||||
impl Capability {
|
||||
/// A human phrase for error messages (`` `optimize` is a {…} command ``).
|
||||
pub(crate) fn describe(self) -> &'static str {
|
||||
match self {
|
||||
Capability::Any => "data",
|
||||
Capability::Served => "served",
|
||||
Capability::Direct => "direct (storage-native)",
|
||||
Capability::Control => "cluster control",
|
||||
Capability::Local => "local",
|
||||
}
|
||||
}
|
||||
|
||||
/// `--server`/`--graph` are served-graph addressing: they apply only to the
|
||||
/// capabilities that reach a graph through a server.
|
||||
fn accepts_server_addressing(self) -> bool {
|
||||
matches!(self, Capability::Any | Capability::Served)
|
||||
}
|
||||
}
|
||||
|
||||
/// The capability a subcommand needs, derived from its `Plane` (the exhaustive
|
||||
/// classifier) plus the one Data→Served refinement: `graphs` is remote-only.
|
||||
///
|
||||
/// This reflects *current enforced behavior*, so messages stay truthful:
|
||||
/// `queries`/`policy` read a cluster's applied state (`Control`).
|
||||
pub(crate) fn command_capability(cmd: &Command) -> Capability {
|
||||
if let Command::Graphs { .. } = cmd {
|
||||
return Capability::Served;
|
||||
}
|
||||
match command_plane(cmd) {
|
||||
Plane::Data => Capability::Any,
|
||||
Plane::Storage => Capability::Direct,
|
||||
Plane::Control => Capability::Control,
|
||||
Plane::Session => Capability::Local,
|
||||
}
|
||||
}
|
||||
|
||||
/// The plane a subcommand belongs to. Exhaustive — a new `Command` variant
|
||||
/// will not compile until classified. Descends into the nested enums where
|
||||
/// the plane differs per subcommand (`schema plan` is storage while `schema
|
||||
/// show`/`apply` are data; `queries`/`policy` read cluster applied state).
|
||||
pub(crate) fn command_plane(cmd: &Command) -> Plane {
|
||||
match cmd {
|
||||
Command::Query { .. }
|
||||
| Command::Mutate { .. }
|
||||
| Command::Load { .. }
|
||||
| Command::Ingest { .. }
|
||||
| Command::Branch { .. }
|
||||
| Command::Snapshot { .. }
|
||||
| Command::Export { .. }
|
||||
| Command::Commit { .. }
|
||||
| Command::Graphs { .. } => Plane::Data,
|
||||
Command::Schema {
|
||||
command: SchemaCommand::Show { .. } | SchemaCommand::Apply { .. },
|
||||
} => Plane::Data,
|
||||
Command::Schema {
|
||||
command: SchemaCommand::Plan { .. },
|
||||
} => Plane::Storage,
|
||||
// `queries` and `policy` tooling now source their inputs from a
|
||||
// cluster's applied state (`--cluster`), so they live on the control
|
||||
// plane (RFC-011 — omnigraph.yaml excised from the CLI).
|
||||
Command::Queries { .. } => Plane::Control,
|
||||
Command::Policy { .. } => Plane::Control,
|
||||
Command::Init { .. }
|
||||
| Command::Optimize { .. }
|
||||
| Command::Repair { .. }
|
||||
| Command::Cleanup { .. }
|
||||
| Command::Lint { .. } => Plane::Storage,
|
||||
Command::Cluster { .. } => Plane::Control,
|
||||
Command::Alias { .. }
|
||||
| Command::Embed(_)
|
||||
| Command::Login { .. }
|
||||
| Command::Logout { .. }
|
||||
| Command::Profile { .. }
|
||||
| Command::Version => Plane::Session,
|
||||
}
|
||||
}
|
||||
|
||||
/// User-facing label for a subcommand (descends one level for the nested
|
||||
/// families so messages read `schema plan`, `queries validate`, etc.).
|
||||
pub(crate) fn command_label(cmd: &Command) -> &'static str {
|
||||
match cmd {
|
||||
Command::Version => "version",
|
||||
Command::Login { .. } => "login",
|
||||
Command::Logout { .. } => "logout",
|
||||
Command::Profile { .. } => "profile",
|
||||
Command::Embed(_) => "embed",
|
||||
Command::Init { .. } => "init",
|
||||
Command::Load { .. } => "load",
|
||||
Command::Ingest { .. } => "ingest",
|
||||
Command::Branch { .. } => "branch",
|
||||
Command::Schema { command } => match command {
|
||||
SchemaCommand::Plan { .. } => "schema plan",
|
||||
SchemaCommand::Apply { .. } => "schema apply",
|
||||
SchemaCommand::Show { .. } => "schema show",
|
||||
},
|
||||
Command::Lint { .. } => "lint",
|
||||
Command::Queries { command } => match command {
|
||||
QueriesCommand::Validate { .. } => "queries validate",
|
||||
QueriesCommand::List { .. } => "queries list",
|
||||
},
|
||||
Command::Snapshot { .. } => "snapshot",
|
||||
Command::Export { .. } => "export",
|
||||
Command::Commit { .. } => "commit",
|
||||
Command::Query { .. } => "query",
|
||||
Command::Mutate { .. } => "mutate",
|
||||
Command::Alias { .. } => "alias",
|
||||
Command::Policy { .. } => "policy",
|
||||
Command::Optimize { .. } => "optimize",
|
||||
Command::Repair { .. } => "repair",
|
||||
Command::Cleanup { .. } => "cleanup",
|
||||
Command::Cluster { .. } => "cluster",
|
||||
Command::Graphs { .. } => "graphs",
|
||||
}
|
||||
}
|
||||
|
||||
/// The verbs that consume a cluster scope. Maintenance/lint select a graph with
|
||||
/// `--cluster <root> --graph <id>`; policy/queries inspect the cluster's
|
||||
/// applied control-plane state and may optionally use `--graph` to select one
|
||||
/// bundle/registry. `init` is storage-plane too but *creates* a graph (cluster
|
||||
/// graphs are born from `cluster apply`, not `init`), and `schema plan` takes a
|
||||
/// positional URI, so the guard rejects `--cluster`/`--graph` there rather than
|
||||
/// silently dropping the flag.
|
||||
pub(crate) fn accepts_cluster_addressing(cmd: &Command) -> bool {
|
||||
matches!(
|
||||
cmd,
|
||||
Command::Optimize { .. }
|
||||
| Command::Repair { .. }
|
||||
| Command::Cleanup { .. }
|
||||
// `lint` can type-check a `.gq` against a cluster graph's schema
|
||||
// (RFC-011): `--cluster <dir> --graph <id>`.
|
||||
| Command::Lint { .. }
|
||||
// The policy/queries tooling addresses a cluster's applied state
|
||||
// (RFC-011): `--cluster <dir>` selects the cluster, `--graph <id>`
|
||||
// picks a graph's bundle/registry within it.
|
||||
| Command::Policy { .. }
|
||||
| Command::Queries { .. }
|
||||
)
|
||||
}
|
||||
|
||||
/// Reject a scope-addressing flag (`--server`/`--cluster`/`--graph`) on a verb
|
||||
/// that cannot consume it, rather than silently dropping it (the old behavior:
|
||||
/// e.g. `optimize --server prod` dropped `--server` and failed later with an
|
||||
/// unrelated message). `alias` gets an extra guard because its binding owns all
|
||||
/// addressing and several ignored globals sit outside this three-flag guard.
|
||||
/// Each flag has a distinct valid surface:
|
||||
/// - `--server` → served-graph scopes (`any`/`served`);
|
||||
/// - `--cluster` → cluster-scoped direct/control verbs;
|
||||
/// - `--graph` → any multi-graph scope: a served scope *or* a cluster one.
|
||||
/// RFC-010 Slice 1, generalized for RFC-011 cluster addressing.
|
||||
pub(crate) fn guard_addressing(cli: &Cli) -> Result<()> {
|
||||
if let Command::Alias { .. } = &cli.command {
|
||||
let mut flags = Vec::new();
|
||||
if cli.server.is_some() {
|
||||
flags.push("--server");
|
||||
}
|
||||
if cli.graph.is_some() {
|
||||
flags.push("--graph");
|
||||
}
|
||||
if cli.store.is_some() {
|
||||
flags.push("--store");
|
||||
}
|
||||
if cli.cluster.is_some() {
|
||||
flags.push("--cluster");
|
||||
}
|
||||
if cli.profile.is_some() {
|
||||
flags.push("--profile");
|
||||
}
|
||||
if cli.as_actor.is_some() {
|
||||
flags.push("--as");
|
||||
}
|
||||
if !flags.is_empty() {
|
||||
bail!(
|
||||
"`alias` uses the server, graph, and stored query declared in \
|
||||
`aliases.<name>` in ~/.omnigraph/config.yaml; remove global scope \
|
||||
flag(s): {}",
|
||||
flags.join(", ")
|
||||
);
|
||||
}
|
||||
}
|
||||
if cli.server.is_none() && cli.cluster.is_none() && cli.graph.is_none() {
|
||||
return Ok(());
|
||||
}
|
||||
let capability = command_capability(&cli.command);
|
||||
let label = command_label(&cli.command);
|
||||
let cluster_ok = accepts_cluster_addressing(&cli.command);
|
||||
|
||||
if cli.server.is_some() && !capability.accepts_server_addressing() {
|
||||
bail!(
|
||||
"`{label}` is a {} command; --server addresses a served graph and does not apply.{}",
|
||||
capability.describe(),
|
||||
remediation(capability, &cli.command),
|
||||
);
|
||||
}
|
||||
if cli.cluster.is_some() && !cluster_ok {
|
||||
bail!(
|
||||
"`{label}` is a {} command; --cluster addresses a cluster-scoped command \
|
||||
and does not apply.{}",
|
||||
capability.describe(),
|
||||
remediation(capability, &cli.command),
|
||||
);
|
||||
}
|
||||
if cli.graph.is_some() && !(capability.accepts_server_addressing() || cluster_ok) {
|
||||
bail!(
|
||||
"`{label}` is a {} command; --graph selects a graph within a server or cluster \
|
||||
scope and does not apply.{}",
|
||||
capability.describe(),
|
||||
remediation(capability, &cli.command),
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// The "what to do instead" tail for a wrong-address error, by capability.
|
||||
/// Includes its own leading space when non-empty so the caller appends it
|
||||
/// directly — an empty tail (the served-addressing capabilities, which only
|
||||
/// reach this fn for a misplaced `--cluster`/`--graph`) leaves no trailing space.
|
||||
fn remediation(capability: Capability, cmd: &Command) -> &'static str {
|
||||
match capability {
|
||||
Capability::Direct => match cmd {
|
||||
Command::Init { .. } => " Pass a storage URI.",
|
||||
Command::Optimize { .. } | Command::Repair { .. } | Command::Cleanup { .. } => {
|
||||
" Pass a storage URI, or --cluster <dir> --graph <id>."
|
||||
}
|
||||
_ => " Pass a storage URI.",
|
||||
},
|
||||
Capability::Control => match cmd {
|
||||
Command::Cluster { .. } => {
|
||||
" It operates on a cluster config directory (pass --config <dir>)."
|
||||
}
|
||||
Command::Policy { .. } | Command::Queries { .. } => {
|
||||
" It operates on a cluster (pass --cluster <dir|uri>, or select a cluster profile)."
|
||||
}
|
||||
_ => " It operates on a cluster.",
|
||||
},
|
||||
Capability::Local => " It does not address a graph.",
|
||||
Capability::Any | Capability::Served => "",
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use clap::Parser;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn server_addressing_allowed_exactly_on_any_and_served() {
|
||||
// The behavior-preservation contract: `--server`/`--graph` apply to the
|
||||
// served-graph capabilities (`any`, `served`) and nothing else. This is
|
||||
// the old "Data plane only" allow set, re-expressed — graphs (the one
|
||||
// Data→Served verb) was already allowed.
|
||||
assert!(Capability::Any.accepts_server_addressing());
|
||||
assert!(Capability::Served.accepts_server_addressing());
|
||||
assert!(!Capability::Direct.accepts_server_addressing());
|
||||
assert!(!Capability::Control.accepts_server_addressing());
|
||||
assert!(!Capability::Local.accepts_server_addressing());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn command_capability_classifies_representative_verbs() {
|
||||
let cap = |args: &[&str]| {
|
||||
command_capability(&Cli::try_parse_from(args).unwrap().command)
|
||||
};
|
||||
// The one Data→Served refinement — if the `graphs` guard were deleted,
|
||||
// every other assertion here would still pass.
|
||||
assert_eq!(cap(&["omnigraph", "graphs", "list"]), Capability::Served);
|
||||
assert_eq!(cap(&["omnigraph", "alias", "who"]), Capability::Local);
|
||||
assert_eq!(cap(&["omnigraph", "optimize", "graph.omni"]), Capability::Direct);
|
||||
assert_eq!(cap(&["omnigraph", "schema", "plan", "--schema", "s.pg", "graph.omni"]), Capability::Direct);
|
||||
assert_eq!(cap(&["omnigraph", "cluster", "status", "--config", "."]), Capability::Control);
|
||||
assert_eq!(cap(&["omnigraph", "version"]), Capability::Local);
|
||||
// `queries`/`policy` tooling reads cluster state now (control plane).
|
||||
assert_eq!(cap(&["omnigraph", "queries", "list"]), Capability::Control);
|
||||
assert_eq!(
|
||||
cap(&["omnigraph", "policy", "validate"]),
|
||||
Capability::Control
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn every_capability_describes_distinctly() {
|
||||
let phrases = [
|
||||
Capability::Any.describe(),
|
||||
Capability::Served.describe(),
|
||||
Capability::Direct.describe(),
|
||||
Capability::Control.describe(),
|
||||
Capability::Local.describe(),
|
||||
];
|
||||
for (i, a) in phrases.iter().enumerate() {
|
||||
assert!(!a.is_empty());
|
||||
for b in &phrases[i + 1..] {
|
||||
assert_ne!(a, b);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,9 +1,31 @@
|
|||
use clap::ValueEnum;
|
||||
use color_eyre::eyre::Result;
|
||||
use omnigraph_server::ReadOutputFormat;
|
||||
use omnigraph_server::api::ReadOutput;
|
||||
use omnigraph_server::config::TableCellLayout;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
/// Output rendering format for read-shaped commands (`read`/`query`/`alias`).
|
||||
/// A CLI presentation concern — lives here, not in the server.
|
||||
#[derive(Debug, Clone, Copy, Default, Eq, PartialEq, Serialize, Deserialize, ValueEnum)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ReadOutputFormat {
|
||||
#[default]
|
||||
Table,
|
||||
Kv,
|
||||
Csv,
|
||||
Jsonl,
|
||||
Json,
|
||||
}
|
||||
|
||||
/// How an over-wide table cell is laid out when rendering `--format table`.
|
||||
#[derive(Debug, Clone, Copy, Default, Eq, PartialEq, Serialize, Deserialize, ValueEnum)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum TableCellLayout {
|
||||
#[default]
|
||||
Truncate,
|
||||
Wrap,
|
||||
}
|
||||
|
||||
pub struct ReadRenderOptions {
|
||||
pub max_column_width: usize,
|
||||
pub cell_layout: TableCellLayout,
|
||||
|
|
|
|||
529
crates/omnigraph-cli/src/scope.rs
Normal file
529
crates/omnigraph-cli/src/scope.rs
Normal file
|
|
@ -0,0 +1,529 @@
|
|||
//! RFC-011 Slice A scope resolution.
|
||||
//!
|
||||
//! Translates the new scope inputs (`--profile` / `--store` / operator-config
|
||||
//! `profiles`/`clusters`/`defaults`) into the SAME effective addressing tuple
|
||||
//! the existing `GraphClient` factories (`client.rs`) and the maintenance
|
||||
//! resolver (`helpers::resolve_storage_uri`) already consume. This is a
|
||||
//! translation layer that sits *in front* of those resolvers — it is purely
|
||||
//! additive: an explicit legacy address (`--uri`/`--target`/`--server`/
|
||||
//! `--store`) wins and reproduces today's behavior exactly, so existing
|
||||
//! invocations are unaffected.
|
||||
//!
|
||||
//! The access path (served vs direct) is never chosen here; it falls out of the
|
||||
//! scope's binding × the verb's capability. The capability→scope check rejects
|
||||
//! mismatches (e.g. a server scope on a maintenance verb) only on the *new*
|
||||
//! resolution paths.
|
||||
|
||||
use std::env;
|
||||
|
||||
use color_eyre::Result;
|
||||
use color_eyre::eyre::{bail, eyre};
|
||||
|
||||
use crate::operator::{OperatorConfig, ScopeBinding};
|
||||
use crate::planes::Capability;
|
||||
|
||||
pub(crate) const PROFILE_ENV: &str = "OMNIGRAPH_PROFILE";
|
||||
|
||||
/// The effective addressing a command should use, in the terms the existing
|
||||
/// resolvers consume. Data/served verbs read `server`/`graph`/`uri`/`target`;
|
||||
/// maintenance verbs read `cluster`/`cluster_graph`.
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct ResolvedScope {
|
||||
pub(crate) server: Option<String>,
|
||||
pub(crate) graph: Option<String>,
|
||||
pub(crate) uri: Option<String>,
|
||||
pub(crate) cluster: Option<String>,
|
||||
pub(crate) cluster_graph: Option<String>,
|
||||
}
|
||||
|
||||
/// The raw addressing inputs for one command: the global scope flags plus the
|
||||
/// command's own positional URI.
|
||||
pub(crate) struct ScopeFlags<'a> {
|
||||
pub(crate) profile: Option<&'a str>,
|
||||
pub(crate) store: Option<&'a str>,
|
||||
pub(crate) server: Option<&'a str>,
|
||||
pub(crate) cluster: Option<&'a str>,
|
||||
pub(crate) graph: Option<&'a str>,
|
||||
pub(crate) uri: Option<String>,
|
||||
}
|
||||
|
||||
/// Resolve the scope for a command with `capability`. Precedence (RFC-011):
|
||||
/// 1. explicit primitive address (`uri`/`--server`/`--store`) → passthrough;
|
||||
/// 2. `--profile` / `OMNIGRAPH_PROFILE`;
|
||||
/// 3. flat `defaults.server` + `defaults.default_graph`;
|
||||
/// 4. nothing — downstream behaves as today.
|
||||
pub(crate) fn resolve_scope(
|
||||
op: &OperatorConfig,
|
||||
capability: Capability,
|
||||
flags: ScopeFlags<'_>,
|
||||
) -> Result<ResolvedScope> {
|
||||
// At most one explicit scope primitive may address a command — a positional
|
||||
// URI, `--store`, `--server`, or `--cluster` are mutually exclusive ways to
|
||||
// name the graph. Combining them is a contradiction, not a silent precedence.
|
||||
let primitives: Vec<&str> = [
|
||||
flags.uri.as_deref().map(|_| "a positional URI"),
|
||||
flags.store.map(|_| "--store"),
|
||||
flags.server.map(|_| "--server"),
|
||||
flags.cluster.map(|_| "--cluster"),
|
||||
]
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.collect();
|
||||
if primitives.len() > 1 {
|
||||
bail!(
|
||||
"{} are mutually exclusive — pick one way to address the graph",
|
||||
primitives.join(" and ")
|
||||
);
|
||||
}
|
||||
|
||||
// 1a. `--cluster` is the cluster scope primitive (maintenance): resolve its
|
||||
// root + select the graph with `--graph`.
|
||||
if let Some(cluster) = flags.cluster {
|
||||
return scope_from_binding(
|
||||
op,
|
||||
capability,
|
||||
ScopeBinding::Cluster(cluster.to_string()),
|
||||
flags.graph.map(str::to_string),
|
||||
"--cluster",
|
||||
);
|
||||
}
|
||||
|
||||
// 1b. Any other explicit address wins; reproduce today's behavior untouched.
|
||||
// `--store` is an explicit store URI — fold it into `uri`.
|
||||
if flags.uri.is_some() || flags.server.is_some() || flags.store.is_some() {
|
||||
// `--graph` selects within a multi-graph scope; a bare positional URI /
|
||||
// `--store` is already a single graph, so a stray `--graph` is an error
|
||||
// rather than a silently-dropped flag.
|
||||
if flags.graph.is_some() && flags.server.is_none() {
|
||||
bail!(
|
||||
"--graph selects a graph within a server or cluster scope; a positional \
|
||||
URI / --store is already a single graph"
|
||||
);
|
||||
}
|
||||
return Ok(ResolvedScope {
|
||||
server: flags.server.map(str::to_string),
|
||||
graph: flags.graph.map(str::to_string),
|
||||
uri: flags.store.map(str::to_string).or(flags.uri),
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
|
||||
// 2. A named profile (flag, else env).
|
||||
let profile_name = flags
|
||||
.profile
|
||||
.map(str::to_string)
|
||||
.or_else(|| env::var(PROFILE_ENV).ok().filter(|s| !s.is_empty()));
|
||||
if let Some(name) = profile_name {
|
||||
let profile = op.profile(&name).ok_or_else(|| {
|
||||
eyre!("unknown profile '{name}' (not defined under `profiles:` in operator config)")
|
||||
})?;
|
||||
let binding = profile.binding(&name)?;
|
||||
let graph = flags
|
||||
.graph
|
||||
.map(str::to_string)
|
||||
.or_else(|| profile.default_graph.clone());
|
||||
return scope_from_binding(op, capability, binding, graph, &format!("profile '{name}'"));
|
||||
}
|
||||
|
||||
// 3. Flat default server scope.
|
||||
if let Some(server) = op.default_server() {
|
||||
let graph = flags
|
||||
.graph
|
||||
.map(str::to_string)
|
||||
.or_else(|| op.default_graph().map(str::to_string));
|
||||
return scope_from_binding(
|
||||
op,
|
||||
capability,
|
||||
ScopeBinding::Server(server.to_string()),
|
||||
graph,
|
||||
"operator defaults",
|
||||
);
|
||||
}
|
||||
|
||||
// 3b. Flat default store scope — the zero-flag local-dev default (RFC-011).
|
||||
// Mutually exclusive with `defaults.server` (enforced at config load).
|
||||
if let Some(store) = op.default_store() {
|
||||
return scope_from_binding(
|
||||
op,
|
||||
capability,
|
||||
ScopeBinding::Store(store.to_string()),
|
||||
flags.graph.map(str::to_string),
|
||||
"operator defaults",
|
||||
);
|
||||
}
|
||||
|
||||
// 4. Nothing resolved — leave the tuple empty; downstream falls through to
|
||||
// today's behavior (legacy `cli.graph` default or a no-address error).
|
||||
Ok(ResolvedScope::default())
|
||||
}
|
||||
|
||||
/// Map a resolved binding to the effective tuple, enforcing scope × capability
|
||||
/// capability (RFC-011): a server scope is served (data only); a cluster scope
|
||||
/// is privileged direct (maintenance/control only); a store scope is direct
|
||||
/// (either).
|
||||
fn scope_from_binding(
|
||||
op: &OperatorConfig,
|
||||
capability: Capability,
|
||||
binding: ScopeBinding,
|
||||
graph: Option<String>,
|
||||
source: &str,
|
||||
) -> Result<ResolvedScope> {
|
||||
match binding {
|
||||
ScopeBinding::Server(server) => {
|
||||
if capability == Capability::Direct {
|
||||
bail!(
|
||||
"this command needs direct storage access, but {source} resolves a \
|
||||
server scope; name storage explicitly with --store <uri> (or \
|
||||
--cluster <dir> --graph <id> for a managed graph)"
|
||||
);
|
||||
}
|
||||
Ok(ResolvedScope {
|
||||
server: Some(server),
|
||||
graph,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
ScopeBinding::Cluster(cluster) => {
|
||||
if capability == Capability::Any {
|
||||
bail!(
|
||||
"{source} resolves a cluster scope, which is not valid for graph data \
|
||||
commands; run data commands through a server, or use --store <uri> \
|
||||
for ad-hoc direct access"
|
||||
);
|
||||
}
|
||||
// A cluster value is a config name (resolved against `clusters:`)
|
||||
// or a literal root: an `s3://`/`file://` URI or a local cluster
|
||||
// directory. Only a configured name is rewritten; anything else is
|
||||
// passed through to the cluster-state resolver verbatim, so a bare
|
||||
// directory path keeps working as it did for per-command `--cluster`.
|
||||
let root = op
|
||||
.cluster_root(&cluster)
|
||||
.map(str::to_string)
|
||||
.unwrap_or(cluster);
|
||||
// A cluster holds many graphs; maintenance addresses one at a time.
|
||||
// When no `--graph`/`default_graph` is given, leave `cluster_graph`
|
||||
// empty and defer to the async storage-URI resolver (RFC-011 D7),
|
||||
// which enumerates the catalog: auto-use a sole graph, else error
|
||||
// and list the candidates.
|
||||
Ok(ResolvedScope {
|
||||
cluster: Some(root),
|
||||
cluster_graph: graph,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
ScopeBinding::Store(uri) => {
|
||||
if graph.is_some() {
|
||||
bail!(
|
||||
"--graph does not apply to a store scope ({source}): a store is already \
|
||||
a single graph"
|
||||
);
|
||||
}
|
||||
Ok(ResolvedScope {
|
||||
uri: Some(uri),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn cfg(yaml: &str) -> OperatorConfig {
|
||||
serde_yaml::from_str(yaml).unwrap()
|
||||
}
|
||||
|
||||
fn flags<'a>() -> ScopeFlags<'a> {
|
||||
ScopeFlags {
|
||||
profile: None,
|
||||
store: None,
|
||||
server: None,
|
||||
cluster: None,
|
||||
graph: None,
|
||||
uri: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_legacy_address_wins_unchanged() {
|
||||
let op = cfg("defaults:\n server: prod\nservers:\n prod:\n url: https://x\n");
|
||||
// A positional URI given → profile/defaults are ignored entirely.
|
||||
let scope = resolve_scope(
|
||||
&op,
|
||||
Capability::Any,
|
||||
ScopeFlags {
|
||||
uri: Some("graph.omni".into()),
|
||||
..flags()
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(scope.uri.as_deref(), Some("graph.omni"));
|
||||
assert_eq!(scope.server, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn store_flag_folds_into_uri_and_rejects_graph() {
|
||||
let op = OperatorConfig::default();
|
||||
let scope = resolve_scope(
|
||||
&op,
|
||||
Capability::Any,
|
||||
ScopeFlags {
|
||||
store: Some("s3://b/g.omni"),
|
||||
..flags()
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(scope.uri.as_deref(), Some("s3://b/g.omni"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scope_primitives_are_mutually_exclusive() {
|
||||
let op = OperatorConfig::default();
|
||||
for flags in [
|
||||
ScopeFlags {
|
||||
store: Some("s3://b/g.omni"),
|
||||
uri: Some("file://other.omni".into()),
|
||||
..flags()
|
||||
},
|
||||
ScopeFlags {
|
||||
store: Some("s3://b/g.omni"),
|
||||
server: Some("prod"),
|
||||
..flags()
|
||||
},
|
||||
ScopeFlags {
|
||||
cluster: Some("./brain"),
|
||||
uri: Some("file://other.omni".into()),
|
||||
..flags()
|
||||
},
|
||||
ScopeFlags {
|
||||
cluster: Some("./brain"),
|
||||
server: Some("prod"),
|
||||
..flags()
|
||||
},
|
||||
] {
|
||||
let err = resolve_scope(&op, Capability::Direct, flags)
|
||||
.unwrap_err()
|
||||
.to_string();
|
||||
assert!(err.contains("mutually exclusive"), "{err}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cluster_flag_resolves_root_and_graph_for_maintenance() {
|
||||
let op = cfg("clusters:\n brain:\n root: s3://acme/brain\n");
|
||||
let scope = resolve_scope(
|
||||
&op,
|
||||
Capability::Direct,
|
||||
ScopeFlags {
|
||||
cluster: Some("brain"),
|
||||
graph: Some("knowledge"),
|
||||
..flags()
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(scope.cluster.as_deref(), Some("s3://acme/brain"));
|
||||
assert_eq!(scope.cluster_graph.as_deref(), Some("knowledge"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cluster_flag_accepts_a_literal_root_uri() {
|
||||
let op = OperatorConfig::default();
|
||||
let scope = resolve_scope(
|
||||
&op,
|
||||
Capability::Direct,
|
||||
ScopeFlags {
|
||||
cluster: Some("s3://bucket/clusters/brain"),
|
||||
graph: Some("knowledge"),
|
||||
..flags()
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(scope.cluster.as_deref(), Some("s3://bucket/clusters/brain"));
|
||||
assert_eq!(scope.cluster_graph.as_deref(), Some("knowledge"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cluster_scope_without_a_graph_defers_to_catalog_enumeration() {
|
||||
// RFC-011 D7: with no `--graph`/`default_graph`, resolution no longer
|
||||
// bails here — it resolves the cluster root and leaves `cluster_graph`
|
||||
// empty, deferring to the async storage-URI resolver (which enumerates
|
||||
// the catalog: auto-use a sole graph, else error listing candidates).
|
||||
let op = cfg("clusters:\n brain:\n root: s3://acme/brain\n");
|
||||
let scope = resolve_scope(
|
||||
&op,
|
||||
Capability::Direct,
|
||||
ScopeFlags {
|
||||
cluster: Some("brain"),
|
||||
..flags()
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(scope.cluster.as_deref(), Some("s3://acme/brain"));
|
||||
assert_eq!(scope.cluster_graph, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn graph_on_a_bare_store_or_uri_is_rejected() {
|
||||
let op = OperatorConfig::default();
|
||||
for flags in [
|
||||
ScopeFlags {
|
||||
uri: Some("graph.omni".into()),
|
||||
graph: Some("knowledge"),
|
||||
..flags()
|
||||
},
|
||||
ScopeFlags {
|
||||
store: Some("s3://b/g.omni"),
|
||||
graph: Some("knowledge"),
|
||||
..flags()
|
||||
},
|
||||
] {
|
||||
let err = resolve_scope(&op, Capability::Any, flags)
|
||||
.unwrap_err()
|
||||
.to_string();
|
||||
assert!(err.contains("already a single graph"), "{err}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flat_default_store_drives_local_verbs() {
|
||||
// RFC-011: `defaults.store` is the zero-flag local default — no flags,
|
||||
// no profile → the store URI resolves as the (single-graph) store scope.
|
||||
let op = cfg("defaults:\n store: file:///tmp/dev.omni\n");
|
||||
let scope = resolve_scope(&op, Capability::Any, flags()).unwrap();
|
||||
assert_eq!(scope.uri.as_deref(), Some("file:///tmp/dev.omni"));
|
||||
assert_eq!(scope.server, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flat_default_store_rejects_graph() {
|
||||
// A store is already a single graph, so `--graph` against a default
|
||||
// store is a loud error.
|
||||
let op = cfg("defaults:\n store: file:///tmp/dev.omni\n");
|
||||
let err = resolve_scope(
|
||||
&op,
|
||||
Capability::Any,
|
||||
ScopeFlags {
|
||||
graph: Some("knowledge"),
|
||||
..flags()
|
||||
},
|
||||
)
|
||||
.unwrap_err()
|
||||
.to_string();
|
||||
assert!(err.contains("does not apply to a store scope"), "{err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flat_default_server_drives_data_verbs() {
|
||||
let op = cfg("defaults:\n server: prod\n default_graph: knowledge\nservers:\n prod:\n url: https://x\n");
|
||||
let scope = resolve_scope(&op, Capability::Any, flags()).unwrap();
|
||||
assert_eq!(scope.server.as_deref(), Some("prod"));
|
||||
assert_eq!(scope.graph.as_deref(), Some("knowledge"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn profile_server_scope_with_graph_override() {
|
||||
let op = cfg(
|
||||
"servers:\n staging:\n url: https://s\nprofiles:\n staging:\n server: staging\n default_graph: knowledge\n",
|
||||
);
|
||||
let scope = resolve_scope(
|
||||
&op,
|
||||
Capability::Any,
|
||||
ScopeFlags {
|
||||
profile: Some("staging"),
|
||||
graph: Some("archive"),
|
||||
..flags()
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(scope.server.as_deref(), Some("staging"));
|
||||
assert_eq!(scope.graph.as_deref(), Some("archive")); // flag beats profile default
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn profile_cluster_scope_resolves_root_for_maintenance() {
|
||||
let op = cfg(
|
||||
"clusters:\n brain:\n root: s3://acme/brain\nprofiles:\n admin:\n cluster: brain\n default_graph: knowledge\n",
|
||||
);
|
||||
let scope = resolve_scope(
|
||||
&op,
|
||||
Capability::Direct,
|
||||
ScopeFlags {
|
||||
profile: Some("admin"),
|
||||
..flags()
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(scope.cluster.as_deref(), Some("s3://acme/brain"));
|
||||
assert_eq!(scope.cluster_graph.as_deref(), Some("knowledge"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn profile_cluster_scope_with_graph_override() {
|
||||
// The deferral closed by this slice: a `--graph` flag overrides a
|
||||
// profile cluster's default_graph, exactly as it does for a server scope.
|
||||
let op = cfg(
|
||||
"clusters:\n brain:\n root: s3://acme/brain\nprofiles:\n admin:\n cluster: brain\n default_graph: knowledge\n",
|
||||
);
|
||||
let scope = resolve_scope(
|
||||
&op,
|
||||
Capability::Direct,
|
||||
ScopeFlags {
|
||||
profile: Some("admin"),
|
||||
graph: Some("archive"),
|
||||
..flags()
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(scope.cluster.as_deref(), Some("s3://acme/brain"));
|
||||
assert_eq!(scope.cluster_graph.as_deref(), Some("archive")); // flag beats profile default
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn server_scope_on_maintenance_verb_errors() {
|
||||
let op = cfg("defaults:\n server: prod\nservers:\n prod:\n url: https://x\n");
|
||||
let err = resolve_scope(&op, Capability::Direct, flags()).unwrap_err().to_string();
|
||||
assert!(err.contains("direct storage access"), "{err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cluster_scope_on_data_verb_errors() {
|
||||
let op = cfg(
|
||||
"clusters:\n brain:\n root: s3://acme/brain\nprofiles:\n admin:\n cluster: brain\n",
|
||||
);
|
||||
let err = resolve_scope(
|
||||
&op,
|
||||
Capability::Any,
|
||||
ScopeFlags {
|
||||
profile: Some("admin"),
|
||||
..flags()
|
||||
},
|
||||
)
|
||||
.unwrap_err()
|
||||
.to_string();
|
||||
assert!(err.contains("not valid for graph data commands"), "{err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_profile_is_a_loud_error() {
|
||||
let op = OperatorConfig::default();
|
||||
let err = resolve_scope(
|
||||
&op,
|
||||
Capability::Any,
|
||||
ScopeFlags {
|
||||
profile: Some("nope"),
|
||||
..flags()
|
||||
},
|
||||
)
|
||||
.unwrap_err()
|
||||
.to_string();
|
||||
assert!(err.contains("unknown profile 'nope'"), "{err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_address_resolves_empty_for_legacy_fallthrough() {
|
||||
let op = OperatorConfig::default();
|
||||
let scope = resolve_scope(&op, Capability::Any, flags()).unwrap();
|
||||
assert_eq!(scope, ResolvedScope::default());
|
||||
}
|
||||
}
|
||||
|
|
@ -683,51 +683,8 @@ fn cluster_apply_locked_exits_nonzero() {
|
|||
assert!(!temp.path().join("__cluster/resources").exists());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cluster_apply_uses_cli_actor_from_local_config() {
|
||||
let temp = tempdir().unwrap();
|
||||
write_cluster_config_fixture(temp.path());
|
||||
fs::write(
|
||||
temp.path().join("omnigraph.yaml"),
|
||||
"cli:\n actor: act-local\n",
|
||||
)
|
||||
.unwrap();
|
||||
// Phase 1: import once (setup, not under test).
|
||||
let output = cli()
|
||||
.current_dir(temp.path())
|
||||
.arg("cluster")
|
||||
.arg("import")
|
||||
.arg("--config")
|
||||
.arg(temp.path())
|
||||
.output()
|
||||
.unwrap();
|
||||
assert!(output.status.success(), "{output:?}");
|
||||
|
||||
// Phase 2: apply alone, capturing the echoed actor (idempotent re-runs).
|
||||
let apply = |extra: &[&str]| {
|
||||
let mut command = cli();
|
||||
command.current_dir(temp.path());
|
||||
for arg in extra {
|
||||
command.arg(arg);
|
||||
}
|
||||
let output = command
|
||||
.arg("cluster")
|
||||
.arg("apply")
|
||||
.arg("--config")
|
||||
.arg(temp.path())
|
||||
.arg("--json")
|
||||
.output()
|
||||
.unwrap();
|
||||
let json: serde_json::Value =
|
||||
serde_json::from_str(String::from_utf8_lossy(&output.stdout).trim()).unwrap();
|
||||
json["actor"].clone()
|
||||
};
|
||||
assert_eq!(apply(&[]), "act-local", "cli.actor is the no-flag default");
|
||||
assert_eq!(apply(&["--as", "andrew"]), "andrew", "--as overrides cli.actor");
|
||||
}
|
||||
|
||||
/// RFC-007 PR 1: the operator layer joins the actor chain —
|
||||
/// `--as` > legacy `cli.actor` (RFC-008 window) > `operator.actor` > none.
|
||||
/// RFC-011: the actor chain is `--as` > `operator.actor` > none. The CLI no
|
||||
/// longer reads omnigraph.yaml `cli.actor`.
|
||||
#[test]
|
||||
fn cluster_apply_uses_operator_actor_from_omnigraph_home() {
|
||||
let temp = tempdir().unwrap();
|
||||
|
|
@ -771,41 +728,31 @@ fn cluster_apply_uses_operator_actor_from_omnigraph_home() {
|
|||
json["actor"].clone()
|
||||
};
|
||||
|
||||
// No --as, no omnigraph.yaml: the operator identity applies.
|
||||
// No --as: the operator identity applies.
|
||||
assert_eq!(
|
||||
apply(&[]),
|
||||
"act-operator",
|
||||
"operator.actor is the no-flag, no-legacy-config default"
|
||||
"operator.actor is the no-flag default"
|
||||
);
|
||||
// --as still wins over everything.
|
||||
// --as still wins over the operator layer.
|
||||
assert_eq!(apply(&["--as", "andrew"]), "andrew");
|
||||
|
||||
// A legacy cli.actor (RFC-008 window) outranks the operator layer.
|
||||
fs::write(
|
||||
temp.path().join("omnigraph.yaml"),
|
||||
"cli:\n actor: act-legacy\n",
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
apply(&[]),
|
||||
"act-legacy",
|
||||
"legacy cli.actor wins over operator.actor during the deprecation window"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cluster_approve_uses_cli_actor_fallback() {
|
||||
fn cluster_approve_uses_operator_actor_fallback() {
|
||||
let temp = tempdir().unwrap();
|
||||
write_cluster_config_fixture(temp.path());
|
||||
let operator_home = tempdir().unwrap();
|
||||
fs::write(
|
||||
temp.path().join("omnigraph.yaml"),
|
||||
"cli:\n actor: act-local\n",
|
||||
operator_home.path().join("config.yaml"),
|
||||
"operator:\n actor: act-operator\n",
|
||||
)
|
||||
.unwrap();
|
||||
// Converge, then remove the graph so a gated delete is pending.
|
||||
for command in ["import", "apply"] {
|
||||
let output = cli()
|
||||
.current_dir(temp.path())
|
||||
.env("OMNIGRAPH_HOME", operator_home.path())
|
||||
.arg("cluster")
|
||||
.arg(command)
|
||||
.arg("--config")
|
||||
|
|
@ -818,6 +765,7 @@ fn cluster_approve_uses_cli_actor_fallback() {
|
|||
|
||||
let output = cli()
|
||||
.current_dir(temp.path())
|
||||
.env("OMNIGRAPH_HOME", operator_home.path())
|
||||
.arg("cluster")
|
||||
.arg("approve")
|
||||
.arg("graph.knowledge")
|
||||
|
|
@ -829,14 +777,17 @@ fn cluster_approve_uses_cli_actor_fallback() {
|
|||
assert!(output.status.success(), "{output:?}");
|
||||
let json: serde_json::Value =
|
||||
serde_json::from_str(String::from_utf8_lossy(&output.stdout).trim()).unwrap();
|
||||
assert_eq!(json["approved_by"], "act-local");
|
||||
assert_eq!(json["approved_by"], "act-operator");
|
||||
|
||||
// With neither flag nor config: refused with the actionable message.
|
||||
// With neither flag nor operator config: refused with the actionable
|
||||
// message (an approval without an approver is meaningless).
|
||||
let bare = tempdir().unwrap();
|
||||
write_cluster_config_fixture(bare.path());
|
||||
let bare_home = tempdir().unwrap();
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.current_dir(bare.path())
|
||||
.env("OMNIGRAPH_HOME", bare_home.path())
|
||||
.arg("cluster")
|
||||
.arg("approve")
|
||||
.arg("graph.knowledge")
|
||||
|
|
@ -845,11 +796,13 @@ fn cluster_approve_uses_cli_actor_fallback() {
|
|||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(stderr.contains("--as"), "{stderr}");
|
||||
assert!(stderr.contains("cli.actor"), "{stderr}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cluster_commands_ignore_malformed_local_config() {
|
||||
fn cluster_commands_ignore_legacy_omnigraph_yaml() {
|
||||
// RFC-011: the CLI never reads omnigraph.yaml for cluster commands — a
|
||||
// present (even malformed) legacy file is inert. The actor falls back to
|
||||
// `operator.actor`, then to none (no loud failure on absence).
|
||||
let temp = tempdir().unwrap();
|
||||
write_cluster_config_fixture(temp.path());
|
||||
fs::write(temp.path().join("omnigraph.yaml"), "{{{{ not yaml").unwrap();
|
||||
|
|
@ -873,14 +826,11 @@ fn cluster_commands_ignore_malformed_local_config() {
|
|||
"cluster {command} touched omnigraph.yaml"
|
||||
);
|
||||
}
|
||||
// import + apply with an explicit --as: the config is never loaded.
|
||||
for (command, args) in [("import", vec![]), ("apply", vec!["--as", "andrew"])] {
|
||||
let mut invocation = cli();
|
||||
invocation.current_dir(temp.path());
|
||||
for arg in &args {
|
||||
invocation.arg(arg);
|
||||
}
|
||||
let output = invocation
|
||||
// import + apply (no --as, no operator config): the legacy file is never
|
||||
// loaded and the no-actor apply succeeds (actor defaults to none).
|
||||
for command in ["import", "apply"] {
|
||||
let output = cli()
|
||||
.current_dir(temp.path())
|
||||
.arg("cluster")
|
||||
.arg(command)
|
||||
.arg("--config")
|
||||
|
|
@ -893,20 +843,6 @@ fn cluster_commands_ignore_malformed_local_config() {
|
|||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
}
|
||||
// Only the no-flag actor lookup is allowed to fail, and loudly.
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.current_dir(temp.path())
|
||||
.arg("cluster")
|
||||
.arg("apply")
|
||||
.arg("--config")
|
||||
.arg(temp.path()),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("omnigraph.yaml") && stderr.contains("--as"),
|
||||
"the actor-default config read must fail loudly and actionably: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -950,3 +886,240 @@ graphs:
|
|||
assert!(!leaked.contains("phantom") && !leaked.contains("9999"), "{leaked}");
|
||||
}
|
||||
|
||||
|
||||
// ── RFC-010 Slice 3: cluster-managed maintenance addressing + init signpost ──
|
||||
|
||||
/// Stand up an applied, served cluster with the `knowledge` graph and return
|
||||
/// its directory guard. Mirrors the e2e setup (fixture → init → import → apply).
|
||||
fn applied_knowledge_cluster() -> tempfile::TempDir {
|
||||
let temp = tempdir().unwrap();
|
||||
write_cluster_config_fixture(temp.path());
|
||||
init_cluster_derived_graph(temp.path());
|
||||
let import = cluster_json(temp.path(), "import");
|
||||
assert_eq!(import["ok"], true, "{import}");
|
||||
let apply = cluster_json(temp.path(), "apply");
|
||||
assert_eq!(apply["converged"], true, "{apply}");
|
||||
temp
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optimize_resolves_a_cluster_graph_by_id() {
|
||||
let temp = applied_knowledge_cluster();
|
||||
// No hand-typed storage path: address the graph by cluster dir + id.
|
||||
let out = output_success(
|
||||
cli()
|
||||
.arg("optimize")
|
||||
.arg("--cluster")
|
||||
.arg(temp.path())
|
||||
.arg("--graph")
|
||||
.arg("knowledge")
|
||||
.arg("--json"),
|
||||
);
|
||||
let payload = parse_stdout_json(&out);
|
||||
assert!(
|
||||
payload["tables"].as_array().is_some(),
|
||||
"optimize did not run against the resolved cluster graph: {payload}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optimize_unknown_cluster_graph_id_errors() {
|
||||
let temp = applied_knowledge_cluster();
|
||||
let out = output_failure(
|
||||
cli()
|
||||
.arg("optimize")
|
||||
.arg("--cluster")
|
||||
.arg(temp.path())
|
||||
.arg("--graph")
|
||||
.arg("does-not-exist")
|
||||
.arg("--json"),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||
assert!(
|
||||
stderr.contains("is not applied in cluster") && stderr.contains("cluster apply"),
|
||||
"expected an unapplied-graph error pointing at cluster apply; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optimize_auto_uses_the_sole_cluster_graph() {
|
||||
// RFC-011 D7: a cluster with exactly one applied graph needs no --graph —
|
||||
// the resolver enumerates the catalog and uses the only candidate.
|
||||
let temp = applied_knowledge_cluster();
|
||||
let out = output_success(
|
||||
cli()
|
||||
.arg("optimize")
|
||||
.arg("--cluster")
|
||||
.arg(temp.path())
|
||||
.arg("--json"),
|
||||
);
|
||||
assert!(
|
||||
parse_stdout_json(&out)["tables"].as_array().is_some(),
|
||||
"optimize should auto-resolve the sole cluster graph"
|
||||
);
|
||||
}
|
||||
|
||||
/// Stand up an applied cluster with two graphs (`knowledge`, `archive`).
|
||||
fn applied_two_graph_cluster() -> tempfile::TempDir {
|
||||
let temp = tempdir().unwrap();
|
||||
let root = temp.path();
|
||||
fs::write(
|
||||
root.join("people.pg"),
|
||||
"node Person {\n name: String @key\n age: I32?\n}\n",
|
||||
)
|
||||
.unwrap();
|
||||
fs::write(root.join("base.policy.yaml"), "rules: []\n").unwrap();
|
||||
fs::write(
|
||||
root.join("cluster.yaml"),
|
||||
r#"
|
||||
version: 1
|
||||
metadata:
|
||||
name: two-graph
|
||||
state:
|
||||
backend: cluster
|
||||
lock: true
|
||||
graphs:
|
||||
knowledge:
|
||||
schema: ./people.pg
|
||||
archive:
|
||||
schema: ./people.pg
|
||||
policies:
|
||||
base:
|
||||
file: ./base.policy.yaml
|
||||
applies_to: [knowledge, archive]
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
init_named_cluster_graph(root, "knowledge", "people.pg");
|
||||
init_named_cluster_graph(root, "archive", "people.pg");
|
||||
assert_eq!(cluster_json(root, "import")["ok"], true);
|
||||
assert_eq!(cluster_json(root, "apply")["converged"], true);
|
||||
temp
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optimize_on_multi_graph_cluster_without_graph_lists_candidates() {
|
||||
// RFC-011 D7: >1 graph and no --graph → error naming every candidate,
|
||||
// never an auto-pick.
|
||||
let temp = applied_two_graph_cluster();
|
||||
let out = output_failure(
|
||||
cli()
|
||||
.arg("optimize")
|
||||
.arg("--cluster")
|
||||
.arg(temp.path())
|
||||
.arg("--json"),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||
assert!(
|
||||
stderr.contains("2 graphs")
|
||||
&& stderr.contains("archive")
|
||||
&& stderr.contains("knowledge")
|
||||
&& stderr.contains("--graph <id>"),
|
||||
"expected a candidate-listing error; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn init_refuses_a_cluster_managed_path_and_signposts_cluster_apply() {
|
||||
let temp = applied_knowledge_cluster();
|
||||
// Hand-init a NEW graph into the established cluster's storage layout.
|
||||
let out = output_failure(
|
||||
cli()
|
||||
.arg("init")
|
||||
.arg("--schema")
|
||||
.arg(temp.path().join("people.pg"))
|
||||
.arg(temp.path().join("graphs").join("sneaky.omni")),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||
assert!(
|
||||
stderr.contains("cluster apply"),
|
||||
"init into a cluster-managed path should signpost `cluster apply`; got: {stderr}"
|
||||
);
|
||||
// And it did not create the graph.
|
||||
assert!(!temp.path().join("graphs").join("sneaky.omni").exists());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn schema_apply_refuses_a_cluster_managed_graph_and_signposts_cluster_apply() {
|
||||
// RFC-011 Decision 10: a direct `schema apply` against a cluster-managed
|
||||
// graph's storage root would bypass the ledger/recovery/approvals, so it is
|
||||
// refused and points at `cluster apply` (mirrors `init`'s refusal).
|
||||
let temp = applied_knowledge_cluster();
|
||||
// A schema that WOULD change the graph (adds `bio`) — so the no-mutation
|
||||
// assertion below is meaningful, not a no-op re-apply.
|
||||
fs::write(
|
||||
temp.path().join("people_v2.pg"),
|
||||
"node Person {\n name: String @key\n age: I32?\n bio: String?\n}\n",
|
||||
)
|
||||
.unwrap();
|
||||
let out = output_failure(
|
||||
cli()
|
||||
.arg("schema")
|
||||
.arg("apply")
|
||||
.arg("--schema")
|
||||
.arg(temp.path().join("people_v2.pg"))
|
||||
.arg("--store")
|
||||
.arg(temp.path().join("graphs").join("knowledge.omni")),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||
assert!(
|
||||
stderr.contains("cluster apply"),
|
||||
"schema apply against a cluster-managed graph should signpost `cluster apply`; got: {stderr}"
|
||||
);
|
||||
// And it bailed BEFORE mutating: the live schema still lacks `bio`.
|
||||
let show = output_success(
|
||||
cli()
|
||||
.arg("schema")
|
||||
.arg("show")
|
||||
.arg(temp.path().join("graphs").join("knowledge.omni")),
|
||||
);
|
||||
assert!(
|
||||
!stdout_string(&show).contains("bio"),
|
||||
"the refused apply must not have changed the live schema; got: {}",
|
||||
stdout_string(&show)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn init_outside_a_cluster_still_works() {
|
||||
// Regression guard: ordinary init (no cluster layout) is unaffected.
|
||||
let temp = tempdir().unwrap();
|
||||
let schema = fixture("test.pg");
|
||||
let out = output_success(
|
||||
cli()
|
||||
.arg("init")
|
||||
.arg("--schema")
|
||||
.arg(&schema)
|
||||
.arg(temp.path().join("plain.omni")),
|
||||
);
|
||||
assert!(stdout_string(&out).contains("initialized"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optimize_by_cluster_works_when_catalog_payloads_are_degraded() {
|
||||
// Robustness (Greptile, #221): maintenance resolves the graph URI from the
|
||||
// state ledger alone, so an unrelated corrupt/missing catalog payload (or a
|
||||
// pending recovery sweep) does NOT block it — unlike the full serving-snapshot
|
||||
// read. This is what keeps `repair --cluster` usable on a degraded cluster.
|
||||
let temp = applied_knowledge_cluster();
|
||||
// Remove the verified catalog payloads (queries/policies) — a serving read
|
||||
// would refuse with a catalog-payload diagnostic; the ledger-only resolve
|
||||
// must not care.
|
||||
let resources = temp.path().join("__cluster").join("resources");
|
||||
if resources.exists() {
|
||||
fs::remove_dir_all(&resources).unwrap();
|
||||
}
|
||||
let out = output_success(
|
||||
cli()
|
||||
.arg("optimize")
|
||||
.arg("--cluster")
|
||||
.arg(temp.path())
|
||||
.arg("--graph")
|
||||
.arg("knowledge")
|
||||
.arg("--json"),
|
||||
);
|
||||
assert!(
|
||||
parse_stdout_json(&out)["tables"].as_array().is_some(),
|
||||
"optimize should resolve via the ledger despite degraded catalog payloads"
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
use std::fs;
|
||||
|
||||
use omnigraph::db::Omnigraph;
|
||||
use tempfile::tempdir;
|
||||
|
||||
mod support;
|
||||
|
|
@ -236,27 +237,28 @@ fn cluster_e2e_out_of_band_schema_drift_then_apply_converges_it() {
|
|||
let apply = cluster_json(temp.path(), "apply");
|
||||
assert_eq!(apply["converged"], true, "{apply}");
|
||||
|
||||
// Out-of-band: the live graph evolves, cluster.yaml stays put.
|
||||
fs::write(
|
||||
temp.path().join("people_v2.pg"),
|
||||
r#"
|
||||
// Out-of-band: the live graph evolves while cluster.yaml stays put. RFC-011
|
||||
// D10 makes the CLI `schema apply` refuse a cluster-managed graph, so this
|
||||
// simulates a true bypass — a direct engine apply against the storage root,
|
||||
// exactly the drift the control plane must still detect and converge.
|
||||
let people_v2 = r#"
|
||||
node Person {
|
||||
name: String @key
|
||||
age: I32?
|
||||
bio: String?
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
output_success(
|
||||
cli()
|
||||
.arg("schema")
|
||||
.arg("apply")
|
||||
.arg(temp.path().join("graphs/knowledge.omni"))
|
||||
.arg("--schema")
|
||||
.arg(temp.path().join("people_v2.pg"))
|
||||
.arg("--json"),
|
||||
);
|
||||
"#;
|
||||
tokio::runtime::Runtime::new().unwrap().block_on(async {
|
||||
let db = Omnigraph::open(
|
||||
temp.path()
|
||||
.join("graphs/knowledge.omni")
|
||||
.to_string_lossy()
|
||||
.as_ref(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
db.apply_schema(people_v2).await.unwrap();
|
||||
});
|
||||
|
||||
// Drift is visible...
|
||||
let refresh = cluster_json(temp.path(), "refresh");
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
use std::fs;
|
||||
|
||||
use assert_cmd::Command;
|
||||
use serde_json::Value;
|
||||
use tempfile::tempdir;
|
||||
|
||||
|
|
@ -142,6 +143,122 @@ fn embed_seed_preserves_non_entity_rows() {
|
|||
assert_eq!(embedded[2]["to"], "dec-alpha");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optimize_json_succeeds_on_local_graph() {
|
||||
// Happy path for the resolve_local_uri swap (RFC-010 Slice 1): a positional
|
||||
// local path still resolves and runs embedded.
|
||||
let temp = tempdir().unwrap();
|
||||
let graph = graph_path(temp.path());
|
||||
init_graph(&graph);
|
||||
load_fixture(&graph);
|
||||
|
||||
let output = output_success(cli().arg("optimize").arg("--json").arg(&graph));
|
||||
let payload: Value = serde_json::from_slice(&output.stdout).unwrap();
|
||||
assert!(payload["tables"].as_array().is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optimize_with_server_flag_errors_wrong_plane() {
|
||||
// RFC-010 Slice 1: --server is a data-plane addressing flag; on a
|
||||
// storage-plane verb the guard rejects it loudly (was: silently ignored).
|
||||
let output = output_failure(cli().arg("optimize").arg("--server").arg("prod"));
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("`optimize` is a direct (storage-native) command")
|
||||
&& stderr.contains("--server addresses a served graph and does not apply")
|
||||
&& stderr.contains("Pass a storage URI, or --cluster <dir> --graph <id>."),
|
||||
"wrong-capability guard message not found; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrong_address_guard_message_has_no_trailing_space() {
|
||||
// The remediation tail is empty for served-addressing capabilities, so a
|
||||
// misplaced --cluster on a data verb must not leave "… does not apply. "
|
||||
// with a dangling space (error text is observable contract). NO_COLOR keeps
|
||||
// the assertion off ANSI styling.
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.env("NO_COLOR", "1")
|
||||
.arg("query")
|
||||
.arg("--cluster")
|
||||
.arg("./brain")
|
||||
.arg("-e")
|
||||
.arg("query q { Person { id } }"),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("and does not apply."),
|
||||
"expected the wrong-address message; got: {stderr}"
|
||||
);
|
||||
assert!(
|
||||
!stderr.contains("and does not apply. "),
|
||||
"trailing space after the message; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn graph_flag_on_a_positional_uri_errors() {
|
||||
// RFC-011: `--graph` selects within a multi-graph scope (a server or
|
||||
// cluster). An explicit `--store <uri>` is already a single graph, so
|
||||
// pairing it with `--graph` is a loud error, not a silently-dropped flag.
|
||||
// (The guard lets `--graph` reach a data verb; the scope resolver rejects
|
||||
// it.)
|
||||
let temp = tempdir().unwrap();
|
||||
let graph = graph_path(temp.path());
|
||||
init_graph(&graph);
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("query")
|
||||
.arg("--store")
|
||||
.arg(&graph)
|
||||
.arg("--graph")
|
||||
.arg("knowledge")
|
||||
.arg("-e")
|
||||
.arg("query q { Person { id } }"),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("already a single graph"),
|
||||
"expected --graph-on-explicit-store rejection; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_by_name_against_a_store_needs_a_server() {
|
||||
// RFC-011 D3: by-name (catalog) invocation is served-only — the catalog is
|
||||
// server-owned, so a bare `--store` has nothing to resolve the name
|
||||
// against. The ad-hoc lane (`-e`/`--query`) is the local alternative.
|
||||
let temp = tempdir().unwrap();
|
||||
let graph = graph_path(temp.path());
|
||||
init_graph(&graph);
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("query")
|
||||
.arg("find_people")
|
||||
.arg("--store")
|
||||
.arg(&graph),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("needs a server"),
|
||||
"expected a served-only by-name error; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optimize_with_remote_target_errors_storage_plane() {
|
||||
// RFC-010 Slice 1: a maintenance verb pointed at a remote URI fails loudly
|
||||
// and declaratively (was: whatever Omnigraph::open said about an https URI).
|
||||
let output = output_failure(cli().arg("optimize").arg("https://graph.example.invalid"));
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("`optimize` is a direct (storage-native) command and needs direct storage access")
|
||||
&& stderr.contains("remote server"),
|
||||
"direct remote-target message not found; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn repair_json_reports_noop_on_clean_graph() {
|
||||
let temp = tempdir().unwrap();
|
||||
|
|
@ -412,10 +529,9 @@ query list_people() {
|
|||
|
||||
#[test]
|
||||
fn deprecated_read_and_change_subcommands_emit_warnings() {
|
||||
// Both subcommands require `--query`/`--query-string`/`--alias`, so
|
||||
// invoking them with no args will exit non-zero. That's fine --
|
||||
// we only care that the deprecation warning is printed before the
|
||||
// argument-required error.
|
||||
// Both subcommands require `--query`/`--query-string`, so invoking them
|
||||
// with no args will exit non-zero. That's fine -- we only care that the
|
||||
// deprecation warning is printed before the argument-required error.
|
||||
let output = cli().arg("read").output().unwrap();
|
||||
let stderr = String::from_utf8(output.stderr).unwrap();
|
||||
assert!(
|
||||
|
|
@ -483,13 +599,15 @@ query list_people() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn query_lint_can_resolve_graph_and_query_from_config() {
|
||||
fn query_lint_can_resolve_graph_from_store_scope() {
|
||||
// RFC-011: lint resolves its graph target through `--store` (the direct
|
||||
// scope), not omnigraph.yaml's cli.graph; the .gq path is plain cwd-relative.
|
||||
let temp = tempdir().unwrap();
|
||||
let graph = graph_path(temp.path());
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
init_graph(&graph);
|
||||
let query_path = temp.path().join("queries.gq");
|
||||
write_query_file(
|
||||
&temp.path().join("queries.gq"),
|
||||
&query_path,
|
||||
r#"
|
||||
query list_people() {
|
||||
match { $p: Person }
|
||||
|
|
@ -497,16 +615,15 @@ query list_people() {
|
|||
}
|
||||
"#,
|
||||
);
|
||||
write_config(&config_path, &local_yaml_config(&graph));
|
||||
|
||||
let output = output_success(
|
||||
cli()
|
||||
.arg("query")
|
||||
.arg("lint")
|
||||
.arg("--query")
|
||||
.arg("queries.gq")
|
||||
.arg("--config")
|
||||
.arg(&config_path)
|
||||
.arg(&query_path)
|
||||
.arg("--store")
|
||||
.arg(&graph)
|
||||
.arg("--json"),
|
||||
);
|
||||
let payload: Value = serde_json::from_slice(&output.stdout).unwrap();
|
||||
|
|
@ -542,8 +659,12 @@ query list_people() {
|
|||
.arg("http://127.0.0.1:8080"),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
// RFC-010/011: the direct (storage-native) verbs share one declared message
|
||||
// (was: "query lint is only supported against local graph URIs …").
|
||||
assert!(
|
||||
stderr.contains("query lint is only supported against local graph URIs in this milestone")
|
||||
stderr.contains("`lint` is a direct (storage-native) command and needs direct storage access")
|
||||
&& stderr.contains("remote server"),
|
||||
"direct remote-target message not found; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -570,7 +691,9 @@ query list_people() {
|
|||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("query lint requires --schema <schema.pg> or a resolvable graph target")
|
||||
stderr.contains("lint requires --schema <schema.pg>")
|
||||
|| stderr.contains("no graph addressed"),
|
||||
"expected a schema-or-graph-target requirement; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -739,10 +862,10 @@ fn read_json_outputs_rows_for_named_query() {
|
|||
let output = output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg("--store")
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(&queries)
|
||||
.arg("--name")
|
||||
.arg("get_person")
|
||||
.arg("--params")
|
||||
.arg(r#"{"name":"Alice"}"#)
|
||||
|
|
@ -756,6 +879,58 @@ fn read_json_outputs_rows_for_named_query() {
|
|||
assert_eq!(payload["rows"][0]["p.name"], "Alice");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn read_via_store_flag_and_profile_match_positional_uri() {
|
||||
// RFC-011 Slice A: the new scope addressing (--store, and a --profile that
|
||||
// binds a store) drives a read identically to the legacy positional URI —
|
||||
// the scope layer is additive, not a behavior change.
|
||||
let temp = tempdir().unwrap();
|
||||
let graph = graph_path(temp.path());
|
||||
init_graph(&graph);
|
||||
load_fixture(&graph);
|
||||
let queries = fixture("test.gq");
|
||||
|
||||
let read_rows = |cmd: &mut Command| -> Value {
|
||||
let output = output_success(
|
||||
cmd.arg("--query")
|
||||
.arg(&queries)
|
||||
.arg("get_person")
|
||||
.arg("--params")
|
||||
.arg(r#"{"name":"Alice"}"#)
|
||||
.arg("--json"),
|
||||
);
|
||||
serde_json::from_slice(&output.stdout).unwrap()
|
||||
};
|
||||
|
||||
// Baseline: --store names the graph.
|
||||
let baseline = read_rows(cli().arg("query").arg("--store").arg(&graph));
|
||||
assert_eq!(baseline["rows"][0]["p.name"], "Alice");
|
||||
|
||||
// --store names the same graph directly.
|
||||
let via_store = read_rows(cli().arg("query").arg("--store").arg(&graph));
|
||||
assert_eq!(via_store["rows"], baseline["rows"]);
|
||||
|
||||
// A profile binding that store, selected with --profile (no positional).
|
||||
let home = temp.path().join("op-home");
|
||||
std::fs::create_dir_all(&home).unwrap();
|
||||
std::fs::write(
|
||||
home.join("config.yaml"),
|
||||
format!(
|
||||
"profiles:\n local:\n store: '{}'\n",
|
||||
graph.to_string_lossy()
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
let via_profile = read_rows(
|
||||
cli()
|
||||
.env("OMNIGRAPH_HOME", &home)
|
||||
.arg("query")
|
||||
.arg("--profile")
|
||||
.arg("local"),
|
||||
);
|
||||
assert_eq!(via_profile["rows"], baseline["rows"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn export_jsonl_outputs_source_rows_for_selected_branch_and_type() {
|
||||
let temp = tempdir().unwrap();
|
||||
|
|
@ -815,43 +990,38 @@ fn export_jsonl_outputs_source_rows_for_selected_branch_and_type() {
|
|||
);
|
||||
}
|
||||
|
||||
// RFC-011: `policy validate|test|explain` source the Cedar bundle from a
|
||||
// converged cluster's applied policies (`--cluster <dir>` + `--graph <id>`),
|
||||
// not omnigraph.yaml's policy.file.
|
||||
|
||||
#[test]
|
||||
fn policy_validate_accepts_valid_policy_file() {
|
||||
let temp = tempdir().unwrap();
|
||||
let (config, _) = write_policy_config_fixture(temp.path());
|
||||
fn policy_validate_accepts_cluster_bundle() {
|
||||
let cluster = converged_loaded_cluster("knowledge", Some(POLICY_YAML));
|
||||
|
||||
let output = output_success(
|
||||
cli()
|
||||
.arg("policy")
|
||||
.arg("validate")
|
||||
.arg("--config")
|
||||
.arg(&config),
|
||||
.arg("--cluster")
|
||||
.arg(cluster.path())
|
||||
.arg("--graph")
|
||||
.arg("knowledge"),
|
||||
);
|
||||
let stdout = stdout_string(&output);
|
||||
|
||||
assert!(stdout.contains("policy valid:"));
|
||||
assert!(stdout.contains("policy.yaml"));
|
||||
assert!(stdout.contains("[2 actors]"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn policy_validate_fails_for_invalid_policy_file() {
|
||||
let temp = tempdir().unwrap();
|
||||
let config = temp.path().join("omnigraph.yaml");
|
||||
let policy = temp.path().join("policy.yaml");
|
||||
fs::write(
|
||||
&config,
|
||||
r#"
|
||||
project:
|
||||
name: policy-test-graph
|
||||
policy:
|
||||
file: ./policy.yaml
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
fs::write(
|
||||
&policy,
|
||||
r#"
|
||||
fn policy_validate_fails_for_invalid_cluster_bundle() {
|
||||
// The cluster does not validate a policy bundle's internal rules, so an
|
||||
// applied-but-malformed bundle reaches `policy validate`, which compiles it
|
||||
// and surfaces the error (here: a duplicate rule id).
|
||||
let cluster = converged_loaded_cluster(
|
||||
"knowledge",
|
||||
Some(
|
||||
r#"
|
||||
version: 1
|
||||
groups:
|
||||
team: [act-andrew]
|
||||
|
|
@ -867,26 +1037,42 @@ rules:
|
|||
actions: [export]
|
||||
branch_scope: any
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
),
|
||||
);
|
||||
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("policy")
|
||||
.arg("validate")
|
||||
.arg("--config")
|
||||
.arg(&config),
|
||||
.arg("--cluster")
|
||||
.arg(cluster.path())
|
||||
.arg("--graph")
|
||||
.arg("knowledge"),
|
||||
);
|
||||
let stderr = String::from_utf8(output.stderr).unwrap();
|
||||
assert!(stderr.contains("duplicate policy rule id"));
|
||||
assert!(
|
||||
stderr.contains("duplicate policy rule id"),
|
||||
"expected a duplicate-rule error; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn policy_test_runs_declarative_cases() {
|
||||
let temp = tempdir().unwrap();
|
||||
let (config, _) = write_policy_config_fixture(temp.path());
|
||||
fn policy_test_runs_declarative_cases_against_cluster_bundle() {
|
||||
let cluster = converged_loaded_cluster("knowledge", Some(POLICY_YAML));
|
||||
let tests = cluster.path().join("policy.tests.yaml");
|
||||
fs::write(&tests, POLICY_TESTS_YAML).unwrap();
|
||||
|
||||
let output = output_success(cli().arg("policy").arg("test").arg("--config").arg(&config));
|
||||
let output = output_success(
|
||||
cli()
|
||||
.arg("policy")
|
||||
.arg("test")
|
||||
.arg("--cluster")
|
||||
.arg(cluster.path())
|
||||
.arg("--graph")
|
||||
.arg("knowledge")
|
||||
.arg("--tests")
|
||||
.arg(&tests),
|
||||
);
|
||||
let stdout = stdout_string(&output);
|
||||
|
||||
assert!(stdout.contains("policy tests passed: 2 cases"));
|
||||
|
|
@ -894,15 +1080,16 @@ fn policy_test_runs_declarative_cases() {
|
|||
|
||||
#[test]
|
||||
fn policy_explain_reports_decision_and_matched_rule() {
|
||||
let temp = tempdir().unwrap();
|
||||
let (config, _) = write_policy_config_fixture(temp.path());
|
||||
let cluster = converged_loaded_cluster("knowledge", Some(POLICY_YAML));
|
||||
|
||||
let allow = output_success(
|
||||
cli()
|
||||
.arg("policy")
|
||||
.arg("explain")
|
||||
.arg("--config")
|
||||
.arg(&config)
|
||||
.arg("--cluster")
|
||||
.arg(cluster.path())
|
||||
.arg("--graph")
|
||||
.arg("knowledge")
|
||||
.arg("--actor")
|
||||
.arg("act-andrew")
|
||||
.arg("--action")
|
||||
|
|
@ -918,8 +1105,10 @@ fn policy_explain_reports_decision_and_matched_rule() {
|
|||
cli()
|
||||
.arg("policy")
|
||||
.arg("explain")
|
||||
.arg("--config")
|
||||
.arg(&config)
|
||||
.arg("--cluster")
|
||||
.arg(cluster.path())
|
||||
.arg("--graph")
|
||||
.arg("knowledge")
|
||||
.arg("--actor")
|
||||
.arg("act-bruno")
|
||||
.arg("--action")
|
||||
|
|
@ -933,22 +1122,26 @@ fn policy_explain_reports_decision_and_matched_rule() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn read_can_resolve_uri_from_config() {
|
||||
fn read_resolves_uri_from_default_store_scope() {
|
||||
// RFC-011: a zero-flag read resolves its graph from `defaults.store` in the
|
||||
// operator config (the local-dev default scope) — no omnigraph.yaml.
|
||||
let temp = tempdir().unwrap();
|
||||
let graph = graph_path(temp.path());
|
||||
let config = temp.path().join("omnigraph.yaml");
|
||||
init_graph(&graph);
|
||||
load_fixture(&graph);
|
||||
write_config(&config, &local_yaml_config(&graph));
|
||||
let home = tempdir().unwrap();
|
||||
std::fs::write(
|
||||
home.path().join("config.yaml"),
|
||||
format!("defaults:\n store: {}\n", graph.to_string_lossy()),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let output = output_success(
|
||||
cli()
|
||||
.env("OMNIGRAPH_HOME", home.path())
|
||||
.arg("read")
|
||||
.arg("--config")
|
||||
.arg(&config)
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
.arg("get_person")
|
||||
.arg("--params")
|
||||
.arg(r#"{"name":"Alice"}"#)
|
||||
|
|
@ -968,10 +1161,10 @@ fn read_csv_format_outputs_header_and_row_values() {
|
|||
let output = output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg("--store")
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
.arg("get_person")
|
||||
.arg("--params")
|
||||
.arg(r#"{"name":"Alice"}"#)
|
||||
|
|
@ -1005,10 +1198,10 @@ fn read_uses_operator_default_output_format() {
|
|||
command
|
||||
.env("OMNIGRAPH_HOME", operator_home.path())
|
||||
.arg("read")
|
||||
.arg("--store")
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
.arg("get_person")
|
||||
.arg("--params")
|
||||
.arg(r#"{"name":"Alice"}"#);
|
||||
|
|
@ -1040,10 +1233,10 @@ fn read_jsonl_format_outputs_metadata_header_first() {
|
|||
let output = output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg("--store")
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
.arg("get_person")
|
||||
.arg("--params")
|
||||
.arg(r#"{"name":"Alice"}"#)
|
||||
|
|
@ -1075,6 +1268,7 @@ query insert_person($name: String, $age: I32) {
|
|||
let output = output_success(
|
||||
cli()
|
||||
.arg("change")
|
||||
.arg("--store")
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(&mutation_file)
|
||||
|
|
@ -1091,10 +1285,10 @@ query insert_person($name: String, $age: I32) {
|
|||
let verify = output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg("--store")
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
.arg("get_person")
|
||||
.arg("--params")
|
||||
.arg(r#"{"name":"Eve"}"#)
|
||||
|
|
@ -1106,13 +1300,13 @@ query insert_person($name: String, $age: I32) {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn change_can_resolve_uri_and_branch_from_config() {
|
||||
fn change_resolves_uri_and_default_branch_from_store_scope() {
|
||||
// RFC-011: a mutate resolves its graph from `--store` and defaults the
|
||||
// branch to main (no omnigraph.yaml cli.graph / cli.branch).
|
||||
let temp = tempdir().unwrap();
|
||||
let graph = graph_path(temp.path());
|
||||
let config = temp.path().join("omnigraph.yaml");
|
||||
init_graph(&graph);
|
||||
load_fixture(&graph);
|
||||
write_config(&config, &local_yaml_config(&graph));
|
||||
let mutation_file = temp.path().join("config-mutations.gq");
|
||||
write_query_file(
|
||||
&mutation_file,
|
||||
|
|
@ -1126,8 +1320,8 @@ query insert_person($name: String, $age: I32) {
|
|||
let output = output_success(
|
||||
cli()
|
||||
.arg("change")
|
||||
.arg("--config")
|
||||
.arg(&config)
|
||||
.arg("--store")
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(&mutation_file)
|
||||
.arg("--params")
|
||||
|
|
@ -1149,6 +1343,7 @@ fn read_requires_name_for_multi_query_files() {
|
|||
let output = output_failure(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg("--store")
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq")),
|
||||
|
|
@ -1167,6 +1362,7 @@ fn read_supports_inline_query_string() {
|
|||
let output = output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg("--store")
|
||||
.arg(&repo)
|
||||
.arg("-e")
|
||||
.arg("query find($name: String) { match { $p: Person { name: $name } } return { $p.name, $p.age } }")
|
||||
|
|
@ -1180,6 +1376,49 @@ fn read_supports_inline_query_string() {
|
|||
assert_eq!(payload["rows"][0]["p.name"], "Alice");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn positional_http_uri_on_a_data_verb_is_rejected() {
|
||||
// RFC-011: a `--store` http(s):// URL no longer dispatches to a remote
|
||||
// server — that requires `--server <url>`.
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("query")
|
||||
.arg("--store")
|
||||
.arg("http://127.0.0.1:1")
|
||||
.arg("-e")
|
||||
.arg("query q() { match { $p: Person { } } return { $p } }"),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("must be addressed with `--server <url>`"),
|
||||
"expected store-remote rejection; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn as_on_a_served_write_is_rejected() {
|
||||
// RFC-011: a served write resolves the actor from the bearer token, so --as
|
||||
// cannot set identity. It errors while building the remote client — before
|
||||
// any HTTP call, so no server is needed.
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("mutate")
|
||||
.arg("--server")
|
||||
.arg("http://127.0.0.1:1")
|
||||
.arg("--as")
|
||||
.arg("act-nope")
|
||||
.arg("-e")
|
||||
.arg("query add($name: String) { insert Person { name: $name } }")
|
||||
.arg("--params")
|
||||
.arg(r#"{"name":"X"}"#),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("`--as` is not allowed on a served write"),
|
||||
"expected --as-served rejection; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn change_supports_inline_query_string() {
|
||||
let temp = tempdir().unwrap();
|
||||
|
|
@ -1190,6 +1429,7 @@ fn change_supports_inline_query_string() {
|
|||
let output = output_success(
|
||||
cli()
|
||||
.arg("change")
|
||||
.arg("--store")
|
||||
.arg(&repo)
|
||||
.arg("--query-string")
|
||||
.arg("query add($name: String, $age: I32) { insert Person { name: $name, age: $age } }")
|
||||
|
|
@ -1204,6 +1444,7 @@ fn change_supports_inline_query_string() {
|
|||
let verify = output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg("--store")
|
||||
.arg(&repo)
|
||||
.arg("-e")
|
||||
.arg("query find($name: String) { match { $p: Person { name: $name } } return { $p.name } }")
|
||||
|
|
@ -1225,6 +1466,7 @@ fn read_rejects_query_string_combined_with_query() {
|
|||
let output = output_failure(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg("--store")
|
||||
.arg(&repo)
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
|
|
@ -1245,7 +1487,7 @@ fn read_rejects_empty_query_string() {
|
|||
init_graph(&repo);
|
||||
load_fixture(&repo);
|
||||
|
||||
let output = output_failure(cli().arg("read").arg(&repo).arg("-e").arg(""));
|
||||
let output = output_failure(cli().arg("read").arg("--store").arg(&repo).arg("-e").arg(""));
|
||||
let stderr = String::from_utf8(output.stderr).unwrap();
|
||||
assert!(
|
||||
stderr.contains("must not be empty"),
|
||||
|
|
@ -1373,6 +1615,160 @@ fn branch_delete_rejects_main() {
|
|||
assert!(stderr.contains("cannot delete branch 'main'"));
|
||||
}
|
||||
|
||||
// ── RFC-011 Decision 9: write diagnostics + non-local destructive-confirm ──
|
||||
|
||||
#[test]
|
||||
fn write_echoes_resolved_target_to_stderr() {
|
||||
// Every write echoes its resolved target + access path to stderr; --json
|
||||
// (stdout) is unaffected. A local load → "(direct, local)".
|
||||
let temp = tempdir().unwrap();
|
||||
let graph = graph_path(temp.path());
|
||||
init_graph(&graph);
|
||||
let data = fixture("test.jsonl");
|
||||
let output = output_success(
|
||||
cli()
|
||||
.arg("load")
|
||||
.arg("--mode")
|
||||
.arg("append")
|
||||
.arg("--data")
|
||||
.arg(&data)
|
||||
.arg(&graph)
|
||||
.arg("--json"),
|
||||
);
|
||||
let stderr = String::from_utf8(output.stderr).unwrap();
|
||||
assert!(
|
||||
stderr.contains("omnigraph load →") && stderr.contains("(direct, local)"),
|
||||
"missing write-target echo; stderr: {stderr}"
|
||||
);
|
||||
// stdout still parses as JSON — the echo went to stderr.
|
||||
let _: Value = serde_json::from_slice(&output.stdout).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn quiet_suppresses_the_write_target_echo() {
|
||||
let temp = tempdir().unwrap();
|
||||
let graph = graph_path(temp.path());
|
||||
init_graph(&graph);
|
||||
let data = fixture("test.jsonl");
|
||||
let output = output_success(
|
||||
cli()
|
||||
.arg("--quiet")
|
||||
.arg("load")
|
||||
.arg("--mode")
|
||||
.arg("append")
|
||||
.arg("--data")
|
||||
.arg(&data)
|
||||
.arg(&graph),
|
||||
);
|
||||
let stderr = String::from_utf8(output.stderr).unwrap();
|
||||
assert!(
|
||||
!stderr.contains("omnigraph load →"),
|
||||
"--quiet should suppress the echo; stderr: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn branch_delete_against_non_local_scope_refuses_without_yes() {
|
||||
// No bucket needed: the confirm gate fires before the graph is opened.
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("branch")
|
||||
.arg("delete")
|
||||
.arg("--store")
|
||||
.arg("s3://fake-bucket/g.omni")
|
||||
.arg("feature")
|
||||
.arg("--json"),
|
||||
);
|
||||
let stderr = String::from_utf8(output.stderr).unwrap();
|
||||
assert!(
|
||||
stderr.contains("refusing destructive `branch delete`") && stderr.contains("--yes"),
|
||||
"expected a non-local destructive refusal; stderr: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn branch_delete_against_non_local_scope_passes_gate_with_yes() {
|
||||
// With --yes the gate is bypassed; the command then fails for an unrelated
|
||||
// reason (the fake bucket can't be opened), so the refusal must be ABSENT.
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("branch")
|
||||
.arg("delete")
|
||||
.arg("--store")
|
||||
.arg("s3://fake-bucket/g.omni")
|
||||
.arg("feature")
|
||||
.arg("--yes")
|
||||
.arg("--json"),
|
||||
);
|
||||
let stderr = String::from_utf8(output.stderr).unwrap();
|
||||
assert!(
|
||||
!stderr.contains("refusing destructive"),
|
||||
"--yes should bypass the confirm gate; stderr: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn overwrite_load_against_non_local_scope_refuses_without_yes() {
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("load")
|
||||
.arg("--mode")
|
||||
.arg("overwrite")
|
||||
.arg("--data")
|
||||
.arg(fixture("test.jsonl"))
|
||||
.arg("--store")
|
||||
.arg("s3://fake-bucket/g.omni")
|
||||
.arg("--json"),
|
||||
);
|
||||
let stderr = String::from_utf8(output.stderr).unwrap();
|
||||
assert!(
|
||||
stderr.contains("refusing destructive `load --mode overwrite`"),
|
||||
"expected a non-local overwrite refusal; stderr: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cleanup_against_non_local_scope_refuses_without_yes() {
|
||||
// Past the --confirm preview gate, a non-local cleanup still needs --yes.
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("cleanup")
|
||||
.arg("--store")
|
||||
.arg("s3://fake-bucket/g.omni")
|
||||
.arg("--keep")
|
||||
.arg("5")
|
||||
.arg("--confirm")
|
||||
.arg("--json"),
|
||||
);
|
||||
let stderr = String::from_utf8(output.stderr).unwrap();
|
||||
assert!(
|
||||
stderr.contains("refusing destructive `cleanup`"),
|
||||
"expected a non-local cleanup refusal; stderr: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cleanup_against_local_scope_executes_with_confirm() {
|
||||
// Local cleanup needs no --yes; --confirm alone executes (and echoes).
|
||||
let temp = tempdir().unwrap();
|
||||
let graph = graph_path(temp.path());
|
||||
init_graph(&graph);
|
||||
load_fixture(&graph);
|
||||
let output = output_success(
|
||||
cli()
|
||||
.arg("cleanup")
|
||||
.arg("--keep")
|
||||
.arg("1")
|
||||
.arg("--confirm")
|
||||
.arg(&graph)
|
||||
.arg("--json"),
|
||||
);
|
||||
let payload: Value = serde_json::from_slice(&output.stdout).unwrap();
|
||||
assert!(payload["tables"].as_array().is_some(), "{payload}");
|
||||
let stderr = String::from_utf8(output.stderr).unwrap();
|
||||
assert!(stderr.contains("omnigraph cleanup →"), "stderr: {stderr}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn branch_merge_defaults_target_to_main() {
|
||||
let temp = tempdir().unwrap();
|
||||
|
|
@ -1522,19 +1918,17 @@ fn snapshot_json_returns_manifest_version_and_tables() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn snapshot_can_resolve_uri_from_config() {
|
||||
fn snapshot_resolves_uri_from_store_scope() {
|
||||
let temp = tempdir().unwrap();
|
||||
let graph = graph_path(temp.path());
|
||||
let config = temp.path().join("omnigraph.yaml");
|
||||
init_graph(&graph);
|
||||
load_fixture(&graph);
|
||||
write_config(&config, &local_yaml_config(&graph));
|
||||
|
||||
let output = output_success(
|
||||
cli()
|
||||
.arg("snapshot")
|
||||
.arg("--config")
|
||||
.arg(&config)
|
||||
.arg("--store")
|
||||
.arg(&graph)
|
||||
.arg("--json"),
|
||||
);
|
||||
let payload: Value = serde_json::from_slice(&output.stdout).unwrap();
|
||||
|
|
@ -1675,3 +2069,162 @@ fn cli_fails_for_invalid_merge_requests() {
|
|||
.contains("distinct source and target")
|
||||
);
|
||||
}
|
||||
|
||||
/// RFC-011 Decision 8: `profile list` / `profile show` inspect the operator
|
||||
/// config's profiles read-only. Hermetic via OMNIGRAPH_HOME.
|
||||
fn profile_home() -> tempfile::TempDir {
|
||||
let home = tempdir().unwrap();
|
||||
std::fs::write(
|
||||
home.path().join("config.yaml"),
|
||||
"operator:\n actor: act-andrew\n\
|
||||
defaults:\n output: json\n server: prod\n default_graph: knowledge\n\
|
||||
servers:\n prod:\n url: https://graph.example.com\n\
|
||||
clusters:\n brain:\n root: s3://acme/clusters/brain\n\
|
||||
profiles:\n\
|
||||
\x20 staging:\n server: prod\n default_graph: kb\n\
|
||||
\x20 brain-admin:\n cluster: brain\n\
|
||||
\x20 localdev:\n store: file:///data/dev.omni\n\
|
||||
\x20 broken:\n server: a\n store: b\n",
|
||||
)
|
||||
.unwrap();
|
||||
home
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn profile_list_names_each_profile_with_its_binding_and_marks_active() {
|
||||
let home = profile_home();
|
||||
let out = output_success(
|
||||
cli()
|
||||
.env("OMNIGRAPH_HOME", home.path())
|
||||
.env("OMNIGRAPH_PROFILE", "staging")
|
||||
.arg("profile")
|
||||
.arg("list"),
|
||||
);
|
||||
let stdout = stdout_string(&out);
|
||||
assert!(stdout.contains("staging (active)"), "{stdout}");
|
||||
assert!(stdout.contains("server: prod"), "{stdout}");
|
||||
assert!(stdout.contains("cluster: brain"), "{stdout}");
|
||||
assert!(stdout.contains("store: file:///data/dev.omni"), "{stdout}");
|
||||
// A malformed (two-scope) profile is reported, not a hard failure.
|
||||
assert!(stdout.contains("broken") && stdout.contains("invalid:"), "{stdout}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn profile_list_json_shape() {
|
||||
let home = profile_home();
|
||||
let out = output_success(
|
||||
cli()
|
||||
.env("OMNIGRAPH_HOME", home.path())
|
||||
.arg("profile")
|
||||
.arg("list")
|
||||
.arg("--json"),
|
||||
);
|
||||
let items: Value = serde_json::from_slice(&out.stdout).unwrap();
|
||||
let brain = items
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.find(|p| p["name"] == "brain-admin")
|
||||
.unwrap();
|
||||
assert_eq!(brain["binding"], "cluster: brain");
|
||||
assert_eq!(brain["scope_kind"], "cluster");
|
||||
assert_eq!(brain["target"], "brain");
|
||||
assert_eq!(brain["valid"], true);
|
||||
assert!(brain["error"].is_null());
|
||||
assert_eq!(brain["active"], false);
|
||||
let broken = items
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.find(|p| p["name"] == "broken")
|
||||
.unwrap();
|
||||
assert_eq!(broken["scope_kind"], "invalid");
|
||||
assert_eq!(broken["valid"], false);
|
||||
assert!(broken["target"].is_null());
|
||||
assert!(
|
||||
broken["error"]
|
||||
.as_str()
|
||||
.unwrap()
|
||||
.contains("profile 'broken'")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn profile_show_resolves_named_scope_endpoints() {
|
||||
let home = profile_home();
|
||||
// A cluster profile resolves its root.
|
||||
let cluster = output_success(
|
||||
cli()
|
||||
.env("OMNIGRAPH_HOME", home.path())
|
||||
.arg("profile")
|
||||
.arg("show")
|
||||
.arg("brain-admin"),
|
||||
);
|
||||
let cs = stdout_string(&cluster);
|
||||
assert!(cs.contains("scope: cluster brain"), "{cs}");
|
||||
assert!(cs.contains("endpoint: s3://acme/clusters/brain"), "{cs}");
|
||||
|
||||
// A store profile shows its URI as the endpoint.
|
||||
let store = output_success(
|
||||
cli()
|
||||
.env("OMNIGRAPH_HOME", home.path())
|
||||
.arg("profile")
|
||||
.arg("show")
|
||||
.arg("localdev")
|
||||
.arg("--json"),
|
||||
);
|
||||
let detail: Value = serde_json::from_slice(&store.stdout).unwrap();
|
||||
assert_eq!(detail["scope_kind"], "store");
|
||||
assert_eq!(detail["endpoint"], "file:///data/dev.omni");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn profile_show_without_name_falls_back_to_flat_defaults() {
|
||||
let home = profile_home();
|
||||
let out = output_success(
|
||||
cli()
|
||||
.env("OMNIGRAPH_HOME", home.path())
|
||||
.arg("profile")
|
||||
.arg("show")
|
||||
.arg("--json"),
|
||||
);
|
||||
let detail: Value = serde_json::from_slice(&out.stdout).unwrap();
|
||||
assert_eq!(detail["name"], "(defaults)");
|
||||
assert_eq!(detail["scope_kind"], "server");
|
||||
assert_eq!(detail["endpoint"], "https://graph.example.com");
|
||||
assert_eq!(detail["default_graph"], "knowledge");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn profile_show_without_name_uses_active_env_profile() {
|
||||
let home = profile_home();
|
||||
let out = output_success(
|
||||
cli()
|
||||
.env("OMNIGRAPH_HOME", home.path())
|
||||
.env("OMNIGRAPH_PROFILE", "brain-admin")
|
||||
.arg("profile")
|
||||
.arg("show")
|
||||
.arg("--json"),
|
||||
);
|
||||
let detail: Value = serde_json::from_slice(&out.stdout).unwrap();
|
||||
// No name arg, but $OMNIGRAPH_PROFILE selects brain-admin (not the flat defaults).
|
||||
assert_eq!(detail["name"], "brain-admin");
|
||||
assert_eq!(detail["scope_kind"], "cluster");
|
||||
assert_eq!(detail["endpoint"], "s3://acme/clusters/brain");
|
||||
// output_format renders as the canonical lowercase value name.
|
||||
assert_eq!(detail["output_format"], "json");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn profile_show_unknown_name_errors() {
|
||||
let home = profile_home();
|
||||
let out = output_failure(
|
||||
cli()
|
||||
.env("OMNIGRAPH_HOME", home.path())
|
||||
.arg("profile")
|
||||
.arg("show")
|
||||
.arg("nope"),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||
assert!(stderr.contains("unknown profile 'nope'"), "{stderr}");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
//! Moved verbatim from tests/cli.rs in the modularization.
|
||||
|
||||
|
||||
use serde_json::Value;
|
||||
use tempfile::tempdir;
|
||||
|
||||
mod support;
|
||||
|
|
@ -57,227 +56,172 @@ query list_people() {
|
|||
assert_eq!(stdout_string(&lint_output), stdout_string(&check_output));
|
||||
}
|
||||
|
||||
// Legacy `omnigraph.yaml` `aliases:` invoked via the `--alias` flag were
|
||||
// removed in RFC-011 D4 — operator aliases now live under `omnigraph alias
|
||||
// <name>` (the happy path is covered by system_local's operator-alias e2e).
|
||||
// The legacy file-alias path has no CLI entry point.
|
||||
|
||||
#[test]
|
||||
fn read_alias_from_yaml_config_runs_with_kv_output() {
|
||||
let temp = tempdir().unwrap();
|
||||
let graph = graph_path(temp.path());
|
||||
let config = temp.path().join("omnigraph.yaml");
|
||||
let query = temp.path().join("aliases.gq");
|
||||
init_graph(&graph);
|
||||
load_fixture(&graph);
|
||||
write_query_file(
|
||||
&query,
|
||||
&std::fs::read_to_string(fixture("test.gq")).unwrap(),
|
||||
fn alias_flag_is_removed_from_query() {
|
||||
// RFC-011 D4: `--alias` no longer exists on query/mutate; use `alias <name>`.
|
||||
let output = output_failure(cli().arg("query").arg("--alias").arg("who"));
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("unexpected argument") && stderr.contains("--alias"),
|
||||
"expected clap to reject --alias on query; got: {stderr}"
|
||||
);
|
||||
write_config(
|
||||
&config,
|
||||
&format!(
|
||||
"{}aliases:\n owner:\n command: read\n query: aliases.gq\n name: get_person\n args: [name]\n format: kv\n",
|
||||
local_yaml_config(&graph)
|
||||
),
|
||||
);
|
||||
|
||||
let output = output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg("--config")
|
||||
.arg(&config)
|
||||
.arg("--alias")
|
||||
.arg("owner")
|
||||
.arg("Alice"),
|
||||
);
|
||||
let stdout = stdout_string(&output);
|
||||
|
||||
assert!(stdout.contains("row 1"));
|
||||
assert!(stdout.contains("p.name: Alice"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn read_alias_uses_alias_target_without_cli_default_and_accepts_url_like_arg() {
|
||||
let temp = tempdir().unwrap();
|
||||
let graph = graph_path(temp.path());
|
||||
let config = temp.path().join("omnigraph.yaml");
|
||||
let query = temp.path().join("aliases.gq");
|
||||
let data = temp.path().join("url-like.jsonl");
|
||||
init_graph(&graph);
|
||||
write_jsonl(
|
||||
&data,
|
||||
r#"{"type":"Person","data":{"name":"https://example.com","age":30}}"#,
|
||||
);
|
||||
output_success(
|
||||
fn alias_unknown_name_errors_listing_defined() {
|
||||
// Hermetic: an unknown alias fails before any network, listing defined ones.
|
||||
let home = tempdir().unwrap();
|
||||
std::fs::write(
|
||||
home.path().join("config.yaml"),
|
||||
"servers:\n dev:\n url: https://x\naliases:\n who:\n server: dev\n query: find_person\n",
|
||||
)
|
||||
.unwrap();
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("load")
|
||||
.arg("--mode")
|
||||
.arg("overwrite")
|
||||
.arg("--data")
|
||||
.arg(&data)
|
||||
.arg(&graph),
|
||||
.env("OMNIGRAPH_HOME", home.path())
|
||||
.arg("alias")
|
||||
.arg("nope"),
|
||||
);
|
||||
write_query_file(
|
||||
&query,
|
||||
&std::fs::read_to_string(fixture("test.gq")).unwrap(),
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("unknown alias 'nope'") && stderr.contains("who"),
|
||||
"expected an unknown-alias error listing defined aliases; got: {stderr}"
|
||||
);
|
||||
write_config(
|
||||
&config,
|
||||
&format!(
|
||||
"graphs:\n local:\n uri: '{}'\nquery:\n roots:\n - .\npolicy: {{}}\naliases:\n owner:\n command: read\n query: aliases.gq\n name: get_person\n args: [name]\n graph: local\n format: kv\n",
|
||||
graph.to_string_lossy()
|
||||
),
|
||||
);
|
||||
|
||||
let output = output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg("--config")
|
||||
.arg(&config)
|
||||
.arg("--alias")
|
||||
.arg("owner")
|
||||
.arg("https://example.com"),
|
||||
);
|
||||
let stdout = stdout_string(&output);
|
||||
|
||||
assert!(stdout.contains("row 1"));
|
||||
assert!(stdout.contains("p.name: https://example.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn change_alias_from_yaml_config_persists_changes() {
|
||||
let temp = tempdir().unwrap();
|
||||
let graph = graph_path(temp.path());
|
||||
let config = temp.path().join("omnigraph.yaml");
|
||||
let query = temp.path().join("mutations.gq");
|
||||
init_graph(&graph);
|
||||
load_fixture(&graph);
|
||||
write_query_file(
|
||||
&query,
|
||||
r#"
|
||||
query insert_person($name: String, $age: I32) {
|
||||
insert Person { name: $name, age: $age }
|
||||
fn alias_rejects_global_scope_flags_that_the_binding_owns() {
|
||||
for (flag, value) in [
|
||||
("--server", "dev"),
|
||||
("--graph", "local"),
|
||||
("--store", "file:///tmp/graph.omni"),
|
||||
("--cluster", "."),
|
||||
("--profile", "prod"),
|
||||
("--as", "act-op"),
|
||||
] {
|
||||
let output = output_failure(cli().arg(flag).arg(value).arg("alias").arg("who"));
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("`alias` uses the server, graph, and stored query")
|
||||
&& stderr.contains(flag),
|
||||
"expected {flag} to be rejected by the alias binding guard; got: {stderr}"
|
||||
);
|
||||
}
|
||||
}
|
||||
"#,
|
||||
|
||||
#[test]
|
||||
fn queries_and_policy_wrong_server_scope_points_at_cluster_scope() {
|
||||
let output = output_failure(cli().arg("--server").arg("prod").arg("queries").arg("list"));
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("pass --cluster <dir|uri>") && !stderr.contains("pass --config <dir>"),
|
||||
"queries should point at --cluster, not --config; got: {stderr}"
|
||||
);
|
||||
write_config(
|
||||
&config,
|
||||
&format!(
|
||||
"{}aliases:\n add_person:\n command: change\n query: mutations.gq\n name: insert_person\n args: [name, age]\n",
|
||||
local_yaml_config(&graph)
|
||||
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("--server")
|
||||
.arg("prod")
|
||||
.arg("policy")
|
||||
.arg("validate"),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("pass --cluster <dir|uri>") && !stderr.contains("pass --config <dir>"),
|
||||
"policy should point at --cluster, not --config; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
// RFC-011: `queries validate`/`list` source the registry + schemas from a
|
||||
// converged cluster's applied state (`--cluster <dir>`), not omnigraph.yaml.
|
||||
|
||||
/// Build a converged single-graph cluster (id `knowledge`) with one stored
|
||||
/// query. `query_block` is the YAML under the graph's `queries:` key.
|
||||
fn converged_cluster_with_query(query_file: &str, query_src: &str, query_block: &str) -> tempfile::TempDir {
|
||||
let temp = tempdir().unwrap();
|
||||
let dir = temp.path();
|
||||
std::fs::copy(fixture("test.pg"), dir.join("graph.pg")).unwrap();
|
||||
write_query_file(&dir.join(query_file), query_src);
|
||||
std::fs::write(
|
||||
dir.join("cluster.yaml"),
|
||||
format!(
|
||||
"version: 1\nmetadata:\n name: sys\nstate:\n backend: cluster\n lock: true\n\
|
||||
graphs:\n knowledge:\n schema: ./graph.pg\n queries:\n{query_block}"
|
||||
),
|
||||
);
|
||||
|
||||
let output = output_success(
|
||||
cli()
|
||||
.arg("change")
|
||||
.arg("--config")
|
||||
.arg(&config)
|
||||
.arg("--alias")
|
||||
.arg("add_person")
|
||||
.arg("Eve")
|
||||
.arg("29")
|
||||
.arg("--json"),
|
||||
);
|
||||
let payload: Value = serde_json::from_slice(&output.stdout).unwrap();
|
||||
assert_eq!(payload["affected_nodes"], 1);
|
||||
|
||||
let verify = output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
.arg("get_person")
|
||||
.arg("--params")
|
||||
.arg(r#"{"name":"Eve"}"#)
|
||||
.arg("--json"),
|
||||
);
|
||||
let verify_payload: Value = serde_json::from_slice(&verify.stdout).unwrap();
|
||||
assert_eq!(verify_payload["row_count"], 1);
|
||||
)
|
||||
.unwrap();
|
||||
output_success(cli().arg("cluster").arg("import").arg("--config").arg(dir));
|
||||
output_success(cli().arg("cluster").arg("apply").arg("--config").arg(dir));
|
||||
temp
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn queries_validate_exits_zero_on_clean_registry() {
|
||||
let graph = SystemGraph::loaded();
|
||||
graph.write_query(
|
||||
let cluster = converged_cluster_with_query(
|
||||
"find_person.gq",
|
||||
"query find_person($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
|
||||
);
|
||||
let config = graph.write_config(
|
||||
"omnigraph.yaml",
|
||||
&queries_test_config(
|
||||
&graph.path().to_string_lossy(),
|
||||
"find_person",
|
||||
"find_person.gq",
|
||||
),
|
||||
" find_person:\n file: ./find_person.gq\n",
|
||||
);
|
||||
let output = output_success(
|
||||
cli()
|
||||
.arg("queries")
|
||||
.arg("validate")
|
||||
.arg("--config")
|
||||
.arg(&config),
|
||||
.arg("--cluster")
|
||||
.arg(cluster.path()),
|
||||
);
|
||||
let stdout = stdout_string(&output);
|
||||
assert!(stdout.contains("OK"), "stdout:\n{stdout}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn queries_validate_exits_nonzero_on_type_broken_query() {
|
||||
let graph = SystemGraph::loaded();
|
||||
// `Widget` is not in the fixture schema.
|
||||
graph.write_query(
|
||||
"ghost.gq",
|
||||
fn cluster_import_rejects_a_type_broken_query() {
|
||||
// In the cluster model a stored query is type-checked at the cluster
|
||||
// boundary (import/apply), so a broken query can never reach the applied
|
||||
// state `queries validate` reads — the gate is upstream. `Widget` is not in
|
||||
// the fixture schema, so import must reject it, naming the query.
|
||||
let temp = tempdir().unwrap();
|
||||
let dir = temp.path();
|
||||
std::fs::copy(fixture("test.pg"), dir.join("graph.pg")).unwrap();
|
||||
write_query_file(
|
||||
&dir.join("ghost.gq"),
|
||||
"query ghost() { match { $w: Widget } return { $w.name } }",
|
||||
);
|
||||
let config = graph.write_config(
|
||||
"omnigraph.yaml",
|
||||
&queries_test_config(&graph.path().to_string_lossy(), "ghost", "ghost.gq"),
|
||||
std::fs::write(
|
||||
dir.join("cluster.yaml"),
|
||||
"version: 1\nmetadata:\n name: sys\nstate:\n backend: cluster\n lock: true\n\
|
||||
graphs:\n knowledge:\n schema: ./graph.pg\n queries:\n ghost:\n file: ./ghost.gq\n",
|
||||
)
|
||||
.unwrap();
|
||||
let output = output_failure(cli().arg("cluster").arg("import").arg("--config").arg(dir));
|
||||
let combined = format!(
|
||||
"{}{}",
|
||||
stdout_string(&output),
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("queries")
|
||||
.arg("validate")
|
||||
.arg("--config")
|
||||
.arg(&config),
|
||||
);
|
||||
let stdout = stdout_string(&output);
|
||||
assert!(
|
||||
stdout.contains("ghost"),
|
||||
"validation should name the broken query; stdout:\n{stdout}"
|
||||
combined.contains("ghost"),
|
||||
"cluster import must reject the broken query, naming it; got:\n{combined}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn queries_list_prints_registered_query() {
|
||||
let graph = SystemGraph::loaded();
|
||||
graph.write_query(
|
||||
let cluster = converged_cluster_with_query(
|
||||
"find_person.gq",
|
||||
"query find_person($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
|
||||
);
|
||||
// Exposed with an explicit tool name so the list shows the MCP suffix.
|
||||
let config = graph.write_config(
|
||||
"omnigraph.yaml",
|
||||
&format!(
|
||||
concat!(
|
||||
"graphs:\n",
|
||||
" local:\n",
|
||||
" uri: '{}'\n",
|
||||
" queries:\n",
|
||||
" find_person:\n",
|
||||
" file: ./find_person.gq\n",
|
||||
" mcp: {{ expose: true, tool_name: lookup_person }}\n",
|
||||
"cli:\n",
|
||||
" graph: local\n",
|
||||
"policy: {{}}\n",
|
||||
),
|
||||
graph.path().to_string_lossy().replace('\'', "''")
|
||||
),
|
||||
" find_person:\n file: ./find_person.gq\n",
|
||||
);
|
||||
let output = output_success(
|
||||
cli()
|
||||
.arg("queries")
|
||||
.arg("list")
|
||||
.arg("--config")
|
||||
.arg(&config),
|
||||
.arg("--cluster")
|
||||
.arg(cluster.path()),
|
||||
);
|
||||
let stdout = stdout_string(&output);
|
||||
assert!(stdout.contains("find_person"), "stdout:\n{stdout}");
|
||||
|
|
@ -285,251 +229,37 @@ fn queries_list_prints_registered_query() {
|
|||
stdout.contains("$name: String"),
|
||||
"list should show typed params; stdout:\n{stdout}"
|
||||
);
|
||||
assert!(
|
||||
stdout.contains("[mcp: lookup_person]"),
|
||||
"list should show the MCP tool name for exposed queries; stdout:\n{stdout}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn queries_list_requires_graph_selection_for_per_graph_only_registries() {
|
||||
let graph = SystemGraph::loaded();
|
||||
graph.write_query(
|
||||
"find_person.gq",
|
||||
"query find_person($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
|
||||
);
|
||||
let config = graph.write_config(
|
||||
"omnigraph.yaml",
|
||||
&format!(
|
||||
concat!(
|
||||
"graphs:\n",
|
||||
" local:\n",
|
||||
" uri: '{}'\n",
|
||||
" queries:\n",
|
||||
" find_person:\n",
|
||||
" file: ./find_person.gq\n",
|
||||
"policy: {{}}\n",
|
||||
),
|
||||
graph.path().to_string_lossy().replace('\'', "''")
|
||||
),
|
||||
);
|
||||
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("queries")
|
||||
.arg("list")
|
||||
.arg("--config")
|
||||
.arg(&config),
|
||||
);
|
||||
fn queries_validate_requires_a_cluster() {
|
||||
// RFC-011: with no --cluster (and no cluster profile), the command errors
|
||||
// loudly rather than reading any omnigraph.yaml.
|
||||
let output = output_failure(cli().arg("queries").arg("validate"));
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("local") && stderr.contains("--target local"),
|
||||
"error must name the graph and give a concrete selection hint; stderr:\n{stderr}"
|
||||
stderr.contains("needs a cluster") || stderr.contains("--cluster"),
|
||||
"queries validate must require a cluster; stderr:\n{stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn queries_list_without_graph_selection_lists_top_level_registry() {
|
||||
let graph = SystemGraph::loaded();
|
||||
graph.write_query(
|
||||
"top_find.gq",
|
||||
"query top_find($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
|
||||
);
|
||||
let config = graph.write_config(
|
||||
"omnigraph.yaml",
|
||||
concat!(
|
||||
"queries:\n",
|
||||
" top_find:\n",
|
||||
" file: ./top_find.gq\n",
|
||||
"policy: {}\n",
|
||||
),
|
||||
);
|
||||
|
||||
let output = output_success(
|
||||
cli()
|
||||
.arg("queries")
|
||||
.arg("list")
|
||||
.arg("--config")
|
||||
.arg(&config),
|
||||
);
|
||||
let stdout = stdout_string(&output);
|
||||
assert!(stdout.contains("top_find"), "stdout:\n{stdout}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn queries_list_unknown_target_errors() {
|
||||
// `queries list` opens no graph URI, so unknown-graph validation can't ride
|
||||
// along on URI resolution the way it does for every other command. An
|
||||
// unknown `--target` must still error (naming the graph) instead of
|
||||
// silently falling back to the top-level registry and showing the wrong
|
||||
// (or empty) catalog.
|
||||
let graph = SystemGraph::loaded();
|
||||
graph.write_query(
|
||||
"find_person.gq",
|
||||
"query find_person($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
|
||||
);
|
||||
let config = graph.write_config(
|
||||
"omnigraph.yaml",
|
||||
&queries_test_config(
|
||||
&graph.path().to_string_lossy(),
|
||||
"find_person",
|
||||
"find_person.gq",
|
||||
),
|
||||
);
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("queries")
|
||||
.arg("list")
|
||||
.arg("--target")
|
||||
.arg("nonexistent")
|
||||
.arg("--config")
|
||||
.arg(&config),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("nonexistent"),
|
||||
"error must name the unknown graph; stderr:\n{stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn queries_commands_reject_named_graph_with_populated_top_level_block() {
|
||||
// A named graph (here via `cli.graph`) uses its own `graphs.<name>` block,
|
||||
// so a populated top-level `queries:` block would be silently ignored — a
|
||||
// config the server REFUSES to boot. `queries validate`/`list` must reject
|
||||
// it too (matching boot) instead of validating/listing the per-graph block
|
||||
// and giving a false green.
|
||||
let graph = SystemGraph::loaded();
|
||||
graph.write_query(
|
||||
"find_person.gq",
|
||||
"query find_person($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
|
||||
);
|
||||
let config = graph.write_config(
|
||||
"omnigraph.yaml",
|
||||
&format!(
|
||||
concat!(
|
||||
"graphs:\n",
|
||||
" local:\n",
|
||||
" uri: '{}'\n",
|
||||
" queries:\n",
|
||||
" find_person:\n",
|
||||
" file: ./find_person.gq\n",
|
||||
"cli:\n",
|
||||
" graph: local\n",
|
||||
"queries:\n", // populated top-level block: the coherence violation
|
||||
" legacy:\n",
|
||||
" file: ./legacy.gq\n",
|
||||
"policy: {{}}\n",
|
||||
),
|
||||
graph.path().to_string_lossy().replace('\'', "''")
|
||||
),
|
||||
);
|
||||
// Both resolve `local` from cli.graph (no positional URI), so both must
|
||||
// error and name the graph + the ignored block — like server boot does.
|
||||
for sub in ["validate", "list"] {
|
||||
let output = output_failure(cli().arg("queries").arg(sub).arg("--config").arg(&config));
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("local") && stderr.contains("queries"),
|
||||
"`queries {sub}` must reject a named graph with a populated top-level block; stderr:\n{stderr}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn queries_validate_exits_nonzero_on_duplicate_tool_name() {
|
||||
// Two exposed queries claiming one MCP tool name is a load-time
|
||||
// collision — `queries validate` must fail (offline, before the engine
|
||||
// opens) and name both queries plus the contested tool.
|
||||
let graph = SystemGraph::loaded();
|
||||
graph.write_query(
|
||||
"a.gq",
|
||||
"query a() { match { $p: Person } return { $p.name } }",
|
||||
);
|
||||
graph.write_query(
|
||||
"b.gq",
|
||||
"query b() { match { $p: Person } return { $p.name } }",
|
||||
);
|
||||
let config = graph.write_config(
|
||||
"omnigraph.yaml",
|
||||
&format!(
|
||||
concat!(
|
||||
"graphs:\n",
|
||||
" local:\n",
|
||||
" uri: '{}'\n",
|
||||
" queries:\n",
|
||||
" a:\n",
|
||||
" file: ./a.gq\n",
|
||||
" mcp: {{ expose: true, tool_name: dup }}\n",
|
||||
" b:\n",
|
||||
" file: ./b.gq\n",
|
||||
" mcp: {{ expose: true, tool_name: dup }}\n",
|
||||
"cli:\n",
|
||||
" graph: local\n",
|
||||
"policy: {{}}\n",
|
||||
),
|
||||
graph.path().to_string_lossy().replace('\'', "''")
|
||||
),
|
||||
);
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("queries")
|
||||
.arg("validate")
|
||||
.arg("--config")
|
||||
.arg(&config),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("dup") && stderr.contains("'a'") && stderr.contains("'b'"),
|
||||
"duplicate tool name should be reported naming both queries; stderr:\n{stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn queries_validate_positional_uri_ignores_default_graph() {
|
||||
// A positional URI is anonymous → the schema AND the registry both come
|
||||
// from top-level, even when `cli.graph` names a graph whose per-graph
|
||||
// queries would fail. Pins that the URI and registry can't diverge.
|
||||
let graph = SystemGraph::loaded();
|
||||
graph.write_query(
|
||||
"clean.gq",
|
||||
"query clean($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
|
||||
);
|
||||
// `Widget` is not in the fixture schema — the default graph's per-graph
|
||||
// query would break validate if it were (wrongly) selected.
|
||||
graph.write_query(
|
||||
"broken.gq",
|
||||
"query broken() { match { $w: Widget } return { $w.name } }",
|
||||
);
|
||||
let config = graph.write_config(
|
||||
"omnigraph.yaml",
|
||||
concat!(
|
||||
"cli:\n graph: prod\n",
|
||||
"graphs:\n",
|
||||
" prod:\n",
|
||||
" uri: /nonexistent-prod.omni\n",
|
||||
" queries:\n",
|
||||
" broken:\n",
|
||||
" file: ./broken.gq\n",
|
||||
"queries:\n",
|
||||
" clean:\n",
|
||||
" file: ./clean.gq\n",
|
||||
"policy: {}\n",
|
||||
),
|
||||
);
|
||||
// Positional URI = the real loaded graph; selection is anonymous, so the
|
||||
// CLEAN top-level registry validates (not prod's broken one).
|
||||
fn queries_validate_graph_filter_selects_one_graph() {
|
||||
// A multi-graph cluster: validate scoped to `knowledge` type-checks only
|
||||
// that graph's registry, ignoring `engineering`'s.
|
||||
let temp = tempdir().unwrap();
|
||||
let dir = temp.path();
|
||||
write_multi_graph_cluster_fixture(dir);
|
||||
output_success(cli().arg("cluster").arg("import").arg("--config").arg(dir));
|
||||
output_success(cli().arg("cluster").arg("apply").arg("--config").arg(dir));
|
||||
let output = output_success(
|
||||
cli()
|
||||
.arg("queries")
|
||||
.arg("validate")
|
||||
.arg(graph.path())
|
||||
.arg("--config")
|
||||
.arg(&config),
|
||||
);
|
||||
let stdout = stdout_string(&output);
|
||||
assert!(
|
||||
stdout.contains("OK"),
|
||||
"positional URI must validate the top-level registry, not the cli.graph default; stdout:\n{stdout}"
|
||||
.arg("--cluster")
|
||||
.arg(dir)
|
||||
.arg("--graph")
|
||||
.arg("knowledge"),
|
||||
);
|
||||
assert!(stdout_string(&output).contains("OK"));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,38 @@ fn version_command_prints_current_cli_version() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn help_groups_commands_by_capability() {
|
||||
// RFC-010 Slice 2 / RFC-011 Slice B: `--help` clusters commands (declaration
|
||||
// order in the Command enum) and explains the capability each needs in an
|
||||
// after_help legend. Pinned lightly — the legend phrase + the cluster
|
||||
// ordering — to avoid brittle full-text assertions on clap's help body.
|
||||
let output = output_success(cli().arg("--help"));
|
||||
let stdout = stdout_string(&output);
|
||||
|
||||
assert!(
|
||||
stdout.contains("COMMANDS BY CAPABILITY"),
|
||||
"capability legend (after_help) missing from --help:\n{stdout}"
|
||||
);
|
||||
|
||||
// The Commands list precedes the legend, so first occurrences sit in the
|
||||
// list and must appear in order: an `any` data verb, then a `direct` verb,
|
||||
// then the `control` verb.
|
||||
let pos = |needle: &str| {
|
||||
stdout
|
||||
.find(needle)
|
||||
.unwrap_or_else(|| panic!("'{needle}' not found in --help:\n{stdout}"))
|
||||
};
|
||||
assert!(
|
||||
pos("query") < pos("optimize"),
|
||||
"data (any) commands should be listed before direct commands"
|
||||
);
|
||||
assert!(
|
||||
pos("optimize") < pos("cluster"),
|
||||
"direct commands should be listed before the control command"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn init_creates_graph_successfully_on_missing_local_directory() {
|
||||
let temp = tempdir().unwrap();
|
||||
|
|
@ -72,6 +104,28 @@ fn schema_plan_json_reports_supported_additive_change() {
|
|||
assert_eq!(payload["steps"][0]["property_name"], "nickname");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn schema_plan_with_server_flag_errors_wrong_plane() {
|
||||
// RFC-010 Slice 1: `schema plan` is storage-plane while `schema show/apply`
|
||||
// are data-plane — the guard rejects --server on plan with the per-subcommand
|
||||
// label (proving command_plane/command_label descend into the nested enum).
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("schema")
|
||||
.arg("plan")
|
||||
.arg("--schema")
|
||||
.arg(fixture("test.pg"))
|
||||
.arg("--server")
|
||||
.arg("prod"),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("`schema plan` is a direct (storage-native) command")
|
||||
&& stderr.contains("Pass a storage URI."),
|
||||
"schema plan wrong-capability message not found; got: {stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn schema_plan_json_reports_unsupported_type_change() {
|
||||
let temp = tempdir().unwrap();
|
||||
|
|
@ -280,7 +334,13 @@ fn schema_apply_json_adds_index_for_existing_property() {
|
|||
let dataset = snapshot.open("node:Person").await.unwrap();
|
||||
dataset.load_indices().await.unwrap().len()
|
||||
});
|
||||
assert!(after_index_count > before_index_count);
|
||||
// iss-848: `schema apply` records the `@index` intent but defers the physical
|
||||
// index build (materialized later by ensure_indices/optimize; on this empty
|
||||
// table nothing builds anyway). So the physical index count is unchanged.
|
||||
assert_eq!(
|
||||
after_index_count, before_index_count,
|
||||
"schema apply records @index intent but defers the physical build (iss-848)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -486,163 +546,18 @@ fn graphs_subcommand_help_lists_list_only() {
|
|||
|
||||
#[test]
|
||||
fn graphs_list_against_local_uri_errors_with_remote_only_message() {
|
||||
// RFC-011: `graphs list` is served-only; a `--store` (local) address has no
|
||||
// enumeration endpoint, so it fails loudly pointing at a server / cluster.
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("graphs")
|
||||
.arg("list")
|
||||
.arg("--uri")
|
||||
.arg("--store")
|
||||
.arg("/tmp/local"),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
|
||||
assert!(
|
||||
stderr.contains("remote multi-graph server URL"),
|
||||
"expected 'remote multi-graph server URL' rejection in stderr; got:\n{stderr}"
|
||||
stderr.contains("remote multi-graph server"),
|
||||
"expected a remote-server rejection in stderr; got:\n{stderr}"
|
||||
);
|
||||
}
|
||||
|
||||
/// RFC-008 stage 1: loading a legacy omnigraph.yaml emits the per-key
|
||||
/// deprecation block (the migration map applied to THIS file), suppressible
|
||||
/// via OMNIGRAPH_SUPPRESS_YAML_DEPRECATION.
|
||||
#[test]
|
||||
fn legacy_config_load_warns_per_key_and_suppression_silences() {
|
||||
let temp = tempdir().unwrap();
|
||||
fs::write(
|
||||
temp.path().join("omnigraph.yaml"),
|
||||
"cli:\n actor: act-x\ngraphs:\n g:\n uri: /tmp/never-opened\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// `graphs list --json` loads the config and exits without touching the
|
||||
// graph URI.
|
||||
let output = cli()
|
||||
.current_dir(temp.path())
|
||||
.arg("graphs")
|
||||
.arg("list")
|
||||
.arg("--json")
|
||||
.output()
|
||||
.unwrap();
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("deprecated (RFC-008)") && stderr.contains("`cli.actor` -> `operator.actor`"),
|
||||
"{stderr}"
|
||||
);
|
||||
assert!(stderr.contains("config migrate"), "{stderr}");
|
||||
|
||||
let output = cli()
|
||||
.current_dir(temp.path())
|
||||
.env("OMNIGRAPH_SUPPRESS_YAML_DEPRECATION", "1")
|
||||
.arg("graphs")
|
||||
.arg("list")
|
||||
.arg("--json")
|
||||
.output()
|
||||
.unwrap();
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(!stderr.contains("deprecated (RFC-008)"), "{stderr}");
|
||||
}
|
||||
|
||||
/// RFC-008 stage 2: `config migrate` proposes the split read-only, applies
|
||||
/// it with --write (operator merge never clobbers; cluster.yaml emitted),
|
||||
/// and a second --write is idempotent.
|
||||
#[test]
|
||||
fn config_migrate_splits_legacy_config() {
|
||||
let temp = tempdir().unwrap();
|
||||
fs::write(
|
||||
temp.path().join("omnigraph.yaml"),
|
||||
"graphs:\n prod:\n uri: https://graph.example.com\n bearer_token_env: PROD_TOKEN\ncli:\n actor: act-me\n output_format: json\npolicy:\n file: ./top.policy.yaml\n",
|
||||
)
|
||||
.unwrap();
|
||||
let operator_home = tempfile::tempdir().unwrap();
|
||||
fs::write(
|
||||
operator_home.path().join("config.yaml"),
|
||||
"operator:\n actor: act-existing\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Read-only proposal: names both halves, writes nothing.
|
||||
let output = cli()
|
||||
.current_dir(temp.path())
|
||||
.env("OMNIGRAPH_HOME", operator_home.path())
|
||||
.env("OMNIGRAPH_SUPPRESS_YAML_DEPRECATION", "1")
|
||||
.arg("config")
|
||||
.arg("migrate")
|
||||
.output()
|
||||
.unwrap();
|
||||
assert!(output.status.success(), "{output:?}");
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
assert!(stdout.contains("team half -> cluster.yaml"), "{stdout}");
|
||||
assert!(stdout.contains("operator.actor: act-me"), "{stdout}");
|
||||
assert!(stdout.contains("omnigraph login prod"), "{stdout}");
|
||||
assert!(!temp.path().join("cluster.yaml").exists());
|
||||
|
||||
// --write: cluster.yaml lands; the existing operator actor is KEPT.
|
||||
let output = cli()
|
||||
.current_dir(temp.path())
|
||||
.env("OMNIGRAPH_HOME", operator_home.path())
|
||||
.env("OMNIGRAPH_SUPPRESS_YAML_DEPRECATION", "1")
|
||||
.arg("config")
|
||||
.arg("migrate")
|
||||
.arg("--write")
|
||||
.output()
|
||||
.unwrap();
|
||||
assert!(output.status.success(), "{output:?}");
|
||||
let cluster = fs::read_to_string(temp.path().join("cluster.yaml")).unwrap();
|
||||
assert!(cluster.contains("version: 1") && cluster.contains(" prod:"), "{cluster}");
|
||||
let operator_text =
|
||||
fs::read_to_string(operator_home.path().join("config.yaml")).unwrap();
|
||||
assert!(operator_text.contains("act-existing"), "{operator_text}");
|
||||
assert!(!operator_text.contains("act-me"), "existing keys win: {operator_text}");
|
||||
assert!(operator_text.contains("output: json"), "{operator_text}");
|
||||
assert!(
|
||||
operator_text.contains("url: https://graph.example.com"),
|
||||
"{operator_text}"
|
||||
);
|
||||
|
||||
// Second --write: cluster.yaml exists -> proposal file, no clobber.
|
||||
let output = cli()
|
||||
.current_dir(temp.path())
|
||||
.env("OMNIGRAPH_HOME", operator_home.path())
|
||||
.env("OMNIGRAPH_SUPPRESS_YAML_DEPRECATION", "1")
|
||||
.arg("config")
|
||||
.arg("migrate")
|
||||
.arg("--write")
|
||||
.output()
|
||||
.unwrap();
|
||||
assert!(output.status.success(), "{output:?}");
|
||||
assert!(temp.path().join("cluster.yaml.proposed").exists());
|
||||
}
|
||||
|
||||
/// RFC-008 stage 4: OMNIGRAPH_NO_LEGACY_CONFIG refuses a present legacy
|
||||
/// file (pointing at config migrate) but changes nothing on migrated
|
||||
/// setups with no file.
|
||||
#[test]
|
||||
fn strict_mode_refuses_legacy_file_but_not_its_absence() {
|
||||
let temp = tempdir().unwrap();
|
||||
fs::write(temp.path().join("omnigraph.yaml"), "cli:\n actor: a\n").unwrap();
|
||||
let output = cli()
|
||||
.current_dir(temp.path())
|
||||
.env("OMNIGRAPH_NO_LEGACY_CONFIG", "1")
|
||||
.arg("graphs")
|
||||
.arg("list")
|
||||
.arg("--json")
|
||||
.output()
|
||||
.unwrap();
|
||||
assert!(!output.status.success());
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(
|
||||
stderr.contains("OMNIGRAPH_NO_LEGACY_CONFIG") && stderr.contains("config migrate"),
|
||||
"{stderr}"
|
||||
);
|
||||
|
||||
// Migrated setup (no file): strict mode is a no-op — a config-loading
|
||||
// command that tolerates empty defaults succeeds.
|
||||
let clean = tempdir().unwrap();
|
||||
let output = cli()
|
||||
.current_dir(clean.path())
|
||||
.env("OMNIGRAPH_NO_LEGACY_CONFIG", "1")
|
||||
.arg("queries")
|
||||
.arg("list")
|
||||
.arg("--json")
|
||||
.output()
|
||||
.unwrap();
|
||||
assert!(output.status.success(), "{output:?}");
|
||||
}
|
||||
|
|
|
|||
285
crates/omnigraph-cli/tests/parity_matrix.rs
Normal file
285
crates/omnigraph-cli/tests/parity_matrix.rs
Normal file
|
|
@ -0,0 +1,285 @@
|
|||
//! RFC-009 Phase 1 — the embedded/remote parity referee.
|
||||
//!
|
||||
//! For every CLI verb with an `is_remote` fork, run the identical
|
||||
//! invocation against (a) the local graph directly and (b) a spawned
|
||||
//! server on a twin copy of the same graph, with the SAME actor on both
|
||||
//! arms (local `--as act-parity`; remote bearer token resolving to
|
||||
//! `act-parity`). Scrub the declared-volatile allowlist
|
||||
//! (`support::scrub_volatile` — ids, wall-clock, transport locations);
|
||||
//! everything else must match exactly.
|
||||
//!
|
||||
//! This test PINS behavior; it does not idealize it. Genuine divergences
|
||||
//! discovered here are recorded in `KNOWN_DIVERGENCES` below (and filed),
|
||||
//! never silently repaired — repairs are Phase 3's job, gated by this
|
||||
//! referee staying green through the refactor.
|
||||
|
||||
use tempfile::TempDir;
|
||||
|
||||
mod support;
|
||||
use support::*;
|
||||
|
||||
/// Divergences between the arms that exist today, pinned as expectations.
|
||||
/// Removing an entry requires the corresponding behavior change to be a
|
||||
/// deliberate, release-noted decision (RFC-009 Compatibility).
|
||||
const KNOWN_DIVERGENCES: &[&str] = &[
|
||||
// populated by the rows below as they are written
|
||||
];
|
||||
|
||||
/// One matched setup per row: twin graphs + the parity Cedar bundle on the
|
||||
/// served arm. The local (`--store`) arm carries no policy (RFC-011); the
|
||||
/// bundle is permissive for `act-parity`, so the arms still agree.
|
||||
struct Parity {
|
||||
_temp: TempDir,
|
||||
local: std::path::PathBuf,
|
||||
server: TestServer,
|
||||
}
|
||||
|
||||
fn parity() -> Parity {
|
||||
let (temp, local, remote) = twin_graphs();
|
||||
// RFC-011 cluster-only: the remote arm is served from a converged
|
||||
// cluster directory (one graph, id `parity`), seeded with the same
|
||||
// fixture data as the local twin.
|
||||
let cluster_dir = parity_configs(temp.path(), &local, &remote);
|
||||
let server = spawn_server_with_cluster_env(
|
||||
&cluster_dir,
|
||||
&[(
|
||||
"OMNIGRAPH_SERVER_BEARER_TOKENS_JSON",
|
||||
r#"{"act-parity":"parity-tok"}"#,
|
||||
)],
|
||||
);
|
||||
Parity {
|
||||
_temp: temp,
|
||||
local,
|
||||
server,
|
||||
}
|
||||
}
|
||||
|
||||
impl Parity {
|
||||
fn run(&self, args: &[&str]) -> (std::process::Output, std::process::Output) {
|
||||
run_both(&self.local, &self.server.base_url, args)
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_parity(verb: &str, local: &std::process::Output, remote: &std::process::Output) {
|
||||
assert_eq!(
|
||||
local.status.code(),
|
||||
remote.status.code(),
|
||||
"{verb}: exit codes diverge\nlocal: {local:?}\nremote: {remote:?}"
|
||||
);
|
||||
if local.status.success() {
|
||||
let local_json = scrubbed_json(local);
|
||||
let remote_json = scrubbed_json(remote);
|
||||
assert_eq!(
|
||||
local_json, remote_json,
|
||||
"{verb}: scrubbed JSON diverges (left=local, right=remote)"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parity_query() {
|
||||
let p = parity();
|
||||
let query = fixture("test.gq");
|
||||
let (l, r) = p.run(&[
|
||||
"query",
|
||||
"--query",
|
||||
query.to_str().unwrap(),
|
||||
"get_person",
|
||||
"--params",
|
||||
r#"{"name":"Alice"}"#,
|
||||
"--json",
|
||||
],
|
||||
);
|
||||
assert_parity("query", &l, &r);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parity_schema_show() {
|
||||
let p = parity();
|
||||
let (l, r) = p.run(&["schema", "show", "--json"]);
|
||||
assert_parity("schema show", &l, &r);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parity_snapshot() {
|
||||
let p = parity();
|
||||
let (l, r) = p.run(&["snapshot", "--json"]);
|
||||
assert_parity("snapshot", &l, &r);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parity_branch_list() {
|
||||
let p = parity();
|
||||
let (l, r) = p.run(&["branch", "list", "--json"]);
|
||||
assert_parity("branch list", &l, &r);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parity_commit_list() {
|
||||
let p = parity();
|
||||
let (l, r) = p.run(&["commit", "list", "--json"]);
|
||||
assert_parity("commit list", &l, &r);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parity_mutate() {
|
||||
let p = parity();
|
||||
let (l, r) = p.run(&[
|
||||
"mutate",
|
||||
"-e",
|
||||
"query add($name: String, $age: I32) { insert Person { name: $name, age: $age } }",
|
||||
"--params",
|
||||
r#"{"name":"Parity","age":7}"#,
|
||||
"--json",
|
||||
],
|
||||
);
|
||||
assert_parity("mutate", &l, &r);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parity_branch_create_delete() {
|
||||
let p = parity();
|
||||
let (l, r) = p.run(&["branch", "create", "--from", "main", "parity-branch", "--json"],
|
||||
);
|
||||
assert_parity("branch create", &l, &r);
|
||||
// `branch delete` is destructive: the served (remote) arm is non-local and
|
||||
// requires consent (RFC-011 Decision 9), so the row passes `--yes` to test
|
||||
// the operation itself, not the safety gate. The local arm ignores `--yes`.
|
||||
let (l, r) = p.run(&["branch", "delete", "parity-branch", "--yes", "--json"],
|
||||
);
|
||||
assert_parity("branch delete", &l, &r);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parity_branch_merge() {
|
||||
let p = parity();
|
||||
let (l, r) = p.run(&["branch", "create", "--from", "main", "feature", "--json"],
|
||||
);
|
||||
assert_parity("branch create (merge setup)", &l, &r);
|
||||
let (l, r) = p.run(&["branch", "merge", "feature", "--into", "main", "--json"],
|
||||
);
|
||||
assert_parity("branch merge", &l, &r);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parity_load() {
|
||||
let p = parity();
|
||||
let data = p.local.parent().unwrap().join("rows.jsonl");
|
||||
std::fs::write(
|
||||
&data,
|
||||
"{\"type\":\"Person\",\"data\":{\"name\":\"Loaded\",\"age\":1}}\n",
|
||||
)
|
||||
.unwrap();
|
||||
let (l, r) = p.run(&[
|
||||
"load",
|
||||
"--mode",
|
||||
"merge",
|
||||
"--data",
|
||||
data.to_str().unwrap(),
|
||||
"--json",
|
||||
],
|
||||
);
|
||||
assert_parity("load", &l, &r);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parity_export() {
|
||||
let p = parity();
|
||||
let (l, r) = p.run(&["export"]);
|
||||
// export emits a JSONL STREAM, not a single `--json` document, so the
|
||||
// scrubbed-single-doc `assert_parity` doesn't apply — compare line-wise.
|
||||
// The twin graphs are byte-copies of one loaded fixture, so rows carry
|
||||
// identical ids/versions and need no scrubbing; sort the lines so any
|
||||
// cross-arm row-ordering difference doesn't masquerade as a divergence.
|
||||
assert_eq!(
|
||||
l.status.code(),
|
||||
r.status.code(),
|
||||
"export: exit codes diverge\nlocal {l:?}\nremote {r:?}"
|
||||
);
|
||||
assert!(l.status.success(), "export local arm failed: {l:?}");
|
||||
let mut local_lines: Vec<&str> = std::str::from_utf8(&l.stdout).unwrap().lines().collect();
|
||||
let mut remote_lines: Vec<&str> = std::str::from_utf8(&r.stdout).unwrap().lines().collect();
|
||||
assert!(
|
||||
!local_lines.is_empty(),
|
||||
"export produced no rows — the parity check would be vacuous"
|
||||
);
|
||||
local_lines.sort_unstable();
|
||||
remote_lines.sort_unstable();
|
||||
assert_eq!(
|
||||
local_lines, remote_lines,
|
||||
"export: JSONL streams diverge (left=local, right=remote)"
|
||||
);
|
||||
}
|
||||
|
||||
// ---- error parity: exit codes must match for shared failure cases ----
|
||||
|
||||
#[test]
|
||||
fn parity_errors_share_exit_codes() {
|
||||
let p = parity();
|
||||
|
||||
// unknown branch on merge
|
||||
let (l, r) = p.run(&["branch", "merge", "no-such-branch", "--into", "main", "--json"],
|
||||
);
|
||||
assert_eq!(
|
||||
(l.status.success(), r.status.success()),
|
||||
(false, false),
|
||||
"merge of unknown branch must fail on both arms\nlocal {l:?}\nremote {r:?}"
|
||||
);
|
||||
|
||||
// unknown query name in the source
|
||||
let query = fixture("test.gq");
|
||||
let (l, r) = p.run(&[
|
||||
"query",
|
||||
"--query",
|
||||
query.to_str().unwrap(),
|
||||
"no_such_query",
|
||||
"--json",
|
||||
],
|
||||
);
|
||||
assert_eq!(
|
||||
(l.status.success(), r.status.success()),
|
||||
(false, false),
|
||||
"unknown query name must fail on both arms\nlocal {l:?}\nremote {r:?}"
|
||||
);
|
||||
|
||||
// Discovery (parity HOLDS, behavior surprising): an inline query run
|
||||
// with a declared-but-unbound param does NOT error on either arm — it
|
||||
// returns every row (the filter drops), while the stored-query invoke
|
||||
// path hard-errors 'parameter not provided'. Pinned here as agreeing
|
||||
// behavior; the cross-path asymmetry is filed separately.
|
||||
let (l, r) = p.run(&[
|
||||
"query",
|
||||
"--query",
|
||||
query.to_str().unwrap(),
|
||||
"get_person",
|
||||
"--json",
|
||||
],
|
||||
);
|
||||
assert_eq!(
|
||||
(l.status.success(), r.status.success()),
|
||||
(true, true),
|
||||
"unbound-param inline query currently SUCCEEDS on both arms (matches-all)"
|
||||
);
|
||||
}
|
||||
|
||||
// ---- documented exclusions (not bugs; the Phase 4 capability table) ----
|
||||
//
|
||||
// - `graphs list`: server-only today; becomes Both-capability when the
|
||||
// embedded arm enumerates the cluster catalog (RFC-009 open Q3, answered).
|
||||
// - `ingest`: deprecated alias of load; its remote arm rides the deprecated
|
||||
// /ingest route. The canonical `load` verb targets `/load` (RFC-009 Phase 5,
|
||||
// landed) — `parity_load` exercises it on the remote arm.
|
||||
// - `init`, `optimize`, `repair`, `cleanup`, `cluster *`: storage-plane by
|
||||
// design (must work with the server down); Phase 4 declares this.
|
||||
#[allow(dead_code)]
|
||||
const EXCLUSIONS_DOCUMENTED: () = ();
|
||||
|
||||
#[test]
|
||||
fn known_divergences_ledger_is_current() {
|
||||
// The ledger exists so removals are deliberate: an empty list with all
|
||||
// rows green means the arms agree everywhere the matrix looks.
|
||||
assert!(
|
||||
KNOWN_DIVERGENCES.is_empty(),
|
||||
"divergences are pinned: {KNOWN_DIVERGENCES:?}"
|
||||
);
|
||||
}
|
||||
|
|
@ -339,6 +339,63 @@ impl SystemGraph {
|
|||
}
|
||||
}
|
||||
|
||||
/// A converged cluster directory the server can boot from (`--cluster`),
|
||||
/// serving one graph seeded with the standard fixture. Holds the temp dir
|
||||
/// alive for the test's lifetime.
|
||||
pub struct ClusterFixture {
|
||||
_temp: TempDir,
|
||||
dir: PathBuf,
|
||||
}
|
||||
|
||||
impl ClusterFixture {
|
||||
pub fn path(&self) -> &Path {
|
||||
&self.dir
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a converged cluster (RFC-011 cluster-only serving) with a single
|
||||
/// graph `graph_id`, seeded with the `test.jsonl` fixture so reads return
|
||||
/// data. When `policy_yaml` is `Some`, the bundle is bound to the graph
|
||||
/// scope. The server boots from the returned path via `--cluster`.
|
||||
pub fn converged_loaded_cluster(graph_id: &str, policy_yaml: Option<&str>) -> ClusterFixture {
|
||||
let temp = tempdir().unwrap();
|
||||
let dir = temp.path().to_path_buf();
|
||||
fs::copy(fixture("test.pg"), dir.join("graph.pg")).unwrap();
|
||||
|
||||
let policy_block = match policy_yaml {
|
||||
Some(source) => {
|
||||
fs::write(dir.join("graph.policy.yaml"), source).unwrap();
|
||||
format!(
|
||||
"policies:\n graph:\n file: ./graph.policy.yaml\n applies_to: [{graph_id}]\n"
|
||||
)
|
||||
}
|
||||
None => String::new(),
|
||||
};
|
||||
fs::write(
|
||||
dir.join("cluster.yaml"),
|
||||
format!(
|
||||
"version: 1\nmetadata:\n name: sys\nstate:\n backend: cluster\n lock: true\ngraphs:\n {graph_id}:\n schema: ./graph.pg\n{policy_block}"
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
output_success(cli().arg("cluster").arg("import").arg("--config").arg(&dir));
|
||||
output_success(cli().arg("cluster").arg("apply").arg("--config").arg(&dir));
|
||||
|
||||
let served_root = dir.join("graphs").join(format!("{graph_id}.omni"));
|
||||
output_success(
|
||||
cli()
|
||||
.arg("load")
|
||||
.arg("--data")
|
||||
.arg(fixture("test.jsonl"))
|
||||
.arg("--mode")
|
||||
.arg("overwrite")
|
||||
.arg(&served_root),
|
||||
);
|
||||
|
||||
ClusterFixture { _temp: temp, dir }
|
||||
}
|
||||
|
||||
// ---- helpers moved from the monolithic tests/cli.rs ----
|
||||
#[allow(unused_imports)]
|
||||
use lance::Dataset;
|
||||
|
|
@ -688,3 +745,239 @@ pub fn queries_test_config(graph_uri: &str, entry: &str, gq_file: &str) -> Strin
|
|||
graph_uri.replace('\'', "''")
|
||||
)
|
||||
}
|
||||
|
||||
// ---- RFC-009 Phase 1: parity-matrix harness ----
|
||||
|
||||
/// Twin graphs for embedded-vs-remote comparison: the same loaded fixture
|
||||
/// copied to two roots, so write verbs can run once per arm on identical
|
||||
/// state. Returns (tempdir-guard, local_graph, remote_graph).
|
||||
pub fn twin_graphs() -> (TempDir, PathBuf, PathBuf) {
|
||||
let temp = tempdir().unwrap();
|
||||
let seed = temp.path().join("seed");
|
||||
fs::create_dir_all(&seed).unwrap();
|
||||
let graph = seed.join("server.omni");
|
||||
init_graph(&graph);
|
||||
load_fixture(&graph);
|
||||
let local = temp.path().join("local.omni");
|
||||
let remote = temp.path().join("remote.omni");
|
||||
copy_dir(&graph, &local);
|
||||
copy_dir(&graph, &remote);
|
||||
(temp, local, remote)
|
||||
}
|
||||
|
||||
pub fn copy_dir(from: &Path, to: &Path) {
|
||||
fs::create_dir_all(to).unwrap();
|
||||
for entry in fs::read_dir(from).unwrap() {
|
||||
let entry = entry.unwrap();
|
||||
let target = to.join(entry.file_name());
|
||||
if entry.file_type().unwrap().is_dir() {
|
||||
copy_dir(&entry.path(), &target);
|
||||
} else {
|
||||
fs::copy(entry.path(), &target).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Scrub declared-volatile fields (RFC-009 Phase 1 allowlist) so the rest
|
||||
/// of the JSON must match exactly. Key-based, recursive; both arms get the
|
||||
/// same placeholders. Everything NOT listed here is contract.
|
||||
pub fn scrub_volatile(value: &mut serde_json::Value) {
|
||||
const VOLATILE_KEYS: &[&str] = &[
|
||||
// identity-bearing per-instance values
|
||||
"commit_id", "id", "parent_id", "merge_parent_id", "snapshot",
|
||||
// wall-clock
|
||||
"committed_at", "created_at", "timestamp",
|
||||
// transport / location
|
||||
"uri", "path",
|
||||
];
|
||||
match value {
|
||||
serde_json::Value::Object(map) => {
|
||||
for (key, val) in map.iter_mut() {
|
||||
if VOLATILE_KEYS.contains(&key.as_str()) && !val.is_null() {
|
||||
*val = serde_json::Value::String(format!("<volatile:{key}>"));
|
||||
} else {
|
||||
scrub_volatile(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
serde_json::Value::Array(items) => {
|
||||
for item in items {
|
||||
scrub_volatile(item);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
pub const PARITY_ACTOR: &str = "act-parity";
|
||||
pub const PARITY_TOKEN: &str = "parity-tok";
|
||||
|
||||
/// Identical Cedar bundle for BOTH arms — like-for-like enforcement is part
|
||||
/// of the parity contract (a bare local arm is permissive while a
|
||||
/// tokens-only server is default-deny; comparing those would measure
|
||||
/// configuration, not the fork).
|
||||
pub fn parity_policy_yaml() -> String {
|
||||
r#"version: 1
|
||||
groups:
|
||||
parity: ["act-parity"]
|
||||
protected_branches: []
|
||||
rules:
|
||||
- id: reads
|
||||
allow:
|
||||
actors: { group: parity }
|
||||
actions: [read, export, invoke_query]
|
||||
- id: read-scope
|
||||
allow:
|
||||
actors: { group: parity }
|
||||
actions: [read, export]
|
||||
branch_scope: any
|
||||
- id: writes
|
||||
allow:
|
||||
actors: { group: parity }
|
||||
actions: [change]
|
||||
branch_scope: any
|
||||
- id: branching
|
||||
allow:
|
||||
actors: { group: parity }
|
||||
actions: [schema_apply, branch_create, branch_delete, branch_merge]
|
||||
target_branch_scope: any
|
||||
"#
|
||||
.to_string()
|
||||
}
|
||||
|
||||
/// The graph id the parity cluster serves the remote arm under. The
|
||||
/// remote arm addresses it with `--graph PARITY_GRAPH_ID` (RFC-011: the
|
||||
/// server is cluster-only, so a graph selector is required).
|
||||
pub const PARITY_GRAPH_ID: &str = "parity";
|
||||
|
||||
/// Build the remote arm's configuration (RFC-011 cluster-only server).
|
||||
///
|
||||
/// The remote arm is served from a converged cluster directory whose single
|
||||
/// graph (id `parity`) carries the parity Cedar bundle (bound to the graph
|
||||
/// scope). The cluster's derived graph root (`<dir>/graphs/parity.omni`) is
|
||||
/// seeded with the SAME fixture data as the local twin so the two arms compare
|
||||
/// like-for-like. The local (`--store`) arm carries no Cedar policy (RFC-011),
|
||||
/// which is fine because the parity bundle is permissive for `act-parity`.
|
||||
///
|
||||
/// `local_graph` is overwritten with a byte-for-byte copy of the cluster's
|
||||
/// seeded served graph so identity-bearing values that are NOT scrubbed
|
||||
/// (e.g. `graph_commit_id`, edge `id`s in export) match across the arms —
|
||||
/// the served graph is the source of truth and the local twin mirrors it.
|
||||
///
|
||||
/// Returns the `cluster_dir`. The caller spawns the server with `--cluster`.
|
||||
pub fn parity_configs(root: &Path, local_graph: &Path, _remote_graph: &Path) -> PathBuf {
|
||||
let policy = root.join("parity.policy.yaml");
|
||||
fs::write(&policy, parity_policy_yaml()).unwrap();
|
||||
|
||||
// Remote arm: a cluster directory the server boots from. One graph
|
||||
// (`parity`), schema = the shared fixture, policy bound to the graph.
|
||||
let cluster_dir = root.join("parity-cluster");
|
||||
fs::create_dir_all(&cluster_dir).unwrap();
|
||||
fs::copy(fixture("test.pg"), cluster_dir.join("parity.pg")).unwrap();
|
||||
fs::copy(&policy, cluster_dir.join("parity.policy.yaml")).unwrap();
|
||||
fs::write(
|
||||
cluster_dir.join("cluster.yaml"),
|
||||
format!(
|
||||
r#"version: 1
|
||||
metadata:
|
||||
name: parity
|
||||
state:
|
||||
backend: cluster
|
||||
lock: true
|
||||
graphs:
|
||||
{PARITY_GRAPH_ID}:
|
||||
schema: ./parity.pg
|
||||
policies:
|
||||
parity:
|
||||
file: ./parity.policy.yaml
|
||||
applies_to: [{PARITY_GRAPH_ID}]
|
||||
"#
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Converge the cluster (creates the empty graph at the derived root),
|
||||
// then seed it with the same fixture data the local twin holds.
|
||||
output_success(
|
||||
cli()
|
||||
.arg("cluster")
|
||||
.arg("import")
|
||||
.arg("--config")
|
||||
.arg(&cluster_dir),
|
||||
);
|
||||
output_success(
|
||||
cli()
|
||||
.arg("cluster")
|
||||
.arg("apply")
|
||||
.arg("--config")
|
||||
.arg(&cluster_dir),
|
||||
);
|
||||
let served_root = cluster_dir
|
||||
.join("graphs")
|
||||
.join(format!("{PARITY_GRAPH_ID}.omni"));
|
||||
output_success(
|
||||
cli()
|
||||
.arg("load")
|
||||
.arg("--data")
|
||||
.arg(fixture("test.jsonl"))
|
||||
.arg("--mode")
|
||||
.arg("overwrite")
|
||||
.arg(&served_root),
|
||||
);
|
||||
|
||||
// Mirror the seeded served graph into the local twin so both arms hold
|
||||
// identical ULIDs / commit ids (the served graph is authoritative).
|
||||
if local_graph.exists() {
|
||||
fs::remove_dir_all(local_graph).unwrap();
|
||||
}
|
||||
copy_dir(&served_root, local_graph);
|
||||
|
||||
cluster_dir
|
||||
}
|
||||
|
||||
/// Run one CLI invocation per arm with identical verb args: locally against
|
||||
/// `local_graph` (--as actor) and remotely against a server URL whose token
|
||||
/// resolves to the same actor. Returns raw Outputs for exit-code + JSON
|
||||
/// comparison by the caller.
|
||||
pub fn run_both(
|
||||
local_graph: &Path,
|
||||
server_url: &str,
|
||||
args: &[&str],
|
||||
) -> (std::process::Output, std::process::Output) {
|
||||
// Address both arms with GLOBAL flags (`--store` / `--server`) appended after
|
||||
// the verb + its args, so the address is placed correctly regardless of
|
||||
// subcommand nesting (a positional graph only works for top-level verbs;
|
||||
// `schema show <graph>` etc. need the global flag). Local = embedded store,
|
||||
// remote = served. RFC-011: a direct (`--store`) write carries no Cedar
|
||||
// policy — the parity policy is permissive for `act-parity` on the served
|
||||
// arm, so the two arms still agree.
|
||||
let mut local = cli();
|
||||
local
|
||||
.args(args)
|
||||
.arg("--store")
|
||||
.arg(local_graph)
|
||||
.arg("--as")
|
||||
.arg(PARITY_ACTOR);
|
||||
let local_out = local.output().unwrap();
|
||||
|
||||
let mut remote = cli();
|
||||
remote
|
||||
.env("OMNIGRAPH_BEARER_TOKEN", PARITY_TOKEN)
|
||||
.args(args)
|
||||
.arg("--server")
|
||||
.arg(server_url)
|
||||
// RFC-011: the parity server is cluster-only (multi-graph), so the
|
||||
// remote arm must name the graph it addresses.
|
||||
.arg("--graph")
|
||||
.arg(PARITY_GRAPH_ID);
|
||||
let remote_out = remote.output().unwrap();
|
||||
(local_out, remote_out)
|
||||
}
|
||||
|
||||
/// Parse, scrub, and pretty-print for diffable assertion messages.
|
||||
pub fn scrubbed_json(output: &std::process::Output) -> String {
|
||||
let mut value: serde_json::Value = serde_json::from_slice(&output.stdout)
|
||||
.unwrap_or_else(|e| panic!("non-JSON stdout ({e}): {output:?}"));
|
||||
scrub_volatile(&mut value);
|
||||
serde_json::to_string_pretty(&value).unwrap()
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -42,7 +42,12 @@ pub(crate) fn resolve_query_decls(
|
|||
return (
|
||||
map.iter()
|
||||
.map(|(name, config)| {
|
||||
(name.clone(), QueryConfig { file: config.file.clone() })
|
||||
(
|
||||
name.clone(),
|
||||
QueryConfig {
|
||||
file: config.file.clone(),
|
||||
},
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
BTreeMap::new(),
|
||||
|
|
@ -66,7 +71,10 @@ pub(crate) fn resolve_query_decls(
|
|||
diagnostics.push(Diagnostic::error(
|
||||
"query_dir_unreadable",
|
||||
format!("graphs.{graph_id}.queries"),
|
||||
format!("could not list query directory '{}': {err}", resolved.display()),
|
||||
format!(
|
||||
"could not list query directory '{}': {err}",
|
||||
resolved.display()
|
||||
),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
|
@ -76,7 +84,10 @@ pub(crate) fn resolve_query_decls(
|
|||
diagnostics.push(Diagnostic::warning(
|
||||
"query_dir_empty",
|
||||
format!("graphs.{graph_id}.queries"),
|
||||
format!("query directory '{}' contains no .gq files", resolved.display()),
|
||||
format!(
|
||||
"query directory '{}' contains no .gq files",
|
||||
resolved.display()
|
||||
),
|
||||
));
|
||||
}
|
||||
for path in entries {
|
||||
|
|
@ -132,7 +143,12 @@ pub(crate) fn resolve_query_decls(
|
|||
continue;
|
||||
}
|
||||
origin.insert(name.clone(), declared.clone());
|
||||
registry.insert(name, QueryConfig { file: declared.clone() });
|
||||
registry.insert(
|
||||
name,
|
||||
QueryConfig {
|
||||
file: declared.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
contents.insert(declared, source);
|
||||
}
|
||||
|
|
@ -269,8 +285,6 @@ pub(crate) fn validate_cluster_header(
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
pub(crate) fn state_resource_digests(state: &ClusterState) -> BTreeMap<String, String> {
|
||||
state
|
||||
.applied_revision
|
||||
|
|
@ -295,7 +309,6 @@ pub(crate) fn initial_import_state(desired: &DesiredCluster) -> ClusterState {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
pub(crate) async fn observe_declared_graphs(
|
||||
desired: &DesiredCluster,
|
||||
backend: &ClusterStore,
|
||||
|
|
@ -350,19 +363,28 @@ pub(crate) async fn observe_declared_graphs(
|
|||
StateResource {
|
||||
digest: observation.schema_digest.clone(),
|
||||
applies_to: None,
|
||||
embedding_provider: None,
|
||||
embedding_profile: None,
|
||||
},
|
||||
);
|
||||
let query_digests = state_query_digests_for_graph(state, &graph.id);
|
||||
let embedding_provider = state_graph_embedding_provider(state, &graph.id);
|
||||
let embedding_provider_digest =
|
||||
state_embedding_provider_digest(state, embedding_provider.as_deref());
|
||||
let graph_digest_value = graph_digest(
|
||||
&graph.id,
|
||||
Some(&observation.schema_digest),
|
||||
Some(&query_digests),
|
||||
embedding_provider.as_deref(),
|
||||
embedding_provider_digest.as_ref(),
|
||||
);
|
||||
state.applied_revision.resources.insert(
|
||||
graph_address.clone(),
|
||||
StateResource {
|
||||
digest: graph_digest_value,
|
||||
applies_to: None,
|
||||
embedding_provider,
|
||||
embedding_profile: None,
|
||||
},
|
||||
);
|
||||
state.observations.insert(
|
||||
|
|
@ -499,7 +521,6 @@ pub(crate) fn graph_observation_json(observation: GraphObservationJson<'_>) -> s
|
|||
})
|
||||
}
|
||||
|
||||
|
||||
pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome {
|
||||
let parsed = parse_cluster_config(config_dir);
|
||||
let config_dir = parsed.config_dir;
|
||||
|
|
@ -519,6 +540,35 @@ pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome {
|
|||
let mut dependencies = BTreeSet::new();
|
||||
let mut graph_query_digests: BTreeMap<String, BTreeMap<String, String>> = BTreeMap::new();
|
||||
let mut graph_schema_digests: BTreeMap<String, String> = BTreeMap::new();
|
||||
let mut graph_embedding_providers: BTreeMap<String, String> = BTreeMap::new();
|
||||
let mut embedding_provider_digests: BTreeMap<String, String> = BTreeMap::new();
|
||||
let mut embedding_providers: BTreeMap<String, EmbeddingProviderConfig> = BTreeMap::new();
|
||||
|
||||
for (provider_name, profile) in &raw.providers.embedding {
|
||||
validate_id(
|
||||
"embedding provider name",
|
||||
&format!("providers.embedding.{provider_name}"),
|
||||
provider_name,
|
||||
&mut diagnostics,
|
||||
);
|
||||
let address = embedding_provider_address(provider_name);
|
||||
profile.validate(
|
||||
format!("providers.embedding.{provider_name}"),
|
||||
&mut diagnostics,
|
||||
);
|
||||
let digest = embedding_provider_digest(profile);
|
||||
embedding_provider_digests.insert(address.clone(), digest.clone());
|
||||
embedding_providers.insert(address.clone(), profile.clone());
|
||||
resources.insert(
|
||||
address.clone(),
|
||||
ResourceSummary {
|
||||
address,
|
||||
kind: "embedding_provider".to_string(),
|
||||
digest,
|
||||
path: None,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
for (graph_id, graph) in &raw.graphs {
|
||||
validate_id(
|
||||
|
|
@ -533,6 +583,35 @@ pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome {
|
|||
from: schema_address.clone(),
|
||||
to: graph_address.clone(),
|
||||
});
|
||||
if let Some(provider_ref) = graph.embedding_provider.as_deref() {
|
||||
match normalize_embedding_provider_target(provider_ref) {
|
||||
EmbeddingProviderTarget::Provider(provider_name) => {
|
||||
let provider_address = embedding_provider_address(&provider_name);
|
||||
if raw.providers.embedding.contains_key(&provider_name) {
|
||||
dependencies.insert(Dependency {
|
||||
from: graph_address.clone(),
|
||||
to: provider_address.clone(),
|
||||
});
|
||||
graph_embedding_providers.insert(graph_id.clone(), provider_address);
|
||||
} else {
|
||||
diagnostics.push(Diagnostic::error(
|
||||
"dangling_embedding_provider_reference",
|
||||
format!("graphs.{graph_id}.embedding_provider"),
|
||||
format!(
|
||||
"graph references embedding provider `{provider_name}`, but no providers.embedding.{provider_name} profile is declared"
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
EmbeddingProviderTarget::WrongKind(kind) => diagnostics.push(Diagnostic::error(
|
||||
"wrong_kind_reference",
|
||||
format!("graphs.{graph_id}.embedding_provider"),
|
||||
format!(
|
||||
"embedding_provider expects a providers.embedding ref or bare provider name, got `{kind}`"
|
||||
),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
let schema_path = resolve_config_path(&config_dir, &graph.schema);
|
||||
let schema_source = match fs::read_to_string(&schema_path) {
|
||||
|
|
@ -646,10 +725,15 @@ pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome {
|
|||
}
|
||||
|
||||
for graph_id in raw.graphs.keys() {
|
||||
let embedding_provider = graph_embedding_providers.get(graph_id);
|
||||
let embedding_provider_digest =
|
||||
embedding_provider.and_then(|address| embedding_provider_digests.get(address));
|
||||
let digest = graph_digest(
|
||||
graph_id,
|
||||
graph_schema_digests.get(graph_id),
|
||||
graph_query_digests.get(graph_id),
|
||||
embedding_provider.map(String::as_str),
|
||||
embedding_provider_digest,
|
||||
);
|
||||
resources.insert(
|
||||
graph_address(graph_id),
|
||||
|
|
@ -754,6 +838,7 @@ pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome {
|
|||
.get(graph_id)
|
||||
.cloned()
|
||||
.unwrap_or_default(),
|
||||
embedding_provider: graph_embedding_providers.get(graph_id).cloned(),
|
||||
})
|
||||
.collect();
|
||||
let config_digest = desired_config_digest(&raw, &resource_digests);
|
||||
|
|
@ -769,6 +854,7 @@ pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome {
|
|||
resources: resource_list,
|
||||
dependencies,
|
||||
policy_bindings,
|
||||
embedding_providers,
|
||||
}),
|
||||
diagnostics,
|
||||
config_dir,
|
||||
|
|
@ -828,7 +914,6 @@ pub(crate) fn future_field_diagnostics(text: &str) -> Vec<Diagnostic> {
|
|||
let future_fields = [
|
||||
"apply",
|
||||
"env_file",
|
||||
"providers",
|
||||
"pipelines",
|
||||
"embeddings",
|
||||
"ui",
|
||||
|
|
@ -882,6 +967,21 @@ pub(crate) fn normalize_policy_target(value: &str) -> PolicyTarget {
|
|||
}
|
||||
}
|
||||
|
||||
enum EmbeddingProviderTarget {
|
||||
Provider(String),
|
||||
WrongKind(String),
|
||||
}
|
||||
|
||||
fn normalize_embedding_provider_target(value: &str) -> EmbeddingProviderTarget {
|
||||
if let Some(name) = value.strip_prefix("provider.embedding.") {
|
||||
EmbeddingProviderTarget::Provider(name.to_string())
|
||||
} else if value.contains('.') {
|
||||
EmbeddingProviderTarget::WrongKind(value.to_string())
|
||||
} else {
|
||||
EmbeddingProviderTarget::Provider(value.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn graph_address(graph_id: &str) -> String {
|
||||
format!("graph.{graph_id}")
|
||||
}
|
||||
|
|
@ -898,6 +998,10 @@ pub(crate) fn policy_address(policy_name: &str) -> String {
|
|||
format!("policy.{policy_name}")
|
||||
}
|
||||
|
||||
pub(crate) fn embedding_provider_address(provider_name: &str) -> String {
|
||||
format!("provider.embedding.{provider_name}")
|
||||
}
|
||||
|
||||
pub(crate) fn resolve_config_path(config_dir: &Path, path: &Path) -> PathBuf {
|
||||
if path.is_absolute() {
|
||||
path.to_path_buf()
|
||||
|
|
|
|||
|
|
@ -152,7 +152,9 @@ pub(crate) fn approved_resources(
|
|||
let candidates: Vec<&ApprovalArtifact> = artifacts
|
||||
.iter()
|
||||
.map(|(_, artifact)| artifact)
|
||||
.filter(|artifact| artifact.consumed_at.is_none() && artifact.resource == change.resource)
|
||||
.filter(|artifact| {
|
||||
artifact.consumed_at.is_none() && artifact.resource == change.resource
|
||||
})
|
||||
.collect();
|
||||
if candidates.is_empty() {
|
||||
continue;
|
||||
|
|
@ -181,6 +183,7 @@ pub(crate) enum ResourceKind {
|
|||
Schema(String),
|
||||
Query { graph: String, name: String },
|
||||
Policy(String),
|
||||
EmbeddingProvider(String),
|
||||
Unknown,
|
||||
}
|
||||
|
||||
|
|
@ -199,6 +202,8 @@ pub(crate) fn resource_kind(address: &str) -> ResourceKind {
|
|||
}
|
||||
} else if let Some(name) = address.strip_prefix("policy.") {
|
||||
ResourceKind::Policy(name.to_string())
|
||||
} else if let Some(name) = address.strip_prefix("provider.embedding.") {
|
||||
ResourceKind::EmbeddingProvider(name.to_string())
|
||||
} else {
|
||||
ResourceKind::Unknown
|
||||
}
|
||||
|
|
@ -261,8 +266,7 @@ pub(crate) fn classify_changes(
|
|||
let (disposition, reason) = match resource_kind(&change.resource) {
|
||||
ResourceKind::Schema(graph) => match change.operation {
|
||||
PlanOperation::Create
|
||||
if graph_creates.contains(&graph)
|
||||
&& !pending_recovery.contains(&graph) =>
|
||||
if graph_creates.contains(&graph) && !pending_recovery.contains(&graph) =>
|
||||
{
|
||||
// Applied with the graph create — the init carries it.
|
||||
(ApplyDisposition::Applied, None)
|
||||
|
|
@ -325,10 +329,7 @@ pub(crate) fn classify_changes(
|
|||
if pending_recovery.contains(&graph) {
|
||||
(ApplyDisposition::Blocked, Some("cluster_recovery_pending"))
|
||||
} else if schema_pending.contains(&graph) {
|
||||
(
|
||||
ApplyDisposition::Blocked,
|
||||
Some("dependency_not_applied"),
|
||||
)
|
||||
(ApplyDisposition::Blocked, Some("dependency_not_applied"))
|
||||
} else {
|
||||
// A graph create in the same plan no longer blocks:
|
||||
// creates execute first in the same apply run.
|
||||
|
|
@ -353,9 +354,8 @@ pub(crate) fn classify_changes(
|
|||
}
|
||||
}
|
||||
},
|
||||
ResourceKind::Unknown => {
|
||||
(ApplyDisposition::Deferred, Some("apply_unsupported_kind"))
|
||||
}
|
||||
ResourceKind::EmbeddingProvider(_) => (ApplyDisposition::Applied, None),
|
||||
ResourceKind::Unknown => (ApplyDisposition::Deferred, Some("apply_unsupported_kind")),
|
||||
};
|
||||
change.disposition = Some(disposition);
|
||||
change.reason = reason.map(str::to_string);
|
||||
|
|
|
|||
|
|
@ -20,18 +20,35 @@ use ulid::Ulid;
|
|||
pub mod failpoints;
|
||||
|
||||
mod config;
|
||||
mod types;
|
||||
mod diff;
|
||||
mod serve;
|
||||
mod sweep;
|
||||
mod store;
|
||||
mod sweep;
|
||||
mod types;
|
||||
use config::{
|
||||
QueriesDecl, future_field_diagnostics, graph_address, initial_import_state, load_desired,
|
||||
normalize_policy_target, observe_declared_graphs, observe_live_graph, parse_cluster_config,
|
||||
policy_address, preview_schema_migration, query_address, resolve_config_path,
|
||||
resolve_query_decls, schema_address, state_resource_digests, validate_cluster_header,
|
||||
validate_id, validate_query_source,
|
||||
};
|
||||
use diff::{
|
||||
FailedGraphOrigin, ResourceKind, append_policy_binding_changes, approved_resources,
|
||||
classify_changes, compute_approvals, compute_blast_radius, demote_dependents_of_failed_graphs,
|
||||
diff_resources, resource_kind,
|
||||
};
|
||||
pub use serve::{
|
||||
ServingGraph, ServingPolicy, ServingQuery, ServingSnapshot, cluster_graph_ids,
|
||||
cluster_root_for_graph_uri, read_serving_snapshot, read_serving_snapshot_from_storage,
|
||||
resolve_graph_storage_uri,
|
||||
};
|
||||
use store::{ClusterStore, StateLockGuard, StateSnapshot};
|
||||
use sweep::{
|
||||
mark_approvals_consumed, record_approval_consumed, sweep_recovery_sidecars,
|
||||
tombstone_graph_subtree, warn_pending_recovery_sidecars,
|
||||
};
|
||||
pub use types::*;
|
||||
use types::*;
|
||||
pub use serve::{ServingGraph, ServingPolicy, ServingQuery, ServingSnapshot, read_serving_snapshot, read_serving_snapshot_from_storage};
|
||||
use config::{QueriesDecl, observe_declared_graphs, validate_cluster_header, future_field_diagnostics, initial_import_state, observe_live_graph, preview_schema_migration, state_resource_digests, graph_address, policy_address, query_address, schema_address, load_desired, normalize_policy_target, parse_cluster_config, resolve_config_path, resolve_query_decls, validate_id, validate_query_source};
|
||||
use diff::{FailedGraphOrigin, ResourceKind, append_policy_binding_changes, approved_resources, classify_changes, compute_approvals, compute_blast_radius, demote_dependents_of_failed_graphs, diff_resources, resource_kind};
|
||||
use sweep::{mark_approvals_consumed, record_approval_consumed, sweep_recovery_sidecars, tombstone_graph_subtree, warn_pending_recovery_sidecars};
|
||||
|
||||
pub const CLUSTER_CONFIG_FILE: &str = "cluster.yaml";
|
||||
pub const CLUSTER_GRAPHS_DIR: &str = "graphs";
|
||||
|
|
@ -44,10 +61,7 @@ pub const CLUSTER_APPROVALS_DIR: &str = "__cluster/approvals";
|
|||
|
||||
/// The store for a load outcome: the declared `storage:` root when present,
|
||||
/// the config directory itself otherwise. A bad root is a loud error.
|
||||
fn store_for(
|
||||
config_dir: &Path,
|
||||
storage_root: Option<&str>,
|
||||
) -> Result<ClusterStore, Diagnostic> {
|
||||
fn store_for(config_dir: &Path, storage_root: Option<&str>) -> Result<ClusterStore, Diagnostic> {
|
||||
match storage_root {
|
||||
Some(root) => ClusterStore::for_storage_root(root),
|
||||
None => Ok(ClusterStore::for_config_dir(config_dir)),
|
||||
|
|
@ -179,7 +193,12 @@ pub async fn plan_config_dir(config_dir: impl AsRef<Path>) -> PlanOutput {
|
|||
&desired.config_digest,
|
||||
&mut diagnostics,
|
||||
);
|
||||
classify_changes(&mut changes, &desired.dependencies, &BTreeSet::new(), &approved);
|
||||
classify_changes(
|
||||
&mut changes,
|
||||
&desired.dependencies,
|
||||
&BTreeSet::new(),
|
||||
&approved,
|
||||
);
|
||||
|
||||
// Embed real migration steps for schema updates so plan is a data-aware
|
||||
// preview; failures degrade to the digest diff with a warning.
|
||||
|
|
@ -282,9 +301,7 @@ pub async fn apply_config_dir_with_options(
|
|||
ok: !has_errors(&diagnostics),
|
||||
config_dir,
|
||||
actor: actor_for_output.clone(),
|
||||
desired_revision: DesiredRevision {
|
||||
config_digest,
|
||||
},
|
||||
desired_revision: DesiredRevision { config_digest },
|
||||
state_observations: observations,
|
||||
changes,
|
||||
applied_count: 0,
|
||||
|
|
@ -464,8 +481,7 @@ pub async fn apply_config_dir_with_options(
|
|||
failed_graphs.insert(graph_id.clone(), FailedGraphOrigin::GraphCreate);
|
||||
continue;
|
||||
}
|
||||
let Some(desired_graph) = desired.graphs.iter().find(|graph| &graph.id == graph_id)
|
||||
else {
|
||||
let Some(desired_graph) = desired.graphs.iter().find(|graph| &graph.id == graph_id) else {
|
||||
continue;
|
||||
};
|
||||
let graph_uri = backend.graph_root(graph_id);
|
||||
|
|
@ -604,8 +620,7 @@ pub async fn apply_config_dir_with_options(
|
|||
failed_graphs.insert(graph_id.clone(), FailedGraphOrigin::SchemaApply);
|
||||
continue;
|
||||
}
|
||||
let Some(desired_graph) = desired.graphs.iter().find(|graph| &graph.id == graph_id)
|
||||
else {
|
||||
let Some(desired_graph) = desired.graphs.iter().find(|graph| &graph.id == graph_id) else {
|
||||
continue;
|
||||
};
|
||||
let graph_uri = backend.graph_root(graph_id);
|
||||
|
|
@ -955,8 +970,10 @@ pub async fn apply_config_dir_with_options(
|
|||
.expect("create/update always carries an after digest"),
|
||||
// Policies record their applied bindings so the
|
||||
// ledger is serving-sufficient (RFC-005 §D3).
|
||||
applies_to: desired
|
||||
.policy_bindings
|
||||
applies_to: desired.policy_bindings.get(&change.resource).cloned(),
|
||||
embedding_provider: None,
|
||||
embedding_profile: desired
|
||||
.embedding_providers
|
||||
.get(&change.resource)
|
||||
.cloned(),
|
||||
},
|
||||
|
|
@ -964,7 +981,10 @@ pub async fn apply_config_dir_with_options(
|
|||
set_resource_status_applied(&mut new_state, &change.resource);
|
||||
}
|
||||
PlanOperation::Delete => {
|
||||
new_state.applied_revision.resources.remove(&change.resource);
|
||||
new_state
|
||||
.applied_revision
|
||||
.resources
|
||||
.remove(&change.resource);
|
||||
new_state.resource_statuses.remove(&change.resource);
|
||||
}
|
||||
},
|
||||
|
|
@ -1219,7 +1239,6 @@ pub async fn approve_config_dir(
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
pub async fn status_config_dir(config_dir: impl AsRef<Path>) -> StatusOutput {
|
||||
let parsed = parse_cluster_config(config_dir.as_ref());
|
||||
let mut diagnostics = parsed.diagnostics;
|
||||
|
|
@ -1238,7 +1257,9 @@ pub async fn status_config_dir(config_dir: impl AsRef<Path>) -> StatusOutput {
|
|||
}
|
||||
};
|
||||
let mut observations = backend.observations();
|
||||
backend.observe_lock(&mut observations, &mut diagnostics).await;
|
||||
backend
|
||||
.observe_lock(&mut observations, &mut diagnostics)
|
||||
.await;
|
||||
warn_pending_recovery_sidecars(&parsed.config_dir, &mut diagnostics);
|
||||
|
||||
let mut resource_digests = BTreeMap::new();
|
||||
|
|
@ -1254,9 +1275,7 @@ pub async fn status_config_dir(config_dir: impl AsRef<Path>) -> StatusOutput {
|
|||
// Read-only point-in-time catalog check: report the
|
||||
// findings as diagnostics; persisting Drifted statuses
|
||||
// is refresh's job. Status never writes state.
|
||||
for (address, finding) in
|
||||
verify_catalog_payloads(&backend, &state).await
|
||||
{
|
||||
for (address, finding) in verify_catalog_payloads(&backend, &state).await {
|
||||
diagnostics.push(payload_finding_diagnostic(&address, &finding));
|
||||
}
|
||||
resource_digests = state_resource_digests(&state);
|
||||
|
|
@ -1312,7 +1331,10 @@ pub async fn force_unlock_config_dir(
|
|||
if let Some(raw) = parsed.raw.as_ref() {
|
||||
let _settings = validate_cluster_header(raw, &mut diagnostics);
|
||||
if !has_errors(&diagnostics) {
|
||||
match backend.force_unlock(lock_id.as_ref(), &mut observations).await {
|
||||
match backend
|
||||
.force_unlock(lock_id.as_ref(), &mut observations)
|
||||
.await
|
||||
{
|
||||
Ok(()) => lock_removed = true,
|
||||
Err(diagnostic) => diagnostics.push(diagnostic),
|
||||
}
|
||||
|
|
@ -1380,7 +1402,10 @@ async fn sync_config_dir(config_dir: &Path, operation: StateSyncOperation) -> St
|
|||
|
||||
let operation_label = state_sync_operation_label(operation);
|
||||
let _lock_guard = if desired.state_lock {
|
||||
match backend.acquire_lock(operation_label, &mut observations).await {
|
||||
match backend
|
||||
.acquire_lock(operation_label, &mut observations)
|
||||
.await
|
||||
{
|
||||
Ok(guard) => Some(guard),
|
||||
Err(diagnostic) => {
|
||||
diagnostics.push(diagnostic);
|
||||
|
|
@ -1542,7 +1567,10 @@ async fn sync_config_dir(config_dir: &Path, operation: StateSyncOperation) -> St
|
|||
state.state_revision = state.state_revision.saturating_add(1);
|
||||
}
|
||||
|
||||
match backend.write_state(&state, expected_cas.as_deref(), &mut observations).await {
|
||||
match backend
|
||||
.write_state(&state, expected_cas.as_deref(), &mut observations)
|
||||
.await
|
||||
{
|
||||
Ok(()) => {
|
||||
// Completed sweep sidecars are deleted only after their outcome
|
||||
// is durably recorded; on failure they stay and re-sweep.
|
||||
|
|
@ -1569,9 +1597,6 @@ async fn sync_config_dir(config_dir: &Path, operation: StateSyncOperation) -> St
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum PayloadFinding {
|
||||
Missing,
|
||||
|
|
@ -1650,7 +1675,10 @@ async fn write_resource_payload(
|
|||
Diagnostic::error(
|
||||
"resource_payload_write_error",
|
||||
resource,
|
||||
format!("could not read resource source '{}': {err}", source.display()),
|
||||
format!(
|
||||
"could not read resource source '{}': {err}",
|
||||
source.display()
|
||||
),
|
||||
)
|
||||
})?;
|
||||
if sha256_hex(&bytes) != expected_digest {
|
||||
|
|
@ -1692,7 +1720,11 @@ async fn write_resource_payload(
|
|||
fn recompute_state_graph_digests(state: &mut ClusterState, desired: &DesiredCluster) {
|
||||
for graph in &desired.graphs {
|
||||
let graph_address = graph_address(&graph.id);
|
||||
if !state.applied_revision.resources.contains_key(&graph_address) {
|
||||
if !state
|
||||
.applied_revision
|
||||
.resources
|
||||
.contains_key(&graph_address)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
let schema_digest = state
|
||||
|
|
@ -1701,11 +1733,26 @@ fn recompute_state_graph_digests(state: &mut ClusterState, desired: &DesiredClus
|
|||
.get(&schema_address(&graph.id))
|
||||
.map(|resource| resource.digest.clone());
|
||||
let query_digests = state_query_digests_for_graph(state, &graph.id);
|
||||
let digest = graph_digest(&graph.id, schema_digest.as_ref(), Some(&query_digests));
|
||||
state
|
||||
.applied_revision
|
||||
.resources
|
||||
.insert(graph_address, StateResource { digest, applies_to: None });
|
||||
let embedding_provider = graph.embedding_provider.as_deref();
|
||||
let embedding_provider_digest = embedding_provider
|
||||
.and_then(|address| state.applied_revision.resources.get(address))
|
||||
.map(|resource| resource.digest.clone());
|
||||
let digest = graph_digest(
|
||||
&graph.id,
|
||||
schema_digest.as_ref(),
|
||||
Some(&query_digests),
|
||||
embedding_provider,
|
||||
embedding_provider_digest.as_ref(),
|
||||
);
|
||||
state.applied_revision.resources.insert(
|
||||
graph_address,
|
||||
StateResource {
|
||||
digest,
|
||||
applies_to: None,
|
||||
embedding_provider: graph.embedding_provider.clone(),
|
||||
embedding_profile: None,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1773,7 +1820,6 @@ fn duplicate_key_diagnostics(text: &str) -> Vec<Diagnostic> {
|
|||
diagnostics
|
||||
}
|
||||
|
||||
|
||||
fn strip_comment(line: &str) -> String {
|
||||
let mut in_single_quote = false;
|
||||
let mut in_double_quote = false;
|
||||
|
|
@ -1796,7 +1842,6 @@ fn strip_comment(line: &str) -> String {
|
|||
line.to_string()
|
||||
}
|
||||
|
||||
|
||||
fn state_query_digests_for_graph(state: &ClusterState, graph_id: &str) -> BTreeMap<String, String> {
|
||||
let prefix = format!("query.{graph_id}.");
|
||||
state
|
||||
|
|
@ -1811,6 +1856,23 @@ fn state_query_digests_for_graph(state: &ClusterState, graph_id: &str) -> BTreeM
|
|||
.collect()
|
||||
}
|
||||
|
||||
fn state_graph_embedding_provider(state: &ClusterState, graph_id: &str) -> Option<String> {
|
||||
state
|
||||
.applied_revision
|
||||
.resources
|
||||
.get(&graph_address(graph_id))
|
||||
.and_then(|resource| resource.embedding_provider.clone())
|
||||
}
|
||||
|
||||
fn state_embedding_provider_digest(
|
||||
state: &ClusterState,
|
||||
embedding_provider: Option<&str>,
|
||||
) -> Option<String> {
|
||||
embedding_provider
|
||||
.and_then(|address| state.applied_revision.resources.get(address))
|
||||
.map(|resource| resource.digest.clone())
|
||||
}
|
||||
|
||||
fn set_resource_status_applied(state: &mut ClusterState, address: &str) {
|
||||
state.resource_statuses.insert(
|
||||
address.to_string(),
|
||||
|
|
@ -1843,6 +1905,8 @@ fn graph_digest(
|
|||
graph_id: &str,
|
||||
schema_digest: Option<&String>,
|
||||
query_digests: Option<&BTreeMap<String, String>>,
|
||||
embedding_provider: Option<&str>,
|
||||
embedding_provider_digest: Option<&String>,
|
||||
) -> String {
|
||||
let mut input = format!(
|
||||
"graph\0{graph_id}\0schema\0{}\0",
|
||||
|
|
@ -1857,6 +1921,21 @@ fn graph_digest(
|
|||
input.push('\0');
|
||||
}
|
||||
}
|
||||
if let Some(provider) = embedding_provider {
|
||||
input.push_str("embedding_provider\0");
|
||||
input.push_str(provider);
|
||||
input.push('\0');
|
||||
input.push_str(embedding_provider_digest.map_or("", String::as_str));
|
||||
input.push('\0');
|
||||
}
|
||||
sha256_hex(input.as_bytes())
|
||||
}
|
||||
|
||||
fn embedding_provider_digest(profile: &EmbeddingProviderConfig) -> String {
|
||||
let mut input = String::from("embedding-provider\0");
|
||||
let config_semantics =
|
||||
serde_json::to_string(profile).expect("embedding provider config must serialize");
|
||||
input.push_str(&config_semantics);
|
||||
sha256_hex(input.as_bytes())
|
||||
}
|
||||
|
||||
|
|
@ -1930,7 +2009,6 @@ fn display_path(path: &Path) -> String {
|
|||
path.display().to_string()
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "tests.rs"]
|
||||
mod tests;
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ use super::*;
|
|||
pub struct ServingGraph {
|
||||
pub graph_id: String,
|
||||
pub root: PathBuf,
|
||||
pub embedding: Option<EmbeddingProviderConfig>,
|
||||
}
|
||||
|
||||
/// One stored query: its graph binding, registry name, and verified source.
|
||||
|
|
@ -79,6 +80,112 @@ pub async fn read_serving_snapshot_from_storage(
|
|||
read_snapshot_with_store(backend).await
|
||||
}
|
||||
|
||||
/// Cluster root for a graph **storage URI** of the cluster layout
|
||||
/// (`<root>/graphs/<id>.omni`), if `<root>` is actually a cluster (holds
|
||||
/// `__cluster/state.json`); otherwise `None`. Used by the CLI to refuse
|
||||
/// `init` into a cluster-managed location — graphs there are created by
|
||||
/// `cluster apply`, not `init`.
|
||||
///
|
||||
/// Cheap by construction: a URI that does not match the `<root>/graphs/<id>.omni`
|
||||
/// shape returns `None` without any I/O, so ordinary `init` targets
|
||||
/// (`./kb.omni`, `s3://bucket/kb.omni`) never probe storage. Works for
|
||||
/// `file://` and `s3://` via the storage adapter.
|
||||
pub async fn cluster_root_for_graph_uri(graph_uri: &str) -> Option<String> {
|
||||
let root = cluster_root_of_graph_layout(graph_uri)?;
|
||||
let store = ClusterStore::for_storage_root(&root).ok()?;
|
||||
store
|
||||
.has_state()
|
||||
.await
|
||||
.then(|| store.display_root().to_string())
|
||||
}
|
||||
|
||||
/// Resolve a graph's **storage URI** (`<root>/graphs/<id>.omni`) from a cluster's
|
||||
/// applied state ledger — the lightweight path for storage-plane maintenance
|
||||
/// (`optimize`/`repair`/`cleanup`).
|
||||
///
|
||||
/// Unlike [`read_serving_snapshot`], this deliberately does NOT validate catalog
|
||||
/// payloads or recovery readiness: maintenance only needs the derivable graph
|
||||
/// root, and must not be blocked by an unrelated corrupt policy/query blob or a
|
||||
/// pending recovery sweep — a degraded cluster is exactly when an operator
|
||||
/// reaches for `repair`. It reads the state ledger, confirms the graph is in the
|
||||
/// applied revision, and returns `graph_root(id)`.
|
||||
///
|
||||
/// `cluster` is a config directory or a storage-root URI (`s3://…`, config-free),
|
||||
/// mirroring the server's `--cluster` dispatch.
|
||||
pub async fn resolve_graph_storage_uri(cluster: &str, graph_id: &str) -> Result<String, Diagnostic> {
|
||||
let backend = open_cluster_backend(cluster)?;
|
||||
let mut observations = backend.observations();
|
||||
let snapshot = backend.read_state(&mut observations).await?;
|
||||
let state = snapshot.state.ok_or_else(|| missing_state_diagnostic(cluster))?;
|
||||
let address = format!("graph.{graph_id}");
|
||||
if !state.applied_revision.resources.contains_key(&address) {
|
||||
let applied = applied_graph_ids(&state);
|
||||
return Err(Diagnostic::error(
|
||||
"graph_not_applied",
|
||||
address,
|
||||
format!(
|
||||
"graph `{graph_id}` is not applied in cluster `{cluster}` (applied graphs: [{}]); \
|
||||
declare it in cluster.yaml and run `cluster apply`, or check the id",
|
||||
applied.join(", ")
|
||||
),
|
||||
));
|
||||
}
|
||||
Ok(backend.graph_root(graph_id))
|
||||
}
|
||||
|
||||
/// List the graph ids applied in a cluster's served state (sorted). Reads the
|
||||
/// ledger only — no catalog validation — like `resolve_graph_storage_uri`, so
|
||||
/// it works on a degraded cluster. Used to enumerate candidates when no
|
||||
/// `--graph` is selected (RFC-011 Decision 7).
|
||||
pub async fn cluster_graph_ids(cluster: &str) -> Result<Vec<String>, Diagnostic> {
|
||||
let backend = open_cluster_backend(cluster)?;
|
||||
let mut observations = backend.observations();
|
||||
let snapshot = backend.read_state(&mut observations).await?;
|
||||
let state = snapshot.state.ok_or_else(|| missing_state_diagnostic(cluster))?;
|
||||
Ok(applied_graph_ids(&state))
|
||||
}
|
||||
|
||||
fn open_cluster_backend(cluster: &str) -> Result<ClusterStore, Diagnostic> {
|
||||
if cluster.contains("://") {
|
||||
ClusterStore::for_storage_root(cluster)
|
||||
} else {
|
||||
Ok(ClusterStore::for_config_dir(Path::new(cluster)))
|
||||
}
|
||||
}
|
||||
|
||||
fn missing_state_diagnostic(cluster: &str) -> Diagnostic {
|
||||
Diagnostic::error(
|
||||
"cluster_state_missing",
|
||||
CLUSTER_STATE_FILE,
|
||||
format!("cluster `{cluster}` has no applied state; run `cluster apply` first"),
|
||||
)
|
||||
}
|
||||
|
||||
fn applied_graph_ids(state: &crate::types::ClusterState) -> Vec<String> {
|
||||
let mut ids: Vec<String> = state
|
||||
.applied_revision
|
||||
.resources
|
||||
.keys()
|
||||
.filter_map(|a| a.strip_prefix("graph."))
|
||||
.map(str::to_string)
|
||||
.collect();
|
||||
ids.sort();
|
||||
ids
|
||||
}
|
||||
|
||||
/// Split `<root>/graphs/<id>.omni` → `<root>`, gating on the exact cluster
|
||||
/// graph-layout shape (a single `<id>` segment, no nested path). `None` for
|
||||
/// anything else — no I/O is done for non-cluster-shaped URIs.
|
||||
fn cluster_root_of_graph_layout(graph_uri: &str) -> Option<String> {
|
||||
let trimmed = graph_uri.trim_end_matches('/');
|
||||
let rest = trimmed.strip_suffix(".omni")?;
|
||||
let (root, id) = rest.rsplit_once("/graphs/")?;
|
||||
if root.is_empty() || id.is_empty() || id.contains('/') {
|
||||
return None;
|
||||
}
|
||||
Some(root.to_string())
|
||||
}
|
||||
|
||||
async fn read_snapshot_with_store(
|
||||
backend: ClusterStore,
|
||||
) -> Result<ServingSnapshot, Vec<Diagnostic>> {
|
||||
|
|
@ -119,15 +226,73 @@ async fn read_snapshot_with_store(
|
|||
return Err(diagnostics);
|
||||
};
|
||||
|
||||
let mut embedding_profiles: BTreeMap<String, EmbeddingProviderConfig> = BTreeMap::new();
|
||||
for (address, entry) in &state.applied_revision.resources {
|
||||
if !matches!(resource_kind(address), ResourceKind::EmbeddingProvider(_)) {
|
||||
continue;
|
||||
}
|
||||
let Some(profile) = entry.embedding_profile.clone() else {
|
||||
diagnostics.push(Diagnostic::error(
|
||||
"embedding_provider_profile_missing",
|
||||
address.clone(),
|
||||
"no applied embedding provider profile recorded; re-run `cluster apply` to backfill",
|
||||
));
|
||||
continue;
|
||||
};
|
||||
let actual_digest = embedding_provider_digest(&profile);
|
||||
if actual_digest != entry.digest {
|
||||
diagnostics.push(Diagnostic::error(
|
||||
"embedding_provider_digest_mismatch",
|
||||
address.clone(),
|
||||
format!(
|
||||
"applied embedding provider profile does not match its recorded digest (actual sha256:{actual_digest}); run `cluster refresh` then `cluster apply`, and restart"
|
||||
),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
embedding_profiles.insert(address.clone(), profile);
|
||||
}
|
||||
|
||||
let mut graphs = Vec::new();
|
||||
let mut queries = Vec::new();
|
||||
let mut policies = Vec::new();
|
||||
for (address, entry) in &state.applied_revision.resources {
|
||||
match resource_kind(address) {
|
||||
ResourceKind::Graph(graph_id) => {
|
||||
let embedding = match entry.embedding_provider.as_deref() {
|
||||
Some(provider_address) => match resource_kind(provider_address) {
|
||||
ResourceKind::EmbeddingProvider(_) => {
|
||||
match embedding_profiles.get(provider_address) {
|
||||
Some(profile) => Some(profile.clone()),
|
||||
None => {
|
||||
diagnostics.push(Diagnostic::error(
|
||||
"embedding_provider_missing",
|
||||
address.clone(),
|
||||
format!(
|
||||
"graph references `{provider_address}`, but no applied embedding provider profile is available; re-run `cluster apply`"
|
||||
),
|
||||
));
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
diagnostics.push(Diagnostic::error(
|
||||
"wrong_kind_reference",
|
||||
address.clone(),
|
||||
format!(
|
||||
"graph embedding_provider expects `provider.embedding.<name>`, got `{provider_address}`"
|
||||
),
|
||||
));
|
||||
None
|
||||
}
|
||||
},
|
||||
None => None,
|
||||
};
|
||||
graphs.push(ServingGraph {
|
||||
root: PathBuf::from(backend.graph_root(&graph_id)),
|
||||
graph_id,
|
||||
embedding,
|
||||
});
|
||||
}
|
||||
ResourceKind::Schema(_) => {}
|
||||
|
|
@ -135,7 +300,10 @@ async fn read_snapshot_with_store(
|
|||
let ResourceKind::Query { graph, name } = &kind else {
|
||||
unreachable!()
|
||||
};
|
||||
match backend.read_verified_payload(&kind, &entry.digest, address).await {
|
||||
match backend
|
||||
.read_verified_payload(&kind, &entry.digest, address)
|
||||
.await
|
||||
{
|
||||
Ok(source) => queries.push(ServingQuery {
|
||||
graph_id: graph.clone(),
|
||||
name: name.clone(),
|
||||
|
|
@ -156,7 +324,10 @@ async fn read_snapshot_with_store(
|
|||
));
|
||||
continue;
|
||||
};
|
||||
match backend.read_verified_payload(&kind, &entry.digest, address).await {
|
||||
match backend
|
||||
.read_verified_payload(&kind, &entry.digest, address)
|
||||
.await
|
||||
{
|
||||
Ok(source) => policies.push(ServingPolicy {
|
||||
name: name.clone(),
|
||||
source,
|
||||
|
|
@ -165,6 +336,7 @@ async fn read_snapshot_with_store(
|
|||
Err(diagnostic) => diagnostics.push(diagnostic),
|
||||
}
|
||||
}
|
||||
ResourceKind::EmbeddingProvider(_) => {}
|
||||
ResourceKind::Unknown => {}
|
||||
}
|
||||
}
|
||||
|
|
@ -186,3 +358,49 @@ async fn read_snapshot_with_store(
|
|||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn graph_layout_gating_does_no_io_for_non_cluster_shapes() {
|
||||
// Only `<root>/graphs/<id>.omni` matches; everything else is None.
|
||||
assert_eq!(
|
||||
cluster_root_of_graph_layout("/data/cluster/graphs/kb.omni").as_deref(),
|
||||
Some("/data/cluster")
|
||||
);
|
||||
assert_eq!(
|
||||
cluster_root_of_graph_layout("s3://bucket/prefix/graphs/kb.omni").as_deref(),
|
||||
Some("s3://bucket/prefix")
|
||||
);
|
||||
assert_eq!(cluster_root_of_graph_layout("./kb.omni"), None);
|
||||
assert_eq!(cluster_root_of_graph_layout("s3://bucket/kb.omni"), None);
|
||||
// nested id under graphs/ is not the cluster layout
|
||||
assert_eq!(cluster_root_of_graph_layout("/c/graphs/a/b.omni"), None);
|
||||
// not a .omni graph
|
||||
assert_eq!(cluster_root_of_graph_layout("/c/graphs/kb"), None);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn cluster_root_detected_only_when_state_ledger_present() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let root = temp.path();
|
||||
std::fs::create_dir_all(root.join("graphs")).unwrap();
|
||||
let graph_uri = format!("{}/graphs/kb.omni", root.to_string_lossy());
|
||||
|
||||
// No __cluster/state.json yet → not a cluster.
|
||||
assert_eq!(cluster_root_for_graph_uri(&graph_uri).await, None);
|
||||
|
||||
// Lay down the state ledger → now it's a cluster-managed location.
|
||||
std::fs::create_dir_all(root.join("__cluster")).unwrap();
|
||||
std::fs::write(root.join(CLUSTER_STATE_FILE), "{}").unwrap();
|
||||
let detected = cluster_root_for_graph_uri(&graph_uri).await;
|
||||
assert!(detected.is_some(), "expected cluster root to be detected");
|
||||
|
||||
// A non-cluster-shaped target never probes and is always None.
|
||||
assert_eq!(
|
||||
cluster_root_for_graph_uri(&format!("{}/plain.omni", root.to_string_lossy())).await,
|
||||
None
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -154,6 +154,21 @@ impl ClusterStore {
|
|||
}
|
||||
}
|
||||
|
||||
/// Display-form storage root (plain local path for `file://`, URI for S3).
|
||||
pub(crate) fn display_root(&self) -> &str {
|
||||
&self.display_root
|
||||
}
|
||||
|
||||
/// Whether this root holds the cluster state ledger (`__cluster/state.json`)
|
||||
/// — i.e. is an actual cluster, not just any directory. Probed via the
|
||||
/// adapter (`file://` or `s3://`), failures read as "not a cluster".
|
||||
pub(crate) async fn has_state(&self) -> bool {
|
||||
self.adapter
|
||||
.exists(&self.uri(CLUSTER_STATE_FILE))
|
||||
.await
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// `read_text_versioned`, returning None for a missing object (probed
|
||||
/// via `exists` — the engine error type doesn't discriminate NotFound).
|
||||
async fn read_versioned_opt(&self, uri: &str) -> Result<Option<(String, String)>, String> {
|
||||
|
|
|
|||
|
|
@ -19,13 +19,29 @@ pub(crate) async fn sweep_recovery_sidecars(
|
|||
for (path, sidecar) in backend.list_recovery_sidecars(diagnostics).await {
|
||||
match sidecar.kind {
|
||||
RecoverySidecarKind::GraphCreate => {
|
||||
sweep_graph_create_sidecar(backend, path, sidecar, state, diagnostics, &mut outcome).await;
|
||||
sweep_graph_create_sidecar(
|
||||
backend,
|
||||
path,
|
||||
sidecar,
|
||||
state,
|
||||
diagnostics,
|
||||
&mut outcome,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
RecoverySidecarKind::SchemaApply => {
|
||||
sweep_schema_apply_sidecar(path, sidecar, state, diagnostics, &mut outcome).await;
|
||||
}
|
||||
RecoverySidecarKind::GraphDelete => {
|
||||
sweep_graph_delete_sidecar(backend, path, sidecar, state, diagnostics, &mut outcome).await;
|
||||
sweep_graph_delete_sidecar(
|
||||
backend,
|
||||
path,
|
||||
sidecar,
|
||||
state,
|
||||
diagnostics,
|
||||
&mut outcome,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -71,15 +87,30 @@ pub(crate) async fn sweep_graph_create_sidecar(
|
|||
StateResource {
|
||||
digest: live_digest.clone(),
|
||||
applies_to: None,
|
||||
embedding_provider: None,
|
||||
embedding_profile: None,
|
||||
},
|
||||
);
|
||||
let query_digests = state_query_digests_for_graph(state, &sidecar.graph_id);
|
||||
let composite =
|
||||
graph_digest(&sidecar.graph_id, Some(&live_digest), Some(&query_digests));
|
||||
state
|
||||
.applied_revision
|
||||
.resources
|
||||
.insert(graph_address.clone(), StateResource { digest: composite, applies_to: None });
|
||||
let embedding_provider = state_graph_embedding_provider(state, &sidecar.graph_id);
|
||||
let embedding_provider_digest =
|
||||
state_embedding_provider_digest(state, embedding_provider.as_deref());
|
||||
let composite = graph_digest(
|
||||
&sidecar.graph_id,
|
||||
Some(&live_digest),
|
||||
Some(&query_digests),
|
||||
embedding_provider.as_deref(),
|
||||
embedding_provider_digest.as_ref(),
|
||||
);
|
||||
state.applied_revision.resources.insert(
|
||||
graph_address.clone(),
|
||||
StateResource {
|
||||
digest: composite,
|
||||
applies_to: None,
|
||||
embedding_provider,
|
||||
embedding_profile: None,
|
||||
},
|
||||
);
|
||||
set_resource_status_applied(state, &graph_address);
|
||||
set_resource_status_applied(state, &schema_addr);
|
||||
state.recovery_records.insert(
|
||||
|
|
@ -200,14 +231,30 @@ pub(crate) async fn sweep_schema_apply_sidecar(
|
|||
StateResource {
|
||||
digest: live_digest.clone(),
|
||||
applies_to: None,
|
||||
embedding_provider: None,
|
||||
embedding_profile: None,
|
||||
},
|
||||
);
|
||||
let query_digests = state_query_digests_for_graph(state, &sidecar.graph_id);
|
||||
let composite = graph_digest(&sidecar.graph_id, Some(&live_digest), Some(&query_digests));
|
||||
state
|
||||
.applied_revision
|
||||
.resources
|
||||
.insert(graph_address.clone(), StateResource { digest: composite, applies_to: None });
|
||||
let embedding_provider = state_graph_embedding_provider(state, &sidecar.graph_id);
|
||||
let embedding_provider_digest =
|
||||
state_embedding_provider_digest(state, embedding_provider.as_deref());
|
||||
let composite = graph_digest(
|
||||
&sidecar.graph_id,
|
||||
Some(&live_digest),
|
||||
Some(&query_digests),
|
||||
embedding_provider.as_deref(),
|
||||
embedding_provider_digest.as_ref(),
|
||||
);
|
||||
state.applied_revision.resources.insert(
|
||||
graph_address.clone(),
|
||||
StateResource {
|
||||
digest: composite,
|
||||
applies_to: None,
|
||||
embedding_provider,
|
||||
embedding_profile: None,
|
||||
},
|
||||
);
|
||||
set_resource_status_applied(state, &graph_address);
|
||||
set_resource_status_applied(state, &schema_addr);
|
||||
state.recovery_records.insert(
|
||||
|
|
@ -274,7 +321,11 @@ pub(crate) async fn sweep_graph_delete_sidecar(
|
|||
return;
|
||||
}
|
||||
|
||||
if !state.applied_revision.resources.contains_key(&graph_address) {
|
||||
if !state
|
||||
.applied_revision
|
||||
.resources
|
||||
.contains_key(&graph_address)
|
||||
{
|
||||
// Row 7: already tombstoned (or never recorded); crash fell between
|
||||
// the state CAS and sidecar delete.
|
||||
outcome.completed_sidecars.push(path);
|
||||
|
|
@ -283,7 +334,12 @@ pub(crate) async fn sweep_graph_delete_sidecar(
|
|||
|
||||
// Row 7b: the root is gone, the ledger is stale — roll forward the
|
||||
// tombstone, consume the approval the sidecar carries, audit.
|
||||
tombstone_graph_subtree(state, &sidecar.graph_id, sidecar.approval_id.as_deref(), sidecar.actor.as_deref());
|
||||
tombstone_graph_subtree(
|
||||
state,
|
||||
&sidecar.graph_id,
|
||||
sidecar.approval_id.as_deref(),
|
||||
sidecar.actor.as_deref(),
|
||||
);
|
||||
state.recovery_records.insert(
|
||||
sidecar.operation_id.clone(),
|
||||
json!({
|
||||
|
|
@ -342,7 +398,11 @@ pub(crate) fn tombstone_graph_subtree(
|
|||
/// Record approval consumption in the state ledger. The artifact FILE is
|
||||
/// rewritten with consumed_at only after the state write lands, so a failed
|
||||
/// CAS leaves the approval valid for the retry.
|
||||
pub(crate) fn record_approval_consumed(state: &mut ClusterState, approval_id: &str, operation_id: &str) {
|
||||
pub(crate) fn record_approval_consumed(
|
||||
state: &mut ClusterState,
|
||||
approval_id: &str,
|
||||
operation_id: &str,
|
||||
) {
|
||||
state.approval_records.insert(
|
||||
approval_id.to_string(),
|
||||
json!({
|
||||
|
|
|
|||
|
|
@ -56,6 +56,39 @@ policies:
|
|||
dir
|
||||
}
|
||||
|
||||
fn write_mock_embedding_cluster(config_dir: &Path, model: &str) {
|
||||
fs::write(
|
||||
config_dir.join(CLUSTER_CONFIG_FILE),
|
||||
format!(
|
||||
r#"
|
||||
version: 1
|
||||
metadata:
|
||||
name: test
|
||||
state:
|
||||
backend: cluster
|
||||
lock: true
|
||||
providers:
|
||||
embedding:
|
||||
default:
|
||||
kind: mock
|
||||
model: {model}
|
||||
graphs:
|
||||
knowledge:
|
||||
schema: ./people.pg
|
||||
embedding_provider: default
|
||||
queries:
|
||||
find_person:
|
||||
file: ./people.gq
|
||||
policies:
|
||||
base:
|
||||
file: ./base.policy.yaml
|
||||
applies_to: [knowledge]
|
||||
"#
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
async fn init_derived_graph(root: &Path) {
|
||||
let graph_dir = root.join(CLUSTER_GRAPHS_DIR);
|
||||
fs::create_dir_all(&graph_dir).unwrap();
|
||||
|
|
@ -194,6 +227,95 @@ policies:
|
|||
assert!(codes.contains("dangling_graph_reference"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn embedding_provider_config_accepts_provider_resources_and_graph_refs() {
|
||||
let dir = fixture();
|
||||
write_mock_embedding_cluster(dir.path(), "recorded-x");
|
||||
|
||||
let out = validate_config_dir(dir.path());
|
||||
assert!(out.ok, "{:?}", out.diagnostics);
|
||||
let provider_digest = out
|
||||
.resource_digests
|
||||
.get("provider.embedding.default")
|
||||
.expect("provider resource digest");
|
||||
assert!(
|
||||
out.resources
|
||||
.iter()
|
||||
.any(|resource| resource.address == "provider.embedding.default"
|
||||
&& resource.kind == "embedding_provider"
|
||||
&& resource.path.is_none())
|
||||
);
|
||||
assert!(
|
||||
out.dependencies
|
||||
.iter()
|
||||
.any(|dep| dep.from == "graph.knowledge" && dep.to == "provider.embedding.default"),
|
||||
"{:?}",
|
||||
out.dependencies
|
||||
);
|
||||
let schema_digest = out.resource_digests.get("schema.knowledge").unwrap();
|
||||
let query_digest = out
|
||||
.resource_digests
|
||||
.get("query.knowledge.find_person")
|
||||
.unwrap();
|
||||
let expected_graph_digest = graph_digest(
|
||||
"knowledge",
|
||||
Some(schema_digest),
|
||||
Some(
|
||||
&[("find_person".to_string(), query_digest.clone())]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
),
|
||||
Some("provider.embedding.default"),
|
||||
Some(provider_digest),
|
||||
);
|
||||
assert_eq!(
|
||||
out.resource_digests["graph.knowledge"],
|
||||
expected_graph_digest
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn embedding_provider_config_rejects_bad_refs_and_inline_secrets() {
|
||||
let dir = fixture();
|
||||
fs::write(
|
||||
dir.path().join(CLUSTER_CONFIG_FILE),
|
||||
r#"
|
||||
version: 1
|
||||
providers:
|
||||
embedding:
|
||||
default:
|
||||
kind: openai-compatible
|
||||
api_key: sk-inline
|
||||
graphs:
|
||||
knowledge:
|
||||
schema: ./people.pg
|
||||
embedding_provider: provider.policy.default
|
||||
missing_provider:
|
||||
schema: ./people.pg
|
||||
embedding_provider: absent
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let out = validate_config_dir(dir.path());
|
||||
assert!(!out.ok);
|
||||
let codes: BTreeSet<_> = out.diagnostics.iter().map(|d| d.code.as_str()).collect();
|
||||
assert!(
|
||||
codes.contains("embedding_api_key_inline"),
|
||||
"{:?}",
|
||||
out.diagnostics
|
||||
);
|
||||
assert!(
|
||||
codes.contains("wrong_kind_reference"),
|
||||
"{:?}",
|
||||
out.diagnostics
|
||||
);
|
||||
assert!(
|
||||
codes.contains("dangling_embedding_provider_reference"),
|
||||
"{:?}",
|
||||
out.diagnostics
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_key_mismatch_fails() {
|
||||
let dir = fixture();
|
||||
|
|
@ -1012,8 +1134,13 @@ graphs:
|
|||
let out = validate_config_dir(config_dir);
|
||||
assert!(out.ok, "{:?}", out.diagnostics);
|
||||
let schema_digest = out.resource_digests.get("schema.knowledge").unwrap().clone();
|
||||
let graph_composite =
|
||||
graph_digest("knowledge", Some(&schema_digest), Some(&BTreeMap::new()));
|
||||
let graph_composite = graph_digest(
|
||||
"knowledge",
|
||||
Some(&schema_digest),
|
||||
Some(&BTreeMap::new()),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
write_state_resources(
|
||||
config_dir,
|
||||
&[
|
||||
|
|
@ -1122,6 +1249,8 @@ graphs:
|
|||
.into_iter()
|
||||
.collect(),
|
||||
),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
assert_eq!(resources["graph.knowledge"]["digest"], expected_composite);
|
||||
assert_eq!(
|
||||
|
|
@ -1136,6 +1265,117 @@ graphs:
|
|||
assert!(!dir.path().join(CLUSTER_LOCK_FILE).exists());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn apply_records_embedding_provider_profile_and_graph_binding() {
|
||||
let dir = fixture();
|
||||
write_mock_embedding_cluster(dir.path(), "recorded-x");
|
||||
write_applyable_state(dir.path());
|
||||
let desired = validate_config_dir(dir.path());
|
||||
let query_digest = desired
|
||||
.resource_digests
|
||||
.get("query.knowledge.find_person")
|
||||
.unwrap()
|
||||
.clone();
|
||||
let schema_digest = desired
|
||||
.resource_digests
|
||||
.get("schema.knowledge")
|
||||
.unwrap()
|
||||
.clone();
|
||||
let provider_digest = desired
|
||||
.resource_digests
|
||||
.get("provider.embedding.default")
|
||||
.unwrap()
|
||||
.clone();
|
||||
|
||||
let out = apply_config_dir(dir.path()).await;
|
||||
assert!(out.ok, "{:?}", out.diagnostics);
|
||||
assert!(out.converged, "{out:?}");
|
||||
|
||||
let state = read_state_json(dir.path());
|
||||
let resources = &state["applied_revision"]["resources"];
|
||||
let provider = resources["provider.embedding.default"]
|
||||
.as_object()
|
||||
.expect("provider resource");
|
||||
assert_eq!(provider["digest"], provider_digest);
|
||||
assert_eq!(provider["embedding_profile"]["kind"], "mock");
|
||||
assert_eq!(provider["embedding_profile"]["model"], "recorded-x");
|
||||
assert!(provider["embedding_profile"].get("api_key").is_none());
|
||||
assert_eq!(
|
||||
resources["graph.knowledge"]["embedding_provider"],
|
||||
"provider.embedding.default"
|
||||
);
|
||||
let expected_graph_digest = graph_digest(
|
||||
"knowledge",
|
||||
Some(&schema_digest),
|
||||
Some(
|
||||
&[("find_person".to_string(), query_digest)]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
),
|
||||
Some("provider.embedding.default"),
|
||||
Some(&provider_digest),
|
||||
);
|
||||
assert_eq!(resources["graph.knowledge"]["digest"], expected_graph_digest);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn embedding_provider_changes_update_provider_and_graph_plan() {
|
||||
let dir = fixture();
|
||||
write_mock_embedding_cluster(dir.path(), "recorded-x");
|
||||
write_applyable_state(dir.path());
|
||||
let first = apply_config_dir(dir.path()).await;
|
||||
assert!(first.ok && first.converged, "{first:?}");
|
||||
|
||||
write_mock_embedding_cluster(dir.path(), "recorded-y");
|
||||
let plan = plan_config_dir(dir.path()).await;
|
||||
assert!(plan.ok, "{:?}", plan.diagnostics);
|
||||
let by_resource: BTreeMap<&str, &PlanChange> = plan
|
||||
.changes
|
||||
.iter()
|
||||
.map(|change| (change.resource.as_str(), change))
|
||||
.collect();
|
||||
assert_eq!(
|
||||
by_resource["provider.embedding.default"].operation,
|
||||
PlanOperation::Update
|
||||
);
|
||||
assert_eq!(
|
||||
by_resource["provider.embedding.default"].disposition,
|
||||
Some(ApplyDisposition::Applied)
|
||||
);
|
||||
assert_eq!(
|
||||
by_resource["graph.knowledge"].operation,
|
||||
PlanOperation::Update
|
||||
);
|
||||
assert_eq!(
|
||||
by_resource["graph.knowledge"].disposition,
|
||||
Some(ApplyDisposition::Derived)
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn embedding_binding_survives_refresh() {
|
||||
let dir = fixture();
|
||||
init_derived_graph(dir.path()).await;
|
||||
write_mock_embedding_cluster(dir.path(), "recorded-x");
|
||||
write_applyable_state(dir.path());
|
||||
let apply = apply_config_dir(dir.path()).await;
|
||||
assert!(apply.ok && apply.converged, "{apply:?}");
|
||||
|
||||
let refresh = refresh_config_dir(dir.path()).await;
|
||||
assert!(refresh.ok, "{:?}", refresh.diagnostics);
|
||||
|
||||
let state = read_state_json(dir.path());
|
||||
let resources = &state["applied_revision"]["resources"];
|
||||
assert_eq!(
|
||||
resources["graph.knowledge"]["embedding_provider"],
|
||||
"provider.embedding.default"
|
||||
);
|
||||
assert_eq!(
|
||||
resources["provider.embedding.default"]["embedding_profile"]["model"],
|
||||
"recorded-x"
|
||||
);
|
||||
}
|
||||
|
||||
fn desired_revision_digest(out: &ApplyOutput) -> String {
|
||||
out.desired_revision.config_digest.clone().unwrap()
|
||||
}
|
||||
|
|
@ -1150,8 +1390,13 @@ graphs:
|
|||
.unwrap()
|
||||
.clone();
|
||||
let old_digest = "0".repeat(64);
|
||||
let graph_composite =
|
||||
graph_digest("knowledge", Some(&schema_digest), Some(&BTreeMap::new()));
|
||||
let graph_composite = graph_digest(
|
||||
"knowledge",
|
||||
Some(&schema_digest),
|
||||
Some(&BTreeMap::new()),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
write_state_resources(
|
||||
dir.path(),
|
||||
&[
|
||||
|
|
@ -1190,8 +1435,13 @@ graphs:
|
|||
.clone();
|
||||
let stale_query_digest = "1".repeat(64);
|
||||
let stale_policy_digest = "2".repeat(64);
|
||||
let graph_composite =
|
||||
graph_digest("knowledge", Some(&schema_digest), Some(&BTreeMap::new()));
|
||||
let graph_composite = graph_digest(
|
||||
"knowledge",
|
||||
Some(&schema_digest),
|
||||
Some(&BTreeMap::new()),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
write_state_resources(
|
||||
dir.path(),
|
||||
&[
|
||||
|
|
@ -1234,6 +1484,8 @@ graphs:
|
|||
"knowledge",
|
||||
Some(&schema_digest),
|
||||
Some(&[("find_person".to_string(), query_digest)].into_iter().collect()),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
assert_eq!(resources["graph.knowledge"]["digest"], expected_composite);
|
||||
}
|
||||
|
|
@ -1494,8 +1746,13 @@ graphs:
|
|||
.get("schema.knowledge")
|
||||
.unwrap()
|
||||
.clone();
|
||||
let graph_composite =
|
||||
graph_digest("knowledge", Some(&schema_digest), Some(&BTreeMap::new()));
|
||||
let graph_composite = graph_digest(
|
||||
"knowledge",
|
||||
Some(&schema_digest),
|
||||
Some(&BTreeMap::new()),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
write_state_resources(
|
||||
dir.path(),
|
||||
&[
|
||||
|
|
@ -2864,6 +3121,54 @@ policies:
|
|||
assert!(snapshot.policies[0].source.contains("rules:"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn serving_snapshot_uses_applied_embedding_provider_profile() {
|
||||
let dir = fixture();
|
||||
init_derived_graph(dir.path()).await;
|
||||
write_mock_embedding_cluster(dir.path(), "recorded-x");
|
||||
write_applyable_state(dir.path());
|
||||
let converge = apply_config_dir(dir.path()).await;
|
||||
assert!(converge.converged, "{converge:?}");
|
||||
|
||||
let snapshot = read_serving_snapshot(dir.path()).await.unwrap();
|
||||
let profile = snapshot.graphs[0].embedding.as_ref().unwrap();
|
||||
assert_eq!(profile.kind.as_deref(), Some("mock"));
|
||||
assert_eq!(profile.model.as_deref(), Some("recorded-x"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn serving_snapshot_refuses_missing_embedding_provider_metadata() {
|
||||
let dir = fixture();
|
||||
init_derived_graph(dir.path()).await;
|
||||
write_mock_embedding_cluster(dir.path(), "recorded-x");
|
||||
write_applyable_state(dir.path());
|
||||
let converge = apply_config_dir(dir.path()).await;
|
||||
assert!(converge.converged, "{converge:?}");
|
||||
|
||||
let mut state = read_state_json(dir.path());
|
||||
state["applied_revision"]["resources"]["provider.embedding.default"]
|
||||
.as_object_mut()
|
||||
.unwrap()
|
||||
.remove("embedding_profile");
|
||||
fs::write(
|
||||
dir.path().join(CLUSTER_STATE_FILE),
|
||||
serde_json::to_string_pretty(&state).unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let err = read_serving_snapshot(dir.path()).await.unwrap_err();
|
||||
assert!(
|
||||
err.iter()
|
||||
.any(|diagnostic| diagnostic.code == "embedding_provider_profile_missing"),
|
||||
"{err:?}"
|
||||
);
|
||||
assert!(
|
||||
err.iter()
|
||||
.any(|diagnostic| diagnostic.code == "embedding_provider_missing"),
|
||||
"{err:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn serving_snapshot_refuses_missing_state() {
|
||||
let dir = fixture();
|
||||
|
|
|
|||
|
|
@ -325,6 +325,7 @@ pub(crate) struct DesiredCluster {
|
|||
/// The declared `storage:` root, if any (None ⇒ the config dir itself).
|
||||
pub(crate) storage_root: Option<String>,
|
||||
pub(crate) state_lock: bool,
|
||||
pub(crate) embedding_providers: BTreeMap<String, EmbeddingProviderConfig>,
|
||||
pub(crate) graphs: Vec<DesiredGraph>,
|
||||
pub(crate) resource_digests: BTreeMap<String, String>,
|
||||
pub(crate) resources: Vec<ResourceSummary>,
|
||||
|
|
@ -337,6 +338,7 @@ pub(crate) struct DesiredCluster {
|
|||
pub(crate) struct DesiredGraph {
|
||||
pub(crate) id: String,
|
||||
pub(crate) schema_digest: String,
|
||||
pub(crate) embedding_provider: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
|
@ -376,6 +378,8 @@ pub(crate) struct RawClusterConfig {
|
|||
#[serde(default)]
|
||||
pub(crate) state: StateConfig,
|
||||
#[serde(default)]
|
||||
pub(crate) providers: ProvidersConfig,
|
||||
#[serde(default)]
|
||||
pub(crate) graphs: BTreeMap<String, GraphConfig>,
|
||||
#[serde(default)]
|
||||
pub(crate) policies: BTreeMap<String, PolicyConfig>,
|
||||
|
|
@ -394,12 +398,123 @@ pub(crate) struct StateConfig {
|
|||
pub(crate) lock: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub(crate) struct ProvidersConfig {
|
||||
#[serde(default)]
|
||||
pub(crate) embedding: BTreeMap<String, EmbeddingProviderConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub(crate) struct GraphConfig {
|
||||
pub(crate) schema: PathBuf,
|
||||
#[serde(default)]
|
||||
pub(crate) queries: QueriesDecl,
|
||||
/// Optional reference to a top-level `providers.embedding.<name>` profile.
|
||||
#[serde(default)]
|
||||
pub(crate) embedding_provider: Option<String>,
|
||||
}
|
||||
|
||||
/// A named cluster embedding provider profile (RFC-012 Phase 5). `kind`/`base_url`/
|
||||
/// `model` default exactly as the engine's `EmbeddingConfig::from_env` does.
|
||||
/// `api_key`, when required, must be a `${NAME}` env reference resolved at
|
||||
/// serving boot, never an inline secret.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct EmbeddingProviderConfig {
|
||||
#[serde(default, alias = "provider", skip_serializing_if = "Option::is_none")]
|
||||
pub kind: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub base_url: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub model: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub api_key: Option<String>,
|
||||
}
|
||||
|
||||
impl EmbeddingProviderConfig {
|
||||
pub(crate) fn validate(&self, path: String, diagnostics: &mut Vec<Diagnostic>) {
|
||||
if let Err(error) = omnigraph::embedding::EmbeddingConfig::from_parts(
|
||||
self.kind.as_deref(),
|
||||
self.base_url.clone(),
|
||||
self.model.clone(),
|
||||
"validation-placeholder".to_string(),
|
||||
) {
|
||||
diagnostics.push(Diagnostic::error(
|
||||
"invalid_embedding_provider",
|
||||
path.clone(),
|
||||
error.to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if self.kind.as_deref() == Some("mock") {
|
||||
if let Some(api_key) = self.api_key.as_deref() {
|
||||
if secret_ref_name(api_key).is_err() {
|
||||
diagnostics.push(Diagnostic::error(
|
||||
"embedding_api_key_inline",
|
||||
format!("{path}.api_key"),
|
||||
"embedding api_key must be a ${NAME} env reference, not an inline secret",
|
||||
));
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
match self.api_key.as_deref() {
|
||||
Some(api_key) if secret_ref_name(api_key).is_err() => diagnostics.push(
|
||||
Diagnostic::error(
|
||||
"embedding_api_key_inline",
|
||||
format!("{path}.api_key"),
|
||||
"embedding api_key must be a ${NAME} env reference, not an inline secret",
|
||||
),
|
||||
),
|
||||
Some(_) => {}
|
||||
None => diagnostics.push(Diagnostic::error(
|
||||
"embedding_api_key_required",
|
||||
format!("{path}.api_key"),
|
||||
"non-mock embedding providers must set api_key to a ${NAME} env reference",
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve into an engine `EmbeddingConfig`, reading the `${NAME}` api-key
|
||||
/// reference from process env. Mock profiles do not read env and may omit
|
||||
/// `api_key`; real providers error if the reference is missing or unset.
|
||||
pub fn resolve(&self) -> Result<omnigraph::embedding::EmbeddingConfig, String> {
|
||||
let api_key = if self.kind.as_deref() == Some("mock") {
|
||||
String::new()
|
||||
} else {
|
||||
resolve_secret_ref(self.api_key.as_deref().ok_or_else(|| {
|
||||
"embedding api_key is required for non-mock providers".to_string()
|
||||
})?)?
|
||||
};
|
||||
omnigraph::embedding::EmbeddingConfig::from_parts(
|
||||
self.kind.as_deref(),
|
||||
self.base_url.clone(),
|
||||
self.model.clone(),
|
||||
api_key,
|
||||
)
|
||||
.map_err(|e| e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn secret_ref_name(value: &str) -> Result<&str, String> {
|
||||
value
|
||||
.trim()
|
||||
.strip_prefix("${")
|
||||
.and_then(|s| s.strip_suffix('}'))
|
||||
.filter(|name| !name.trim().is_empty())
|
||||
.ok_or_else(|| {
|
||||
format!("embedding api_key must be a ${{NAME}} env reference, got '{}'", value.trim())
|
||||
})
|
||||
}
|
||||
|
||||
/// Resolve a `${NAME}` secret reference from process env. Rejects an inline value
|
||||
/// (anything not wrapped in `${…}`) so secrets never sit in the cluster config.
|
||||
fn resolve_secret_ref(value: &str) -> Result<String, String> {
|
||||
let name = secret_ref_name(value)?;
|
||||
std::env::var(name).map_err(|_| format!("embedding api_key env var '{name}' is not set"))
|
||||
}
|
||||
|
||||
/// How a graph declares its stored queries. Terraform-style: the `.gq`
|
||||
|
|
@ -457,6 +572,16 @@ pub(crate) struct StateResource {
|
|||
/// non-policy resources.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) applies_to: Option<Vec<String>>,
|
||||
/// Graph resources only: the applied `provider.embedding.<name>` binding.
|
||||
/// The provider profile itself is stored on the provider resource so
|
||||
/// serving can boot without re-reading mutable desired config.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) embedding_provider: Option<String>,
|
||||
/// Embedding provider resources only: the applied profile with unresolved
|
||||
/// `${ENV}` references. The server resolves the referenced env var exactly
|
||||
/// once at boot and injects the resulting engine config into the graph.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) embedding_profile: Option<EmbeddingProviderConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
|
|
@ -518,3 +643,74 @@ pub(crate) struct SweepOutcome {
|
|||
/// files are rewritten with consumed_at only after the state write lands.
|
||||
pub(crate) consumed_approvals: Vec<String>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod embedding_provider_config_tests {
|
||||
use super::EmbeddingProviderConfig;
|
||||
|
||||
#[test]
|
||||
fn resolves_secret_from_env_and_applies_defaults() {
|
||||
// SAFETY: a unique var name, no concurrent reader.
|
||||
unsafe { std::env::set_var("OG_TEST_EMBED_KEY_A", "secret-x") };
|
||||
let profile = EmbeddingProviderConfig {
|
||||
kind: Some("openai-compatible".to_string()),
|
||||
base_url: None,
|
||||
model: Some("m".to_string()),
|
||||
api_key: Some("${OG_TEST_EMBED_KEY_A}".to_string()),
|
||||
};
|
||||
let config = profile.resolve().unwrap();
|
||||
assert_eq!(config.api_key, "secret-x");
|
||||
assert_eq!(config.model, "m");
|
||||
unsafe { std::env::remove_var("OG_TEST_EMBED_KEY_A") };
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_inline_api_key() {
|
||||
let profile = EmbeddingProviderConfig {
|
||||
kind: None,
|
||||
base_url: None,
|
||||
model: None,
|
||||
api_key: Some("sk-inline".to_string()),
|
||||
};
|
||||
let err = profile.resolve().unwrap_err();
|
||||
assert!(err.contains("${NAME}"), "got: {err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errors_on_unset_secret() {
|
||||
let profile = EmbeddingProviderConfig {
|
||||
kind: None,
|
||||
base_url: None,
|
||||
model: None,
|
||||
api_key: Some("${OG_TEST_DEFINITELY_UNSET_VAR}".to_string()),
|
||||
};
|
||||
let err = profile.resolve().unwrap_err();
|
||||
assert!(err.contains("not set"), "got: {err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_unknown_provider() {
|
||||
unsafe { std::env::set_var("OG_TEST_EMBED_KEY_B", "x") };
|
||||
let profile = EmbeddingProviderConfig {
|
||||
kind: Some("cohere".to_string()),
|
||||
base_url: None,
|
||||
model: None,
|
||||
api_key: Some("${OG_TEST_EMBED_KEY_B}".to_string()),
|
||||
};
|
||||
let err = profile.resolve().unwrap_err();
|
||||
assert!(err.contains("unknown embedding provider"), "got: {err}");
|
||||
unsafe { std::env::remove_var("OG_TEST_EMBED_KEY_B") };
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mock_does_not_require_secret_env() {
|
||||
let profile = EmbeddingProviderConfig {
|
||||
kind: Some("mock".to_string()),
|
||||
base_url: None,
|
||||
model: Some("cluster-mock".to_string()),
|
||||
api_key: None,
|
||||
};
|
||||
let config = profile.resolve().unwrap();
|
||||
assert_eq!(config.model, "cluster-mock");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,10 +20,5 @@ pest_derive = { workspace = true }
|
|||
thiserror = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
reqwest = { workspace = true }
|
||||
ahash = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
sha2 = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -26,6 +26,15 @@ pub struct InterfaceType {
|
|||
pub properties: HashMap<String, PropType>,
|
||||
}
|
||||
|
||||
/// The `@embed` binding for a vector property: its source text property and,
|
||||
/// optionally, the embedding model recorded by `@embed("source", model="…")`.
|
||||
/// The model is what the query-time same-space check validates against.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct EmbedSource {
|
||||
pub source: String,
|
||||
pub model: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct NodeType {
|
||||
pub name: String,
|
||||
|
|
@ -42,8 +51,8 @@ pub struct NodeType {
|
|||
pub range_constraints: Vec<RangeConstraint>,
|
||||
/// Regex check constraints
|
||||
pub check_constraints: Vec<CheckConstraint>,
|
||||
/// Maps @embed target property -> source text property
|
||||
pub embed_sources: HashMap<String, String>,
|
||||
/// Maps @embed target property -> its source text property + recorded model.
|
||||
pub embed_sources: HashMap<String, EmbedSource>,
|
||||
pub blob_properties: HashSet<String>,
|
||||
pub arrow_schema: SchemaRef,
|
||||
}
|
||||
|
|
@ -156,14 +165,18 @@ pub fn build_catalog(schema: &SchemaFile) -> Result<Catalog> {
|
|||
if matches!(prop.prop_type.scalar, ScalarType::Blob) {
|
||||
blob_properties.insert(prop.name.clone());
|
||||
}
|
||||
// Extract @embed from property annotations (stays as annotation)
|
||||
if let Some(source_prop) = prop
|
||||
.annotations
|
||||
.iter()
|
||||
.find(|ann| ann.name == "embed")
|
||||
.and_then(|ann| ann.value.clone())
|
||||
{
|
||||
embed_sources.insert(prop.name.clone(), source_prop);
|
||||
// Extract @embed: the source text property (positional) and the
|
||||
// optional recorded model (the `model` kwarg).
|
||||
if let Some(ann) = prop.annotations.iter().find(|ann| ann.name == "embed") {
|
||||
if let Some(source) = ann.value.clone() {
|
||||
embed_sources.insert(
|
||||
prop.name.clone(),
|
||||
EmbedSource {
|
||||
source,
|
||||
model: ann.kwargs.get("model").cloned(),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1137,6 +1137,7 @@ node Person @description("new") {
|
|||
annotations: vec![Annotation {
|
||||
name: "description".to_string(),
|
||||
value: Some("new".to_string()),
|
||||
kwargs: Default::default(),
|
||||
}],
|
||||
}));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,33 @@ fn test_build_catalog() {
|
|||
assert!(catalog.node_types.contains_key("Company"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_embed_source_records_model_kwarg() {
|
||||
let schema = parse_schema(
|
||||
r#"
|
||||
node Doc {
|
||||
title: String
|
||||
embedding: Vector(3) @embed("title", model="openai/text-embedding-3-large")
|
||||
plain: Vector(3) @embed("title")
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let catalog = build_catalog(&schema).unwrap();
|
||||
let doc = catalog.node_types.get("Doc").unwrap();
|
||||
|
||||
let embedding = doc.embed_sources.get("embedding").unwrap();
|
||||
assert_eq!(embedding.source, "title");
|
||||
assert_eq!(
|
||||
embedding.model.as_deref(),
|
||||
Some("openai/text-embedding-3-large")
|
||||
);
|
||||
|
||||
let plain = doc.embed_sources.get("plain").unwrap();
|
||||
assert_eq!(plain.source, "title");
|
||||
assert_eq!(plain.model, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_edge_lookup() {
|
||||
let schema = parse_schema(test_schema()).unwrap();
|
||||
|
|
|
|||
|
|
@ -1,379 +0,0 @@
|
|||
#![allow(dead_code)]
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use reqwest::Client;
|
||||
use serde::Deserialize;
|
||||
use tokio::time::sleep;
|
||||
|
||||
use crate::error::{NanoError, Result};
|
||||
|
||||
const DEFAULT_EMBED_MODEL: &str = "text-embedding-3-small";
|
||||
const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1";
|
||||
const DEFAULT_TIMEOUT_MS: u64 = 30_000;
|
||||
const DEFAULT_RETRY_ATTEMPTS: usize = 4;
|
||||
const DEFAULT_RETRY_BACKOFF_MS: u64 = 200;
|
||||
|
||||
#[derive(Clone)]
|
||||
enum EmbeddingTransport {
|
||||
Mock,
|
||||
OpenAi {
|
||||
api_key: String,
|
||||
base_url: String,
|
||||
http: Client,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct EmbeddingClient {
|
||||
model: String,
|
||||
retry_attempts: usize,
|
||||
retry_backoff_ms: u64,
|
||||
transport: EmbeddingTransport,
|
||||
}
|
||||
|
||||
struct EmbedCallError {
|
||||
message: String,
|
||||
retryable: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct OpenAiEmbeddingResponse {
|
||||
data: Vec<OpenAiEmbeddingDatum>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct OpenAiEmbeddingDatum {
|
||||
index: usize,
|
||||
embedding: Vec<f32>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct OpenAiErrorEnvelope {
|
||||
error: OpenAiErrorBody,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct OpenAiErrorBody {
|
||||
message: String,
|
||||
}
|
||||
|
||||
impl EmbeddingClient {
|
||||
pub(crate) fn from_env() -> Result<Self> {
|
||||
let model = std::env::var("NANOGRAPH_EMBED_MODEL")
|
||||
.ok()
|
||||
.map(|v| v.trim().to_string())
|
||||
.filter(|v| !v.is_empty())
|
||||
.unwrap_or_else(|| DEFAULT_EMBED_MODEL.to_string());
|
||||
let retry_attempts =
|
||||
parse_env_usize("NANOGRAPH_EMBED_RETRY_ATTEMPTS", DEFAULT_RETRY_ATTEMPTS);
|
||||
let retry_backoff_ms =
|
||||
parse_env_u64("NANOGRAPH_EMBED_RETRY_BACKOFF_MS", DEFAULT_RETRY_BACKOFF_MS);
|
||||
|
||||
if env_flag("NANOGRAPH_EMBEDDINGS_MOCK") {
|
||||
return Ok(Self {
|
||||
model,
|
||||
retry_attempts,
|
||||
retry_backoff_ms,
|
||||
transport: EmbeddingTransport::Mock,
|
||||
});
|
||||
}
|
||||
|
||||
let api_key = std::env::var("OPENAI_API_KEY")
|
||||
.ok()
|
||||
.map(|v| v.trim().to_string())
|
||||
.filter(|v| !v.is_empty())
|
||||
.ok_or_else(|| {
|
||||
NanoError::Execution(
|
||||
"OPENAI_API_KEY is required when an embedding call is needed".to_string(),
|
||||
)
|
||||
})?;
|
||||
let base_url = std::env::var("OPENAI_BASE_URL")
|
||||
.ok()
|
||||
.map(|v| v.trim_end_matches('/').to_string())
|
||||
.filter(|v| !v.is_empty())
|
||||
.unwrap_or_else(|| DEFAULT_OPENAI_BASE_URL.to_string());
|
||||
let timeout_ms = parse_env_u64("NANOGRAPH_EMBED_TIMEOUT_MS", DEFAULT_TIMEOUT_MS);
|
||||
let http = Client::builder()
|
||||
.timeout(Duration::from_millis(timeout_ms))
|
||||
.build()
|
||||
.map_err(|e| {
|
||||
NanoError::Execution(format!("failed to initialize HTTP client: {}", e))
|
||||
})?;
|
||||
|
||||
Ok(Self {
|
||||
model,
|
||||
retry_attempts,
|
||||
retry_backoff_ms,
|
||||
transport: EmbeddingTransport::OpenAi {
|
||||
api_key,
|
||||
base_url,
|
||||
http,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn mock_for_tests() -> Self {
|
||||
Self {
|
||||
model: DEFAULT_EMBED_MODEL.to_string(),
|
||||
retry_attempts: DEFAULT_RETRY_ATTEMPTS,
|
||||
retry_backoff_ms: DEFAULT_RETRY_BACKOFF_MS,
|
||||
transport: EmbeddingTransport::Mock,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn model(&self) -> &str {
|
||||
&self.model
|
||||
}
|
||||
|
||||
pub(crate) async fn embed_text(&self, input: &str, expected_dim: usize) -> Result<Vec<f32>> {
|
||||
let mut vectors = self.embed_texts(&[input.to_string()], expected_dim).await?;
|
||||
vectors.pop().ok_or_else(|| {
|
||||
NanoError::Execution("embedding provider returned no vector".to_string())
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) async fn embed_texts(
|
||||
&self,
|
||||
inputs: &[String],
|
||||
expected_dim: usize,
|
||||
) -> Result<Vec<Vec<f32>>> {
|
||||
if expected_dim == 0 {
|
||||
return Err(NanoError::Execution(
|
||||
"embedding dimension must be greater than zero".to_string(),
|
||||
));
|
||||
}
|
||||
if inputs.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
match &self.transport {
|
||||
EmbeddingTransport::Mock => Ok(inputs
|
||||
.iter()
|
||||
.map(|input| mock_embedding(input, expected_dim))
|
||||
.collect()),
|
||||
EmbeddingTransport::OpenAi { .. } => {
|
||||
self.embed_texts_openai_with_retry(inputs, expected_dim)
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn embed_texts_openai_with_retry(
|
||||
&self,
|
||||
inputs: &[String],
|
||||
expected_dim: usize,
|
||||
) -> Result<Vec<Vec<f32>>> {
|
||||
let max_attempt = self.retry_attempts.max(1);
|
||||
let mut attempt = 0usize;
|
||||
loop {
|
||||
attempt += 1;
|
||||
match self.embed_texts_openai_once(inputs, expected_dim).await {
|
||||
Ok(vectors) => return Ok(vectors),
|
||||
Err(err) => {
|
||||
if !err.retryable || attempt >= max_attempt {
|
||||
return Err(NanoError::Execution(err.message));
|
||||
}
|
||||
let shift = (attempt - 1).min(10) as u32;
|
||||
let delay = self.retry_backoff_ms.saturating_mul(1u64 << shift);
|
||||
sleep(Duration::from_millis(delay)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn embed_texts_openai_once(
|
||||
&self,
|
||||
inputs: &[String],
|
||||
expected_dim: usize,
|
||||
) -> std::result::Result<Vec<Vec<f32>>, EmbedCallError> {
|
||||
let (api_key, base_url, http) = match &self.transport {
|
||||
EmbeddingTransport::OpenAi {
|
||||
api_key,
|
||||
base_url,
|
||||
http,
|
||||
} => (api_key, base_url, http),
|
||||
EmbeddingTransport::Mock => unreachable!("mock transport should not call OpenAI"),
|
||||
};
|
||||
|
||||
let request = serde_json::json!({
|
||||
"model": self.model,
|
||||
"input": inputs,
|
||||
"dimensions": expected_dim,
|
||||
});
|
||||
let url = format!("{}/embeddings", base_url);
|
||||
let response = http
|
||||
.post(&url)
|
||||
.bearer_auth(api_key)
|
||||
.json(&request)
|
||||
.send()
|
||||
.await;
|
||||
|
||||
let response = match response {
|
||||
Ok(resp) => resp,
|
||||
Err(err) => {
|
||||
let retryable = err.is_timeout() || err.is_connect() || err.is_request();
|
||||
return Err(EmbedCallError {
|
||||
message: format!("embedding request failed: {}", err),
|
||||
retryable,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let status = response.status();
|
||||
let body = match response.text().await {
|
||||
Ok(body) => body,
|
||||
Err(err) => {
|
||||
return Err(EmbedCallError {
|
||||
message: format!(
|
||||
"embedding response read failed (status {}): {}",
|
||||
status, err
|
||||
),
|
||||
retryable: status.is_server_error() || status.as_u16() == 429,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
if !status.is_success() {
|
||||
let message = parse_openai_error_message(&body).unwrap_or_else(|| body.clone());
|
||||
return Err(EmbedCallError {
|
||||
message: format!(
|
||||
"embedding request failed with status {}: {}",
|
||||
status, message
|
||||
),
|
||||
retryable: status.is_server_error() || status.as_u16() == 429,
|
||||
});
|
||||
}
|
||||
|
||||
let mut parsed: OpenAiEmbeddingResponse =
|
||||
serde_json::from_str(&body).map_err(|err| EmbedCallError {
|
||||
message: format!("embedding response decode failed: {}", err),
|
||||
retryable: false,
|
||||
})?;
|
||||
|
||||
if parsed.data.len() != inputs.len() {
|
||||
return Err(EmbedCallError {
|
||||
message: format!(
|
||||
"embedding response size mismatch: expected {}, got {}",
|
||||
inputs.len(),
|
||||
parsed.data.len()
|
||||
),
|
||||
retryable: false,
|
||||
});
|
||||
}
|
||||
|
||||
parsed.data.sort_by_key(|item| item.index);
|
||||
let mut vectors = Vec::with_capacity(parsed.data.len());
|
||||
for (idx, item) in parsed.data.into_iter().enumerate() {
|
||||
if item.index != idx {
|
||||
return Err(EmbedCallError {
|
||||
message: format!(
|
||||
"embedding response index mismatch at position {}: got {}",
|
||||
idx, item.index
|
||||
),
|
||||
retryable: false,
|
||||
});
|
||||
}
|
||||
if item.embedding.len() != expected_dim {
|
||||
return Err(EmbedCallError {
|
||||
message: format!(
|
||||
"embedding dimension mismatch: expected {}, got {}",
|
||||
expected_dim,
|
||||
item.embedding.len()
|
||||
),
|
||||
retryable: false,
|
||||
});
|
||||
}
|
||||
vectors.push(item.embedding);
|
||||
}
|
||||
Ok(vectors)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_openai_error_message(body: &str) -> Option<String> {
|
||||
serde_json::from_str::<OpenAiErrorEnvelope>(body)
|
||||
.ok()
|
||||
.map(|e| e.error.message)
|
||||
.filter(|msg| !msg.trim().is_empty())
|
||||
}
|
||||
|
||||
fn parse_env_usize(name: &str, default: usize) -> usize {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
.and_then(|v| v.parse::<usize>().ok())
|
||||
.filter(|v| *v > 0)
|
||||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
fn parse_env_u64(name: &str, default: u64) -> u64 {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
.and_then(|v| v.parse::<u64>().ok())
|
||||
.filter(|v| *v > 0)
|
||||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
fn env_flag(name: &str) -> bool {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
.map(|v| {
|
||||
let s = v.trim().to_ascii_lowercase();
|
||||
s == "1" || s == "true" || s == "yes" || s == "on"
|
||||
})
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn mock_embedding(input: &str, dim: usize) -> Vec<f32> {
|
||||
let mut seed = fnv1a64(input.as_bytes());
|
||||
let mut out = Vec::with_capacity(dim);
|
||||
for _ in 0..dim {
|
||||
seed = xorshift64(seed);
|
||||
let ratio = (seed as f64 / u64::MAX as f64) as f32;
|
||||
out.push((ratio * 2.0) - 1.0);
|
||||
}
|
||||
|
||||
let norm = out
|
||||
.iter()
|
||||
.map(|v| (*v as f64) * (*v as f64))
|
||||
.sum::<f64>()
|
||||
.sqrt() as f32;
|
||||
if norm > f32::EPSILON {
|
||||
for value in &mut out {
|
||||
*value /= norm;
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn fnv1a64(bytes: &[u8]) -> u64 {
|
||||
let mut hash = 14695981039346656037u64;
|
||||
for byte in bytes {
|
||||
hash ^= *byte as u64;
|
||||
hash = hash.wrapping_mul(1099511628211u64);
|
||||
}
|
||||
hash
|
||||
}
|
||||
|
||||
fn xorshift64(mut x: u64) -> u64 {
|
||||
x ^= x << 13;
|
||||
x ^= x >> 7;
|
||||
x ^= x << 17;
|
||||
x
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn mock_embeddings_are_deterministic() {
|
||||
let client = EmbeddingClient::mock_for_tests();
|
||||
let a = client.embed_text("alpha", 8).await.unwrap();
|
||||
let b = client.embed_text("alpha", 8).await.unwrap();
|
||||
let c = client.embed_text("beta", 8).await.unwrap();
|
||||
assert_eq!(a, b);
|
||||
assert_ne!(a, c);
|
||||
assert_eq!(a.len(), 8);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,4 @@
|
|||
pub mod catalog;
|
||||
pub mod embedding;
|
||||
pub mod error;
|
||||
pub mod ir;
|
||||
pub mod json_output;
|
||||
|
|
|
|||
|
|
@ -261,13 +261,13 @@ fn typecheck_mutation(catalog: &Catalog, mutation: &Mutation, params: &[Param])
|
|||
continue;
|
||||
}
|
||||
|
||||
if let Some(source_prop) = node_type.embed_sources.get(prop_name) {
|
||||
if assigned_props.contains(source_prop.as_str()) {
|
||||
if let Some(embed) = node_type.embed_sources.get(prop_name) {
|
||||
if assigned_props.contains(embed.source.as_str()) {
|
||||
continue;
|
||||
}
|
||||
return Err(NanoError::Type(format!(
|
||||
"T12: insert for `{}` must provide non-nullable property `{}` or @embed source `{}`",
|
||||
insert.type_name, prop_name, source_prop
|
||||
insert.type_name, prop_name, embed.source
|
||||
)));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::types::PropType;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
|
|
@ -50,6 +52,11 @@ pub struct PropDecl {
|
|||
pub struct Annotation {
|
||||
pub name: String,
|
||||
pub value: Option<String>,
|
||||
/// Keyword arguments, e.g. `model="…"` on `@embed("source", model="…")`.
|
||||
/// Empty is skipped in serialization so existing schemas' IR JSON (and
|
||||
/// hash) stay byte-identical; `BTreeMap` keeps the order deterministic.
|
||||
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
|
||||
pub kwargs: BTreeMap<String, String>,
|
||||
}
|
||||
|
||||
/// A typed constraint declared in a node or edge body.
|
||||
|
|
|
|||
|
|
@ -556,12 +556,32 @@ fn parse_type_ref(pair: pest::iterators::Pair<Rule>) -> Result<PropType> {
|
|||
fn parse_annotation(pair: pest::iterators::Pair<Rule>) -> Result<Annotation> {
|
||||
let mut inner = pair.into_inner();
|
||||
let name = inner.next().unwrap().as_str().to_string();
|
||||
let value = inner
|
||||
.next()
|
||||
.map(|p| decode_string_literal(p.as_str()))
|
||||
.transpose()?;
|
||||
let mut value = None;
|
||||
let mut kwargs = std::collections::BTreeMap::new();
|
||||
if let Some(args) = inner.next() {
|
||||
// `annotation_args`: one positional arg followed by zero or more
|
||||
// `key = literal` kwargs (e.g. `@embed("source", model="…")`).
|
||||
for arg in args.into_inner() {
|
||||
match arg.as_rule() {
|
||||
Rule::annotation_arg => {
|
||||
value = Some(decode_string_literal(arg.as_str())?);
|
||||
}
|
||||
Rule::annotation_kwarg => {
|
||||
let mut kw = arg.into_inner();
|
||||
let key = kw.next().unwrap().as_str().to_string();
|
||||
let raw = kw.next().unwrap().as_str();
|
||||
kwargs.insert(key, decode_string_literal(raw)?);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Annotation { name, value })
|
||||
Ok(Annotation {
|
||||
name,
|
||||
value,
|
||||
kwargs,
|
||||
})
|
||||
}
|
||||
|
||||
fn validate_string_annotation(
|
||||
|
|
@ -823,6 +843,17 @@ fn validate_property_annotations(
|
|||
type_name, source_prop
|
||||
)));
|
||||
}
|
||||
|
||||
// `model` is the only supported kwarg; reject the rest loudly so
|
||||
// a typo can't be silently ignored (it would never validate).
|
||||
for key in ann.kwargs.keys() {
|
||||
if key != "model" {
|
||||
return Err(NanoError::Parse(format!(
|
||||
"@embed on {}.{} has unknown argument '{}=' (only 'model' is supported)",
|
||||
type_name, prop.name, key
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -508,6 +508,66 @@ embedding: Vector(3) @embed(title)
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_embed_annotation_with_model_kwarg() {
|
||||
let input = r#"
|
||||
node Doc {
|
||||
title: String
|
||||
embedding: Vector(3) @embed("title", model="openai/text-embedding-3-large")
|
||||
}
|
||||
"#;
|
||||
let schema = parse_schema(input).unwrap();
|
||||
match &schema.declarations[0] {
|
||||
SchemaDecl::Node(n) => {
|
||||
let ann = &n.properties[1].annotations[0];
|
||||
assert_eq!(ann.name, "embed");
|
||||
assert_eq!(ann.value.as_deref(), Some("title"));
|
||||
assert_eq!(
|
||||
ann.kwargs.get("model").map(String::as_str),
|
||||
Some("openai/text-embedding-3-large")
|
||||
);
|
||||
}
|
||||
_ => panic!("expected Node"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_embed_annotation_without_model_has_empty_kwargs() {
|
||||
let input = r#"
|
||||
node Doc {
|
||||
title: String
|
||||
embedding: Vector(3) @embed("title")
|
||||
}
|
||||
"#;
|
||||
let schema = parse_schema(input).unwrap();
|
||||
match &schema.declarations[0] {
|
||||
SchemaDecl::Node(n) => {
|
||||
let ann = &n.properties[1].annotations[0];
|
||||
assert!(ann.kwargs.is_empty());
|
||||
// Empty kwargs must NOT serialize, so existing schemas' IR JSON (and
|
||||
// thus the schema hash) stay byte-identical after this field is added.
|
||||
let json = serde_json::to_string(ann).unwrap();
|
||||
assert!(!json.contains("kwargs"), "unexpected kwargs in {json}");
|
||||
}
|
||||
_ => panic!("expected Node"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_embed_annotation_rejects_unknown_kwarg() {
|
||||
let input = r#"
|
||||
node Doc {
|
||||
title: String
|
||||
embedding: Vector(3) @embed("title", provider="openai")
|
||||
}
|
||||
"#;
|
||||
let err = parse_schema(input).unwrap_err();
|
||||
assert!(
|
||||
err.to_string().contains("only 'model' is supported"),
|
||||
"got: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_edge_no_body() {
|
||||
let input = "edge WorksAt: Person -> Company\n";
|
||||
|
|
|
|||
|
|
@ -42,8 +42,10 @@ enum_value = @{ (ASCII_ALPHANUMERIC | "_" | "-")+ }
|
|||
base_type = { "String" | "Blob" | "Bool" | "I32" | "I64" | "U32" | "U64" | "F32" | "F64" | "DateTime" | "Date" }
|
||||
|
||||
// Annotation rule excludes constraint keywords followed by "(" — those are body_constraints
|
||||
annotation = { "@" ~ !(constraint_name ~ "(") ~ ident ~ ("(" ~ annotation_arg ~ ")")? }
|
||||
annotation = { "@" ~ !(constraint_name ~ "(") ~ ident ~ ("(" ~ annotation_args ~ ")")? }
|
||||
annotation_args = { annotation_arg ~ ("," ~ annotation_kwarg)* }
|
||||
annotation_arg = { literal | ident }
|
||||
annotation_kwarg = { ident ~ "=" ~ literal }
|
||||
|
||||
literal = { string_lit | float_lit | integer | bool_lit }
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ aws = ["dep:aws-config", "dep:aws-sdk-secretsmanager"]
|
|||
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.7.0" }
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.7.0" }
|
||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.7.0" }
|
||||
omnigraph-api-types = { path = "../omnigraph-api-types", version = "0.7.0" }
|
||||
omnigraph-cluster = { path = "../omnigraph-cluster", version = "0.7.0" }
|
||||
axum = { workspace = true }
|
||||
clap = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -1,14 +1,15 @@
|
|||
//! Server-level concurrent HTTP benchmark for MR-686 (PR 0 baseline).
|
||||
//!
|
||||
//! Drives concurrent `/change` requests against an in-process Omnigraph HTTP
|
||||
//! server. Measures the global `Arc<RwLock<Omnigraph>>` lock penalty on
|
||||
//! current `main` so PR 1 + PR 2 can be evaluated against a real baseline.
|
||||
//! server. Originally written to measure the global `Arc<RwLock<Omnigraph>>`
|
||||
//! lock penalty as an MR-686 baseline; that lock has since been removed
|
||||
//! (engine write APIs are `&self`, the server holds a lockless
|
||||
//! `Arc<Omnigraph>`), so this now measures the concurrent write path itself
|
||||
//! (per-`(table, branch)` queue contention + Lance I/O).
|
||||
//!
|
||||
//! Per the MR-686 plan: this is the load-bearing bench. `Omnigraph::mutate_as`
|
||||
//! is `&mut self`, so an engine-level concurrent bench either serializes on the
|
||||
//! borrow checker (measures nothing) or drives multiple handles (measures Lance
|
||||
//! contention, not the server bottleneck). Driving the HTTP server is the only
|
||||
//! way to measure the actual `RwLock<Omnigraph>` contention this work removes.
|
||||
//! Driving the HTTP server is still the right level: an engine-level bench on
|
||||
//! a single handle measures Lance contention, not the server's request-path
|
||||
//! concurrency.
|
||||
//!
|
||||
//! Usage:
|
||||
//! ```sh
|
||||
|
|
|
|||
|
|
@ -1,452 +1,14 @@
|
|||
use omnigraph::db::{GraphCommit, MergeOutcome, ReadTarget, SchemaApplyResult, Snapshot};
|
||||
use omnigraph::error::{MergeConflict, MergeConflictKind};
|
||||
use omnigraph::loader::{LoadMode, LoadResult};
|
||||
//! HTTP wire DTOs. The types and their engine-result -> DTO mappings live
|
||||
//! in the shared `omnigraph-api-types` crate (RFC-009 Phase 2) so the CLI
|
||||
//! and server share one definition; re-exported here so every
|
||||
//! `omnigraph_server::api::*` path (handlers, the OpenApi schema list,
|
||||
//! CLI imports) keeps resolving unchanged. Only `query_catalog_entry`
|
||||
//! stays — it maps the server's runtime `StoredQuery` (not a wire type)
|
||||
//! into the shared `QueryCatalogEntry` DTO.
|
||||
|
||||
pub use omnigraph_api_types::*;
|
||||
|
||||
use crate::queries::StoredQuery;
|
||||
use omnigraph_compiler::SchemaMigrationStep;
|
||||
use omnigraph_compiler::query::ast::Param;
|
||||
use omnigraph_compiler::result::QueryResult;
|
||||
use omnigraph_compiler::types::{PropType, ScalarType};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use utoipa::{IntoParams, ToSchema};
|
||||
|
||||
/// Shadow enum for documenting [`LoadMode`] in the OpenAPI schema.
|
||||
#[derive(ToSchema)]
|
||||
#[schema(as = LoadMode)]
|
||||
#[allow(dead_code)]
|
||||
enum LoadModeSchema {
|
||||
/// Overwrite existing data.
|
||||
#[schema(rename = "overwrite")]
|
||||
Overwrite,
|
||||
/// Append to existing data.
|
||||
#[schema(rename = "append")]
|
||||
Append,
|
||||
/// Merge by id key (upsert).
|
||||
#[schema(rename = "merge")]
|
||||
Merge,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct SnapshotTableOutput {
|
||||
pub table_key: String,
|
||||
pub table_path: String,
|
||||
pub table_version: u64,
|
||||
pub table_branch: Option<String>,
|
||||
pub row_count: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct SnapshotOutput {
|
||||
pub branch: String,
|
||||
pub manifest_version: u64,
|
||||
pub tables: Vec<SnapshotTableOutput>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct BranchCreateRequest {
|
||||
/// Parent branch to fork from. Defaults to `main`.
|
||||
pub from: Option<String>,
|
||||
/// Name of the new branch. Must not already exist.
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct BranchCreateOutput {
|
||||
pub uri: String,
|
||||
pub from: String,
|
||||
pub name: String,
|
||||
pub actor_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct BranchListOutput {
|
||||
pub branches: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct BranchDeleteOutput {
|
||||
pub uri: String,
|
||||
pub name: String,
|
||||
pub actor_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct BranchMergeRequest {
|
||||
/// Source branch whose commits will be merged.
|
||||
pub source: String,
|
||||
/// Target branch that will receive the merge. Defaults to `main`.
|
||||
pub target: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum BranchMergeOutcome {
|
||||
AlreadyUpToDate,
|
||||
FastForward,
|
||||
Merged,
|
||||
}
|
||||
|
||||
impl From<MergeOutcome> for BranchMergeOutcome {
|
||||
fn from(value: MergeOutcome) -> Self {
|
||||
match value {
|
||||
MergeOutcome::AlreadyUpToDate => Self::AlreadyUpToDate,
|
||||
MergeOutcome::FastForward => Self::FastForward,
|
||||
MergeOutcome::Merged => Self::Merged,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BranchMergeOutcome {
|
||||
pub fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::AlreadyUpToDate => "already_up_to_date",
|
||||
Self::FastForward => "fast_forward",
|
||||
Self::Merged => "merged",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct BranchMergeOutput {
|
||||
pub source: String,
|
||||
pub target: String,
|
||||
pub outcome: BranchMergeOutcome,
|
||||
pub actor_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum MergeConflictKindOutput {
|
||||
DivergentInsert,
|
||||
DivergentUpdate,
|
||||
DeleteVsUpdate,
|
||||
OrphanEdge,
|
||||
UniqueViolation,
|
||||
CardinalityViolation,
|
||||
ValueConstraintViolation,
|
||||
}
|
||||
|
||||
impl MergeConflictKindOutput {
|
||||
pub fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::DivergentInsert => "divergent_insert",
|
||||
Self::DivergentUpdate => "divergent_update",
|
||||
Self::DeleteVsUpdate => "delete_vs_update",
|
||||
Self::OrphanEdge => "orphan_edge",
|
||||
Self::UniqueViolation => "unique_violation",
|
||||
Self::CardinalityViolation => "cardinality_violation",
|
||||
Self::ValueConstraintViolation => "value_constraint_violation",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<MergeConflictKind> for MergeConflictKindOutput {
|
||||
fn from(value: MergeConflictKind) -> Self {
|
||||
match value {
|
||||
MergeConflictKind::DivergentInsert => Self::DivergentInsert,
|
||||
MergeConflictKind::DivergentUpdate => Self::DivergentUpdate,
|
||||
MergeConflictKind::DeleteVsUpdate => Self::DeleteVsUpdate,
|
||||
MergeConflictKind::OrphanEdge => Self::OrphanEdge,
|
||||
MergeConflictKind::UniqueViolation => Self::UniqueViolation,
|
||||
MergeConflictKind::CardinalityViolation => Self::CardinalityViolation,
|
||||
MergeConflictKind::ValueConstraintViolation => Self::ValueConstraintViolation,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct MergeConflictOutput {
|
||||
pub table_key: String,
|
||||
pub row_id: Option<String>,
|
||||
pub kind: MergeConflictKindOutput,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
impl From<&MergeConflict> for MergeConflictOutput {
|
||||
fn from(value: &MergeConflict) -> Self {
|
||||
Self {
|
||||
table_key: value.table_key.clone(),
|
||||
row_id: value.row_id.clone(),
|
||||
kind: value.kind.into(),
|
||||
message: value.message.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ReadTargetOutput {
|
||||
pub branch: Option<String>,
|
||||
pub snapshot: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ReadOutput {
|
||||
pub query_name: String,
|
||||
pub target: ReadTargetOutput,
|
||||
pub row_count: usize,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub columns: Vec<String>,
|
||||
pub rows: Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ChangeOutput {
|
||||
pub branch: String,
|
||||
pub query_name: String,
|
||||
pub affected_nodes: usize,
|
||||
pub affected_edges: usize,
|
||||
pub actor_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct IngestTableOutput {
|
||||
pub table_key: String,
|
||||
pub rows_loaded: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct IngestOutput {
|
||||
pub uri: String,
|
||||
pub branch: String,
|
||||
/// Base branch a fork was requested from (the request's `from`), echoed
|
||||
/// even when the branch already existed. `null` when `from` was absent.
|
||||
pub base_branch: Option<String>,
|
||||
pub branch_created: bool,
|
||||
#[schema(value_type = LoadModeSchema)]
|
||||
pub mode: LoadMode,
|
||||
pub tables: Vec<IngestTableOutput>,
|
||||
pub actor_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct CommitOutput {
|
||||
pub graph_commit_id: String,
|
||||
pub manifest_branch: Option<String>,
|
||||
pub manifest_version: u64,
|
||||
pub parent_commit_id: Option<String>,
|
||||
pub merged_parent_commit_id: Option<String>,
|
||||
pub actor_id: Option<String>,
|
||||
/// Commit creation time as Unix epoch microseconds.
|
||||
#[schema(example = 1714000000000000i64)]
|
||||
pub created_at: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct CommitListOutput {
|
||||
pub commits: Vec<CommitOutput>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ReadRequest {
|
||||
/// GQ query source. May declare one or more named queries; pick one with
|
||||
/// `query_name` if there is more than one.
|
||||
#[schema(
|
||||
example = "query get_person($name: String) {\n match {\n $p: Person { name: $name }\n }\n return { $p.name, $p.age }\n}"
|
||||
)]
|
||||
pub query_source: String,
|
||||
/// Name of the query to run when `query_source` declares multiple. Optional
|
||||
/// when only one query is declared.
|
||||
pub query_name: Option<String>,
|
||||
/// JSON object whose keys match the query's declared parameters.
|
||||
pub params: Option<Value>,
|
||||
/// Branch to read from. Mutually exclusive with `snapshot`. Defaults to `main`.
|
||||
pub branch: Option<String>,
|
||||
/// Snapshot id to read from. Mutually exclusive with `branch`.
|
||||
pub snapshot: Option<String>,
|
||||
}
|
||||
|
||||
/// Inline read-query request for `POST /query`.
|
||||
///
|
||||
/// Friendlier-named alternative to [`ReadRequest`] for ad-hoc reads and
|
||||
/// AI-agent integration. Mutations are rejected with 400 — use `POST
|
||||
/// /mutate` (or its deprecated alias `POST /change`) for write queries.
|
||||
/// Field names are deliberately short (`query`, `name`) to match the GQ
|
||||
/// keyword and the CLI `-e` flag.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct QueryRequest {
|
||||
/// GQ read-query source. May declare one or more named queries; pick one
|
||||
/// with `name` when more than one is declared. Mutations
|
||||
/// (`insert`/`update`/`delete`) get 400 — use `POST /mutate` (or its
|
||||
/// deprecated alias `POST /change`) instead.
|
||||
#[schema(example = "query get_person($name: String) {\n match {\n $p: Person { name: $name }\n }\n return { $p.name, $p.age }\n}")]
|
||||
pub query: String,
|
||||
/// Name of the query to run when `query` declares multiple. Optional when
|
||||
/// only one query is declared.
|
||||
pub name: Option<String>,
|
||||
/// JSON object whose keys match the query's declared parameters.
|
||||
pub params: Option<Value>,
|
||||
/// Branch to read from. Mutually exclusive with `snapshot`. Defaults to `main`.
|
||||
pub branch: Option<String>,
|
||||
/// Snapshot id to read from. Mutually exclusive with `branch`.
|
||||
pub snapshot: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ChangeRequest {
|
||||
/// GQ mutation source containing `insert`, `update`, or `delete` statements.
|
||||
/// May declare multiple named mutations; pick one with `name`.
|
||||
///
|
||||
/// Accepts the legacy field name `query_source` as a deserialization alias.
|
||||
#[schema(
|
||||
example = "query insert_person($name: String, $age: I32) {\n insert Person { name: $name, age: $age }\n}"
|
||||
)]
|
||||
#[serde(alias = "query_source")]
|
||||
pub query: String,
|
||||
/// Name of the mutation to run when `query` declares multiple.
|
||||
///
|
||||
/// Accepts the legacy field name `query_name` as a deserialization alias.
|
||||
#[serde(default, alias = "query_name")]
|
||||
pub name: Option<String>,
|
||||
/// JSON object whose keys match the mutation's declared parameters.
|
||||
#[serde(default)]
|
||||
pub params: Option<Value>,
|
||||
/// Target branch. Defaults to `main`.
|
||||
#[serde(default)]
|
||||
pub branch: Option<String>,
|
||||
}
|
||||
|
||||
/// Body for `POST /queries/{name}` — invokes the server-side stored query
|
||||
/// named in the path. The query source and name come from the registry,
|
||||
/// never the body; only the runtime inputs are supplied here.
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
|
||||
pub struct InvokeStoredQueryRequest {
|
||||
/// JSON object whose keys match the stored query's declared parameters.
|
||||
#[serde(default)]
|
||||
pub params: Option<Value>,
|
||||
/// Branch to run against. Defaults to `main`; for a stored mutation the
|
||||
/// write targets this branch.
|
||||
#[serde(default)]
|
||||
pub branch: Option<String>,
|
||||
/// Snapshot id to read from (read queries only — rejected for a stored
|
||||
/// mutation). Mutually exclusive with `branch`.
|
||||
#[serde(default)]
|
||||
pub snapshot: Option<String>,
|
||||
}
|
||||
|
||||
/// Response for `POST /queries/{name}`: the read envelope for a stored
|
||||
/// read, or the mutation envelope for a stored mutation. Serialized
|
||||
/// **untagged**, so the wire shape is exactly [`ReadOutput`] or
|
||||
/// [`ChangeOutput`] — classification follows the stored query, not a
|
||||
/// wrapper field.
|
||||
#[derive(Debug, Serialize, ToSchema)]
|
||||
#[serde(untagged)]
|
||||
pub enum InvokeStoredQueryResponse {
|
||||
Read(ReadOutput),
|
||||
Change(ChangeOutput),
|
||||
}
|
||||
|
||||
/// The kind of a stored-query parameter, decomposed so a client (e.g. an
|
||||
/// MCP server) can build a typed input schema with a closed `match` and
|
||||
/// never re-parse omnigraph's type spelling. `bigint`/`date`/`datetime`/
|
||||
/// `blob` are carried as JSON strings on the wire: a 64-bit integer past
|
||||
/// 2^53 loses precision as a JSON number, and Date/DateTime are ISO
|
||||
/// strings, Blob a blob-URI string.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ParamKind {
|
||||
String,
|
||||
Bool,
|
||||
Int,
|
||||
#[serde(rename = "bigint")]
|
||||
BigInt,
|
||||
Float,
|
||||
Date,
|
||||
#[serde(rename = "datetime")]
|
||||
DateTime,
|
||||
Blob,
|
||||
Vector,
|
||||
List,
|
||||
}
|
||||
|
||||
/// One declared parameter of a stored query, projected for the catalog.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ParamDescriptor {
|
||||
pub name: String,
|
||||
pub kind: ParamKind,
|
||||
/// Element kind when `kind == list` (always a scalar — the grammar
|
||||
/// forbids lists of vectors or nested lists).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub item_kind: Option<ParamKind>,
|
||||
/// Dimension when `kind == vector`.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub vector_dim: Option<u32>,
|
||||
/// `false` → the caller must supply it; `true` → optional.
|
||||
pub nullable: bool,
|
||||
}
|
||||
|
||||
/// One entry in the stored-query catalog (`GET /queries`).
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct QueryCatalogEntry {
|
||||
/// Registry key / invoke path segment (`POST /queries/{name}`).
|
||||
pub name: String,
|
||||
/// MCP tool id (the `tool_name` override, else `name`).
|
||||
pub tool_name: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub description: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub instruction: Option<String>,
|
||||
/// `true` for a stored mutation → an MCP read-only hint of `false`.
|
||||
pub mutation: bool,
|
||||
pub params: Vec<ParamDescriptor>,
|
||||
}
|
||||
|
||||
/// Response for `GET /queries`: the `mcp.expose` subset of a graph's
|
||||
/// stored-query registry, each with typed parameters.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct QueriesCatalogOutput {
|
||||
pub queries: Vec<QueryCatalogEntry>,
|
||||
}
|
||||
|
||||
/// Total map from a resolved scalar to its catalog kind. Exhaustive on
|
||||
/// purpose: a new `ScalarType` is a compile error here until catalogued.
|
||||
fn scalar_kind(scalar: ScalarType) -> ParamKind {
|
||||
match scalar {
|
||||
ScalarType::String => ParamKind::String,
|
||||
ScalarType::Bool => ParamKind::Bool,
|
||||
ScalarType::I32 | ScalarType::U32 => ParamKind::Int,
|
||||
ScalarType::I64 | ScalarType::U64 => ParamKind::BigInt,
|
||||
ScalarType::F32 | ScalarType::F64 => ParamKind::Float,
|
||||
ScalarType::Date => ParamKind::Date,
|
||||
ScalarType::DateTime => ParamKind::DateTime,
|
||||
ScalarType::Blob => ParamKind::Blob,
|
||||
ScalarType::Vector(_) => ParamKind::Vector,
|
||||
}
|
||||
}
|
||||
|
||||
fn param_descriptor(param: &Param) -> ParamDescriptor {
|
||||
match PropType::from_param_type_name(¶m.type_name, param.nullable) {
|
||||
Some(pt) if pt.list => ParamDescriptor {
|
||||
name: param.name.clone(),
|
||||
kind: ParamKind::List,
|
||||
item_kind: Some(scalar_kind(pt.scalar)),
|
||||
vector_dim: None,
|
||||
nullable: param.nullable,
|
||||
},
|
||||
Some(pt) => {
|
||||
let (kind, vector_dim) = match pt.scalar {
|
||||
ScalarType::Vector(dim) => (ParamKind::Vector, Some(dim)),
|
||||
other => (scalar_kind(other), None),
|
||||
};
|
||||
ParamDescriptor {
|
||||
name: param.name.clone(),
|
||||
kind,
|
||||
item_kind: None,
|
||||
vector_dim,
|
||||
nullable: param.nullable,
|
||||
}
|
||||
}
|
||||
// Unreachable for a parsed query (every declared param type is
|
||||
// grammatical); fall back to an opaque string so the field is still
|
||||
// usable rather than dropped.
|
||||
None => ParamDescriptor {
|
||||
name: param.name.clone(),
|
||||
kind: ParamKind::String,
|
||||
item_kind: None,
|
||||
vector_dim: None,
|
||||
nullable: param.nullable,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Project a loaded stored query into its catalog entry (typed params,
|
||||
/// MCP tool name, read/mutate flag, description/instruction).
|
||||
|
|
@ -460,246 +22,3 @@ pub fn query_catalog_entry(query: &StoredQuery) -> QueryCatalogEntry {
|
|||
params: query.decl.params.iter().map(param_descriptor).collect(),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
|
||||
pub struct SchemaApplyRequest {
|
||||
/// Project schema in `.pg` source form. The diff against the current
|
||||
/// schema produces the migration steps that will be applied.
|
||||
#[schema(
|
||||
example = "node Person {\n name: String @key\n age: I32?\n}\n\nedge Knows: Person -> Person"
|
||||
)]
|
||||
pub schema_source: String,
|
||||
/// When true, promote every `DropMode::Soft` step in the plan to
|
||||
/// `DropMode::Hard`, making the prior column data unreachable
|
||||
/// after the apply. Matches the CLI's `--allow-data-loss` flag.
|
||||
/// Defaults to `false` (drops remain reversible via time travel).
|
||||
#[serde(default)]
|
||||
pub allow_data_loss: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct SchemaApplyOutput {
|
||||
pub uri: String,
|
||||
pub supported: bool,
|
||||
pub applied: bool,
|
||||
pub step_count: usize,
|
||||
pub manifest_version: u64,
|
||||
#[schema(value_type = Vec<Value>)]
|
||||
pub steps: Vec<SchemaMigrationStep>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct SchemaOutput {
|
||||
pub schema_source: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct IngestRequest {
|
||||
/// Target branch. Defaults to `main`. Without `from`, the branch must
|
||||
/// already exist — a missing branch is a 404, never an implicit fork.
|
||||
pub branch: Option<String>,
|
||||
/// Parent branch used to create `branch` if it does not exist. Branch
|
||||
/// creation is opt-in by presence of this field; omit it to require an
|
||||
/// existing branch.
|
||||
pub from: Option<String>,
|
||||
/// How existing rows are handled. Defaults to `merge`.
|
||||
#[schema(value_type = Option<LoadModeSchema>)]
|
||||
pub mode: Option<LoadMode>,
|
||||
/// NDJSON payload: one record per line, each shaped
|
||||
/// `{"type": "<TypeName>", "data": {...}}`.
|
||||
#[schema(
|
||||
example = "{\"type\": \"Person\", \"data\": {\"name\": \"Alice\", \"age\": 30}}\n{\"type\": \"Person\", \"data\": {\"name\": \"Bob\", \"age\": 25}}"
|
||||
)]
|
||||
pub data: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ExportRequest {
|
||||
/// Branch to export. Defaults to `main`.
|
||||
pub branch: Option<String>,
|
||||
/// Restrict the export to these node/edge type names. Empty exports all types.
|
||||
#[serde(default)]
|
||||
pub type_names: Vec<String>,
|
||||
/// Restrict the export to these table keys. Empty exports all tables.
|
||||
#[serde(default)]
|
||||
pub table_keys: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, IntoParams)]
|
||||
pub struct SnapshotQuery {
|
||||
pub branch: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, IntoParams)]
|
||||
pub struct CommitListQuery {
|
||||
pub branch: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct HealthOutput {
|
||||
pub status: String,
|
||||
pub version: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub source_version: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ErrorCode {
|
||||
Unauthorized,
|
||||
Forbidden,
|
||||
BadRequest,
|
||||
NotFound,
|
||||
/// 405 Method Not Allowed — the route exists but the active server
|
||||
/// mode doesn't serve this method (e.g. `GET /graphs` in single-graph
|
||||
/// mode). Distinct from 404 so clients can tell "wrong context" from
|
||||
/// "no such resource."
|
||||
MethodNotAllowed,
|
||||
Conflict,
|
||||
/// 429 Too Many Requests — per-actor admission cap exceeded.
|
||||
/// Clients should respect the `Retry-After` header.
|
||||
TooManyRequests,
|
||||
Internal,
|
||||
}
|
||||
|
||||
/// Structured details for a publisher-level OCC failure. Surfaces alongside
|
||||
/// HTTP 409 when a write was rejected because the caller's pre-write view of
|
||||
/// one table's manifest version was stale relative to the current head. The
|
||||
/// expected/actual fields tell the client which table to refresh.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ManifestConflictOutput {
|
||||
pub table_key: String,
|
||||
pub expected: u64,
|
||||
pub actual: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ErrorOutput {
|
||||
pub error: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub code: Option<ErrorCode>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub merge_conflicts: Vec<MergeConflictOutput>,
|
||||
/// Set when the conflict is a publisher CAS rejection
|
||||
/// (`ManifestConflictDetails::ExpectedVersionMismatch`). The caller's
|
||||
/// pre-write view of `table_key` was at version `expected` but the
|
||||
/// manifest is now at `actual`. Refresh and retry.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub manifest_conflict: Option<ManifestConflictOutput>,
|
||||
}
|
||||
|
||||
pub fn snapshot_payload(branch: &str, snapshot: &Snapshot) -> SnapshotOutput {
|
||||
let mut entries: Vec<_> = snapshot.entries().cloned().collect();
|
||||
entries.sort_by(|a, b| a.table_key.cmp(&b.table_key));
|
||||
let tables = entries
|
||||
.iter()
|
||||
.map(|entry| SnapshotTableOutput {
|
||||
table_key: entry.table_key.clone(),
|
||||
table_path: entry.table_path.clone(),
|
||||
table_version: entry.table_version,
|
||||
table_branch: entry.table_branch.clone(),
|
||||
row_count: entry.row_count,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
SnapshotOutput {
|
||||
branch: branch.to_string(),
|
||||
manifest_version: snapshot.version(),
|
||||
tables,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn schema_apply_output(uri: &str, result: SchemaApplyResult) -> SchemaApplyOutput {
|
||||
SchemaApplyOutput {
|
||||
uri: uri.to_string(),
|
||||
supported: result.supported,
|
||||
applied: result.applied,
|
||||
step_count: result.steps.len(),
|
||||
manifest_version: result.manifest_version,
|
||||
steps: result.steps,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn commit_output(commit: &GraphCommit) -> CommitOutput {
|
||||
CommitOutput {
|
||||
graph_commit_id: commit.graph_commit_id.clone(),
|
||||
manifest_branch: commit.manifest_branch.clone(),
|
||||
manifest_version: commit.manifest_version,
|
||||
parent_commit_id: commit.parent_commit_id.clone(),
|
||||
merged_parent_commit_id: commit.merged_parent_commit_id.clone(),
|
||||
actor_id: commit.actor_id.clone(),
|
||||
created_at: commit.created_at,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_output(query_name: String, target: &ReadTarget, result: QueryResult) -> ReadOutput {
|
||||
let columns = result
|
||||
.schema()
|
||||
.fields()
|
||||
.iter()
|
||||
.map(|field| field.name().clone())
|
||||
.collect();
|
||||
ReadOutput {
|
||||
query_name,
|
||||
target: read_target_output(target),
|
||||
row_count: result.num_rows(),
|
||||
columns,
|
||||
rows: result.to_rust_json(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ingest_output(
|
||||
uri: &str,
|
||||
result: &LoadResult,
|
||||
mode: LoadMode,
|
||||
actor_id: Option<String>,
|
||||
) -> IngestOutput {
|
||||
IngestOutput {
|
||||
uri: uri.to_string(),
|
||||
branch: result.branch.clone(),
|
||||
base_branch: result.base_branch.clone(),
|
||||
branch_created: result.branch_created,
|
||||
mode,
|
||||
tables: result
|
||||
.to_ingest_tables()
|
||||
.into_iter()
|
||||
.map(|table| IngestTableOutput {
|
||||
table_key: table.table_key,
|
||||
rows_loaded: table.rows_loaded,
|
||||
})
|
||||
.collect(),
|
||||
actor_id,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_target_output(target: &ReadTarget) -> ReadTargetOutput {
|
||||
match target {
|
||||
ReadTarget::Branch(branch) => ReadTargetOutput {
|
||||
branch: Some(branch.clone()),
|
||||
snapshot: None,
|
||||
},
|
||||
ReadTarget::Snapshot(snapshot) => ReadTargetOutput {
|
||||
branch: None,
|
||||
snapshot: Some(snapshot.as_str().to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ─── MR-668 — management endpoint shapes ──────────────────────────────────
|
||||
|
||||
/// One entry in the response from `GET /graphs`. Cluster operators
|
||||
/// consume this list to discover which graphs the server is currently
|
||||
/// serving. The shape is intentionally minimal — `graph_id` and `uri`
|
||||
/// are the only fields a routing client needs.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct GraphInfo {
|
||||
pub graph_id: String,
|
||||
pub uri: String,
|
||||
}
|
||||
|
||||
/// Response from `GET /graphs`. Lists every graph registered with the
|
||||
/// server in alphabetical order by `graph_id` (sorted server-side so
|
||||
/// clients get deterministic output across requests).
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct GraphListResponse {
|
||||
pub graphs: Vec<GraphInfo>,
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -51,25 +51,15 @@ pub(crate) async fn server_graphs_list(
|
|||
State(state): State<AppState>,
|
||||
actor: Option<Extension<ResolvedActor>>,
|
||||
) -> std::result::Result<Json<GraphListResponse>, ApiError> {
|
||||
// 405 in single mode — there's no registry to enumerate, and the
|
||||
// legacy URL surface didn't expose this endpoint.
|
||||
let registry = match state.routing() {
|
||||
GraphRouting::Single { .. } => {
|
||||
return Err(ApiError::method_not_allowed(
|
||||
"GET /graphs is only available in multi-graph mode",
|
||||
));
|
||||
}
|
||||
GraphRouting::Multi { registry, .. } => registry,
|
||||
};
|
||||
let registry = &state.routing().registry;
|
||||
|
||||
// Server-level Cedar gate. `state.server_policy` is loaded from
|
||||
// `server.policy.file` in `omnigraph.yaml` at startup. When no
|
||||
// server policy is configured, `authorize_request_server` falls
|
||||
// through to the MR-723 default-deny semantics (every non-Read
|
||||
// action denied for an authenticated actor). `GraphList` is not
|
||||
// `Read`, so without a server policy the request gets 403 — which
|
||||
// is the right default (don't leak the registry until the operator
|
||||
// explicitly authorizes it).
|
||||
// Server-level Cedar gate. `state.server_policy` is loaded from the
|
||||
// cluster-scoped policy bundle at startup. When no server policy is
|
||||
// configured, `authorize_request_server` falls through to the MR-723
|
||||
// default-deny semantics (every non-Read action denied for an
|
||||
// authenticated actor). `GraphList` is not `Read`, so without a server
|
||||
// policy the request gets 403 — which is the right default (don't leak
|
||||
// the registry until the operator explicitly authorizes it).
|
||||
authorize_request(
|
||||
actor.as_ref().map(|Extension(actor)| actor),
|
||||
state.server_policy.as_deref(),
|
||||
|
|
@ -93,17 +83,15 @@ pub(crate) async fn server_graphs_list(
|
|||
}
|
||||
|
||||
pub(crate) async fn server_openapi(State(state): State<AppState>) -> Json<utoipa::openapi::OpenApi> {
|
||||
let mut doc = ApiDoc::openapi();
|
||||
// `served_openapi` is the single nesting source — the protected
|
||||
// routes always live under `/graphs/{graph_id}/...` (public/management
|
||||
// paths `/healthz`, `/graphs` stay flat). Building from it here means
|
||||
// the runtime spec and the committed `openapi.json` share one nesting
|
||||
// pass and can't drift.
|
||||
let mut doc = crate::served_openapi();
|
||||
if !state.requires_bearer_auth() {
|
||||
strip_security(&mut doc);
|
||||
}
|
||||
// MR-668: in multi mode, the protected routes live under
|
||||
// `/graphs/{graph_id}/...`. Rewrite the doc so the spec matches
|
||||
// the routes the router actually serves. Public paths (`/healthz`)
|
||||
// stay flat in both modes.
|
||||
if matches!(state.routing(), GraphRouting::Multi { .. }) {
|
||||
nest_paths_under_cluster_prefix(&mut doc);
|
||||
}
|
||||
Json(doc)
|
||||
}
|
||||
|
||||
|
|
@ -248,16 +236,11 @@ pub(crate) async fn require_bearer_auth(
|
|||
Ok(next.run(request).await)
|
||||
}
|
||||
|
||||
/// Routing middleware (MR-668). Resolves the active graph for the
|
||||
/// request and injects `Arc<GraphHandle>` as an extension so handlers can
|
||||
/// extract it via `Extension<Arc<GraphHandle>>`.
|
||||
/// Routing middleware (RFC-011 cluster-only). Resolves the active graph
|
||||
/// for the request and injects `Arc<GraphHandle>` as an extension so
|
||||
/// handlers can extract it via `Extension<Arc<GraphHandle>>`.
|
||||
///
|
||||
/// **Single mode**: the routing field holds the single handle directly.
|
||||
/// Routes are flat; every request resolves to that handle, regardless
|
||||
/// of the URI path. No registry walk, no sentinel key, no
|
||||
/// programmer-error guard.
|
||||
///
|
||||
/// **Multi mode**: routes are nested under `/graphs/{graph_id}/...`. The
|
||||
/// Routes are always nested under `/graphs/{graph_id}/...`. The
|
||||
/// middleware extracts `{graph_id}` from the URI path and looks it up in
|
||||
/// the registry. Returns 404 if the graph is not registered.
|
||||
///
|
||||
|
|
@ -268,39 +251,33 @@ pub(crate) async fn resolve_graph_handle(
|
|||
mut request: Request,
|
||||
next: Next,
|
||||
) -> std::result::Result<Response, ApiError> {
|
||||
let handle = match &state.routing {
|
||||
GraphRouting::Single { handle } => Arc::clone(handle),
|
||||
GraphRouting::Multi { registry, .. } => {
|
||||
// `Router::nest("/graphs/{graph_id}", inner)` rewrites
|
||||
// `request.uri().path()` to the inner suffix (e.g. `/snapshot`).
|
||||
// The pre-rewrite URI is preserved in the `OriginalUri`
|
||||
// request extension by axum's router; we read from there to
|
||||
// extract `{graph_id}`. Fall back to the current URI only if
|
||||
// the extension is missing, which shouldn't happen for
|
||||
// nested routes but is safe defensive code.
|
||||
let original_path: String = request
|
||||
.extensions()
|
||||
.get::<OriginalUri>()
|
||||
.map(|OriginalUri(uri)| uri.path().to_string())
|
||||
.unwrap_or_else(|| request.uri().path().to_string());
|
||||
let graph_id_str = original_path
|
||||
.strip_prefix("/graphs/")
|
||||
.and_then(|rest| rest.split('/').next())
|
||||
.filter(|s| !s.is_empty())
|
||||
.ok_or_else(|| {
|
||||
ApiError::bad_request(
|
||||
"cluster route missing /graphs/{graph_id} prefix".to_string(),
|
||||
)
|
||||
})?;
|
||||
let graph_id = GraphId::try_from(graph_id_str.to_string())
|
||||
.map_err(|err| ApiError::bad_request(err.to_string()))?;
|
||||
let key = GraphKey::cluster(graph_id.clone());
|
||||
match registry.get(&key) {
|
||||
RegistryLookup::Ready(handle) => handle,
|
||||
RegistryLookup::Gone => {
|
||||
return Err(ApiError::not_found(format!("graph '{graph_id}' not found")));
|
||||
}
|
||||
}
|
||||
let registry = &state.routing.registry;
|
||||
// `Router::nest("/graphs/{graph_id}", inner)` rewrites
|
||||
// `request.uri().path()` to the inner suffix (e.g. `/snapshot`).
|
||||
// The pre-rewrite URI is preserved in the `OriginalUri`
|
||||
// request extension by axum's router; we read from there to
|
||||
// extract `{graph_id}`. Fall back to the current URI only if
|
||||
// the extension is missing, which shouldn't happen for
|
||||
// nested routes but is safe defensive code.
|
||||
let original_path: String = request
|
||||
.extensions()
|
||||
.get::<OriginalUri>()
|
||||
.map(|OriginalUri(uri)| uri.path().to_string())
|
||||
.unwrap_or_else(|| request.uri().path().to_string());
|
||||
let graph_id_str = original_path
|
||||
.strip_prefix("/graphs/")
|
||||
.and_then(|rest| rest.split('/').next())
|
||||
.filter(|s| !s.is_empty())
|
||||
.ok_or_else(|| {
|
||||
ApiError::bad_request("cluster route missing /graphs/{graph_id} prefix".to_string())
|
||||
})?;
|
||||
let graph_id = GraphId::try_from(graph_id_str.to_string())
|
||||
.map_err(|err| ApiError::bad_request(err.to_string()))?;
|
||||
let key = GraphKey::cluster(graph_id.clone());
|
||||
let handle = match registry.get(&key) {
|
||||
RegistryLookup::Ready(handle) => handle,
|
||||
RegistryLookup::Gone => {
|
||||
return Err(ApiError::not_found(format!("graph '{graph_id}' not found")));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -382,22 +359,25 @@ pub(crate) fn authorize(
|
|||
// runtime state means the docstring contract on
|
||||
// `server_graphs_list` ("don't leak the registry until the
|
||||
// operator explicitly authorizes it") holds uniformly; the
|
||||
// operator's only path to enabling it is configuring an
|
||||
// explicit `server.policy.file` in omnigraph.yaml.
|
||||
// operator's only path to enabling it is configuring a
|
||||
// cluster-scoped policy bundle, applying the cluster, and
|
||||
// restarting the server.
|
||||
if request.action.resource_kind() == PolicyResourceKind::Server {
|
||||
return Ok(Authz::Denied(
|
||||
"server-scoped actions require an explicit `server.policy.file` \
|
||||
configured in omnigraph.yaml — the management surface is closed \
|
||||
by default in every runtime state, including --unauthenticated, \
|
||||
so that server topology is never exposed without operator opt-in."
|
||||
"server-scoped actions require an explicit cluster policy bundle \
|
||||
applied with `omnigraph cluster apply` and served after restart — \
|
||||
the management surface is closed by default in every runtime state, \
|
||||
including --unauthenticated, so that server topology is never exposed \
|
||||
without operator opt-in."
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
if actor.is_some() && request.action != PolicyAction::Read {
|
||||
return Ok(Authz::Denied(
|
||||
"server runs in default-deny mode (bearer tokens configured but no \
|
||||
policy file). Only `read` actions are permitted; configure \
|
||||
`policy.file` in omnigraph.yaml to enable other actions."
|
||||
applied policy bundle). Only `read` actions are permitted; configure \
|
||||
a graph or cluster policy bundle in the cluster config, run \
|
||||
`omnigraph cluster apply`, and restart the server to enable other actions."
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
|
|
@ -510,7 +490,7 @@ pub(crate) fn deprecation_headers(successor_link: &'static str) -> [(HeaderName,
|
|||
operation_id = "read",
|
||||
request_body = ReadRequest,
|
||||
responses(
|
||||
(status = 200, description = "Query results (response includes `Deprecation: true` + `Link: </query>; rel=\"successor-version\"`)", body = ReadOutput),
|
||||
(status = 200, description = "Query results (response includes `Deprecation: true` + `Link: <query>; rel=\"successor-version\"`)", body = ReadOutput),
|
||||
(status = 400, description = "Bad request", body = ErrorOutput),
|
||||
(status = 401, description = "Unauthorized", body = ErrorOutput),
|
||||
(status = 403, description = "Forbidden", body = ErrorOutput),
|
||||
|
|
@ -524,7 +504,7 @@ pub(crate) fn deprecation_headers(successor_link: &'static str) -> [(HeaderName,
|
|||
/// route is kept indefinitely for byte-stable back-compat. New integrations
|
||||
/// should target `POST /query`, which has clean field names (`query` /
|
||||
/// `name`) and a 400-on-mutation guard. Responses from this route include
|
||||
/// `Deprecation: true` and `Link: </query>; rel="successor-version"`
|
||||
/// `Deprecation: true` and `Link: <query>; rel="successor-version"`
|
||||
/// headers per RFC 9745 / RFC 8288 so SDKs and proxies can surface the
|
||||
/// signal.
|
||||
pub(crate) async fn server_read(
|
||||
|
|
@ -544,7 +524,7 @@ pub(crate) async fn server_read(
|
|||
)
|
||||
.await?;
|
||||
Ok((
|
||||
deprecation_headers("</query>; rel=\"successor-version\""),
|
||||
deprecation_headers("<query>; rel=\"successor-version\""),
|
||||
Json(api::read_output(selected_name, &target, result)),
|
||||
))
|
||||
}
|
||||
|
|
@ -793,7 +773,7 @@ pub(crate) async fn run_query(
|
|||
operation_id = "change",
|
||||
request_body = ChangeRequest,
|
||||
responses(
|
||||
(status = 200, description = "Mutation results (response includes `Deprecation: true` + `Link: </mutate>; rel=\"successor-version\"`)", body = ChangeOutput),
|
||||
(status = 200, description = "Mutation results (response includes `Deprecation: true` + `Link: <mutate>; rel=\"successor-version\"`)", body = ChangeOutput),
|
||||
(status = 400, description = "Bad request", body = ErrorOutput),
|
||||
(status = 401, description = "Unauthorized", body = ErrorOutput),
|
||||
(status = 403, description = "Forbidden", body = ErrorOutput),
|
||||
|
|
@ -809,7 +789,7 @@ pub(crate) async fn run_query(
|
|||
/// kept indefinitely for back-compat. New integrations should target
|
||||
/// `POST /mutate`, which has identical semantics and a name that pairs
|
||||
/// cleanly with `POST /query`. Responses from this route include
|
||||
/// `Deprecation: true` and `Link: </mutate>; rel="successor-version"`
|
||||
/// `Deprecation: true` and `Link: <mutate>; rel="successor-version"`
|
||||
/// headers per RFC 9745 / RFC 8288 so SDKs and proxies can surface the
|
||||
/// signal.
|
||||
pub(crate) async fn server_change(
|
||||
|
|
@ -830,7 +810,7 @@ pub(crate) async fn server_change(
|
|||
)
|
||||
.await?;
|
||||
Ok((
|
||||
deprecation_headers("</mutate>; rel=\"successor-version\""),
|
||||
deprecation_headers("<mutate>; rel=\"successor-version\""),
|
||||
Json(output),
|
||||
))
|
||||
}
|
||||
|
|
@ -980,6 +960,22 @@ pub(crate) async fn server_invoke_query(
|
|||
let query_name = stored.name.clone();
|
||||
let is_mutation = stored.is_mutation();
|
||||
|
||||
// RFC-011 D3: the CLI verb asserts the stored query's kind. `query <name>`
|
||||
// sends `expect_mutation: false`, `mutate <name>` sends `true`; a mismatch
|
||||
// is rejected here so the wrong verb errors instead of silently running.
|
||||
if let Some(expected) = req.expect_mutation {
|
||||
if expected != is_mutation {
|
||||
let (actual, verb) = if is_mutation {
|
||||
("mutation", "mutate")
|
||||
} else {
|
||||
("read", "query")
|
||||
};
|
||||
return Err(ApiError::bad_request(format!(
|
||||
"'{query_name}' is a {actual} — use omnigraph {verb} {query_name}"
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
graph = %handle.uri,
|
||||
actor = ?actor_ref.map(|a| a.actor_id.as_ref()),
|
||||
|
|
@ -1117,12 +1113,16 @@ pub(crate) async fn server_schema_get(
|
|||
(status = 400, description = "Bad request", body = ErrorOutput),
|
||||
(status = 401, description = "Unauthorized", body = ErrorOutput),
|
||||
(status = 403, description = "Forbidden", body = ErrorOutput),
|
||||
(status = 409, description = "Schema apply is disabled for cluster-backed serving; use `omnigraph cluster apply` and restart", body = ErrorOutput),
|
||||
(status = 429, description = "Per-actor admission cap exceeded; honor `Retry-After` header", body = ErrorOutput),
|
||||
),
|
||||
security(("bearer_token" = [])),
|
||||
)]
|
||||
/// Apply a schema migration.
|
||||
///
|
||||
/// Cluster-backed servers reject this route with `409 Conflict`; operators
|
||||
/// must apply schema changes through `omnigraph cluster apply` and restart.
|
||||
///
|
||||
/// Diffs `schema_source` against the current schema and applies the resulting
|
||||
/// migration steps (add/drop type, add/drop column, etc.). **Destructive**:
|
||||
/// some steps drop data. Returns the list of steps applied; if `applied` is
|
||||
|
|
@ -1149,6 +1149,17 @@ pub(crate) async fn server_schema_apply(
|
|||
target_branch: Some("main".to_string()),
|
||||
},
|
||||
)?;
|
||||
// Disable HTTP schema apply on cluster-backed serving AFTER the Cedar gate,
|
||||
// so an unauthorized actor gets a 403 (not a 409 that would disclose the
|
||||
// server is cluster-backed): 401 → 403 → 409, never leak topology before
|
||||
// authorization. An authorized actor gets the actionable 409 signpost.
|
||||
if state.routing().config_path.is_some() {
|
||||
return Err(ApiError::conflict(
|
||||
"server-side schema apply is disabled for cluster-backed serving; \
|
||||
update the cluster config, run `omnigraph cluster apply`, and restart \
|
||||
the server.",
|
||||
));
|
||||
}
|
||||
let est_bytes = request.schema_source.len() as u64;
|
||||
let _admission = state
|
||||
.workload
|
||||
|
|
@ -1180,49 +1191,44 @@ pub(crate) async fn server_schema_apply(
|
|||
.await
|
||||
.map_err(ApiError::from_omni)?
|
||||
};
|
||||
// Prompt index convergence (iss-848): schema apply records `@index` intent
|
||||
// but defers the physical build. On a long-lived server, materialize it
|
||||
// promptly rather than waiting for the next `optimize` cron — spawned
|
||||
// detached so it never blocks or fails the apply response. Best-effort: a
|
||||
// failure is logged and the index still converges on the next optimize.
|
||||
// The CLI is one-shot, so it has no equivalent; its convergence path is the
|
||||
// operator's optimize cadence.
|
||||
if result.applied {
|
||||
let engine = Arc::clone(&handle.engine);
|
||||
tokio::spawn(async move {
|
||||
if let Err(err) = engine.ensure_indices().await {
|
||||
tracing::warn!(
|
||||
target: "omnigraph::server",
|
||||
error = %err,
|
||||
"post-apply ensure_indices failed; indexes will converge on the next optimize",
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
Ok(Json(schema_apply_output(handle.uri.as_str(), result)))
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "/ingest",
|
||||
tag = "mutations",
|
||||
operation_id = "ingest",
|
||||
request_body = IngestRequest,
|
||||
responses(
|
||||
(status = 200, description = "Ingest results", body = IngestOutput),
|
||||
(status = 400, description = "Bad request", body = ErrorOutput),
|
||||
(status = 401, description = "Unauthorized", body = ErrorOutput),
|
||||
(status = 403, description = "Forbidden", body = ErrorOutput),
|
||||
(status = 429, description = "Per-actor admission cap exceeded; honor `Retry-After` header", body = ErrorOutput),
|
||||
),
|
||||
security(("bearer_token" = [])),
|
||||
)]
|
||||
/// Bulk-load NDJSON data into a branch.
|
||||
///
|
||||
/// `data` is NDJSON with one record per line. `mode` controls behavior on
|
||||
/// existing rows: `merge` upserts by id (default), `append` blindly inserts,
|
||||
/// `overwrite` replaces table contents. Branch creation is opt-in by
|
||||
/// presence of `from`: with `from` set, a missing `branch` is created from
|
||||
/// it; without `from`, `branch` must already exist — a missing branch is a
|
||||
/// 404, never an implicit fork. **Destructive** when `mode` is `overwrite`
|
||||
/// or when the load produces conflicting writes.
|
||||
pub(crate) async fn server_ingest(
|
||||
State(state): State<AppState>,
|
||||
Extension(handle): Extension<Arc<GraphHandle>>,
|
||||
actor: Option<Extension<ResolvedActor>>,
|
||||
Json(request): Json<IngestRequest>,
|
||||
) -> std::result::Result<Json<IngestOutput>, ApiError> {
|
||||
/// Shared body for `POST /load` (canonical) and `POST /ingest` (deprecated):
|
||||
/// branch-exists / fork-if-`from` check, Cedar authorization, admission, the
|
||||
/// bulk `load_as`, and the `IngestOutput` mapping.
|
||||
async fn run_ingest(
|
||||
state: AppState,
|
||||
handle: Arc<GraphHandle>,
|
||||
actor: Option<&ResolvedActor>,
|
||||
request: IngestRequest,
|
||||
) -> std::result::Result<IngestOutput, ApiError> {
|
||||
let branch = request.branch.unwrap_or_else(|| "main".to_string());
|
||||
let from = request.from;
|
||||
let mode = request.mode.unwrap_or(omnigraph::loader::LoadMode::Merge);
|
||||
let actor_arc = actor
|
||||
.as_ref()
|
||||
.map(|Extension(actor)| Arc::clone(&actor.actor_id))
|
||||
.map(|actor| Arc::clone(&actor.actor_id))
|
||||
.unwrap_or_else(|| Arc::<str>::from("anonymous"));
|
||||
let actor_id = actor
|
||||
.as_ref()
|
||||
.map(|Extension(actor)| actor.actor_id.as_ref());
|
||||
let actor_id = actor.map(|actor| actor.actor_id.as_ref());
|
||||
|
||||
let branch_exists = {
|
||||
let db = &handle.engine;
|
||||
|
|
@ -1244,7 +1250,7 @@ pub(crate) async fn server_ingest(
|
|||
)));
|
||||
}
|
||||
Some(from) => authorize_request(
|
||||
actor.as_ref().map(|Extension(actor)| actor),
|
||||
actor,
|
||||
handle.policy.as_deref(),
|
||||
PolicyRequest {
|
||||
action: PolicyAction::BranchCreate,
|
||||
|
|
@ -1255,7 +1261,7 @@ pub(crate) async fn server_ingest(
|
|||
}
|
||||
}
|
||||
authorize_request(
|
||||
actor.as_ref().map(|Extension(actor)| actor),
|
||||
actor,
|
||||
handle.policy.as_deref(),
|
||||
PolicyRequest {
|
||||
action: PolicyAction::Change,
|
||||
|
|
@ -1276,12 +1282,98 @@ pub(crate) async fn server_ingest(
|
|||
.map_err(ApiError::from_omni)?
|
||||
};
|
||||
|
||||
Ok(Json(ingest_output(
|
||||
Ok(ingest_output(
|
||||
handle.uri.as_str(),
|
||||
&result,
|
||||
mode,
|
||||
actor_id.map(str::to_string),
|
||||
)))
|
||||
))
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "/load",
|
||||
tag = "mutations",
|
||||
operation_id = "load",
|
||||
request_body = IngestRequest,
|
||||
responses(
|
||||
(status = 200, description = "Load results", body = IngestOutput),
|
||||
(status = 400, description = "Bad request", body = ErrorOutput),
|
||||
(status = 401, description = "Unauthorized", body = ErrorOutput),
|
||||
(status = 403, description = "Forbidden", body = ErrorOutput),
|
||||
(status = 429, description = "Per-actor admission cap exceeded; honor `Retry-After` header", body = ErrorOutput),
|
||||
),
|
||||
security(("bearer_token" = [])),
|
||||
)]
|
||||
/// Bulk-load NDJSON data into a branch (canonical load endpoint).
|
||||
///
|
||||
/// `data` is NDJSON with one record per line. `mode` controls behavior on
|
||||
/// existing rows: `merge` upserts by id (default), `append` blindly inserts,
|
||||
/// `overwrite` replaces table contents. Branch creation is opt-in by
|
||||
/// presence of `from`: with `from` set, a missing `branch` is created from
|
||||
/// it; without `from`, `branch` must already exist — a missing branch is a
|
||||
/// 404, never an implicit fork. **Destructive** when `mode` is `overwrite`
|
||||
/// or when the load produces conflicting writes.
|
||||
///
|
||||
/// The legacy `POST /ingest` route has identical semantics and is kept as a
|
||||
/// deprecated alias.
|
||||
pub(crate) async fn server_load(
|
||||
State(state): State<AppState>,
|
||||
Extension(handle): Extension<Arc<GraphHandle>>,
|
||||
actor: Option<Extension<ResolvedActor>>,
|
||||
Json(request): Json<IngestRequest>,
|
||||
) -> std::result::Result<Json<IngestOutput>, ApiError> {
|
||||
Ok(Json(
|
||||
run_ingest(
|
||||
state,
|
||||
handle,
|
||||
actor.as_ref().map(|Extension(actor)| actor),
|
||||
request,
|
||||
)
|
||||
.await?,
|
||||
))
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "/ingest",
|
||||
tag = "mutations",
|
||||
operation_id = "ingest",
|
||||
request_body = IngestRequest,
|
||||
responses(
|
||||
(status = 200, description = "Load results (response includes `Deprecation: true` + `Link: <load>; rel=\"successor-version\"`)", body = IngestOutput),
|
||||
(status = 400, description = "Bad request", body = ErrorOutput),
|
||||
(status = 401, description = "Unauthorized", body = ErrorOutput),
|
||||
(status = 403, description = "Forbidden", body = ErrorOutput),
|
||||
(status = 429, description = "Per-actor admission cap exceeded; honor `Retry-After` header", body = ErrorOutput),
|
||||
),
|
||||
security(("bearer_token" = [])),
|
||||
)]
|
||||
#[deprecated(note = "use POST /load instead; /ingest is kept indefinitely for back-compat")]
|
||||
/// **Deprecated** — use [`POST /load`](#tag/mutations/operation/load) instead.
|
||||
///
|
||||
/// Bulk-load NDJSON data into a branch. Behavior is unchanged; the route is
|
||||
/// kept indefinitely for back-compat. New integrations should target
|
||||
/// `POST /load`, which has identical semantics. Responses from this route
|
||||
/// include `Deprecation: true` and `Link: <load>; rel="successor-version"`
|
||||
/// headers per RFC 9745 / RFC 8288 so SDKs and proxies can surface the signal.
|
||||
pub(crate) async fn server_ingest(
|
||||
State(state): State<AppState>,
|
||||
Extension(handle): Extension<Arc<GraphHandle>>,
|
||||
actor: Option<Extension<ResolvedActor>>,
|
||||
Json(request): Json<IngestRequest>,
|
||||
) -> std::result::Result<([(HeaderName, HeaderValue); 2], Json<IngestOutput>), ApiError> {
|
||||
let output = run_ingest(
|
||||
state,
|
||||
handle,
|
||||
actor.as_ref().map(|Extension(actor)| actor),
|
||||
request,
|
||||
)
|
||||
.await?;
|
||||
Ok((
|
||||
deprecation_headers("<load>; rel=\"successor-version\""),
|
||||
Json(output),
|
||||
))
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
|
|
@ -1663,4 +1755,3 @@ pub(crate) fn query_params_from_json(
|
|||
json_params_to_param_map(params_json, query_params, JsonParamMode::Standard)
|
||||
.map_err(|err| color_eyre::eyre::eyre!(err.to_string()))
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,10 @@
|
|||
pub mod api;
|
||||
mod handlers;
|
||||
mod settings;
|
||||
pub use settings::{load_server_settings, classify_server_runtime_state, server_config_is_multi, ServerRuntimeState};
|
||||
pub use settings::{load_server_settings, classify_server_runtime_state, ServerRuntimeState};
|
||||
use settings::*;
|
||||
use handlers::*;
|
||||
pub mod auth;
|
||||
pub mod config;
|
||||
pub mod graph_id;
|
||||
pub mod identity;
|
||||
pub mod policy;
|
||||
|
|
@ -46,11 +45,6 @@ use axum::response::{IntoResponse, Response};
|
|||
use axum::routing::{delete, get, post};
|
||||
use axum::{Json, Router};
|
||||
use color_eyre::eyre::{Result, WrapErr, bail, eyre};
|
||||
pub use config::{
|
||||
AliasCommand, AliasConfig, CliDefaults, DEFAULT_CONFIG_FILE, OmnigraphConfig, PolicySettings,
|
||||
ProjectConfig, QueryDefaults, ReadOutputFormat, ServerDefaults, TableCellLayout, TargetConfig,
|
||||
graph_resource_id_for_selection, load_config,
|
||||
};
|
||||
use futures::stream;
|
||||
use omnigraph::db::{Omnigraph, ReadTarget};
|
||||
use omnigraph::error::{ManifestConflictDetails, ManifestErrorKind, OmniError};
|
||||
|
|
@ -107,7 +101,10 @@ fn hash_bearer_token(token: &str) -> BearerTokenHash {
|
|||
handlers::server_invoke_query,
|
||||
handlers::server_schema_apply,
|
||||
handlers::server_schema_get,
|
||||
handlers::server_ingest,
|
||||
handlers::server_load,
|
||||
// deprecated; the #[deprecated] attribute on the handler surfaces as
|
||||
// `deprecated: true` on the OpenAPI operation.
|
||||
#[allow(deprecated)] handlers::server_ingest,
|
||||
handlers::server_branch_list,
|
||||
handlers::server_branch_create,
|
||||
handlers::server_branch_delete,
|
||||
|
|
@ -119,6 +116,20 @@ fn hash_bearer_token(token: &str) -> BearerTokenHash {
|
|||
)]
|
||||
pub struct ApiDoc;
|
||||
|
||||
/// The canonical served OpenAPI shape (RFC-011 cluster-only): the static
|
||||
/// `ApiDoc` with every protected path nested under `/graphs/{graph_id}/…`
|
||||
/// and `cluster_`-prefixed operation ids. `/healthz` and `/graphs` stay
|
||||
/// flat. This is the single source of nesting — both the runtime
|
||||
/// `server_openapi` handler and the committed `openapi.json` derive from
|
||||
/// it, so the published spec can never describe routes the server does
|
||||
/// not serve. The handler additionally strips security in open mode; the
|
||||
/// committed spec retains it.
|
||||
pub fn served_openapi() -> utoipa::openapi::OpenApi {
|
||||
let mut doc = ApiDoc::openapi();
|
||||
handlers::nest_paths_under_cluster_prefix(&mut doc);
|
||||
doc
|
||||
}
|
||||
|
||||
struct SecurityAddon;
|
||||
|
||||
impl utoipa::Modify for SecurityAddon {
|
||||
|
|
@ -140,11 +151,10 @@ const SERVER_SOURCE_VERSION: Option<&str> = option_env!("OMNIGRAPH_SOURCE_VERSIO
|
|||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ServerConfig {
|
||||
/// Server topology + the graphs to open at startup. Single-mode
|
||||
/// invocations (`omnigraph-server <URI>` or `--target <name>`)
|
||||
/// produce `ServerConfigMode::Single`; multi-mode invocations
|
||||
/// (`--config omnigraph.yaml` with a non-empty `graphs:` map and
|
||||
/// no single-mode selector) produce `ServerConfigMode::Multi`.
|
||||
/// Server topology + the graphs to open at startup. RFC-011
|
||||
/// cluster-only: the server always boots from a cluster
|
||||
/// (`--cluster <dir | s3://…>`) and serves N graphs under cluster
|
||||
/// routes.
|
||||
pub mode: ServerConfigMode,
|
||||
pub bind: String,
|
||||
/// Operator opt-in for fully-unauthenticated dev mode (MR-723).
|
||||
|
|
@ -158,49 +168,33 @@ pub struct ServerConfig {
|
|||
pub allow_unauthenticated: bool,
|
||||
}
|
||||
|
||||
/// What `load_server_settings` produces after applying the four-rule
|
||||
/// mode inference matrix (MR-668 decision 2).
|
||||
/// What `load_server_settings` produces. RFC-011 cluster-only: the
|
||||
/// server always boots from a cluster's applied revision into a
|
||||
/// multi-graph deployment (N ≥ 1 graphs).
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ServerConfigMode {
|
||||
/// Legacy invocation — one graph at the given URI. Either:
|
||||
/// * `omnigraph-server <URI>` (CLI positional), or
|
||||
/// * `omnigraph-server --target <name> --config omnigraph.yaml`, or
|
||||
/// * `omnigraph-server --config omnigraph.yaml` with `server.graph`
|
||||
/// set to a named target.
|
||||
Single {
|
||||
uri: String,
|
||||
/// Cedar graph resource id for the single graph. A named selection
|
||||
/// uses the graph name; an anonymous URI uses the normalized URI to
|
||||
/// preserve legacy single-graph policy identity.
|
||||
graph_id: String,
|
||||
/// Top-level `policy.file` (single-graph Cedar policy).
|
||||
policy_file: Option<PathBuf>,
|
||||
/// Top-level stored-query registry, loaded and identity-checked
|
||||
/// at settings-build time; type-checked against the schema when
|
||||
/// the engine opens.
|
||||
queries: QueryRegistry,
|
||||
},
|
||||
/// Multi-graph invocation — `--config omnigraph.yaml` with a
|
||||
/// non-empty `graphs:` map and no single-mode selector.
|
||||
/// Cluster boot — `--cluster <dir | s3://…>` resolves the applied
|
||||
/// revision into per-graph startup configs plus an optional
|
||||
/// server-level policy.
|
||||
Multi {
|
||||
/// Per-graph startup configs, sorted by graph id (BTreeMap
|
||||
/// iteration order). The parallel-open loop iterates this.
|
||||
graphs: Vec<GraphStartupConfig>,
|
||||
/// Path to the config file the server was started from. Kept on
|
||||
/// the mode so future runtime mutation (deferred — see release
|
||||
/// notes) can locate the source of truth without re-parsing CLI
|
||||
/// args.
|
||||
/// The cluster boot source (config directory or storage root).
|
||||
/// Kept on the mode so future runtime mutation (deferred — see
|
||||
/// release notes) can locate the source of truth without
|
||||
/// re-parsing CLI args.
|
||||
config_path: PathBuf,
|
||||
/// `server.policy.file` (server-level Cedar policy for the
|
||||
/// management endpoints). Wired into `GET /graphs` authorization.
|
||||
/// Server-level Cedar policy for the management endpoints
|
||||
/// (`GET /graphs`). Wired into `GET /graphs` authorization.
|
||||
server_policy: Option<PolicySource>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Where a Cedar policy bundle comes from at startup. File-based for
|
||||
/// omnigraph.yaml deployments; inline (digest-verified catalog content)
|
||||
/// for cluster-mode boots, where the catalog may live on object storage
|
||||
/// and the server must not re-read mutable state after the snapshot.
|
||||
/// Where a Cedar policy bundle comes from at startup. Cluster-local files are
|
||||
/// used during config application; inline digest-verified catalog content is
|
||||
/// used for serving, where the catalog may live on object storage and the
|
||||
/// server must not re-read mutable state after the snapshot.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum PolicySource {
|
||||
File(PathBuf),
|
||||
|
|
@ -215,42 +209,34 @@ pub struct GraphStartupConfig {
|
|||
pub graph_id: String,
|
||||
pub uri: String,
|
||||
pub policy: Option<PolicySource>,
|
||||
/// Pre-resolved embedding config from an applied cluster provider profile.
|
||||
/// Legacy config paths leave this unset and continue to use env resolution.
|
||||
pub embedding: Option<omnigraph::embedding::EmbeddingConfig>,
|
||||
/// Per-graph stored-query registry, loaded and identity-checked at
|
||||
/// settings-build time; type-checked against the schema when this
|
||||
/// graph's engine opens.
|
||||
pub queries: QueryRegistry,
|
||||
}
|
||||
|
||||
/// Runtime routing for the server. Single mode = legacy
|
||||
/// `omnigraph-server <URI>` invocation, one graph, flat HTTP routes.
|
||||
/// Multi mode = `--config omnigraph.yaml` with a non-empty `graphs:`
|
||||
/// map, N graphs, cluster routes (`/graphs/{graph_id}/...`). Mode is
|
||||
/// determined at startup by `load_server_settings`.
|
||||
/// Runtime routing for the server (RFC-011 cluster-only). Every
|
||||
/// deployment serves cluster routes (`/graphs/{graph_id}/...`) backed by
|
||||
/// a registry of N graphs (N ≥ 1). The single-graph convenience
|
||||
/// constructors build a one-graph registry keyed by `default`; the
|
||||
/// cluster boot path builds an N-graph registry. There is no longer a
|
||||
/// flat-route mode.
|
||||
///
|
||||
/// In single mode the handle lives here directly — there is no
|
||||
/// registry, no sentinel key, no walk-and-assert. In multi mode the
|
||||
/// registry carries N handles and the middleware dispatches on the
|
||||
/// URL's `{graph_id}` segment.
|
||||
/// `config_path` is the boot source (the cluster directory or storage
|
||||
/// root); preserved here so future runtime mutation (deferred) can find
|
||||
/// the source of truth without re-parsing CLI args. The server treats
|
||||
/// the source as operator-owned and never writes it.
|
||||
///
|
||||
/// Both modes share the same handler bodies — the routing middleware
|
||||
/// All handler bodies are mode-agnostic — the routing middleware
|
||||
/// (`resolve_graph_handle`) injects `Arc<GraphHandle>` as a request
|
||||
/// extension so handlers never see the routing discriminator.
|
||||
/// extension by looking up the `{graph_id}` URL segment in the registry.
|
||||
#[derive(Clone)]
|
||||
pub enum GraphRouting {
|
||||
/// Single-graph deployment: one handle, flat routes (`/snapshot`,
|
||||
/// `/read`, …). The `handle.uri` field carries the URI the engine
|
||||
/// was opened from. Backward compatible with v0.6.0 deployments.
|
||||
Single { handle: Arc<GraphHandle> },
|
||||
/// Multi-graph deployment: many handles, cluster routes
|
||||
/// (`/graphs/{graph_id}/...`). `config_path` is the `omnigraph.yaml`
|
||||
/// the server reads at startup; preserved here so future runtime
|
||||
/// mutation (deferred) can find the source of truth without
|
||||
/// re-parsing CLI args. The server treats the file as
|
||||
/// operator-owned and never writes it.
|
||||
Multi {
|
||||
registry: Arc<GraphRegistry>,
|
||||
config_path: Option<PathBuf>,
|
||||
},
|
||||
pub struct GraphRouting {
|
||||
pub registry: Arc<GraphRegistry>,
|
||||
pub config_path: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
|
|
@ -266,12 +252,10 @@ pub struct AppState {
|
|||
/// see MR-668 decision Q6.
|
||||
workload: Arc<workload::WorkloadController>,
|
||||
bearer_tokens: Arc<[(BearerTokenHash, Arc<str>)]>,
|
||||
/// Server-level Cedar policy. Used by management endpoints (`POST
|
||||
/// /graphs`, `GET /graphs`) which act on the registry resource,
|
||||
/// not on a per-graph resource. Loaded from `server.policy.file`
|
||||
/// in `omnigraph.yaml`. `None` outside multi mode and when no
|
||||
/// server policy is configured. Per-graph policies live on each
|
||||
/// `GraphHandle.policy`.
|
||||
/// Server-level Cedar policy. Used by management endpoints (`GET
|
||||
/// /graphs`) which act on the registry resource, not on a per-graph
|
||||
/// resource. Loaded from the cluster-scoped policy binding when
|
||||
/// configured. Per-graph policies live on each `GraphHandle.policy`.
|
||||
server_policy: Option<Arc<PolicyEngine>>,
|
||||
}
|
||||
|
||||
|
|
@ -496,11 +480,13 @@ impl AppState {
|
|||
))
|
||||
}
|
||||
|
||||
/// Single-mode shared construction: wraps the bare engine + per-graph
|
||||
/// policy in a `GraphHandle` carried directly by `GraphRouting::Single`.
|
||||
/// Per-graph policy enforcement on the engine (MR-722) is re-applied
|
||||
/// via `Omnigraph::with_policy` so HTTP and engine layers can never
|
||||
/// diverge.
|
||||
/// Single-graph convenience construction (RFC-011 cluster-only):
|
||||
/// wraps the bare engine + per-graph policy in a `GraphHandle` keyed
|
||||
/// by `default`, then builds a one-graph registry so the deployment
|
||||
/// serves the same `/graphs/{graph_id}/...` cluster routes as any
|
||||
/// other. Per-graph policy enforcement on the engine (MR-722) is
|
||||
/// re-applied via `Omnigraph::with_policy` so HTTP and engine layers
|
||||
/// can never diverge.
|
||||
fn build_single_mode(
|
||||
uri: String,
|
||||
db: Omnigraph,
|
||||
|
|
@ -519,18 +505,13 @@ impl AppState {
|
|||
} else {
|
||||
db
|
||||
};
|
||||
// `GraphHandle.key` is required by the struct, but in single
|
||||
// mode it is never a registry key (there's no registry) and
|
||||
// never compared against user input (routes are flat, no
|
||||
// `{graph_id}` parameter). The label appears only in tracing
|
||||
// output from `resolve_graph_handle`. The literal below is a
|
||||
// log label, not a routing key — when the future cluster
|
||||
// catalog ships, single mode may carry the catalog-assigned
|
||||
// id here instead.
|
||||
// The convenience constructors address the single graph by the
|
||||
// reserved id `default` — both the registry key and the URL
|
||||
// segment (`/graphs/default/...`).
|
||||
let uri = normalize_root_uri(&uri).unwrap_or(uri);
|
||||
let key = GraphKey::cluster(
|
||||
GraphId::try_from("default").expect("'default' is a valid GraphId log label"),
|
||||
);
|
||||
let graph_id =
|
||||
GraphId::try_from("default").expect("'default' is a valid GraphId");
|
||||
let key = GraphKey::cluster(graph_id);
|
||||
let handle = Arc::new(GraphHandle {
|
||||
key,
|
||||
uri,
|
||||
|
|
@ -538,8 +519,15 @@ impl AppState {
|
|||
policy: policy_engine,
|
||||
queries,
|
||||
});
|
||||
let registry = Arc::new(
|
||||
GraphRegistry::from_handles(vec![handle])
|
||||
.expect("a single handle never collides on graph id"),
|
||||
);
|
||||
Self {
|
||||
routing: GraphRouting::Single { handle },
|
||||
routing: GraphRouting {
|
||||
registry,
|
||||
config_path: None,
|
||||
},
|
||||
workload,
|
||||
bearer_tokens,
|
||||
server_policy: None,
|
||||
|
|
@ -547,12 +535,11 @@ impl AppState {
|
|||
}
|
||||
|
||||
/// Multi-mode constructor — used by the startup loop. Operators
|
||||
/// reach this by invoking `omnigraph-server --config omnigraph.yaml`
|
||||
/// with a non-empty `graphs:` map.
|
||||
/// reach this by invoking `omnigraph-server --cluster <dir|s3://...>`.
|
||||
///
|
||||
/// Caller supplies the already-opened `GraphHandle`s and (optionally)
|
||||
/// the path to the source config file. `server_policy` is loaded
|
||||
/// from `server.policy.file` if configured.
|
||||
/// the path to the source cluster. `server_policy` is loaded from the
|
||||
/// cluster-scoped policy binding if configured.
|
||||
pub fn new_multi(
|
||||
handles: Vec<Arc<GraphHandle>>,
|
||||
bearer_tokens: Vec<(String, String)>,
|
||||
|
|
@ -563,7 +550,7 @@ impl AppState {
|
|||
let bearer_tokens = hash_bearer_tokens(bearer_tokens);
|
||||
let registry = Arc::new(GraphRegistry::from_handles(handles)?);
|
||||
Ok(Self {
|
||||
routing: GraphRouting::Multi {
|
||||
routing: GraphRouting {
|
||||
registry,
|
||||
config_path,
|
||||
},
|
||||
|
|
@ -575,9 +562,7 @@ impl AppState {
|
|||
|
||||
/// Runtime routing accessor. Handlers don't typically inspect this —
|
||||
/// they extract `Arc<GraphHandle>` via the routing middleware — but
|
||||
/// `build_app` matches on it to decide flat vs nested route
|
||||
/// mounting, and a handful of management endpoints (`GET /graphs`,
|
||||
/// the OpenAPI cluster rewrite) match on the discriminant.
|
||||
/// `server_graphs_list` reads the registry through it.
|
||||
pub fn routing(&self) -> &GraphRouting {
|
||||
&self.routing
|
||||
}
|
||||
|
|
@ -591,13 +576,9 @@ impl AppState {
|
|||
}
|
||||
// Any per-graph policy also requires auth — otherwise the
|
||||
// policy gate would receive unauthenticated requests. Reading
|
||||
// from `routing` is O(1) in both arms: single mode is a direct
|
||||
// `handle.policy.is_some()` check, multi mode reads the
|
||||
// cached `any_per_graph_policy` flag on the registry snapshot.
|
||||
match &self.routing {
|
||||
GraphRouting::Single { handle } => handle.policy.is_some(),
|
||||
GraphRouting::Multi { registry, .. } => registry.snapshot_ref().any_per_graph_policy,
|
||||
}
|
||||
// the cached `any_per_graph_policy` flag off the registry
|
||||
// snapshot is O(1).
|
||||
self.routing.registry.snapshot_ref().any_per_graph_policy
|
||||
}
|
||||
|
||||
fn authenticate_bearer_token(&self, provided_token: &str) -> Option<ResolvedActor> {
|
||||
|
|
@ -892,18 +873,6 @@ fn validate_and_attach(
|
|||
})
|
||||
}
|
||||
|
||||
/// Format every load error (parse / identity failure) into a multi-line
|
||||
/// boot-abort message.
|
||||
fn format_registry_load_errors(label: &str, errors: &[queries::LoadError]) -> String {
|
||||
let joined = errors
|
||||
.iter()
|
||||
.map(|e| e.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n ");
|
||||
format!("graph '{label}': stored-query registry failed to load:\n {joined}")
|
||||
}
|
||||
|
||||
|
||||
pub fn build_app(state: AppState) -> Router {
|
||||
// The per-graph protected routes, identical in single + multi mode.
|
||||
// Two middleware layers wrap them (outer first, inner last):
|
||||
|
|
@ -934,9 +903,20 @@ pub fn build_app(state: AppState) -> Router {
|
|||
.route("/queries/{name}", post(server_invoke_query))
|
||||
.route("/schema", get(server_schema_get))
|
||||
.route("/schema/apply", post(server_schema_apply))
|
||||
.route(
|
||||
"/load",
|
||||
post(server_load).layer(DefaultBodyLimit::max(INGEST_REQUEST_BODY_LIMIT_BYTES)),
|
||||
)
|
||||
// /ingest is the deprecated alias of /load; its handler carries
|
||||
// #[deprecated] (OpenAPI operation flagged) and emits RFC 9745
|
||||
// Deprecation + RFC 8288 Link headers. Suppress the call-site warning.
|
||||
.route(
|
||||
"/ingest",
|
||||
post(server_ingest).layer(DefaultBodyLimit::max(INGEST_REQUEST_BODY_LIMIT_BYTES)),
|
||||
post({
|
||||
#[allow(deprecated)]
|
||||
server_ingest
|
||||
})
|
||||
.layer(DefaultBodyLimit::max(INGEST_REQUEST_BODY_LIMIT_BYTES)),
|
||||
)
|
||||
.route(
|
||||
"/branches",
|
||||
|
|
@ -958,13 +938,9 @@ pub fn build_app(state: AppState) -> Router {
|
|||
// Management endpoints (`GET /graphs`) live alongside the per-graph
|
||||
// router. They go through bearer auth but NOT through
|
||||
// `resolve_graph_handle` — they operate on the registry directly.
|
||||
// The endpoint is mounted in both modes; in single mode the handler
|
||||
// returns 405 so clients see "resource exists, wrong context"
|
||||
// rather than 404 "no such resource."
|
||||
//
|
||||
// Runtime add/remove (`POST /graphs`, `DELETE /graphs/{id}`) is not
|
||||
// exposed in v0.6.0 — operators add graphs by editing
|
||||
// `omnigraph.yaml` and restarting.
|
||||
// exposed — operators run `cluster apply` and restart.
|
||||
let management = Router::new()
|
||||
.route("/graphs", get(server_graphs_list))
|
||||
.route_layer(middleware::from_fn_with_state(
|
||||
|
|
@ -972,15 +948,11 @@ pub fn build_app(state: AppState) -> Router {
|
|||
require_bearer_auth,
|
||||
));
|
||||
|
||||
// Mount the protected routes differently per mode:
|
||||
// * Single → flat routes (legacy: `/snapshot`, `/read`, etc.)
|
||||
// * Multi → nested under `/graphs/{graph_id}/...`
|
||||
let protected: Router<AppState> = match state.routing() {
|
||||
GraphRouting::Single { .. } => per_graph_protected.merge(management),
|
||||
GraphRouting::Multi { .. } => Router::new()
|
||||
.nest("/graphs/{graph_id}", per_graph_protected)
|
||||
.merge(management),
|
||||
};
|
||||
// RFC-011 cluster-only: per-graph routes always nest under
|
||||
// `/graphs/{graph_id}/...`; there are no flat single-graph routes.
|
||||
let protected: Router<AppState> = Router::new()
|
||||
.nest("/graphs/{graph_id}", per_graph_protected)
|
||||
.merge(management);
|
||||
|
||||
Router::new()
|
||||
.route("/healthz", get(server_health))
|
||||
|
|
@ -1001,7 +973,6 @@ pub async fn serve(config: ServerConfig) -> Result<()> {
|
|||
// policy OR any per-graph policy file. Mirrors the
|
||||
// `requires_bearer_auth` semantics on AppState.
|
||||
let has_policy_configured = match &config.mode {
|
||||
ServerConfigMode::Single { policy_file, .. } => policy_file.is_some(),
|
||||
ServerConfigMode::Multi {
|
||||
graphs,
|
||||
server_policy,
|
||||
|
|
@ -1022,36 +993,14 @@ pub async fn serve(config: ServerConfig) -> Result<()> {
|
|||
ServerRuntimeState::DefaultDeny => warn!(
|
||||
"bearer tokens are configured but no policy file is set — running in \
|
||||
default-deny mode (only `read` actions are permitted for authenticated \
|
||||
actors). Configure `policy.file` in omnigraph.yaml to enable Cedar rules."
|
||||
actors). Configure a graph or cluster policy bundle in the cluster config, \
|
||||
run `omnigraph cluster apply`, and restart to enable Cedar rules."
|
||||
),
|
||||
ServerRuntimeState::PolicyEnabled => {}
|
||||
}
|
||||
|
||||
let bind = config.bind.clone();
|
||||
let state = match config.mode {
|
||||
ServerConfigMode::Single {
|
||||
uri,
|
||||
graph_id,
|
||||
policy_file,
|
||||
queries,
|
||||
} => {
|
||||
let uri_for_log = uri.clone();
|
||||
info!(
|
||||
uri = %uri_for_log,
|
||||
graph_id = %graph_id,
|
||||
bind = %bind,
|
||||
mode = "single",
|
||||
"serving omnigraph"
|
||||
);
|
||||
AppState::open_single_with_queries_for_graph_id(
|
||||
uri,
|
||||
tokens,
|
||||
policy_file.as_ref(),
|
||||
queries,
|
||||
Some(graph_id),
|
||||
)
|
||||
.await?
|
||||
}
|
||||
ServerConfigMode::Multi {
|
||||
graphs,
|
||||
config_path,
|
||||
|
|
@ -1059,7 +1008,7 @@ pub async fn serve(config: ServerConfig) -> Result<()> {
|
|||
} => {
|
||||
info!(
|
||||
bind = %bind,
|
||||
mode = "multi",
|
||||
mode = "cluster",
|
||||
graph_count = graphs.len(),
|
||||
config = %config_path.display(),
|
||||
"serving omnigraph"
|
||||
|
|
@ -1142,6 +1091,11 @@ async fn open_single_graph(cfg: GraphStartupConfig) -> Result<Arc<GraphHandle>>
|
|||
let db = Omnigraph::open(&uri)
|
||||
.await
|
||||
.map_err(|err| color_eyre::eyre::eyre!("open graph '{}' at {}: {err}", graph_id, uri))?;
|
||||
let db = if let Some(embedding) = cfg.embedding {
|
||||
db.with_embedding_config(Arc::new(embedding))
|
||||
} else {
|
||||
db
|
||||
};
|
||||
|
||||
// Validate this graph's stored queries against the live schema and
|
||||
// resolve them to an attachable handle (refuse boot on breakage).
|
||||
|
|
@ -1175,5 +1129,3 @@ async fn shutdown_signal() {
|
|||
}
|
||||
info!("shutdown signal received");
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -8,16 +8,10 @@ use omnigraph_server::{ServerConfig, init_tracing, load_server_settings, serve};
|
|||
#[command(name = "omnigraph-server")]
|
||||
#[command(about = "HTTP server for the Omnigraph graph database")]
|
||||
struct Cli {
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
#[arg(long)]
|
||||
config: Option<PathBuf>,
|
||||
/// Boot from a cluster: either a config directory (storage resolved
|
||||
/// through cluster.yaml) or a storage-root URI directly
|
||||
/// (s3://bucket/prefix — config-free serving from the bucket).
|
||||
/// Exclusive: cannot combine with <URI>, --target, or --config.
|
||||
/// The server's only boot source (RFC-011 cluster-only).
|
||||
#[arg(long)]
|
||||
cluster: Option<PathBuf>,
|
||||
#[arg(long)]
|
||||
|
|
@ -36,14 +30,7 @@ async fn main() -> Result<()> {
|
|||
init_tracing();
|
||||
|
||||
let cli = Cli::parse();
|
||||
let settings: ServerConfig = load_server_settings(
|
||||
cli.config.as_ref(),
|
||||
cli.cluster.as_ref(),
|
||||
cli.uri,
|
||||
cli.target,
|
||||
cli.bind,
|
||||
cli.unauthenticated,
|
||||
)
|
||||
.await?;
|
||||
let settings: ServerConfig =
|
||||
load_server_settings(cli.cluster.as_ref(), cli.bind, cli.unauthenticated).await?;
|
||||
serve(settings).await
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@
|
|||
//! Renaming either is a breaking change to callers, by design.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs;
|
||||
use std::sync::Arc;
|
||||
|
||||
use omnigraph_compiler::catalog::Catalog;
|
||||
|
|
@ -22,8 +21,6 @@ use omnigraph_compiler::query::parser::parse_query;
|
|||
use omnigraph_compiler::query::typecheck::typecheck_query_decl;
|
||||
use omnigraph_compiler::types::{PropType, ScalarType};
|
||||
|
||||
use crate::config::{OmnigraphConfig, QueryEntry};
|
||||
|
||||
/// One loaded stored query. `source` is the full `.gq` file text — the
|
||||
/// invocation handler hands it to `run_query` / `run_mutate` verbatim,
|
||||
/// which reuse the same parse/IR/exec path as the inline routes (no
|
||||
|
|
@ -68,8 +65,9 @@ pub struct QueryRegistry {
|
|||
by_name: BTreeMap<String, StoredQuery>,
|
||||
}
|
||||
|
||||
/// In-memory registry entry before file I/O. Used by [`QueryRegistry::load`]
|
||||
/// (after reading each `.gq` from disk) and directly by tests.
|
||||
/// In-memory registry spec: a query's name + already-read `.gq` source. The
|
||||
/// input to [`QueryRegistry::from_specs`] — built by the server's cluster boot
|
||||
/// and by the CLI's `queries` tooling from a cluster serving snapshot.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RegistrySpec {
|
||||
pub name: String,
|
||||
|
|
@ -169,47 +167,6 @@ impl QueryRegistry {
|
|||
}
|
||||
}
|
||||
|
||||
/// Read each registry entry's `.gq` file from disk and build the
|
||||
/// registry. `entries` is either the top-level `queries` map (single
|
||||
/// mode) or a graph's `queries` map (multi mode); `config` resolves
|
||||
/// each entry's relative `file:` path against `base_dir`.
|
||||
pub fn load(
|
||||
config: &OmnigraphConfig,
|
||||
entries: &BTreeMap<String, QueryEntry>,
|
||||
) -> Result<Self, Vec<LoadError>> {
|
||||
let mut specs = Vec::with_capacity(entries.len());
|
||||
let mut errors = Vec::new();
|
||||
for (name, entry) in entries {
|
||||
let path = config.resolve_query_file(&entry.file);
|
||||
match fs::read_to_string(&path) {
|
||||
Ok(source) => specs.push(RegistrySpec {
|
||||
name: name.clone(),
|
||||
source,
|
||||
expose: entry.mcp.expose,
|
||||
tool_name: entry.mcp.tool_name.clone(),
|
||||
}),
|
||||
Err(err) => errors.push(LoadError {
|
||||
query: Some(name.clone()),
|
||||
message: format!("cannot read '{}': {err}", path.display()),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
// Parse/identity/uniqueness-check the readable specs even when some
|
||||
// files failed to read, so every broken entry (I/O, parse, identity,
|
||||
// tool-name collision) surfaces in one pass rather than one per
|
||||
// restart. I/O errors come first (in `entries` key order), then the
|
||||
// spec errors. A non-empty `errors` always fails the load.
|
||||
match Self::from_specs(specs) {
|
||||
Ok(registry) if errors.is_empty() => Ok(registry),
|
||||
Ok(_) => Err(errors),
|
||||
Err(spec_errors) => {
|
||||
errors.extend(spec_errors);
|
||||
Err(errors)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn lookup(&self, name: &str) -> Option<&StoredQuery> {
|
||||
self.by_name.get(name)
|
||||
}
|
||||
|
|
@ -653,36 +610,4 @@ embedding: Vector(4)
|
|||
assert!(entry2.params.is_empty(), "no declared params → empty list");
|
||||
}
|
||||
|
||||
// --- load() error collection (file I/O + parse in one pass) ---
|
||||
|
||||
#[test]
|
||||
fn load_collects_io_and_parse_errors_in_one_pass() {
|
||||
use crate::config::load_config;
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
std::fs::write(
|
||||
temp.path().join("good.gq"),
|
||||
"query good() { match { $u: User } return { $u.name } }",
|
||||
)
|
||||
.unwrap();
|
||||
std::fs::write(temp.path().join("broken.gq"), "query broken( {{ not valid").unwrap();
|
||||
// `missing.gq` is deliberately not written (an I/O failure).
|
||||
std::fs::write(
|
||||
temp.path().join("omnigraph.yaml"),
|
||||
"queries:\n good:\n file: ./good.gq\n \
|
||||
missing:\n file: ./missing.gq\n broken:\n file: ./broken.gq\n",
|
||||
)
|
||||
.unwrap();
|
||||
let config = load_config(Some(&temp.path().join("omnigraph.yaml"))).unwrap();
|
||||
|
||||
let errors = QueryRegistry::load(&config, config.query_entries()).unwrap_err();
|
||||
let joined = errors.iter().map(|e| e.to_string()).collect::<Vec<_>>().join("\n");
|
||||
// Both the missing file AND the parse error surface in one pass —
|
||||
// the I/O failure must not mask the parse failure.
|
||||
assert!(joined.contains("missing"), "I/O error must surface: {joined}");
|
||||
assert!(
|
||||
joined.contains("broken") && joined.contains("parse error"),
|
||||
"the parse error in a readable file must surface in the same pass: {joined}"
|
||||
);
|
||||
assert!(!joined.contains("'good'"), "the valid entry is not an error: {joined}");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,14 +1,13 @@
|
|||
//! Server settings: omnigraph.yaml/CLI/env resolution, mode inference
|
||||
//! (single vs multi vs cluster), bearer-token sources, and runtime-state
|
||||
//! classification (moved verbatim from lib.rs in the modularization).
|
||||
//! Server settings: cluster/CLI/env resolution, bearer-token sources, and
|
||||
//! runtime-state classification (moved verbatim from lib.rs in the
|
||||
//! modularization).
|
||||
|
||||
use super::*;
|
||||
|
||||
/// Build serving settings from a cluster directory's applied revision
|
||||
/// (RFC-005 §D2): graphs at derived roots, stored queries from verified
|
||||
/// catalog blob content, policy bundles from blob paths with their applied
|
||||
/// bindings. Always multi-graph routing. The unauthenticated/env handling
|
||||
/// matches the omnigraph.yaml path.
|
||||
/// bindings. Always multi-graph routing.
|
||||
pub(crate) async fn load_cluster_settings(
|
||||
cluster_dir: &PathBuf,
|
||||
cli_bind: Option<String>,
|
||||
|
|
@ -99,6 +98,15 @@ pub(crate) async fn load_cluster_settings(
|
|||
graph_id: graph.graph_id.clone(),
|
||||
uri: graph.root.to_string_lossy().to_string(),
|
||||
policy: graph_policies.get(&graph.graph_id).cloned(),
|
||||
embedding: graph
|
||||
.embedding
|
||||
.as_ref()
|
||||
.map(|profile| {
|
||||
profile.resolve().map_err(|err| {
|
||||
eyre!("embedding provider for graph '{}': {err}", graph.graph_id)
|
||||
})
|
||||
})
|
||||
.transpose()?,
|
||||
queries: registry,
|
||||
});
|
||||
}
|
||||
|
|
@ -122,162 +130,24 @@ pub(crate) async fn load_cluster_settings(
|
|||
})
|
||||
}
|
||||
|
||||
/// RFC-011 cluster-only boot: the server serves exclusively from a
|
||||
/// cluster's applied revision (`--cluster <dir | s3://…>`). The legacy
|
||||
/// omnigraph.yaml / `--target` / positional-URI single-graph boot paths
|
||||
/// were removed — a deployment serves from exactly one source.
|
||||
pub async fn load_server_settings(
|
||||
config_path: Option<&PathBuf>,
|
||||
cli_cluster: Option<&PathBuf>,
|
||||
cli_uri: Option<String>,
|
||||
cli_target: Option<String>,
|
||||
cli_bind: Option<String>,
|
||||
cli_allow_unauthenticated: bool,
|
||||
) -> Result<ServerConfig> {
|
||||
// Rule 0 (RFC-005): --cluster is an exclusive boot source. It is checked
|
||||
// before anything reads omnigraph.yaml — in cluster mode that file is
|
||||
// never opened, not even the implicit current-directory search.
|
||||
if let Some(cluster_dir) = cli_cluster {
|
||||
if cli_uri.is_some() || cli_target.is_some() || config_path.is_some() {
|
||||
bail!(
|
||||
"--cluster is an exclusive boot source; it cannot combine with a graph URI, --target, or --config (axiom 15: a deployment serves from one source)"
|
||||
);
|
||||
}
|
||||
return load_cluster_settings(cluster_dir, cli_bind, cli_allow_unauthenticated).await;
|
||||
}
|
||||
let config = load_config(config_path)?;
|
||||
let bind = cli_bind.unwrap_or_else(|| config.server_bind().to_string());
|
||||
// Either `--unauthenticated` or `OMNIGRAPH_UNAUTHENTICATED=1` flips
|
||||
// this. Treat any non-empty, non-"0"/"false" string as truthy —
|
||||
// standard 12-factor "any value is true" reading of the env var.
|
||||
let env_unauth = std::env::var("OMNIGRAPH_UNAUTHENTICATED")
|
||||
.ok()
|
||||
.map(|v| {
|
||||
let trimmed = v.trim();
|
||||
!trimmed.is_empty() && trimmed != "0" && !trimmed.eq_ignore_ascii_case("false")
|
||||
})
|
||||
.unwrap_or(false);
|
||||
let allow_unauthenticated = cli_allow_unauthenticated || env_unauth;
|
||||
|
||||
// MR-668 decision 2 — four-rule mode inference matrix.
|
||||
//
|
||||
// 1. CLI `<URI>` positional → Single (URI = the value)
|
||||
// 2. CLI `--target <name>` → Single (URI = graphs.<name>.uri)
|
||||
// 3. `server.graph` in config → Single (URI = graphs.<server.graph>.uri)
|
||||
// 4. `--config` + non-empty `graphs:` + no single-mode selector
|
||||
// → Multi (every entry in `graphs:`)
|
||||
// 5. otherwise → error with migration hint
|
||||
//
|
||||
// Rules 1-3 are mutually compatible (CLI URI wins over `--target`
|
||||
// wins over `server.graph`), reusing the existing
|
||||
// `resolve_target_uri` precedence.
|
||||
let has_cli_uri = cli_uri.is_some();
|
||||
let has_cli_target = cli_target.is_some();
|
||||
let has_server_graph = config.server_graph_name().is_some();
|
||||
let has_graphs_map = !config.graphs.is_empty();
|
||||
let has_explicit_config = config_path.is_some();
|
||||
|
||||
let mode = if has_cli_uri || has_cli_target || has_server_graph {
|
||||
// Rules 1, 2, or 3 → Single mode.
|
||||
let raw_uri = config.resolve_target_uri(
|
||||
cli_uri,
|
||||
cli_target.as_deref(),
|
||||
config.server_graph_name(),
|
||||
)?;
|
||||
let uri = normalize_root_uri(&raw_uri).wrap_err_with(|| {
|
||||
format!("normalize single-graph URI '{raw_uri}' from server settings")
|
||||
})?;
|
||||
// Config follows graph IDENTITY, not mode: a bare URI is anonymous
|
||||
// (top-level config); a graph chosen by name uses its per-graph
|
||||
// `graphs.<name>.{policy,queries}`. `resolve_target_uri` already
|
||||
// errored on an unknown name, so a `Some(name)` here is a known graph.
|
||||
let selected: Option<&str> = if has_cli_uri {
|
||||
None
|
||||
} else {
|
||||
cli_target.as_deref().or_else(|| config.server_graph_name())
|
||||
};
|
||||
// A named selection must not leave a populated top-level block
|
||||
// silently unused — refuse boot and point at the per-graph block. The
|
||||
// same rule the CLI selection gate enforces, shared via one helper so
|
||||
// the boot check and `omnigraph queries validate`/`list` can't drift.
|
||||
config.ensure_top_level_blocks_honored(selected)?;
|
||||
// Load + identity-check now (no engine needed); the schema
|
||||
// type-check happens when the engine opens.
|
||||
let policy_file = config.resolve_policy_file_for(selected);
|
||||
let queries = QueryRegistry::load(&config, config.query_entries_for(selected))
|
||||
.map_err(|errs| color_eyre::eyre::eyre!(format_registry_load_errors(&uri, &errs)))?;
|
||||
let graph_id = graph_resource_id_for_selection(selected, &uri);
|
||||
ServerConfigMode::Single {
|
||||
uri,
|
||||
graph_id,
|
||||
policy_file,
|
||||
queries,
|
||||
}
|
||||
} else if has_explicit_config && has_graphs_map {
|
||||
// Multi mode: every graph uses its per-graph block; top-level
|
||||
// policy/queries are never honored, so a populated one is an error.
|
||||
let unhonored = config.populated_top_level_blocks();
|
||||
if !unhonored.is_empty() {
|
||||
bail!(
|
||||
"multi-graph mode: top-level {} {} not honored — each graph uses its own \
|
||||
`graphs.<graph_id>.…` block. Move per-graph rules there (and any \
|
||||
`graph_list` policy to `server.policy.file`).",
|
||||
unhonored.join(" and "),
|
||||
if unhonored.len() == 1 { "is" } else { "are" },
|
||||
);
|
||||
}
|
||||
// Rule 4 → Multi mode. Build a startup config per graph.
|
||||
let mut graphs = Vec::with_capacity(config.graphs.len());
|
||||
for (name, target) in &config.graphs {
|
||||
// Validate the graph id can construct a `GraphId` newtype.
|
||||
// Doing this here (not at registry insert) so a malformed
|
||||
// omnigraph.yaml fails at startup with a clear error.
|
||||
GraphId::try_from(name.clone()).map_err(|err| {
|
||||
color_eyre::eyre::eyre!("invalid graph id '{name}' in omnigraph.yaml: {err}")
|
||||
})?;
|
||||
let raw_uri = config.resolve_uri_value(&target.uri);
|
||||
let uri = normalize_root_uri(&raw_uri).wrap_err_with(|| {
|
||||
format!("normalize URI '{raw_uri}' for graph '{name}' in omnigraph.yaml")
|
||||
})?;
|
||||
// Per-graph `queries:`, selected through the shared
|
||||
// `query_entries_for` so server and CLI resolve identically.
|
||||
// Load + identity-check now; the schema type-check happens
|
||||
// when this graph's engine opens.
|
||||
let queries = QueryRegistry::load(&config, config.query_entries_for(Some(name.as_str())))
|
||||
.map_err(|errs| color_eyre::eyre::eyre!(format_registry_load_errors(name, &errs)))?;
|
||||
graphs.push(GraphStartupConfig {
|
||||
graph_id: name.clone(),
|
||||
uri,
|
||||
policy: config.resolve_target_policy_file(name).map(PolicySource::File),
|
||||
queries,
|
||||
});
|
||||
}
|
||||
let config_path = config_path
|
||||
.cloned()
|
||||
.expect("has_explicit_config implies config_path is Some");
|
||||
let server_policy = config.resolve_server_policy_file().map(PolicySource::File);
|
||||
ServerConfigMode::Multi {
|
||||
graphs,
|
||||
config_path,
|
||||
server_policy,
|
||||
}
|
||||
} else {
|
||||
// Rule 5 → error with migration hint.
|
||||
let Some(cluster_dir) = cli_cluster else {
|
||||
bail!(
|
||||
"no graph to serve: pass a URI (`omnigraph-server <URI>`), select a target \
|
||||
(`--target <name> --config omnigraph.yaml`), set `server.graph: <name>` in \
|
||||
omnigraph.yaml, or for multi-graph mode add a `graphs:` map to the config \
|
||||
file referenced by `--config`."
|
||||
"omnigraph-server boots from a cluster: pass --cluster <dir|s3://…> \
|
||||
(the cluster's applied revision is the deployment artifact). The legacy \
|
||||
single-graph boot (positional <URI>, --target, --config omnigraph.yaml) \
|
||||
was removed in RFC-011."
|
||||
);
|
||||
};
|
||||
|
||||
Ok(ServerConfig {
|
||||
mode,
|
||||
bind,
|
||||
allow_unauthenticated,
|
||||
})
|
||||
}
|
||||
|
||||
/// Whether the loaded config will run the server in multi-graph mode.
|
||||
/// Useful for the test that constructs `ServerConfig` directly.
|
||||
pub fn server_config_is_multi(config: &ServerConfig) -> bool {
|
||||
matches!(config.mode, ServerConfigMode::Multi { .. })
|
||||
load_cluster_settings(cluster_dir, cli_bind, cli_allow_unauthenticated).await
|
||||
}
|
||||
|
||||
/// MR-723 server runtime state, classified from the three-state matrix
|
||||
|
|
@ -327,14 +197,15 @@ pub fn classify_server_runtime_state(
|
|||
"server has no bearer tokens and no policy file configured. This is a fully \
|
||||
open server — pass `--unauthenticated` (or set OMNIGRAPH_UNAUTHENTICATED=1) \
|
||||
if you actually want that, otherwise configure bearer tokens (see \
|
||||
docs/user/server.md) and/or `policy.file` in omnigraph.yaml."
|
||||
docs/user/operations/server.md) and a graph or cluster policy bundle in \
|
||||
the cluster config, then run `omnigraph cluster apply` and restart."
|
||||
),
|
||||
(false, false, true) => Ok(ServerRuntimeState::Open),
|
||||
(true, false, _) => Ok(ServerRuntimeState::DefaultDeny),
|
||||
(false, true, _) => bail!(
|
||||
"policy file is configured but no bearer tokens — every request would 401 \
|
||||
because no token can ever match. Configure at least one bearer token (see \
|
||||
docs/user/server.md), or remove the policy file. To deny all unauthenticated \
|
||||
docs/user/operations/server.md), or remove the policy file. To deny all unauthenticated \
|
||||
traffic deliberately, configure tokens plus a deny-all Cedar rule — that \
|
||||
produces meaningful 403s with policy-decision logging instead of silent 401s."
|
||||
),
|
||||
|
|
@ -417,8 +288,8 @@ pub(crate) fn server_bearer_tokens_from_env() -> Result<Vec<(String, String)>> {
|
|||
mod tests {
|
||||
use super::{
|
||||
GraphStartupConfig, ServerConfig, ServerConfigMode, ServerRuntimeState,
|
||||
classify_server_runtime_state, hash_bearer_token, load_server_settings,
|
||||
normalize_bearer_token, parse_bearer_tokens_json, serve, server_bearer_tokens_from_env,
|
||||
classify_server_runtime_state, hash_bearer_token, normalize_bearer_token,
|
||||
parse_bearer_tokens_json, serve, server_bearer_tokens_from_env,
|
||||
};
|
||||
use serial_test::serial;
|
||||
use std::env;
|
||||
|
|
@ -577,108 +448,15 @@ mod tests {
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn server_settings_load_from_yaml_config() {
|
||||
let temp = tempdir().unwrap();
|
||||
let config = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config,
|
||||
r#"
|
||||
graphs:
|
||||
local:
|
||||
uri: /tmp/demo.omni
|
||||
server:
|
||||
graph: local
|
||||
bind: 0.0.0.0:9090
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let settings = load_server_settings(Some(&config), None, None, None, None, false).await.unwrap();
|
||||
match &settings.mode {
|
||||
ServerConfigMode::Single { uri, graph_id, .. } => {
|
||||
assert_eq!(uri, "/tmp/demo.omni");
|
||||
assert_eq!(graph_id, "local");
|
||||
}
|
||||
ServerConfigMode::Multi { .. } => panic!("expected Single mode, got Multi"),
|
||||
}
|
||||
assert_eq!(settings.bind, "0.0.0.0:9090");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn server_settings_cli_flags_override_yaml_config() {
|
||||
let temp = tempdir().unwrap();
|
||||
let config = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config,
|
||||
r#"
|
||||
graphs:
|
||||
local:
|
||||
uri: /tmp/demo.omni
|
||||
server:
|
||||
graph: local
|
||||
bind: 127.0.0.1:8080
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let settings = load_server_settings(
|
||||
Some(&config),
|
||||
None,
|
||||
Some("/tmp/override.omni".to_string()),
|
||||
None,
|
||||
Some("0.0.0.0:9999".to_string()),
|
||||
false,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
match &settings.mode {
|
||||
ServerConfigMode::Single { uri, graph_id, .. } => {
|
||||
assert_eq!(uri, "/tmp/override.omni");
|
||||
assert_eq!(graph_id, "/tmp/override.omni");
|
||||
}
|
||||
ServerConfigMode::Multi { .. } => panic!("expected Single mode, got Multi"),
|
||||
}
|
||||
assert_eq!(settings.bind, "0.0.0.0:9999");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn server_settings_can_resolve_named_target() {
|
||||
let temp = tempdir().unwrap();
|
||||
let config = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config,
|
||||
r#"
|
||||
graphs:
|
||||
local:
|
||||
uri: ./demo.omni
|
||||
dev:
|
||||
uri: http://127.0.0.1:8080
|
||||
server:
|
||||
graph: local
|
||||
bind: 127.0.0.1:8080
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let settings =
|
||||
load_server_settings(Some(&config), None, None, Some("dev".to_string()), None, false)
|
||||
.await
|
||||
.unwrap();
|
||||
match &settings.mode {
|
||||
ServerConfigMode::Single { uri, graph_id, .. } => {
|
||||
assert_eq!(uri, "http://127.0.0.1:8080");
|
||||
assert_eq!(graph_id, "dev");
|
||||
}
|
||||
ServerConfigMode::Multi { .. } => panic!("expected Single mode, got Multi"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn server_settings_require_uri_from_cli_or_config() {
|
||||
let error = load_server_settings(None, None, None, None, None, false).await.unwrap_err();
|
||||
async fn server_settings_require_cluster_boot_source() {
|
||||
// RFC-011 cluster-only: with no --cluster the server refuses to
|
||||
// start and names the cluster-required remedy.
|
||||
let error = super::load_server_settings(None, None, false)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(
|
||||
error.to_string().contains("no graph to serve"),
|
||||
"expected mode-inference error, got: {error}",
|
||||
error.to_string().contains("boots from a cluster"),
|
||||
"expected cluster-required error, got: {error}",
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -748,6 +526,7 @@ server:
|
|||
.to_string_lossy()
|
||||
.into_owned(),
|
||||
policy: None,
|
||||
embedding: None,
|
||||
queries: crate::queries::QueryRegistry::default(),
|
||||
}],
|
||||
config_path: temp.path().join("omnigraph.yaml"),
|
||||
|
|
@ -788,17 +567,22 @@ server:
|
|||
]);
|
||||
let temp = tempdir().unwrap();
|
||||
// Graph path doesn't need to exist — classifier fires before
|
||||
// `AppState::open_with_bearer_tokens_and_policy`.
|
||||
// any engine open.
|
||||
let config = ServerConfig {
|
||||
mode: ServerConfigMode::Single {
|
||||
uri: temp
|
||||
.path()
|
||||
.join("graph.omni")
|
||||
.to_string_lossy()
|
||||
.into_owned(),
|
||||
graph_id: "default".to_string(),
|
||||
policy_file: None,
|
||||
queries: crate::queries::QueryRegistry::default(),
|
||||
mode: ServerConfigMode::Multi {
|
||||
graphs: vec![GraphStartupConfig {
|
||||
graph_id: "default".to_string(),
|
||||
uri: temp
|
||||
.path()
|
||||
.join("graph.omni")
|
||||
.to_string_lossy()
|
||||
.into_owned(),
|
||||
policy: None,
|
||||
embedding: None,
|
||||
queries: crate::queries::QueryRegistry::default(),
|
||||
}],
|
||||
config_path: temp.path().join("cluster"),
|
||||
server_policy: None,
|
||||
},
|
||||
bind: "127.0.0.1:0".to_string(),
|
||||
allow_unauthenticated: false,
|
||||
|
|
@ -813,75 +597,6 @@ server:
|
|||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[serial]
|
||||
async fn unauthenticated_env_var_classification() {
|
||||
// MR-723 PR A: closes the gap where the env-var read path inside
|
||||
// `load_server_settings` was structurally implemented but not
|
||||
// exercised by any test. Three properties to pin, all in one
|
||||
// sequential test because `cargo test` runs the mod test suite
|
||||
// in parallel and `OMNIGRAPH_UNAUTHENTICATED` is process-global
|
||||
// — interleaving with another test that sets the same env var
|
||||
// (concurrent classifier tests, even the bearer-token suite
|
||||
// sharing `EnvGuard`) corrupts the read. Sequential within one
|
||||
// test fn is the simplest race-free shape.
|
||||
let temp = tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
graphs:
|
||||
local:
|
||||
uri: /tmp/demo-unauth.omni
|
||||
server:
|
||||
graph: local
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Truthy values flip Open mode on, even with CLI flag off.
|
||||
for value in ["1", "true", "yes", "TRUE", "anything"] {
|
||||
let _guard = EnvGuard::set(&[("OMNIGRAPH_UNAUTHENTICATED", Some(value))]);
|
||||
let settings = load_server_settings(Some(&config_path), None, None, None, None, false).await
|
||||
.expect("settings load should succeed");
|
||||
assert!(
|
||||
settings.allow_unauthenticated,
|
||||
"OMNIGRAPH_UNAUTHENTICATED={value:?} should enable Open mode",
|
||||
);
|
||||
}
|
||||
|
||||
// Falsy values keep refusal behavior, even with CLI flag off.
|
||||
for value in ["0", "false", "FALSE", ""] {
|
||||
let _guard = EnvGuard::set(&[("OMNIGRAPH_UNAUTHENTICATED", Some(value))]);
|
||||
let settings = load_server_settings(Some(&config_path), None, None, None, None, false).await
|
||||
.expect("settings load should succeed");
|
||||
assert!(
|
||||
!settings.allow_unauthenticated,
|
||||
"OMNIGRAPH_UNAUTHENTICATED={value:?} should NOT enable Open mode",
|
||||
);
|
||||
}
|
||||
|
||||
// Unset env var: also false.
|
||||
let _guard = EnvGuard::set(&[("OMNIGRAPH_UNAUTHENTICATED", None)]);
|
||||
let settings = load_server_settings(Some(&config_path), None, None, None, None, false).await
|
||||
.expect("settings load should succeed");
|
||||
assert!(
|
||||
!settings.allow_unauthenticated,
|
||||
"OMNIGRAPH_UNAUTHENTICATED unset should NOT enable Open mode",
|
||||
);
|
||||
drop(_guard);
|
||||
|
||||
// CLI flag wins even when env is falsy — `serve()` honors the
|
||||
// OR of both inputs.
|
||||
let _guard = EnvGuard::set(&[("OMNIGRAPH_UNAUTHENTICATED", Some("0"))]);
|
||||
let settings = load_server_settings(Some(&config_path), None, None, None, None, true).await
|
||||
.expect("settings load should succeed");
|
||||
assert!(
|
||||
settings.allow_unauthenticated,
|
||||
"--unauthenticated CLI flag should win even when env is falsy",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_policy_enabled_requires_tokens() {
|
||||
// State 3: tokens + policy → PolicyEnabled, regardless of the
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ async fn protected_routes_require_bearer_token() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches")
|
||||
.uri(g("/branches"))
|
||||
.method(Method::GET)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
|
|
@ -85,7 +85,7 @@ async fn protected_routes_accept_valid_bearer_token_while_healthz_stays_open() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches")
|
||||
.uri(g("/branches"))
|
||||
.method(Method::GET)
|
||||
.header("authorization", "Bearer demo-token")
|
||||
.body(Body::empty())
|
||||
|
|
@ -108,7 +108,7 @@ async fn protected_routes_accept_any_configured_team_bearer_token() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches")
|
||||
.uri(g("/branches"))
|
||||
.method(Method::GET)
|
||||
.header("authorization", "Bearer token-two")
|
||||
.body(Body::empty())
|
||||
|
|
@ -158,7 +158,7 @@ rules:
|
|||
let (ok_status, _) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/snapshot?branch=main")
|
||||
.uri(g("/snapshot?branch=main"))
|
||||
.method(Method::GET)
|
||||
.header("authorization", "Bearer token-a")
|
||||
.body(Body::empty())
|
||||
|
|
@ -172,7 +172,7 @@ rules:
|
|||
let (denied_status, denied_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/snapshot?branch=main")
|
||||
.uri(g("/snapshot?branch=main"))
|
||||
.method(Method::GET)
|
||||
.header("authorization", "Bearer token-b")
|
||||
.body(Body::empty())
|
||||
|
|
@ -190,7 +190,7 @@ rules:
|
|||
let (bad_status, _) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/snapshot?branch=main")
|
||||
.uri(g("/snapshot?branch=main"))
|
||||
.method(Method::GET)
|
||||
.header("authorization", "Bearer wrong-token")
|
||||
.body(Body::empty())
|
||||
|
|
@ -245,7 +245,7 @@ rules:
|
|||
let (spoof_up_status, spoof_up_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/snapshot?branch=main")
|
||||
.uri(g("/snapshot?branch=main"))
|
||||
.method(Method::GET)
|
||||
.header("authorization", "Bearer token-b")
|
||||
.header("x-actor-id", "act-a")
|
||||
|
|
@ -270,7 +270,7 @@ rules:
|
|||
let (spoof_down_status, _) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/snapshot?branch=main")
|
||||
.uri(g("/snapshot?branch=main"))
|
||||
.method(Method::GET)
|
||||
.header("authorization", "Bearer token-a")
|
||||
.header("x-actor-id", "act-b")
|
||||
|
|
@ -290,7 +290,7 @@ rules:
|
|||
let (empty_spoof_status, _) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/snapshot?branch=main")
|
||||
.uri(g("/snapshot?branch=main"))
|
||||
.method(Method::GET)
|
||||
.header("authorization", "Bearer token-b")
|
||||
.header("x-actor-id", "")
|
||||
|
|
@ -316,7 +316,7 @@ async fn policy_allows_read_but_distinguishes_401_from_403() {
|
|||
let (missing_status, missing_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/snapshot?branch=main")
|
||||
.uri(g("/snapshot?branch=main"))
|
||||
.method(Method::GET)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
|
|
@ -332,7 +332,7 @@ async fn policy_allows_read_but_distinguishes_401_from_403() {
|
|||
let (snapshot_status, snapshot_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/snapshot?branch=main")
|
||||
.uri(g("/snapshot?branch=main"))
|
||||
.method(Method::GET)
|
||||
.header("authorization", "Bearer team-token")
|
||||
.body(Body::empty())
|
||||
|
|
@ -350,7 +350,7 @@ async fn policy_allows_read_but_distinguishes_401_from_403() {
|
|||
let (forbidden_status, forbidden_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/export")
|
||||
.uri(g("/export"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer team-token")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -369,7 +369,7 @@ async fn policy_allows_read_but_distinguishes_401_from_403() {
|
|||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/export")
|
||||
.uri(g("/export"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -410,7 +410,7 @@ async fn policy_uses_resolved_branch_for_snapshot_reads() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/read")
|
||||
.uri(g("/read"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer team-token")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -458,7 +458,7 @@ async fn policy_blocks_change_on_protected_main_but_allows_unprotected_branch()
|
|||
let (main_status, main_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer team-token")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -482,7 +482,7 @@ async fn policy_blocks_change_on_protected_main_but_allows_unprotected_branch()
|
|||
let (feature_status, feature_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer team-token")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -533,7 +533,7 @@ async fn policy_blocks_non_admin_merge_to_main_and_allows_admin() {
|
|||
let (deny_status, deny_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches/merge")
|
||||
.uri(g("/branches/merge"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer team-token")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -551,7 +551,7 @@ async fn policy_blocks_non_admin_merge_to_main_and_allows_admin() {
|
|||
let (allow_status, allow_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches/merge")
|
||||
.uri(g("/branches/merge"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -578,7 +578,7 @@ async fn authenticated_change_stamps_actor_on_commits() {
|
|||
let (change_status, change_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer token-one")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -592,7 +592,7 @@ async fn authenticated_change_stamps_actor_on_commits() {
|
|||
let (commits_status, commits_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/commits?branch=main")
|
||||
.uri(g("/commits?branch=main"))
|
||||
.method(Method::GET)
|
||||
.header("authorization", "Bearer token-one")
|
||||
.body(Body::empty())
|
||||
|
|
@ -623,7 +623,7 @@ async fn authenticated_branch_merge_stamps_merge_actor_on_head_commit() {
|
|||
let (create_status, _) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches")
|
||||
.uri(g("/branches"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer token-one")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -642,7 +642,7 @@ async fn authenticated_branch_merge_stamps_merge_actor_on_head_commit() {
|
|||
let (change_status, _) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer token-one")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -659,7 +659,7 @@ async fn authenticated_branch_merge_stamps_merge_actor_on_head_commit() {
|
|||
let (merge_status, merge_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches/merge")
|
||||
.uri(g("/branches/merge"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer token-two")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -673,7 +673,7 @@ async fn authenticated_branch_merge_stamps_merge_actor_on_head_commit() {
|
|||
let (commit_status, commit_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/commits?branch=main")
|
||||
.uri(g("/commits?branch=main"))
|
||||
.method(Method::GET)
|
||||
.header("authorization", "Bearer token-two")
|
||||
.body(Body::empty())
|
||||
|
|
@ -691,7 +691,6 @@ async fn authenticated_branch_merge_stamps_merge_actor_on_head_commit() {
|
|||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn engine_layer_policy_fires_via_direct_arc_omnigraph_from_new_single() {
|
||||
use omnigraph_server::GraphRouting;
|
||||
let temp = init_loaded_graph().await;
|
||||
let graph = graph_path(temp.path());
|
||||
let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
|
||||
|
|
@ -717,9 +716,14 @@ async fn engine_layer_policy_fires_via_direct_arc_omnigraph_from_new_single() {
|
|||
// embedded consumer holding `Arc<Omnigraph>` would. If `new_single`
|
||||
// failed to apply `with_policy` to the engine, this `mutate_as`
|
||||
// would succeed — the HTTP-layer is bypassed entirely.
|
||||
let handle = match state.routing() {
|
||||
GraphRouting::Single { handle } => Arc::clone(handle),
|
||||
GraphRouting::Multi { .. } => panic!("expected single-mode routing"),
|
||||
// RFC-011 cluster-only: the single-graph convenience constructor
|
||||
// registers the graph under the reserved id `default`.
|
||||
let key = omnigraph_server::GraphKey::cluster(
|
||||
omnigraph_server::GraphId::try_from("default").unwrap(),
|
||||
);
|
||||
let handle = match state.routing().registry.get(&key) {
|
||||
omnigraph_server::RegistryLookup::Ready(handle) => handle,
|
||||
omnigraph_server::RegistryLookup::Gone => panic!("default graph must be registered"),
|
||||
};
|
||||
let engine = Arc::clone(&handle.engine);
|
||||
|
||||
|
|
@ -758,7 +762,7 @@ async fn oversized_request_body_returns_payload_too_large() {
|
|||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/read")
|
||||
.uri(g("/read"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(oversized))
|
||||
|
|
@ -781,7 +785,7 @@ async fn default_deny_mode_allows_read_for_authenticated_actor() {
|
|||
let (status, _body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/snapshot")
|
||||
.uri(g("/snapshot"))
|
||||
.method(Method::GET)
|
||||
.header(AUTHORIZATION, "Bearer demo-token")
|
||||
.body(Body::empty())
|
||||
|
|
@ -808,7 +812,7 @@ async fn default_deny_mode_rejects_change_with_forbidden() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header(AUTHORIZATION, "Bearer demo-token")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -840,7 +844,7 @@ async fn default_deny_mode_rejects_schema_apply_with_forbidden() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/schema/apply")
|
||||
.uri(g("/schema/apply"))
|
||||
.method(Method::POST)
|
||||
.header(AUTHORIZATION, "Bearer demo-token")
|
||||
.header("content-type", "application/json")
|
||||
|
|
|
|||
|
|
@ -18,10 +18,7 @@ use support::*;
|
|||
mod multi_graph_startup {
|
||||
use super::*;
|
||||
use omnigraph::storage::normalize_root_uri;
|
||||
use omnigraph_server::{
|
||||
GraphHandle, GraphId, GraphKey, GraphRegistry, InsertError, ServerConfig, ServerConfigMode,
|
||||
load_server_settings,
|
||||
};
|
||||
use omnigraph_server::{GraphHandle, GraphId, GraphKey, GraphRegistry, InsertError};
|
||||
use std::sync::Arc;
|
||||
|
||||
async fn build_multi_mode_app(graph_ids: &[&str]) -> (Vec<tempfile::TempDir>, Router) {
|
||||
|
|
@ -280,10 +277,11 @@ mod multi_graph_startup {
|
|||
);
|
||||
}
|
||||
|
||||
/// Flat routes 404 in multi mode — the router only mounts under
|
||||
/// `/graphs/{graph_id}/...` so `/snapshot` doesn't resolve.
|
||||
/// RFC-011 cluster-only: flat per-graph routes never resolve — the
|
||||
/// router only mounts under `/graphs/{graph_id}/...` so a root
|
||||
/// `/snapshot` returns 404.
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn flat_routes_404_in_multi_mode() {
|
||||
async fn flat_routes_404_at_root() {
|
||||
let (_dirs, app) = build_multi_mode_app(&["alpha"]).await;
|
||||
let resp = app
|
||||
.oneshot(
|
||||
|
|
@ -298,28 +296,6 @@ mod multi_graph_startup {
|
|||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
/// `GraphId` validation runs at startup — a reserved name in
|
||||
/// `omnigraph.yaml` produces a clear error rather than getting
|
||||
/// rejected per-request.
|
||||
#[tokio::test]
|
||||
async fn load_server_settings_rejects_reserved_graph_id() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
graphs:
|
||||
policies:
|
||||
uri: /tmp/g1.omni
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let err = load_server_settings(Some(&config_path), None, None, None, None, false).await.unwrap_err();
|
||||
assert!(
|
||||
err.to_string().contains("invalid graph id 'policies'"),
|
||||
"expected reserved-name rejection, got: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn registry_rejects_duplicate_normalized_graph_uris() {
|
||||
|
|
@ -375,372 +351,6 @@ graphs:
|
|||
assert_eq!(listed[0].uri, graph_uri);
|
||||
}
|
||||
|
||||
// ── Four-rule mode inference matrix ───────────────────────────────
|
||||
|
||||
/// Rule 1: CLI positional URI → Single.
|
||||
#[tokio::test]
|
||||
async fn mode_inference_cli_uri_is_single() {
|
||||
let settings = load_server_settings(
|
||||
None,
|
||||
None,
|
||||
Some("/tmp/cli.omni".to_string()),
|
||||
None,
|
||||
None,
|
||||
true, // allow unauth so we get past the runtime-state check
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
match settings.mode {
|
||||
ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "/tmp/cli.omni"),
|
||||
ServerConfigMode::Multi { .. } => panic!("expected Single (rule 1), got Multi"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Rule 2: --target picks one graph from `graphs:` map → Single.
|
||||
#[tokio::test]
|
||||
async fn mode_inference_cli_target_is_single() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
graphs:
|
||||
alpha:
|
||||
uri: /tmp/alpha.omni
|
||||
beta:
|
||||
uri: /tmp/beta.omni
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let settings =
|
||||
load_server_settings(Some(&config_path), None, None, Some("alpha".into()), None, true)
|
||||
.await
|
||||
.unwrap();
|
||||
match settings.mode {
|
||||
ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "/tmp/alpha.omni"),
|
||||
ServerConfigMode::Multi { .. } => panic!("expected Single (rule 2), got Multi"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Rule 3: `server.graph` set → Single (target picked from config).
|
||||
#[tokio::test]
|
||||
async fn mode_inference_server_graph_is_single() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
graphs:
|
||||
alpha:
|
||||
uri: /tmp/alpha.omni
|
||||
beta:
|
||||
uri: /tmp/beta.omni
|
||||
server:
|
||||
graph: beta
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let settings = load_server_settings(Some(&config_path), None, None, None, None, true).await.unwrap();
|
||||
match settings.mode {
|
||||
ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "/tmp/beta.omni"),
|
||||
ServerConfigMode::Multi { .. } => panic!("expected Single (rule 3), got Multi"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Rule 4: `--config` + non-empty `graphs:` + no single-mode selector → Multi.
|
||||
#[tokio::test]
|
||||
async fn mode_inference_config_plus_graphs_is_multi() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
graphs:
|
||||
alpha:
|
||||
uri: /tmp/alpha.omni
|
||||
beta:
|
||||
uri: /tmp/beta.omni
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let settings = load_server_settings(Some(&config_path), None, None, None, None, true).await.unwrap();
|
||||
match settings.mode {
|
||||
ServerConfigMode::Multi { graphs, .. } => {
|
||||
let ids: Vec<&str> = graphs.iter().map(|g| g.graph_id.as_str()).collect();
|
||||
// BTreeMap iteration order is alphabetical.
|
||||
assert_eq!(ids, vec!["alpha", "beta"]);
|
||||
}
|
||||
ServerConfigMode::Single { .. } => panic!("expected Multi (rule 4), got Single"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn mode_inference_multi_rejects_top_level_policy_file() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
policy:
|
||||
file: ./policy.yaml
|
||||
graphs:
|
||||
alpha:
|
||||
uri: /tmp/alpha.omni
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let err = load_server_settings(Some(&config_path), None, None, None, None, true).await.unwrap_err();
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("top-level") && msg.contains("policy.file") && msg.contains("not honored"),
|
||||
"expected top-level-not-honored guidance, got: {msg}"
|
||||
);
|
||||
assert!(
|
||||
msg.contains("graphs.<graph_id>"),
|
||||
"expected per-graph migration guidance, got: {msg}"
|
||||
);
|
||||
assert!(
|
||||
msg.contains("server.policy.file"),
|
||||
"expected server policy migration guidance, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn mode_inference_multi_rejects_top_level_queries() {
|
||||
// Symmetric to the policy guard: a top-level `queries:` block in
|
||||
// multi-graph mode is not honored (each graph uses its own), so it
|
||||
// is a loud error rather than a silent no-op.
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
"queries:\n q:\n file: ./q.gq\ngraphs:\n alpha:\n uri: /tmp/alpha.omni\n",
|
||||
)
|
||||
.unwrap();
|
||||
let err = load_server_settings(Some(&config_path), None, None, None, None, true).await.unwrap_err();
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("queries") && msg.contains("not honored"),
|
||||
"top-level queries must be rejected in multi-graph mode: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn single_mode_named_graph_rejects_top_level_blocks() {
|
||||
// Serving a graph by name (`--target`/`server.graph`) uses its
|
||||
// per-graph block; a populated top-level block would be silently
|
||||
// shadowed, so boot refuses and names the per-graph location.
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
"policy:\n file: ./top.yaml\ngraphs:\n prod:\n uri: /tmp/prod.omni\n",
|
||||
)
|
||||
.unwrap();
|
||||
let err =
|
||||
load_server_settings(Some(&config_path), None, None, Some("prod".to_string()), None, true)
|
||||
.await
|
||||
.unwrap_err();
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("prod") && msg.contains("policy.file") && msg.contains("graphs.prod"),
|
||||
"named single-mode + top-level policy must refuse, naming the graph: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn single_mode_named_graph_uses_per_graph_policy_and_queries() {
|
||||
// The identity rule: `--target prod` attaches `graphs.prod`'s own
|
||||
// policy + queries, not the top-level ones (which are absent here).
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
fs::write(
|
||||
temp.path().join("prod.gq"),
|
||||
"query pq() { match { $u: User } return { $u.name } }",
|
||||
)
|
||||
.unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
"graphs:\n prod:\n uri: /tmp/prod.omni\n policy:\n file: ./prod-policy.yaml\n \
|
||||
queries:\n pq:\n file: ./prod.gq\n",
|
||||
)
|
||||
.unwrap();
|
||||
let settings =
|
||||
load_server_settings(Some(&config_path), None, None, Some("prod".to_string()), None, true)
|
||||
.await
|
||||
.unwrap();
|
||||
match settings.mode {
|
||||
ServerConfigMode::Single {
|
||||
graph_id,
|
||||
policy_file,
|
||||
queries,
|
||||
..
|
||||
} => {
|
||||
assert_eq!(graph_id, "prod", "named single-mode keeps graph identity");
|
||||
assert!(
|
||||
policy_file
|
||||
.as_ref()
|
||||
.is_some_and(|p| p.ends_with("prod-policy.yaml")),
|
||||
"per-graph policy attached: {policy_file:?}"
|
||||
);
|
||||
assert!(queries.lookup("pq").is_some(), "per-graph query attached");
|
||||
}
|
||||
other => panic!("expected Single mode, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn mode_inference_normalizes_multi_graph_uris() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let graph = temp.path().join("alpha.omni");
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
format!(
|
||||
r#"
|
||||
graphs:
|
||||
alpha:
|
||||
uri: file://{}/
|
||||
"#,
|
||||
graph.display()
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
let settings = load_server_settings(Some(&config_path), None, None, None, None, true).await.unwrap();
|
||||
match settings.mode {
|
||||
ServerConfigMode::Multi { graphs, .. } => {
|
||||
assert_eq!(graphs[0].uri, graph.to_string_lossy());
|
||||
}
|
||||
ServerConfigMode::Single { .. } => panic!("expected Multi"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Rule 5: nothing → error with migration hint.
|
||||
#[tokio::test]
|
||||
async fn mode_inference_no_inputs_errors_with_migration_hint() {
|
||||
let err = load_server_settings(None, None, None, None, None, true).await.unwrap_err();
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("no graph to serve"),
|
||||
"expected migration-hint error, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
/// Rule 4 sub-case: `--config` with empty `graphs:` map and no
|
||||
/// single-mode selector → rule 5 fires (no graph to serve).
|
||||
#[tokio::test]
|
||||
async fn mode_inference_empty_graphs_map_errors() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(&config_path, "server:\n bind: 127.0.0.1:8080\n").unwrap();
|
||||
let err = load_server_settings(Some(&config_path), None, None, None, None, true).await.unwrap_err();
|
||||
assert!(err.to_string().contains("no graph to serve"));
|
||||
}
|
||||
|
||||
/// `--config` + `<URI>` together: URI wins → Single (the CLI URI
|
||||
/// takes precedence over the config's graphs map).
|
||||
#[tokio::test]
|
||||
async fn mode_inference_cli_uri_overrides_graphs_map() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
graphs:
|
||||
alpha:
|
||||
uri: /tmp/alpha.omni
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let settings = load_server_settings(
|
||||
Some(&config_path),
|
||||
None,
|
||||
Some("/tmp/cli-override.omni".to_string()),
|
||||
None,
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
match settings.mode {
|
||||
ServerConfigMode::Single { uri, .. } => {
|
||||
assert_eq!(
|
||||
uri, "/tmp/cli-override.omni",
|
||||
"CLI URI must win over graphs: map"
|
||||
);
|
||||
}
|
||||
ServerConfigMode::Multi { .. } => {
|
||||
panic!("expected Single (CLI URI wins), got Multi")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-graph `policy.file` is resolved relative to the config base_dir.
|
||||
#[tokio::test]
|
||||
async fn per_graph_policy_file_is_resolved_relative_to_base_dir() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
graphs:
|
||||
alpha:
|
||||
uri: /tmp/alpha.omni
|
||||
policy:
|
||||
file: ./policies/alpha.yaml
|
||||
beta:
|
||||
uri: /tmp/beta.omni
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let settings = load_server_settings(Some(&config_path), None, None, None, None, true).await.unwrap();
|
||||
let graphs = match settings.mode {
|
||||
ServerConfigMode::Multi { graphs, .. } => graphs,
|
||||
_ => panic!("expected Multi"),
|
||||
};
|
||||
// graphs is BTreeMap-iter order (alphabetical).
|
||||
let alpha = &graphs[0];
|
||||
let beta = &graphs[1];
|
||||
assert_eq!(alpha.graph_id, "alpha");
|
||||
let omnigraph_server::PolicySource::File(alpha_policy) =
|
||||
alpha.policy.as_ref().unwrap()
|
||||
else {
|
||||
panic!("yaml-configured policy must stay file-based");
|
||||
};
|
||||
assert_eq!(alpha_policy, &temp.path().join("policies/alpha.yaml"));
|
||||
assert_eq!(beta.graph_id, "beta");
|
||||
assert!(beta.policy.is_none());
|
||||
}
|
||||
|
||||
/// `server.policy.file` resolves alongside the graphs map.
|
||||
#[tokio::test]
|
||||
async fn server_policy_file_is_resolved_relative_to_base_dir() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let config_path = temp.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
server:
|
||||
policy:
|
||||
file: ./server-policy.yaml
|
||||
graphs:
|
||||
alpha:
|
||||
uri: /tmp/alpha.omni
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let settings = load_server_settings(Some(&config_path), None, None, None, None, true).await.unwrap();
|
||||
match settings.mode {
|
||||
ServerConfigMode::Multi { server_policy, .. } => {
|
||||
let omnigraph_server::PolicySource::File(path) = server_policy.unwrap() else {
|
||||
panic!("yaml-configured server policy must stay file-based");
|
||||
};
|
||||
assert_eq!(path, temp.path().join("server-policy.yaml"));
|
||||
}
|
||||
_ => panic!("expected Multi"),
|
||||
}
|
||||
}
|
||||
|
||||
/// `GET /graphs` must NOT leak the registry in Open mode without
|
||||
/// an explicit server policy. Operators who pass `--unauthenticated`
|
||||
/// opted into trusting the network for graph DATA, not for leaking
|
||||
|
|
@ -786,28 +396,6 @@ graphs:
|
|||
);
|
||||
}
|
||||
|
||||
/// `GET /graphs` returns 405 in single mode (resource exists in the
|
||||
/// API surface, just not operational without a `graphs:` map).
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn get_graphs_returns_405_in_single_mode() {
|
||||
let temp = init_loaded_graph().await;
|
||||
let graph = graph_path(temp.path());
|
||||
let state = AppState::open(graph.to_string_lossy().to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
let app = build_app(state);
|
||||
let resp = app
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/graphs")
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::METHOD_NOT_ALLOWED);
|
||||
}
|
||||
|
||||
/// `GET /graphs` requires bearer auth when tokens are configured.
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
|
|
@ -971,52 +559,4 @@ rules:
|
|||
);
|
||||
}
|
||||
|
||||
/// Loads an `omnigraph.yaml` with two graphs and verifies multi-mode
|
||||
/// inference plus graph entry resolution. Cluster-route dispatch is
|
||||
/// covered by the route tests above.
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn server_settings_load_multi_graph_config_entries() {
|
||||
let cfg_dir = tempfile::tempdir().unwrap();
|
||||
// Real graph storage dirs (the URIs in the config must point to
|
||||
// a graph init-able location).
|
||||
let alpha_dir = cfg_dir.path().join("alpha.omni");
|
||||
let beta_dir = cfg_dir.path().join("beta.omni");
|
||||
let schema = fs::read_to_string(fixture("test.pg")).unwrap();
|
||||
Omnigraph::init(alpha_dir.to_str().unwrap(), &schema)
|
||||
.await
|
||||
.unwrap();
|
||||
Omnigraph::init(beta_dir.to_str().unwrap(), &schema)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let config_path = cfg_dir.path().join("omnigraph.yaml");
|
||||
fs::write(
|
||||
&config_path,
|
||||
format!(
|
||||
r#"
|
||||
graphs:
|
||||
alpha:
|
||||
uri: {alpha}
|
||||
beta:
|
||||
uri: {beta}
|
||||
"#,
|
||||
alpha = alpha_dir.display(),
|
||||
beta = beta_dir.display(),
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let settings: ServerConfig =
|
||||
load_server_settings(Some(&config_path), None, None, None, None, true).await.unwrap();
|
||||
assert!(matches!(settings.mode, ServerConfigMode::Multi { .. }));
|
||||
|
||||
match settings.mode {
|
||||
ServerConfigMode::Multi { graphs, .. } => {
|
||||
assert_eq!(graphs.len(), 2);
|
||||
let ids: Vec<&str> = graphs.iter().map(|g| g.graph_id.as_str()).collect();
|
||||
assert_eq!(ids, vec!["alpha", "beta"]);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ async fn export_route_returns_jsonl_for_branch_snapshot() {
|
|||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/export")
|
||||
.uri(g("/export"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", format!("Bearer {}", token))
|
||||
|
|
@ -99,7 +99,7 @@ async fn snapshot_route_returns_manifest_dataset_version() {
|
|||
let (snapshot_status, snapshot_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/snapshot?branch=main")
|
||||
.uri(g("/snapshot?branch=main"))
|
||||
.method(Method::GET)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
|
|
@ -131,7 +131,7 @@ async fn ingest_creates_branch_returns_metadata_and_stamps_actor() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/ingest")
|
||||
.uri(g("/ingest"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer token-one")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -195,7 +195,7 @@ async fn ingest_existing_branch_skips_branch_create_policy_check() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/ingest")
|
||||
.uri(g("/ingest"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer team-token")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -223,7 +223,7 @@ async fn ingest_without_from_returns_404_for_missing_branch_and_creates_nothing(
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/ingest")
|
||||
.uri(g("/ingest"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&ingest).unwrap()))
|
||||
|
|
@ -264,7 +264,7 @@ async fn ingest_without_from_loads_into_existing_branch() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/ingest")
|
||||
.uri(g("/ingest"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&ingest).unwrap()))
|
||||
|
|
@ -294,7 +294,7 @@ async fn ingest_denies_missing_branch_without_branch_create_permission() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/ingest")
|
||||
.uri(g("/ingest"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer team-token")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -327,7 +327,7 @@ async fn ingest_denies_when_actor_lacks_change_permission() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/ingest")
|
||||
.uri(g("/ingest"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer team-token")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -357,7 +357,7 @@ async fn ingest_rejects_payloads_over_32_mib() {
|
|||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/ingest")
|
||||
.uri(g("/ingest"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&oversize).unwrap()))
|
||||
|
|
@ -419,7 +419,7 @@ async fn branch_merge_conflict_response_includes_structured_conflicts() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches/merge")
|
||||
.uri(g("/branches/merge"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&merge).unwrap()))
|
||||
|
|
@ -451,7 +451,7 @@ async fn repeated_read_after_change_sees_updated_state_from_same_app() {
|
|||
let (change_status, change_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&change).unwrap()))
|
||||
|
|
@ -471,7 +471,7 @@ async fn repeated_read_after_change_sees_updated_state_from_same_app() {
|
|||
let (read_status, read_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/read")
|
||||
.uri(g("/read"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&read).unwrap()))
|
||||
|
|
@ -497,7 +497,7 @@ async fn query_endpoint_runs_inline_read() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/query")
|
||||
.uri(g("/query"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&query).unwrap()))
|
||||
|
|
@ -524,7 +524,7 @@ async fn query_endpoint_rejects_mutation_with_400() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/query")
|
||||
.uri(g("/query"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&query).unwrap()))
|
||||
|
|
@ -555,7 +555,7 @@ async fn mutate_endpoint_runs_inline_mutation() {
|
|||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/mutate")
|
||||
.uri(g("/mutate"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&request).unwrap()))
|
||||
|
|
@ -580,7 +580,7 @@ async fn mutate_endpoint_runs_inline_mutation() {
|
|||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn change_endpoint_emits_deprecation_headers() {
|
||||
// `/change` is kept indefinitely for back-compat but flagged at runtime
|
||||
// per RFC 9745 (`Deprecation: true`) + RFC 8288 (`Link: </mutate>;
|
||||
// per RFC 9745 (`Deprecation: true`) + RFC 8288 (`Link: <mutate>;
|
||||
// rel="successor-version"`). The OpenAPI side is covered by
|
||||
// `openapi_change_is_deprecated` in tests/openapi.rs.
|
||||
let (_temp, app) = app_for_loaded_graph().await;
|
||||
|
|
@ -595,7 +595,7 @@ async fn change_endpoint_emits_deprecation_headers() {
|
|||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&request).unwrap()))
|
||||
|
|
@ -615,11 +615,88 @@ async fn change_endpoint_emits_deprecation_headers() {
|
|||
);
|
||||
assert_eq!(
|
||||
response.headers().get("link").and_then(|v| v.to_str().ok()),
|
||||
Some("</mutate>; rel=\"successor-version\""),
|
||||
Some("<mutate>; rel=\"successor-version\""),
|
||||
"POST /change must point at /mutate via `Link` rel=successor-version (RFC 8288)"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn load_endpoint_loads_into_existing_branch() {
|
||||
// Canonical bulk-load endpoint (RFC-009 Phase 5). Same wire shape as
|
||||
// /ingest, no deprecation signal.
|
||||
let (_temp, app) = app_for_loaded_graph().await;
|
||||
let request = IngestRequest {
|
||||
branch: Some("main".to_string()),
|
||||
from: None,
|
||||
mode: Some(LoadMode::Merge),
|
||||
data: r#"{"type":"Person","data":{"name":"Loaded","age":7}}"#.to_string(),
|
||||
};
|
||||
let response = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(g("/load"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&request).unwrap()))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(response.status(), StatusCode::OK);
|
||||
assert!(
|
||||
response.headers().get("deprecation").is_none(),
|
||||
"POST /load must not advertise itself as deprecated"
|
||||
);
|
||||
let body_bytes = to_bytes(response.into_body(), usize::MAX).await.unwrap();
|
||||
let body: Value = serde_json::from_slice(&body_bytes).unwrap();
|
||||
assert_eq!(body["branch"], "main");
|
||||
assert_eq!(body["tables"][0]["table_key"], "node:Person");
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn ingest_endpoint_emits_deprecation_headers() {
|
||||
// `/ingest` is the deprecated alias of `/load` (RFC-009 Phase 5): flagged
|
||||
// at runtime per RFC 9745 (`Deprecation: true`) + RFC 8288 (`Link: <load>;
|
||||
// rel="successor-version"`). The OpenAPI side is covered by
|
||||
// `openapi_ingest_is_deprecated` in tests/openapi.rs.
|
||||
let (_temp, app) = app_for_loaded_graph().await;
|
||||
let request = IngestRequest {
|
||||
branch: Some("main".to_string()),
|
||||
from: None,
|
||||
mode: Some(LoadMode::Merge),
|
||||
data: r#"{"type":"Person","data":{"name":"Legacyer","age":33}}"#.to_string(),
|
||||
};
|
||||
let response = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(g("/ingest"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&request).unwrap()))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(response.status(), StatusCode::OK);
|
||||
assert_eq!(
|
||||
response
|
||||
.headers()
|
||||
.get("deprecation")
|
||||
.and_then(|v| v.to_str().ok()),
|
||||
Some("true"),
|
||||
"POST /ingest must advertise `Deprecation: true` (RFC 9745)"
|
||||
);
|
||||
assert_eq!(
|
||||
response.headers().get("link").and_then(|v| v.to_str().ok()),
|
||||
Some("<load>; rel=\"successor-version\""),
|
||||
"POST /ingest must point at /load via `Link` rel=successor-version (RFC 8288)"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn read_endpoint_emits_deprecation_headers() {
|
||||
// `/read` is kept indefinitely for byte-stable back-compat but flagged
|
||||
|
|
@ -637,7 +714,7 @@ async fn read_endpoint_emits_deprecation_headers() {
|
|||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/read")
|
||||
.uri(g("/read"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&request).unwrap()))
|
||||
|
|
@ -657,7 +734,7 @@ async fn read_endpoint_emits_deprecation_headers() {
|
|||
);
|
||||
assert_eq!(
|
||||
response.headers().get("link").and_then(|v| v.to_str().ok()),
|
||||
Some("</query>; rel=\"successor-version\""),
|
||||
Some("<query>; rel=\"successor-version\""),
|
||||
"POST /read must point at /query via `Link` rel=successor-version (RFC 8288)"
|
||||
);
|
||||
}
|
||||
|
|
@ -680,7 +757,7 @@ async fn query_endpoint_does_not_emit_deprecation_headers() {
|
|||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/query")
|
||||
.uri(g("/query"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&request).unwrap()))
|
||||
|
|
@ -712,7 +789,7 @@ async fn change_endpoint_accepts_legacy_field_names() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&legacy_body).unwrap()))
|
||||
|
|
@ -731,7 +808,7 @@ async fn change_endpoint_accepts_legacy_field_names() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&canonical_body).unwrap()))
|
||||
|
|
@ -749,7 +826,7 @@ async fn remote_branch_list_create_merge_flow_works() {
|
|||
let (list_status, list_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches")
|
||||
.uri(g("/branches"))
|
||||
.method(Method::GET)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
|
|
@ -765,7 +842,7 @@ async fn remote_branch_list_create_merge_flow_works() {
|
|||
let (create_status, create_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches")
|
||||
.uri(g("/branches"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&create).unwrap()))
|
||||
|
|
@ -779,7 +856,7 @@ async fn remote_branch_list_create_merge_flow_works() {
|
|||
let (list_status, list_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches")
|
||||
.uri(g("/branches"))
|
||||
.method(Method::GET)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
|
|
@ -797,7 +874,7 @@ async fn remote_branch_list_create_merge_flow_works() {
|
|||
let (change_status, change_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&change).unwrap()))
|
||||
|
|
@ -818,7 +895,7 @@ async fn remote_branch_list_create_merge_flow_works() {
|
|||
let (read_status, read_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/read")
|
||||
.uri(g("/read"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&read_main_before).unwrap()))
|
||||
|
|
@ -835,7 +912,7 @@ async fn remote_branch_list_create_merge_flow_works() {
|
|||
let (merge_status, merge_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches/merge")
|
||||
.uri(g("/branches/merge"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&merge).unwrap()))
|
||||
|
|
@ -857,7 +934,7 @@ async fn remote_branch_list_create_merge_flow_works() {
|
|||
let (read_status, read_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/read")
|
||||
.uri(g("/read"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&read_main_after).unwrap()))
|
||||
|
|
@ -880,7 +957,7 @@ async fn remote_branch_delete_flow_works() {
|
|||
let (create_status, _) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches")
|
||||
.uri(g("/branches"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&create).unwrap()))
|
||||
|
|
@ -892,7 +969,7 @@ async fn remote_branch_delete_flow_works() {
|
|||
let (delete_status, delete_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches/feature")
|
||||
.uri(g("/branches/feature"))
|
||||
.method(Method::DELETE)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
|
|
@ -904,7 +981,7 @@ async fn remote_branch_delete_flow_works() {
|
|||
let (list_status, list_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches")
|
||||
.uri(g("/branches"))
|
||||
.method(Method::GET)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
|
|
@ -932,7 +1009,7 @@ async fn branch_delete_denies_without_policy_permission() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches/feature")
|
||||
.uri(g("/branches/feature"))
|
||||
.method(Method::DELETE)
|
||||
.header("authorization", "Bearer token-team")
|
||||
.body(Body::empty())
|
||||
|
|
@ -1004,7 +1081,7 @@ query vector_search_string($q: String) {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/read")
|
||||
.uri(g("/read"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&read).unwrap()))
|
||||
|
|
@ -1057,7 +1134,7 @@ async fn change_conflict_returns_manifest_conflict_409() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(
|
||||
|
|
@ -1129,7 +1206,7 @@ async fn change_concurrent_inserts_same_key_serialize_without_409() {
|
|||
})
|
||||
.unwrap();
|
||||
let req = Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(body))
|
||||
|
|
@ -1161,7 +1238,7 @@ async fn change_concurrent_inserts_same_key_serialize_without_409() {
|
|||
let (snapshot_status, snapshot_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/snapshot?branch=main")
|
||||
.uri(g("/snapshot?branch=main"))
|
||||
.method(Method::GET)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
|
|
@ -1242,7 +1319,7 @@ async fn change_concurrent_updates_same_key_serialize_via_publisher_cas() {
|
|||
})
|
||||
.unwrap();
|
||||
let req = Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(body))
|
||||
|
|
@ -1351,7 +1428,7 @@ query insert_c($name: String) {
|
|||
})
|
||||
.unwrap();
|
||||
let req = Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(body))
|
||||
|
|
@ -1368,7 +1445,7 @@ query insert_c($name: String) {
|
|||
})
|
||||
.unwrap();
|
||||
let req = Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(body))
|
||||
|
|
@ -1397,7 +1474,7 @@ query insert_c($name: String) {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/snapshot?branch=main")
|
||||
.uri(g("/snapshot?branch=main"))
|
||||
.method(Method::GET)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
|
|
@ -1505,7 +1582,7 @@ async fn ingest_per_actor_admission_cap_returns_429() {
|
|||
})
|
||||
.unwrap();
|
||||
let req = Request::builder()
|
||||
.uri("/ingest")
|
||||
.uri(g("/ingest"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer flooder-token")
|
||||
.header("content-type", "application/json")
|
||||
|
|
|
|||
|
|
@ -5,9 +5,12 @@ use std::fs;
|
|||
|
||||
use axum::body::{Body, to_bytes};
|
||||
use axum::http::{Method, Request, StatusCode};
|
||||
use omnigraph_server::api::ErrorOutput;
|
||||
use omnigraph::db::Omnigraph;
|
||||
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||
use omnigraph_server::api::{ErrorOutput, ReadRequest};
|
||||
use omnigraph_server::{AppState, build_app};
|
||||
use serde_json::Value;
|
||||
use serial_test::serial;
|
||||
use tower::ServiceExt;
|
||||
|
||||
|
||||
|
|
@ -245,7 +248,7 @@ async fn concurrent_branch_ops_morphological_matrix() {
|
|||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/branches")
|
||||
.uri(g("/branches"))
|
||||
.method(Method::GET)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
|
|
@ -366,7 +369,7 @@ async fn concurrent_branch_ops_morphological_matrix() {
|
|||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/snapshot?branch=main")
|
||||
.uri(g("/snapshot?branch=main"))
|
||||
.method(Method::GET)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
|
|
@ -457,6 +460,180 @@ async fn cluster_boot_serves_applied_state() {
|
|||
assert_eq!(status, StatusCode::OK, "{body}");
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
#[serial]
|
||||
async fn cluster_boot_injects_embedding_provider_config() {
|
||||
const EMBED_SCHEMA: &str = r#"
|
||||
node Doc {
|
||||
slug: String @key
|
||||
title: String @index
|
||||
embedding: Vector(4) @embed("title", model="cluster-mock") @index
|
||||
}
|
||||
"#;
|
||||
const EMBED_QUERY: &str = r#"
|
||||
query vector_search_string($q: String) {
|
||||
match { $d: Doc }
|
||||
return { $d.slug, $d.title }
|
||||
order { nearest($d.embedding, $q) }
|
||||
limit 3
|
||||
}
|
||||
"#;
|
||||
|
||||
let alpha = mock_embedding("alpha", 4);
|
||||
let beta = mock_embedding("beta", 4);
|
||||
let gamma = mock_embedding("gamma", 4);
|
||||
let data = format!(
|
||||
concat!(
|
||||
r#"{{"type":"Doc","data":{{"slug":"alpha-doc","title":"alpha guide","embedding":[{}]}}}}"#,
|
||||
"\n",
|
||||
r#"{{"type":"Doc","data":{{"slug":"beta-doc","title":"beta guide","embedding":[{}]}}}}"#,
|
||||
"\n",
|
||||
r#"{{"type":"Doc","data":{{"slug":"gamma-doc","title":"gamma handbook","embedding":[{}]}}}}"#
|
||||
),
|
||||
format_vector(&alpha),
|
||||
format_vector(&beta),
|
||||
format_vector(&gamma),
|
||||
);
|
||||
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
fs::write(temp.path().join("docs.pg"), EMBED_SCHEMA).unwrap();
|
||||
fs::write(temp.path().join("search.gq"), EMBED_QUERY).unwrap();
|
||||
fs::write(
|
||||
temp.path().join("cluster.yaml"),
|
||||
r#"
|
||||
version: 1
|
||||
providers:
|
||||
embedding:
|
||||
default:
|
||||
kind: mock
|
||||
model: cluster-mock
|
||||
graphs:
|
||||
knowledge:
|
||||
schema: ./docs.pg
|
||||
embedding_provider: default
|
||||
queries:
|
||||
vector_search_string:
|
||||
file: ./search.gq
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let import = omnigraph_cluster::import_config_dir(temp.path()).await;
|
||||
assert!(import.ok, "{:?}", import.diagnostics);
|
||||
let apply = omnigraph_cluster::apply_config_dir(temp.path()).await;
|
||||
assert!(apply.ok && apply.converged, "{:?}", apply.diagnostics);
|
||||
|
||||
let graph_uri = temp
|
||||
.path()
|
||||
.join("graphs/knowledge.omni")
|
||||
.to_string_lossy()
|
||||
.to_string();
|
||||
let mut db = Omnigraph::open(&graph_uri).await.unwrap();
|
||||
load_jsonl(&mut db, &data, LoadMode::Overwrite)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let _guard = EnvGuard::set(&[
|
||||
("OMNIGRAPH_EMBEDDINGS_MOCK", None),
|
||||
("OMNIGRAPH_EMBED_PROVIDER", None),
|
||||
("OMNIGRAPH_EMBED_BASE_URL", None),
|
||||
("OMNIGRAPH_EMBED_MODEL", None),
|
||||
("OPENROUTER_API_KEY", None),
|
||||
("OPENAI_API_KEY", None),
|
||||
("GEMINI_API_KEY", None),
|
||||
]);
|
||||
let settings = cluster_settings(temp.path()).await.unwrap();
|
||||
let omnigraph_server::ServerConfigMode::Multi {
|
||||
graphs,
|
||||
config_path,
|
||||
server_policy,
|
||||
} = settings.mode
|
||||
else {
|
||||
panic!("cluster boot must select multi-graph routing");
|
||||
};
|
||||
let state = omnigraph_server::open_multi_graph_state(
|
||||
graphs,
|
||||
Vec::new(),
|
||||
server_policy.as_ref(),
|
||||
config_path,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let app = build_app(state);
|
||||
|
||||
let read = ReadRequest {
|
||||
query_source: EMBED_QUERY.to_string(),
|
||||
query_name: Some("vector_search_string".to_string()),
|
||||
params: Some(serde_json::json!({ "q": "alpha" })),
|
||||
branch: Some("main".to_string()),
|
||||
snapshot: None,
|
||||
};
|
||||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/graphs/knowledge/read")
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&read).unwrap()))
|
||||
.unwrap(),
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(status, StatusCode::OK, "{body}");
|
||||
assert_eq!(body["row_count"], 3);
|
||||
assert_eq!(body["rows"][0]["d.slug"], "alpha-doc");
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
#[serial]
|
||||
async fn cluster_boot_refuses_missing_embedding_secret_env() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
fs::write(
|
||||
temp.path().join("people.pg"),
|
||||
"\nnode Person {\n name: String @key\n}\n",
|
||||
)
|
||||
.unwrap();
|
||||
fs::write(
|
||||
temp.path().join("people.gq"),
|
||||
"\nquery find_person($name: String) {\n match { $p: Person { name: $name } }\n return { $p.name }\n}\n",
|
||||
)
|
||||
.unwrap();
|
||||
fs::write(
|
||||
temp.path().join("cluster.yaml"),
|
||||
r#"
|
||||
version: 1
|
||||
providers:
|
||||
embedding:
|
||||
default:
|
||||
kind: openai-compatible
|
||||
api_key: ${OG_TEST_MISSING_EMBED_KEY}
|
||||
graphs:
|
||||
knowledge:
|
||||
schema: ./people.pg
|
||||
embedding_provider: default
|
||||
queries:
|
||||
find_person:
|
||||
file: ./people.gq
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let import = omnigraph_cluster::import_config_dir(temp.path()).await;
|
||||
assert!(import.ok, "{:?}", import.diagnostics);
|
||||
let apply = omnigraph_cluster::apply_config_dir(temp.path()).await;
|
||||
assert!(apply.ok && apply.converged, "{:?}", apply.diagnostics);
|
||||
|
||||
let _guard = EnvGuard::set(&[
|
||||
("OG_TEST_MISSING_EMBED_KEY", None),
|
||||
("OMNIGRAPH_EMBEDDINGS_MOCK", None),
|
||||
]);
|
||||
let err = cluster_settings(temp.path()).await.unwrap_err();
|
||||
let message = err.to_string();
|
||||
assert!(
|
||||
message.contains("embedding provider for graph 'knowledge'"),
|
||||
"{message}"
|
||||
);
|
||||
assert!(message.contains("OG_TEST_MISSING_EMBED_KEY"), "{message}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn cluster_boot_wires_policy_bindings_into_cedar_slots() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
|
|
@ -540,31 +717,15 @@ graphs:
|
|||
|
||||
#[tokio::test]
|
||||
async fn cluster_boot_refusals() {
|
||||
// Mutual exclusion with --config / URI.
|
||||
// RFC-011 cluster-only: with no --cluster, boot refuses with the
|
||||
// cluster-required remedy.
|
||||
let err = omnigraph_server::load_server_settings(None, None, true)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().contains("boots from a cluster"), "{err}");
|
||||
|
||||
let temp = converged_cluster_dir("").await;
|
||||
let dir = temp.path().to_path_buf();
|
||||
let err = omnigraph_server::load_server_settings(
|
||||
Some(&dir.join("omnigraph.yaml")),
|
||||
Some(&dir),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().contains("exclusive boot source"), "{err}");
|
||||
let err = omnigraph_server::load_server_settings(
|
||||
None,
|
||||
Some(&dir),
|
||||
Some("file:///tmp/x.omni".to_string()),
|
||||
None,
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().contains("exclusive boot source"), "{err}");
|
||||
|
||||
// Tampered catalog blob refuses boot with the remedy.
|
||||
let blob_dir = dir.join("__cluster/resources/query/knowledge/find_person");
|
||||
|
|
|
|||
|
|
@ -8,10 +8,9 @@ use axum::body::{Body, to_bytes};
|
|||
use axum::http::{Method, Request, StatusCode};
|
||||
use omnigraph::db::Omnigraph;
|
||||
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||
use omnigraph_server::{ApiDoc, AppState, build_app};
|
||||
use omnigraph_server::{AppState, build_app, served_openapi};
|
||||
use serde_json::Value;
|
||||
use tower::ServiceExt;
|
||||
use utoipa::OpenApi;
|
||||
|
||||
fn fixture(name: &str) -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
|
|
@ -71,7 +70,10 @@ async fn json_response(app: &Router, request: Request<Body>) -> (StatusCode, Val
|
|||
}
|
||||
|
||||
fn openapi_doc() -> utoipa::openapi::OpenApi {
|
||||
ApiDoc::openapi()
|
||||
// RFC-011 cluster-only: the canonical committed spec is the SERVED
|
||||
// shape — protected routes nested under `/graphs/{graph_id}/…`,
|
||||
// `/healthz` and `/graphs` flat. This matches what the server serves.
|
||||
served_openapi()
|
||||
}
|
||||
|
||||
fn openapi_json() -> Value {
|
||||
|
|
@ -159,25 +161,28 @@ fn openapi_info_contains_version() {
|
|||
// Path coverage tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// The canonical served spec keeps `/healthz` and `/graphs` flat; every
|
||||
// protected route nests under `/graphs/{graph_id}/…`.
|
||||
const EXPECTED_PATHS: &[&str] = &[
|
||||
"/healthz",
|
||||
"/graphs",
|
||||
"/snapshot",
|
||||
"/read",
|
||||
"/query",
|
||||
"/export",
|
||||
"/change",
|
||||
"/mutate",
|
||||
"/queries",
|
||||
"/queries/{name}",
|
||||
"/schema",
|
||||
"/schema/apply",
|
||||
"/ingest",
|
||||
"/branches",
|
||||
"/branches/{branch}",
|
||||
"/branches/merge",
|
||||
"/commits",
|
||||
"/commits/{commit_id}",
|
||||
"/graphs/{graph_id}/snapshot",
|
||||
"/graphs/{graph_id}/read",
|
||||
"/graphs/{graph_id}/query",
|
||||
"/graphs/{graph_id}/export",
|
||||
"/graphs/{graph_id}/change",
|
||||
"/graphs/{graph_id}/mutate",
|
||||
"/graphs/{graph_id}/queries",
|
||||
"/graphs/{graph_id}/queries/{name}",
|
||||
"/graphs/{graph_id}/schema",
|
||||
"/graphs/{graph_id}/schema/apply",
|
||||
"/graphs/{graph_id}/load",
|
||||
"/graphs/{graph_id}/ingest",
|
||||
"/graphs/{graph_id}/branches",
|
||||
"/graphs/{graph_id}/branches/{branch}",
|
||||
"/graphs/{graph_id}/branches/merge",
|
||||
"/graphs/{graph_id}/commits",
|
||||
"/graphs/{graph_id}/commits/{commit_id}",
|
||||
];
|
||||
|
||||
#[test]
|
||||
|
|
@ -221,25 +226,25 @@ fn openapi_healthz_is_get() {
|
|||
#[test]
|
||||
fn openapi_read_is_post() {
|
||||
let doc = openapi_json();
|
||||
assert!(doc["paths"]["/read"]["post"].is_object());
|
||||
assert!(doc["paths"]["/graphs/{graph_id}/read"]["post"].is_object());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn openapi_export_is_post() {
|
||||
let doc = openapi_json();
|
||||
assert!(doc["paths"]["/export"]["post"].is_object());
|
||||
assert!(doc["paths"]["/graphs/{graph_id}/export"]["post"].is_object());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn openapi_change_is_post() {
|
||||
let doc = openapi_json();
|
||||
assert!(doc["paths"]["/change"]["post"].is_object());
|
||||
assert!(doc["paths"]["/graphs/{graph_id}/change"]["post"].is_object());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn openapi_mutate_is_post() {
|
||||
let doc = openapi_json();
|
||||
assert!(doc["paths"]["/mutate"]["post"].is_object());
|
||||
assert!(doc["paths"]["/graphs/{graph_id}/mutate"]["post"].is_object());
|
||||
}
|
||||
|
||||
// Deprecation flagging — `/read` and `/change` are kept indefinitely for
|
||||
|
|
@ -252,7 +257,7 @@ fn openapi_mutate_is_post() {
|
|||
fn openapi_read_is_deprecated() {
|
||||
let doc = openapi_json();
|
||||
assert_eq!(
|
||||
doc["paths"]["/read"]["post"]["deprecated"],
|
||||
doc["paths"]["/graphs/{graph_id}/read"]["post"]["deprecated"],
|
||||
serde_json::Value::Bool(true),
|
||||
"/read must be flagged deprecated in OpenAPI; use /query instead"
|
||||
);
|
||||
|
|
@ -262,7 +267,7 @@ fn openapi_read_is_deprecated() {
|
|||
fn openapi_change_is_deprecated() {
|
||||
let doc = openapi_json();
|
||||
assert_eq!(
|
||||
doc["paths"]["/change"]["post"]["deprecated"],
|
||||
doc["paths"]["/graphs/{graph_id}/change"]["post"]["deprecated"],
|
||||
serde_json::Value::Bool(true),
|
||||
"/change must be flagged deprecated in OpenAPI; use /mutate instead"
|
||||
);
|
||||
|
|
@ -271,7 +276,7 @@ fn openapi_change_is_deprecated() {
|
|||
#[test]
|
||||
fn openapi_query_is_not_deprecated() {
|
||||
let doc = openapi_json();
|
||||
let deprecated = doc["paths"]["/query"]["post"]
|
||||
let deprecated = doc["paths"]["/graphs/{graph_id}/query"]["post"]
|
||||
.get("deprecated")
|
||||
.and_then(serde_json::Value::as_bool)
|
||||
.unwrap_or(false);
|
||||
|
|
@ -284,7 +289,7 @@ fn openapi_query_is_not_deprecated() {
|
|||
#[test]
|
||||
fn openapi_mutate_is_not_deprecated() {
|
||||
let doc = openapi_json();
|
||||
let deprecated = doc["paths"]["/mutate"]["post"]
|
||||
let deprecated = doc["paths"]["/graphs/{graph_id}/mutate"]["post"]
|
||||
.get("deprecated")
|
||||
.and_then(serde_json::Value::as_bool)
|
||||
.unwrap_or(false);
|
||||
|
|
@ -297,38 +302,64 @@ fn openapi_mutate_is_not_deprecated() {
|
|||
#[test]
|
||||
fn openapi_ingest_is_post() {
|
||||
let doc = openapi_json();
|
||||
assert!(doc["paths"]["/ingest"]["post"].is_object());
|
||||
assert!(doc["paths"]["/graphs/{graph_id}/ingest"]["post"].is_object());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn openapi_load_is_not_deprecated() {
|
||||
// RFC-009 Phase 5: /load is the canonical bulk-load endpoint.
|
||||
let doc = openapi_json();
|
||||
assert!(doc["paths"]["/graphs/{graph_id}/load"]["post"].is_object());
|
||||
let deprecated = doc["paths"]["/graphs/{graph_id}/load"]["post"]
|
||||
.get("deprecated")
|
||||
.and_then(serde_json::Value::as_bool)
|
||||
.unwrap_or(false);
|
||||
assert!(
|
||||
!deprecated,
|
||||
"/load is the canonical load endpoint and must not be deprecated"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn openapi_ingest_is_deprecated() {
|
||||
// RFC-009 Phase 5: /ingest is now the deprecated alias of /load.
|
||||
let doc = openapi_json();
|
||||
assert_eq!(
|
||||
doc["paths"]["/graphs/{graph_id}/ingest"]["post"]["deprecated"],
|
||||
serde_json::Value::Bool(true),
|
||||
"/ingest must be flagged deprecated now that /load is canonical"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn openapi_branches_supports_get_and_post() {
|
||||
let doc = openapi_json();
|
||||
assert!(doc["paths"]["/branches"]["get"].is_object());
|
||||
assert!(doc["paths"]["/branches"]["post"].is_object());
|
||||
assert!(doc["paths"]["/graphs/{graph_id}/branches"]["get"].is_object());
|
||||
assert!(doc["paths"]["/graphs/{graph_id}/branches"]["post"].is_object());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn openapi_branch_delete_is_delete() {
|
||||
let doc = openapi_json();
|
||||
assert!(doc["paths"]["/branches/{branch}"]["delete"].is_object());
|
||||
assert!(doc["paths"]["/graphs/{graph_id}/branches/{branch}"]["delete"].is_object());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn openapi_branch_merge_is_post() {
|
||||
let doc = openapi_json();
|
||||
assert!(doc["paths"]["/branches/merge"]["post"].is_object());
|
||||
assert!(doc["paths"]["/graphs/{graph_id}/branches/merge"]["post"].is_object());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn openapi_commits_is_get() {
|
||||
let doc = openapi_json();
|
||||
assert!(doc["paths"]["/commits"]["get"].is_object());
|
||||
assert!(doc["paths"]["/graphs/{graph_id}/commits"]["get"].is_object());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn openapi_commit_show_is_get() {
|
||||
let doc = openapi_json();
|
||||
assert!(doc["paths"]["/commits/{commit_id}"]["get"].is_object());
|
||||
assert!(doc["paths"]["/graphs/{graph_id}/commits/{commit_id}"]["get"].is_object());
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -483,13 +514,13 @@ fn query_request_query_is_required() {
|
|||
#[test]
|
||||
fn openapi_query_is_post() {
|
||||
let doc = openapi_json();
|
||||
assert!(doc["paths"]["/query"]["post"].is_object());
|
||||
assert!(doc["paths"]["/graphs/{graph_id}/query"]["post"].is_object());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_endpoint_documents_mutation_400() {
|
||||
let doc = openapi_json();
|
||||
let four_hundred = &doc["paths"]["/query"]["post"]["responses"]["400"];
|
||||
let four_hundred = &doc["paths"]["/graphs/{graph_id}/query"]["post"]["responses"]["400"];
|
||||
let description = four_hundred["description"].as_str().unwrap_or_default();
|
||||
assert!(
|
||||
description.contains("mutations") || description.contains("POST /mutate"),
|
||||
|
|
@ -700,20 +731,21 @@ fn openapi_defines_bearer_token_security_scheme() {
|
|||
fn protected_endpoints_reference_bearer_token_security() {
|
||||
let doc = openapi_json();
|
||||
let protected_paths = [
|
||||
("/read", "post"),
|
||||
("/change", "post"),
|
||||
("/schema/apply", "post"),
|
||||
("/queries", "get"),
|
||||
("/queries/{name}", "post"),
|
||||
("/ingest", "post"),
|
||||
("/export", "post"),
|
||||
("/snapshot", "get"),
|
||||
("/branches", "get"),
|
||||
("/branches", "post"),
|
||||
("/branches/{branch}", "delete"),
|
||||
("/branches/merge", "post"),
|
||||
("/commits", "get"),
|
||||
("/commits/{commit_id}", "get"),
|
||||
("/graphs/{graph_id}/read", "post"),
|
||||
("/graphs/{graph_id}/change", "post"),
|
||||
("/graphs/{graph_id}/schema/apply", "post"),
|
||||
("/graphs/{graph_id}/queries", "get"),
|
||||
("/graphs/{graph_id}/queries/{name}", "post"),
|
||||
("/graphs/{graph_id}/load", "post"),
|
||||
("/graphs/{graph_id}/ingest", "post"),
|
||||
("/graphs/{graph_id}/export", "post"),
|
||||
("/graphs/{graph_id}/snapshot", "get"),
|
||||
("/graphs/{graph_id}/branches", "get"),
|
||||
("/graphs/{graph_id}/branches", "post"),
|
||||
("/graphs/{graph_id}/branches/{branch}", "delete"),
|
||||
("/graphs/{graph_id}/branches/merge", "post"),
|
||||
("/graphs/{graph_id}/commits", "get"),
|
||||
("/graphs/{graph_id}/commits/{commit_id}", "get"),
|
||||
];
|
||||
|
||||
for (path, method) in protected_paths {
|
||||
|
|
@ -745,7 +777,7 @@ fn healthz_does_not_require_security() {
|
|||
#[test]
|
||||
fn branch_delete_has_branch_path_parameter() {
|
||||
let doc = openapi_json();
|
||||
let params = doc["paths"]["/branches/{branch}"]["delete"]["parameters"]
|
||||
let params = doc["paths"]["/graphs/{graph_id}/branches/{branch}"]["delete"]["parameters"]
|
||||
.as_array()
|
||||
.unwrap();
|
||||
let has_branch = params
|
||||
|
|
@ -760,7 +792,7 @@ fn branch_delete_has_branch_path_parameter() {
|
|||
#[test]
|
||||
fn commit_show_has_commit_id_path_parameter() {
|
||||
let doc = openapi_json();
|
||||
let params = doc["paths"]["/commits/{commit_id}"]["get"]["parameters"]
|
||||
let params = doc["paths"]["/graphs/{graph_id}/commits/{commit_id}"]["get"]["parameters"]
|
||||
.as_array()
|
||||
.unwrap();
|
||||
let has_commit_id = params
|
||||
|
|
@ -775,7 +807,7 @@ fn commit_show_has_commit_id_path_parameter() {
|
|||
#[test]
|
||||
fn snapshot_has_branch_query_parameter() {
|
||||
let doc = openapi_json();
|
||||
let params = doc["paths"]["/snapshot"]["get"]["parameters"]
|
||||
let params = doc["paths"]["/graphs/{graph_id}/snapshot"]["get"]["parameters"]
|
||||
.as_array()
|
||||
.unwrap();
|
||||
let has_branch = params
|
||||
|
|
@ -790,7 +822,7 @@ fn snapshot_has_branch_query_parameter() {
|
|||
#[test]
|
||||
fn commits_has_branch_query_parameter() {
|
||||
let doc = openapi_json();
|
||||
let params = doc["paths"]["/commits"]["get"]["parameters"]
|
||||
let params = doc["paths"]["/graphs/{graph_id}/commits"]["get"]["parameters"]
|
||||
.as_array()
|
||||
.unwrap();
|
||||
let has_branch = params
|
||||
|
|
@ -830,7 +862,7 @@ fn openapi_operations_have_tags() {
|
|||
#[test]
|
||||
fn read_endpoint_200_references_read_output_schema() {
|
||||
let doc = openapi_json();
|
||||
let content = &doc["paths"]["/read"]["post"]["responses"]["200"]["content"];
|
||||
let content = &doc["paths"]["/graphs/{graph_id}/read"]["post"]["responses"]["200"]["content"];
|
||||
let schema = &content["application/json"]["schema"];
|
||||
let ref_path = schema["$ref"].as_str().unwrap();
|
||||
assert!(
|
||||
|
|
@ -842,7 +874,7 @@ fn read_endpoint_200_references_read_output_schema() {
|
|||
#[test]
|
||||
fn change_endpoint_200_references_change_output_schema() {
|
||||
let doc = openapi_json();
|
||||
let content = &doc["paths"]["/change"]["post"]["responses"]["200"]["content"];
|
||||
let content = &doc["paths"]["/graphs/{graph_id}/change"]["post"]["responses"]["200"]["content"];
|
||||
let schema = &content["application/json"]["schema"];
|
||||
let ref_path = schema["$ref"].as_str().unwrap();
|
||||
assert!(
|
||||
|
|
@ -867,11 +899,11 @@ fn healthz_200_references_health_output_schema() {
|
|||
fn error_responses_reference_error_output_schema() {
|
||||
let doc = openapi_json();
|
||||
let paths_with_errors = [
|
||||
("/read", "post", "400"),
|
||||
("/read", "post", "401"),
|
||||
("/change", "post", "400"),
|
||||
("/change", "post", "409"),
|
||||
("/branches", "post", "409"),
|
||||
("/graphs/{graph_id}/read", "post", "400"),
|
||||
("/graphs/{graph_id}/read", "post", "401"),
|
||||
("/graphs/{graph_id}/change", "post", "400"),
|
||||
("/graphs/{graph_id}/change", "post", "409"),
|
||||
("/graphs/{graph_id}/branches", "post", "409"),
|
||||
];
|
||||
|
||||
for (path, method, status) in paths_with_errors {
|
||||
|
|
@ -893,13 +925,13 @@ fn error_responses_reference_error_output_schema() {
|
|||
fn post_endpoints_have_request_body() {
|
||||
let doc = openapi_json();
|
||||
let post_paths = [
|
||||
("/read", "ReadRequest"),
|
||||
("/change", "ChangeRequest"),
|
||||
("/schema/apply", "SchemaApplyRequest"),
|
||||
("/ingest", "IngestRequest"),
|
||||
("/export", "ExportRequest"),
|
||||
("/branches", "BranchCreateRequest"),
|
||||
("/branches/merge", "BranchMergeRequest"),
|
||||
("/graphs/{graph_id}/read", "ReadRequest"),
|
||||
("/graphs/{graph_id}/change", "ChangeRequest"),
|
||||
("/graphs/{graph_id}/schema/apply", "SchemaApplyRequest"),
|
||||
("/graphs/{graph_id}/ingest", "IngestRequest"),
|
||||
("/graphs/{graph_id}/export", "ExportRequest"),
|
||||
("/graphs/{graph_id}/branches", "BranchCreateRequest"),
|
||||
("/graphs/{graph_id}/branches/merge", "BranchMergeRequest"),
|
||||
];
|
||||
|
||||
for (path, expected_schema) in post_paths {
|
||||
|
|
@ -920,7 +952,7 @@ fn post_endpoints_have_request_body() {
|
|||
#[test]
|
||||
fn invoke_stored_query_request_body_is_optional() {
|
||||
let doc = openapi_json();
|
||||
let request_body = &doc["paths"]["/queries/{name}"]["post"]["requestBody"];
|
||||
let request_body = &doc["paths"]["/graphs/{graph_id}/queries/{name}"]["post"]["requestBody"];
|
||||
assert!(
|
||||
request_body.is_object(),
|
||||
"POST /queries/{{name}} should document its optional request body"
|
||||
|
|
@ -1023,12 +1055,14 @@ async fn auth_mode_spec_has_security_on_protected_operations() {
|
|||
.body(Body::empty())
|
||||
.unwrap();
|
||||
let (_, json) = json_response(&app, request).await;
|
||||
// RFC-011 cluster-only: the served spec always nests protected
|
||||
// routes under `/graphs/{graph_id}/...`.
|
||||
let protected_paths = [
|
||||
("/read", "post"),
|
||||
("/change", "post"),
|
||||
("/snapshot", "get"),
|
||||
("/branches", "get"),
|
||||
("/commits", "get"),
|
||||
("/graphs/{graph_id}/read", "post"),
|
||||
("/graphs/{graph_id}/change", "post"),
|
||||
("/graphs/{graph_id}/snapshot", "get"),
|
||||
("/graphs/{graph_id}/branches", "get"),
|
||||
("/graphs/{graph_id}/commits", "get"),
|
||||
];
|
||||
for (path, method) in protected_paths {
|
||||
let security = &json["paths"][path][method]["security"];
|
||||
|
|
@ -1045,22 +1079,6 @@ async fn auth_mode_spec_has_security_on_protected_operations() {
|
|||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn auth_mode_spec_matches_static_generation() {
|
||||
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
.body(Body::empty())
|
||||
.unwrap();
|
||||
let (_, served) = json_response(&app, request).await;
|
||||
let static_doc = openapi_json();
|
||||
assert_eq!(
|
||||
served, static_doc,
|
||||
"auth-mode served spec must match static generation"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn auth_mode_healthz_still_has_no_security() {
|
||||
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
|
||||
|
|
@ -1366,8 +1384,9 @@ async fn multi_mode_operation_ids_are_unique() {
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn single_mode_openapi_unchanged_by_cluster_filter() {
|
||||
// Regression: single mode still emits the legacy flat surface.
|
||||
async fn served_spec_always_nests_under_cluster_prefix() {
|
||||
// RFC-011 cluster-only: even a one-graph convenience app serves the
|
||||
// nested cluster surface and never the flat protected routes.
|
||||
let (_temp, app) = app_for_loaded_graph().await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
|
|
@ -1377,16 +1396,37 @@ async fn single_mode_openapi_unchanged_by_cluster_filter() {
|
|||
let (_, json) = json_response(&app, request).await;
|
||||
let paths = json["paths"].as_object().unwrap();
|
||||
let path_keys: HashSet<&str> = paths.keys().map(|k| k.as_str()).collect();
|
||||
for expected in EXPECTED_PATHS {
|
||||
assert!(
|
||||
path_keys.contains(expected),
|
||||
"single mode must still emit flat path: {expected}"
|
||||
);
|
||||
}
|
||||
for cluster in EXPECTED_CLUSTER_PATHS {
|
||||
assert!(
|
||||
!path_keys.contains(cluster),
|
||||
"single mode must NOT emit cluster path: {cluster}"
|
||||
path_keys.contains(cluster),
|
||||
"served spec must emit cluster path: {cluster}. Found: {path_keys:?}"
|
||||
);
|
||||
}
|
||||
// The flat protected routes must NOT appear — only the nested
|
||||
// cluster surface plus the always-flat `/healthz` and `/graphs`.
|
||||
let flat_protected = [
|
||||
"/snapshot",
|
||||
"/read",
|
||||
"/query",
|
||||
"/export",
|
||||
"/change",
|
||||
"/mutate",
|
||||
"/queries",
|
||||
"/queries/{name}",
|
||||
"/schema",
|
||||
"/schema/apply",
|
||||
"/load",
|
||||
"/ingest",
|
||||
"/branches",
|
||||
"/branches/{branch}",
|
||||
"/branches/merge",
|
||||
"/commits",
|
||||
"/commits/{commit_id}",
|
||||
];
|
||||
for flat in flat_protected {
|
||||
assert!(
|
||||
!path_keys.contains(flat),
|
||||
"served spec must NOT emit flat protected path: {flat}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ async fn server_opens_s3_graph_directly_and_serves_snapshot_and_read() {
|
|||
let (snapshot_status, snapshot_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/snapshot")
|
||||
.uri(g("/snapshot"))
|
||||
.method(Method::GET)
|
||||
.header("authorization", "Bearer s3-token")
|
||||
.body(Body::empty())
|
||||
|
|
@ -63,7 +63,7 @@ async fn server_opens_s3_graph_directly_and_serves_snapshot_and_read() {
|
|||
let (read_status, read_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/read")
|
||||
.uri(g("/read"))
|
||||
.method(Method::POST)
|
||||
.header("authorization", "Bearer s3-token")
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -134,11 +134,8 @@ async fn server_boots_cluster_from_bare_storage_uri_and_serves_query() {
|
|||
}
|
||||
|
||||
let settings = omnigraph_server::load_server_settings(
|
||||
None,
|
||||
Some(&std::path::PathBuf::from(&root)),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.await
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
//! Moved verbatim from tests/server.rs in the modularization.
|
||||
|
||||
use std::fs;
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::body::Body;
|
||||
use axum::http::{Method, Request, StatusCode};
|
||||
|
|
@ -11,7 +12,9 @@ use omnigraph::loader::LoadMode;
|
|||
use omnigraph_server::api::{
|
||||
ChangeRequest, ErrorOutput, ReadRequest, SchemaApplyRequest, SchemaOutput,
|
||||
};
|
||||
use omnigraph_server::{AppState, build_app};
|
||||
use omnigraph_server::{
|
||||
AppState, GraphHandle, GraphId, GraphKey, PolicyEngine, build_app, workload,
|
||||
};
|
||||
use serde_json::json;
|
||||
|
||||
|
||||
|
|
@ -30,7 +33,7 @@ async fn schema_apply_route_updates_graph_for_authorized_admin() {
|
|||
|
||||
let request = Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri("/schema/apply")
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::from(
|
||||
|
|
@ -54,6 +57,111 @@ async fn schema_apply_route_updates_graph_for_authorized_admin() {
|
|||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn schema_apply_route_refuses_cluster_backed_server_mode() {
|
||||
let temp = init_graph_with_schema(&fs::read_to_string(fixture("test.pg")).unwrap()).await;
|
||||
let graph = graph_path(temp.path());
|
||||
let graph_uri = graph.to_string_lossy().to_string();
|
||||
let engine = Omnigraph::open(&graph_uri).await.unwrap();
|
||||
let handle = Arc::new(GraphHandle {
|
||||
key: GraphKey::cluster(GraphId::try_from("default").unwrap()),
|
||||
uri: graph_uri.clone(),
|
||||
engine: Arc::new(engine),
|
||||
policy: None,
|
||||
queries: None,
|
||||
});
|
||||
let state = AppState::new_multi(
|
||||
vec![handle],
|
||||
Vec::new(),
|
||||
None,
|
||||
workload::WorkloadController::from_env(),
|
||||
Some(temp.path().join("cluster.yaml")),
|
||||
)
|
||||
.unwrap();
|
||||
let app = build_app(state);
|
||||
|
||||
let request = Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(
|
||||
serde_json::to_vec(&SchemaApplyRequest {
|
||||
schema_source: additive_schema_with_nickname(),
|
||||
..Default::default()
|
||||
})
|
||||
.unwrap(),
|
||||
))
|
||||
.unwrap();
|
||||
let (status, payload) = json_response(&app, request).await;
|
||||
|
||||
assert_eq!(status, StatusCode::CONFLICT, "body: {payload}");
|
||||
assert!(
|
||||
payload["error"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.contains("cluster apply"),
|
||||
"body: {payload}"
|
||||
);
|
||||
let reopened = Omnigraph::open(&graph_uri).await.unwrap();
|
||||
assert!(
|
||||
!reopened.catalog().node_types["Person"]
|
||||
.properties
|
||||
.contains_key("nickname"),
|
||||
"cluster-backed schema apply must not mutate the graph"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn schema_apply_route_cluster_backed_denies_unauthorized_actor_before_409() {
|
||||
// The cluster-backed 409 is reported AFTER the Cedar gate, so an actor
|
||||
// without `schema_apply` permission gets a 403 — never a 409 that would
|
||||
// disclose the server is cluster-backed (401 → 403 → 409, no topology leak
|
||||
// before authorization). POLICY_YAML grants read/export but not schema_apply,
|
||||
// so act-ragnor is denied.
|
||||
let temp = init_graph_with_schema(&fs::read_to_string(fixture("test.pg")).unwrap()).await;
|
||||
let graph = graph_path(temp.path());
|
||||
let graph_uri = graph.to_string_lossy().to_string();
|
||||
let engine = Omnigraph::open(&graph_uri).await.unwrap();
|
||||
let policy = PolicyEngine::load_graph_from_source(POLICY_YAML, "default").unwrap();
|
||||
let handle = Arc::new(GraphHandle {
|
||||
key: GraphKey::cluster(GraphId::try_from("default").unwrap()),
|
||||
uri: graph_uri,
|
||||
engine: Arc::new(engine),
|
||||
policy: Some(Arc::new(policy)),
|
||||
queries: None,
|
||||
});
|
||||
let state = AppState::new_multi(
|
||||
vec![handle],
|
||||
vec![("act-ragnor".to_string(), "admin-token".to_string())],
|
||||
None,
|
||||
workload::WorkloadController::from_env(),
|
||||
Some(temp.path().join("cluster.yaml")),
|
||||
)
|
||||
.unwrap();
|
||||
let app = build_app(state);
|
||||
|
||||
let request = Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::from(
|
||||
serde_json::to_vec(&SchemaApplyRequest {
|
||||
schema_source: additive_schema_with_nickname(),
|
||||
..Default::default()
|
||||
})
|
||||
.unwrap(),
|
||||
))
|
||||
.unwrap();
|
||||
let (status, payload) = json_response(&app, request).await;
|
||||
|
||||
assert_eq!(
|
||||
status,
|
||||
StatusCode::FORBIDDEN,
|
||||
"an unauthorized actor must get 403 before the cluster-backed 409: {payload}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn schema_apply_route_rejects_stored_query_breakage_before_publish() {
|
||||
let (temp, app) = app_with_stored_queries(
|
||||
|
|
@ -65,7 +173,7 @@ async fn schema_apply_route_rejects_stored_query_breakage_before_publish() {
|
|||
|
||||
let request = Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri("/schema/apply")
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::from(
|
||||
|
|
@ -115,7 +223,7 @@ async fn schema_apply_route_noop_keeps_valid_stored_query_registry() {
|
|||
|
||||
let request = Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri("/schema/apply")
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::from(
|
||||
|
|
@ -142,7 +250,7 @@ async fn schema_apply_route_requires_schema_apply_policy_permission() {
|
|||
|
||||
let request = Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri("/schema/apply")
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::from(
|
||||
|
|
@ -173,7 +281,7 @@ async fn schema_apply_route_requires_bearer_token_when_policy_enabled() {
|
|||
|
||||
let request = Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri("/schema/apply")
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(
|
||||
serde_json::to_vec(&SchemaApplyRequest {
|
||||
|
|
@ -203,7 +311,7 @@ async fn schema_apply_route_can_rename_type() {
|
|||
|
||||
let request = Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri("/schema/apply")
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::from(
|
||||
|
|
@ -239,7 +347,7 @@ async fn schema_apply_route_can_rename_property() {
|
|||
|
||||
let request = Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri("/schema/apply")
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::from(
|
||||
|
|
@ -279,7 +387,7 @@ async fn schema_apply_route_can_add_index() {
|
|||
|
||||
let request = Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri("/schema/apply")
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::from(
|
||||
|
|
@ -294,6 +402,11 @@ async fn schema_apply_route_can_add_index() {
|
|||
|
||||
assert_eq!(status, StatusCode::OK);
|
||||
assert_eq!(payload["applied"], true);
|
||||
// iss-848: the /schema/apply route accepts the index-add and applies it as a
|
||||
// metadata change — it records the `@index` intent in the catalog/IR but does
|
||||
// NOT build the physical index inline (the build is deferred to
|
||||
// ensure_indices/optimize; on this empty table nothing would build anyway).
|
||||
// So the physical index count is unchanged by the apply.
|
||||
let reopened = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
|
||||
let snapshot = reopened
|
||||
.snapshot_of(ReadTarget::branch("main"))
|
||||
|
|
@ -301,7 +414,10 @@ async fn schema_apply_route_can_add_index() {
|
|||
.unwrap();
|
||||
let dataset = snapshot.open("node:Person").await.unwrap();
|
||||
let after_index_count = dataset.load_indices().await.unwrap().len();
|
||||
assert!(after_index_count > before_index_count);
|
||||
assert_eq!(
|
||||
after_index_count, before_index_count,
|
||||
"schema apply records @index intent but defers the physical build (iss-848)"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -315,7 +431,7 @@ async fn schema_apply_route_rejects_unsupported_plan() {
|
|||
|
||||
let request = Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri("/schema/apply")
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::from(
|
||||
|
|
@ -356,7 +472,7 @@ async fn schema_apply_route_rejects_when_non_main_branch_exists() {
|
|||
|
||||
let request = Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri("/schema/apply")
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::from(
|
||||
|
|
@ -385,7 +501,7 @@ async fn schema_drift_returns_conflict_for_snapshot_read_and_change() {
|
|||
let (snapshot_status, snapshot_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/snapshot?branch=main")
|
||||
.uri(g("/snapshot?branch=main"))
|
||||
.method(Method::GET)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
|
|
@ -413,7 +529,7 @@ async fn schema_drift_returns_conflict_for_snapshot_read_and_change() {
|
|||
let (read_status, read_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/read")
|
||||
.uri(g("/read"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&read).unwrap()))
|
||||
|
|
@ -441,7 +557,7 @@ async fn schema_drift_returns_conflict_for_snapshot_read_and_change() {
|
|||
let (change_status, change_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&change).unwrap()))
|
||||
|
|
@ -467,7 +583,7 @@ async fn schema_route_returns_current_source() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/schema")
|
||||
.uri(g("/schema"))
|
||||
.method(Method::GET)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
|
|
@ -486,7 +602,7 @@ async fn schema_route_requires_bearer_token_when_auth_configured() {
|
|||
let (missing_status, missing_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/schema")
|
||||
.uri(g("/schema"))
|
||||
.method(Method::GET)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
|
|
@ -502,7 +618,7 @@ async fn schema_route_requires_bearer_token_when_auth_configured() {
|
|||
let (ok_status, ok_body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/schema")
|
||||
.uri(g("/schema"))
|
||||
.method(Method::GET)
|
||||
.header("authorization", "Bearer demo-token")
|
||||
.body(Body::empty())
|
||||
|
|
@ -533,7 +649,7 @@ async fn schema_route_denied_when_actor_lacks_read_permission() {
|
|||
let (status, body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/schema")
|
||||
.uri(g("/schema"))
|
||||
.method(Method::GET)
|
||||
.header("authorization", "Bearer team-token")
|
||||
.body(Body::empty())
|
||||
|
|
@ -574,7 +690,7 @@ async fn schema_apply_route_soft_drops_property_via_http() {
|
|||
&app,
|
||||
Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri("/schema/apply")
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::from(
|
||||
|
|
@ -631,7 +747,7 @@ async fn schema_apply_route_soft_drops_node_type_via_http() {
|
|||
&app,
|
||||
Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri("/schema/apply")
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::from(
|
||||
|
|
@ -683,7 +799,7 @@ async fn schema_apply_route_hard_drops_property_with_allow_data_loss() {
|
|||
&app,
|
||||
Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri("/schema/apply")
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::from(
|
||||
|
|
@ -738,7 +854,7 @@ async fn schema_apply_route_keeps_drops_soft_without_flag() {
|
|||
&app,
|
||||
Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri("/schema/apply")
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::from(
|
||||
|
|
@ -770,29 +886,27 @@ async fn schema_apply_route_additive_property_preserves_existing_rows() {
|
|||
// AddProperty wasn't pinned with a row-count check anywhere.
|
||||
// Load N rows, apply schema adding nullable property, verify
|
||||
// every row is still readable and the new column is null.
|
||||
let (temp, app) = app_for_graph_with_auth_tokens_and_policy(
|
||||
&fs::read_to_string(fixture("test.pg")).unwrap(),
|
||||
let (temp, app) = app_for_loaded_graph_with_auth_tokens_and_policy(
|
||||
&[("act-ragnor", "admin-token")],
|
||||
SCHEMA_APPLY_POLICY_YAML,
|
||||
)
|
||||
.await;
|
||||
let graph = graph_path(temp.path());
|
||||
|
||||
// Standard fixture data: 4 Persons + 1 Company. Load it.
|
||||
// Standard fixture data is loaded before the app is built, so the server
|
||||
// handle applies schema from the same manifest it is serving.
|
||||
let pre_count = {
|
||||
let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
|
||||
db.load(
|
||||
"main",
|
||||
&fs::read_to_string(fixture("test.jsonl")).unwrap(),
|
||||
LoadMode::Append,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let snap = db
|
||||
.snapshot_of(omnigraph::db::ReadTarget::branch("main"))
|
||||
.await
|
||||
.unwrap();
|
||||
snap.entry("node:Person").expect("Person").row_count
|
||||
snap.open("node:Person")
|
||||
.await
|
||||
.expect("Person")
|
||||
.count_rows(None)
|
||||
.await
|
||||
.unwrap()
|
||||
};
|
||||
assert!(pre_count > 0, "fixture should have loaded Person rows");
|
||||
|
||||
|
|
@ -800,7 +914,7 @@ async fn schema_apply_route_additive_property_preserves_existing_rows() {
|
|||
&app,
|
||||
Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri("/schema/apply")
|
||||
.uri(g("/schema/apply"))
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", "Bearer admin-token")
|
||||
.body(Body::from(
|
||||
|
|
@ -822,7 +936,13 @@ async fn schema_apply_route_additive_property_preserves_existing_rows() {
|
|||
.snapshot_of(omnigraph::db::ReadTarget::branch("main"))
|
||||
.await
|
||||
.unwrap();
|
||||
let post_count = snap.entry("node:Person").expect("Person").row_count;
|
||||
let post_count = snap
|
||||
.open("node:Person")
|
||||
.await
|
||||
.expect("Person")
|
||||
.count_rows(None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
post_count, pre_count,
|
||||
"AddProperty should preserve row count",
|
||||
|
|
|
|||
|
|
@ -82,6 +82,58 @@ async fn invoke_stored_read_returns_rows() {
|
|||
assert!(body["rows"].is_array(), "read envelope shape; body: {body}");
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn invoke_with_mismatched_expected_kind_is_rejected() {
|
||||
// RFC-011 D3: the CLI verb asserts the stored query's kind via
|
||||
// `expect_mutation`. Invoking a read with `expect_mutation: true`
|
||||
// (i.e. `omnigraph mutate <a-read>`) is a 400 naming the right verb.
|
||||
let (_temp, app) = app_with_stored_queries(
|
||||
&[("find_person", FIND_PERSON_GQ, false)],
|
||||
&[("act-invoke", "t-invoke")],
|
||||
INVOKE_POLICY_YAML,
|
||||
)
|
||||
.await;
|
||||
let (status, body) = json_response(
|
||||
&app,
|
||||
invoke_request(
|
||||
"find_person",
|
||||
"t-invoke",
|
||||
json!({ "expect_mutation": true, "params": { "name": "Alice" } }),
|
||||
),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
|
||||
assert!(
|
||||
body["error"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.contains("'find_person' is a read — use omnigraph query find_person"),
|
||||
"expected a kind-mismatch error; body: {body}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn invoke_with_matching_expected_kind_runs() {
|
||||
// The matching assertion (`omnigraph query <a-read>`) passes through.
|
||||
let (_temp, app) = app_with_stored_queries(
|
||||
&[("find_person", FIND_PERSON_GQ, false)],
|
||||
&[("act-invoke", "t-invoke")],
|
||||
INVOKE_POLICY_YAML,
|
||||
)
|
||||
.await;
|
||||
let (status, body) = json_response(
|
||||
&app,
|
||||
invoke_request(
|
||||
"find_person",
|
||||
"t-invoke",
|
||||
json!({ "expect_mutation": false, "params": { "name": "Alice" } }),
|
||||
),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::OK, "matching kind should run; body: {body}");
|
||||
assert_eq!(body["query_name"], "find_person");
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn invoke_stored_read_accepts_absent_or_empty_body() {
|
||||
let no_param_query = "query list_people() { match { $p: Person } return { $p.name } }";
|
||||
|
|
@ -272,7 +324,7 @@ async fn list_queries_returns_only_exposed_with_typed_params() {
|
|||
INVOKE_POLICY_YAML,
|
||||
)
|
||||
.await;
|
||||
let (status, body) = json_response(&app, get_request("/queries", "t-invoke")).await;
|
||||
let (status, body) = json_response(&app, get_request(&g("/queries"), "t-invoke")).await;
|
||||
assert_eq!(status, StatusCode::OK, "body: {body}");
|
||||
|
||||
let entries = body["queries"].as_array().unwrap();
|
||||
|
|
@ -303,7 +355,7 @@ async fn list_queries_is_read_gated_so_a_non_invoker_can_list() {
|
|||
INVOKE_POLICY_YAML,
|
||||
)
|
||||
.await;
|
||||
let (status, body) = json_response(&app, get_request("/queries", "t-noinvoke")).await;
|
||||
let (status, body) = json_response(&app, get_request(&g("/queries"), "t-noinvoke")).await;
|
||||
assert_eq!(status, StatusCode::OK, "read-gated catalog; body: {body}");
|
||||
let names: Vec<&str> = body["queries"]
|
||||
.as_array()
|
||||
|
|
@ -320,7 +372,7 @@ async fn list_queries_is_read_gated_so_a_non_invoker_can_list() {
|
|||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn list_queries_is_empty_when_no_registry() {
|
||||
let (_temp, app) = app_for_loaded_graph_with_auth("demo-token").await;
|
||||
let (status, body) = json_response(&app, get_request("/queries", "demo-token")).await;
|
||||
let (status, body) = json_response(&app, get_request(&g("/queries"), "demo-token")).await;
|
||||
assert_eq!(status, StatusCode::OK, "body: {body}");
|
||||
assert!(
|
||||
body["queries"].as_array().unwrap().is_empty(),
|
||||
|
|
|
|||
|
|
@ -248,9 +248,17 @@ rules:
|
|||
pub const FIND_PERSON_GQ: &str =
|
||||
"query find_person($name: String) { match { $p: Person { name: $name } } return { $p.age } }";
|
||||
|
||||
/// RFC-011 cluster-only: the single-graph convenience apps built by the
|
||||
/// `app_for_loaded_graph*` helpers serve the graph under the reserved id
|
||||
/// `default`. This prefixes a flat per-graph path (e.g. `/snapshot`) with
|
||||
/// the cluster route prefix so tests address `/graphs/default/snapshot`.
|
||||
pub fn g(path: &str) -> String {
|
||||
format!("/graphs/default{path}")
|
||||
}
|
||||
|
||||
pub fn invoke_request(name: &str, token: &str, body: Value) -> Request<Body> {
|
||||
Request::builder()
|
||||
.uri(format!("/queries/{name}"))
|
||||
.uri(g(&format!("/queries/{name}")))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.header("authorization", format!("Bearer {token}"))
|
||||
|
|
@ -265,7 +273,7 @@ pub fn invoke_request_bytes(
|
|||
content_type: Option<&str>,
|
||||
) -> Request<Body> {
|
||||
let mut builder = Request::builder()
|
||||
.uri(format!("/queries/{name}"))
|
||||
.uri(g(&format!("/queries/{name}")))
|
||||
.method(Method::POST)
|
||||
.header("authorization", format!("Bearer {token}"));
|
||||
if let Some(content_type) = content_type {
|
||||
|
|
@ -656,7 +664,7 @@ pub mod matrix {
|
|||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/branches")
|
||||
.uri(g("/branches"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(body))
|
||||
|
|
@ -686,7 +694,7 @@ pub mod matrix {
|
|||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(body))
|
||||
|
|
@ -728,7 +736,7 @@ pub mod matrix {
|
|||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/snapshot?branch={}", branch))
|
||||
.uri(g(&format!("/snapshot?branch={}", branch)))
|
||||
.method(Method::GET)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
|
|
@ -766,7 +774,7 @@ pub mod matrix {
|
|||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/read")
|
||||
.uri(g("/read"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(body))
|
||||
|
|
@ -833,7 +841,7 @@ pub mod matrix {
|
|||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(body))
|
||||
|
|
@ -874,7 +882,7 @@ pub mod matrix {
|
|||
let response = app
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/branches/merge")
|
||||
.uri(g("/branches/merge"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(body))
|
||||
|
|
@ -910,7 +918,7 @@ pub mod matrix {
|
|||
let response = app
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(body))
|
||||
|
|
@ -943,7 +951,7 @@ pub mod matrix {
|
|||
let response = app
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/branches")
|
||||
.uri(g("/branches"))
|
||||
.method(Method::POST)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(body))
|
||||
|
|
@ -970,7 +978,7 @@ pub mod matrix {
|
|||
let response = app
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/branches/{}", name))
|
||||
.uri(g(&format!("/branches/{}", name)))
|
||||
.method(Method::DELETE)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
|
|
@ -1091,7 +1099,7 @@ pub async fn http_change_decision(
|
|||
let (status, _body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/change")
|
||||
.uri(g("/change"))
|
||||
.method(Method::POST)
|
||||
.header(AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -1141,7 +1149,7 @@ pub async fn http_merge_decision(
|
|||
let (status, _body) = json_response(
|
||||
&app,
|
||||
Request::builder()
|
||||
.uri("/branches/merge")
|
||||
.uri(g("/branches/merge"))
|
||||
.method(Method::POST)
|
||||
.header(AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("content-type", "application/json")
|
||||
|
|
@ -1191,5 +1199,5 @@ graphs:
|
|||
}
|
||||
|
||||
pub async fn cluster_settings(dir: &Path) -> color_eyre::eyre::Result<omnigraph_server::ServerConfig> {
|
||||
omnigraph_server::load_server_settings(None, Some(&dir.to_path_buf()), None, None, None, true).await
|
||||
omnigraph_server::load_server_settings(Some(&dir.to_path_buf()), None, true).await
|
||||
}
|
||||
|
|
|
|||
|
|
@ -248,12 +248,12 @@ async fn diff_table_same_lineage(
|
|||
// Inserts + Updates: use _row_last_updated_at_version to find all rows
|
||||
// touched since Vf, then classify by checking whether the ID existed at Vf.
|
||||
//
|
||||
// Why not _row_created_at_version for inserts: Lance's merge_insert stamps
|
||||
// new rows with _row_created_at_version = dataset_creation_version (v1),
|
||||
// not the merge_insert commit version. This makes _row_created_at_version
|
||||
// unreliable for detecting inserts from merge_insert writes. Using
|
||||
// _row_last_updated_at_version catches all touched rows regardless of
|
||||
// write mode, and ID-set membership distinguishes inserts from updates.
|
||||
// We key on _row_last_updated_at_version because one scan over it catches
|
||||
// every row touched in the window — inserts and updates alike — regardless
|
||||
// of write mode, and ID-set membership at Vf then distinguishes inserts from
|
||||
// updates. (lance#6774 made merge_insert stamp new rows' _row_created_at_version
|
||||
// with the commit version, so created_at became reliable too; last_updated
|
||||
// stays the right key since it also covers updates.)
|
||||
if wants_inserts || wants_updates {
|
||||
let filter_sql = format!(
|
||||
"_row_last_updated_at_version > {} AND _row_last_updated_at_version <= {}",
|
||||
|
|
|
|||
|
|
@ -57,6 +57,8 @@ impl CommitGraph {
|
|||
mode: WriteMode::Create,
|
||||
enable_stable_row_ids: true,
|
||||
data_storage_version: Some(LanceFileVersion::V2_2),
|
||||
auto_cleanup: None,
|
||||
skip_auto_cleanup: true,
|
||||
..Default::default()
|
||||
};
|
||||
let dataset = Dataset::write(reader, &uri as &str, Some(params))
|
||||
|
|
@ -430,6 +432,8 @@ async fn create_commit_actor_dataset(root_uri: &str) -> Result<Dataset> {
|
|||
mode: WriteMode::Create,
|
||||
enable_stable_row_ids: true,
|
||||
data_storage_version: Some(LanceFileVersion::V2_2),
|
||||
auto_cleanup: None,
|
||||
skip_auto_cleanup: true,
|
||||
..Default::default()
|
||||
};
|
||||
match Dataset::write(reader, &uri as &str, Some(params)).await {
|
||||
|
|
|
|||
|
|
@ -34,10 +34,10 @@ pub(crate) use namespace::open_table_head_for_write;
|
|||
use namespace::{branch_manifest_namespace, staged_table_namespace};
|
||||
use publisher::{GraphNamespacePublisher, ManifestBatchPublisher};
|
||||
pub(crate) use recovery::{
|
||||
RecoveryMode, RecoverySidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin,
|
||||
SidecarTableRegistration, SidecarTombstone, delete_sidecar, has_schema_apply_sidecar,
|
||||
heal_pending_sidecars_roll_forward, list_sidecars, new_sidecar, recover_manifest_drift,
|
||||
schema_apply_serial_queue_key, write_sidecar,
|
||||
RecoveryMode, RecoverySidecarHandle, SidecarKind, SidecarTablePin, SidecarTableRegistration,
|
||||
SidecarTombstone, delete_sidecar, has_schema_apply_sidecar, heal_pending_sidecars_roll_forward,
|
||||
list_sidecars, new_sidecar, recover_manifest_drift, schema_apply_serial_queue_key,
|
||||
write_sidecar,
|
||||
};
|
||||
pub use state::SubTableEntry;
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
|
|
@ -31,6 +31,8 @@ pub(super) async fn init_manifest_graph(
|
|||
mode: WriteMode::Create,
|
||||
enable_stable_row_ids: true,
|
||||
data_storage_version: Some(LanceFileVersion::V2_2),
|
||||
auto_cleanup: None,
|
||||
skip_auto_cleanup: true,
|
||||
..Default::default()
|
||||
};
|
||||
let manifest_path = manifest_uri(root);
|
||||
|
|
@ -127,6 +129,8 @@ async fn create_empty_dataset(uri: &str, schema: &SchemaRef) -> Result<Dataset>
|
|||
enable_stable_row_ids: true,
|
||||
data_storage_version: Some(LanceFileVersion::V2_2),
|
||||
allow_external_blob_outside_bases: true,
|
||||
auto_cleanup: None,
|
||||
skip_auto_cleanup: true,
|
||||
..Default::default()
|
||||
};
|
||||
Dataset::write(reader, uri, Some(params))
|
||||
|
|
|
|||
|
|
@ -113,20 +113,47 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()>
|
|||
/// so the merge-insert conflict resolver enforces row-level CAS at commit
|
||||
/// time, then bump the stamp.
|
||||
///
|
||||
/// Both steps are idempotent under retry: re-applying the field annotation
|
||||
/// at its current value is a no-op-ish bump in Lance, and the stamp is a
|
||||
/// simple key-value write. A crash between the two leaves the field set
|
||||
/// without a stamp; the next open re-runs this fn and only the stamp lands.
|
||||
/// Idempotent under crash-retry by construction. Lance 7 makes the unenforced
|
||||
/// primary key **immutable once set**: any write that touches the reserved
|
||||
/// `lance-schema:unenforced-primary-key` field metadata after the PK is set
|
||||
/// errors ("cannot be changed once set", `lance::dataset::transaction`), even
|
||||
/// re-applying the same value. A crash between the field-set and the stamp
|
||||
/// bump leaves the field set without a stamp, so the next open re-enters here
|
||||
/// with the PK already present — we must therefore set it only when absent.
|
||||
/// (Fresh graphs bake the PK into `manifest_schema()` at init and never run
|
||||
/// this migration; only genuine pre-v0.4.0 graphs do.)
|
||||
async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> {
|
||||
dataset
|
||||
.update_field_metadata()
|
||||
.update(
|
||||
"object_id",
|
||||
[(OBJECT_ID_PK_KEY.to_string(), "true".to_string())],
|
||||
)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
// The guard is over the *specific* field, not just "any PK is set": skipping
|
||||
// when `object_id` is already the PK is the idempotent crash-recovery path,
|
||||
// but a manifest whose PK is some *other* field has the wrong CAS key — and
|
||||
// Lance 7 won't let us change it. Refuse loudly rather than silently leave
|
||||
// merge-insert conflict detection keyed on the wrong column.
|
||||
let pk_fields: Vec<&str> = dataset
|
||||
.schema()
|
||||
.unenforced_primary_key()
|
||||
.iter()
|
||||
.map(|field| field.name.as_str())
|
||||
.collect();
|
||||
match pk_fields.as_slice() {
|
||||
["object_id"] => {} // already migrated (or a crash re-entry) — idempotent no-op
|
||||
[] => {
|
||||
dataset
|
||||
.update_field_metadata()
|
||||
.update(
|
||||
"object_id",
|
||||
[(OBJECT_ID_PK_KEY.to_string(), "true".to_string())],
|
||||
)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
}
|
||||
other => {
|
||||
return Err(OmniError::manifest_internal(format!(
|
||||
"__manifest unenforced primary key is {other:?}, expected [\"object_id\"]; \
|
||||
refusing to migrate a manifest with an unexpected CAS key"
|
||||
)));
|
||||
}
|
||||
}
|
||||
set_stamp(dataset, 2).await
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,9 @@ use lance_namespace::models::{
|
|||
};
|
||||
use lance_namespace::{Error as LanceNamespaceError, LanceNamespace, NamespaceError};
|
||||
use lance_table::io::commit::ManifestNamingScheme;
|
||||
use object_store::{Error as ObjectStoreError, ObjectStore as _, PutMode, PutOptions, path::Path};
|
||||
use object_store::{
|
||||
Error as ObjectStoreError, ObjectStore as _, ObjectStoreExt, PutMode, PutOptions, path::Path,
|
||||
};
|
||||
|
||||
use crate::error::{OmniError, Result};
|
||||
|
||||
|
|
|
|||
|
|
@ -381,6 +381,12 @@ impl GraphNamespacePublisher {
|
|||
// the publisher loop above, where each attempt re-runs the pre-check.
|
||||
merge_builder.conflict_retries(0);
|
||||
merge_builder.use_index(false);
|
||||
// Skip Lance's auto-cleanup hook: `__manifest` versions are the snapshot
|
||||
// / time-travel authority and must never be GC'd by Lance's per-commit
|
||||
// hook. A `__manifest` created before the v7 bump (6.0.1 defaulted
|
||||
// auto_cleanup ON) still carries the stored config, so this skip is
|
||||
// load-bearing on upgraded graphs, not just defensive.
|
||||
merge_builder.skip_auto_cleanup(true);
|
||||
let (new_dataset, _stats) = merge_builder
|
||||
.try_build()
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
|
|
|
|||
|
|
@ -793,10 +793,10 @@ pub(crate) fn schema_apply_serial_queue_key() -> crate::db::write_queue::TableQu
|
|||
/// same table append extra Lance restore commits which `omnigraph
|
||||
/// cleanup` reclaims.
|
||||
///
|
||||
/// Concurrency: today recovery runs synchronously in `Omnigraph::open`
|
||||
/// *before* the engine is wrapped in the server's `Arc<RwLock<Omnigraph>>`.
|
||||
/// No request handlers can race, so this sweep does NOT acquire write
|
||||
/// queues. In-process callers (refresh, write entry points) must use
|
||||
/// Concurrency: the open-time sweep runs synchronously in `Omnigraph::open`
|
||||
/// before the engine handle is published to any caller, so no request
|
||||
/// handler can race it and it does NOT acquire write queues. In-process
|
||||
/// callers (refresh, write entry points) must use
|
||||
/// [`heal_pending_sidecars_roll_forward`] instead, which serializes
|
||||
/// against live writers via per-(table_key, branch) queue acquisition.
|
||||
pub(crate) async fn recover_manifest_drift(
|
||||
|
|
|
|||
|
|
@ -336,40 +336,77 @@ async fn test_directory_namespace_direct_publish_cannot_replace_native_omnigraph
|
|||
.await
|
||||
.unwrap();
|
||||
|
||||
let versions = namespace
|
||||
.list_table_versions(ListTableVersionsRequest {
|
||||
id: Some(vec!["node:Person".to_string()]),
|
||||
descending: Some(true),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
versions.versions[0].version as u64,
|
||||
person_entry.table_version
|
||||
// Lance 7: the native `DirectoryNamespace` no longer recognizes omnigraph's
|
||||
// manifest-tracked tables, so list / describe / create_table_version all
|
||||
// return `TableNotFound`. The mechanism is *contingent on omnigraph's legacy
|
||||
// boolean PK key*, not an unconditional v7 property: v7's namespace eagerly
|
||||
// rewrites any `__manifest` whose `object_id` lacks the new
|
||||
// `lance-schema:unenforced-primary-key:position` key, omnigraph declares the
|
||||
// PK with the legacy boolean key, and v7 forbids changing a PK once set — so
|
||||
// `ensure_manifest_table_up_to_date` errors, the namespace silently falls
|
||||
// back to directory listing (disabled here), and `check_table_status` reports
|
||||
// the table absent. omnigraph keeps the boolean key deliberately: Lance
|
||||
// honors it permanently (it maps to PK position 0) and one uniform on-disk
|
||||
// format beats a new-vs-old split, since existing graphs can't be re-keyed to
|
||||
// the position key under that same immutability rule. The decoupling is
|
||||
// therefore an accepted, production-irrelevant tradeoff (omnigraph never uses
|
||||
// the native namespace — its publisher writes `__manifest` via merge_insert
|
||||
// and its reads go through its own `LanceNamespace` impls), and it only
|
||||
// strengthens this guard's thesis: native tooling cannot enumerate, inspect,
|
||||
// or publish over omnigraph's tables, let alone replace the write path.
|
||||
let assert_table_not_found = |what: &str, dbg: String| {
|
||||
assert!(
|
||||
dbg.contains("TableNotFound") && dbg.contains("node:Person"),
|
||||
"{what}: expected TableNotFound for node:Person, got: {dbg}"
|
||||
);
|
||||
};
|
||||
assert_table_not_found(
|
||||
"list_table_versions",
|
||||
format!(
|
||||
"{:?}",
|
||||
namespace
|
||||
.list_table_versions(ListTableVersionsRequest {
|
||||
id: Some(vec!["node:Person".to_string()]),
|
||||
descending: Some(true),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap_err()
|
||||
),
|
||||
);
|
||||
assert_table_not_found(
|
||||
"describe_table_version",
|
||||
format!(
|
||||
"{:?}",
|
||||
namespace
|
||||
.describe_table_version(DescribeTableVersionRequest {
|
||||
id: Some(vec!["node:Person".to_string()]),
|
||||
version: Some(person_version as i64),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap_err()
|
||||
),
|
||||
);
|
||||
assert_table_not_found(
|
||||
"create_table_version",
|
||||
format!(
|
||||
"{:?}",
|
||||
namespace
|
||||
.create_table_version(version_metadata.to_create_table_version_request(
|
||||
"node:Person",
|
||||
person_version,
|
||||
1,
|
||||
None,
|
||||
))
|
||||
.await
|
||||
.unwrap_err()
|
||||
),
|
||||
);
|
||||
|
||||
let err = namespace
|
||||
.describe_table_version(DescribeTableVersionRequest {
|
||||
id: Some(vec!["node:Person".to_string()]),
|
||||
version: Some(person_version as i64),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().contains("not found"));
|
||||
|
||||
let err = namespace
|
||||
.create_table_version(version_metadata.to_create_table_version_request(
|
||||
"node:Person",
|
||||
person_version,
|
||||
1,
|
||||
None,
|
||||
))
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().contains("already exists"));
|
||||
|
||||
// omnigraph's manifest stays authoritative: refresh ignores the direct
|
||||
// `person_ds.append` above (it was never manifest-published), so the row
|
||||
// count stays 0 and the version is unchanged.
|
||||
mc.refresh().await.unwrap();
|
||||
assert_eq!(
|
||||
mc.snapshot().entry("node:Person").unwrap().table_version,
|
||||
|
|
|
|||
|
|
@ -11,9 +11,9 @@ pub use graph_coordinator::{GraphCoordinator, ReadTarget, ResolvedTarget, Snapsh
|
|||
pub use manifest::{Snapshot, SubTableEntry, SubTableUpdate};
|
||||
pub(crate) use omnigraph::ensure_public_branch_ref;
|
||||
pub use omnigraph::{
|
||||
CleanupPolicyOptions, InitOptions, MergeOutcome, Omnigraph, OpenMode, RepairAction,
|
||||
RepairClassification, RepairOptions, RepairStats, SchemaApplyOptions, SchemaApplyResult,
|
||||
SkipReason, TableCleanupStats, TableOptimizeStats, TableRepairStats,
|
||||
CleanupPolicyOptions, InitOptions, MergeOutcome, Omnigraph, OpenMode, PendingIndex,
|
||||
RepairAction, RepairClassification, RepairOptions, RepairStats, SchemaApplyOptions,
|
||||
SchemaApplyResult, SkipReason, TableCleanupStats, TableOptimizeStats, TableRepairStats,
|
||||
};
|
||||
|
||||
pub(crate) const SCHEMA_APPLY_LOCK_BRANCH: &str = "__schema_apply_lock__";
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ use lance::dataset::scanner::ColumnOrdering;
|
|||
use lance::datatypes::BlobKind;
|
||||
use omnigraph_compiler::catalog::{Catalog, EdgeType, NodeType};
|
||||
use omnigraph_compiler::schema::parser::parse_schema;
|
||||
use omnigraph_compiler::types::ScalarType;
|
||||
use omnigraph_compiler::types::{PropType, ScalarType};
|
||||
use omnigraph_compiler::{
|
||||
DropMode, SchemaIR, SchemaMigrationPlan, SchemaMigrationStep, SchemaTypeKind,
|
||||
build_catalog_from_ir, build_schema_ir, plan_schema_migration,
|
||||
|
|
@ -40,6 +40,7 @@ pub use repair::{
|
|||
RepairAction, RepairClassification, RepairOptions, RepairStats, TableRepairStats,
|
||||
};
|
||||
pub use schema_apply::SchemaApplyOptions;
|
||||
pub use table_ops::PendingIndex;
|
||||
|
||||
use super::commit_graph::GraphCommit;
|
||||
use super::manifest::{
|
||||
|
|
@ -113,10 +114,11 @@ pub struct Omnigraph {
|
|||
/// Read-heavy on schema introspection paths, written only by
|
||||
/// `apply_schema`. Same ArcSwap rationale as `catalog`.
|
||||
schema_source: Arc<ArcSwap<String>>,
|
||||
/// Per-`(table_key, branch)` writer queues. Reachable from engine
|
||||
/// internals (mutation finalize, schema_apply, branch_merge,
|
||||
/// ensure_indices, delete_where) and from future MR-870 recovery
|
||||
/// reconciler. PR 1b adds the field; callers acquire in commits 4+.
|
||||
/// Per-`(table_key, branch)` writer queues — the engine's
|
||||
/// write-serialization mechanism (the server holds the engine as a
|
||||
/// lockless `Arc<Omnigraph>`). Reachable from engine internals
|
||||
/// (mutation finalize, schema_apply, branch_merge, ensure_indices,
|
||||
/// delete_where, the fork path, recovery reconciler).
|
||||
write_queue: Arc<crate::db::write_queue::WriteQueueManager>,
|
||||
/// Process-wide mutex held across the swap → operate → restore window
|
||||
/// in `branch_merge_impl`. Two concurrent merges with distinct targets
|
||||
|
|
@ -156,6 +158,17 @@ pub struct Omnigraph {
|
|||
/// `apply_schema_as` consults this field (PR #2 proof-of-concept);
|
||||
/// PR #3 fans the `enforce()` call out to the remaining writers.
|
||||
policy: Option<Arc<dyn omnigraph_policy::PolicyChecker>>,
|
||||
/// Lazily-built, reused-across-queries embedding client. Built on the first
|
||||
/// `nearest($v, "string")` that needs server-side embedding (so a graph that
|
||||
/// never embeds needs no provider key), then shared by every later query —
|
||||
/// avoids the per-query `from_env()` rebuild and keeps the provider HTTP
|
||||
/// connection pool warm. `OnceCell` guarantees a single initialization.
|
||||
embedding: Arc<tokio::sync::OnceCell<crate::embedding::EmbeddingClient>>,
|
||||
/// Optional pre-resolved embedding config (RFC-012 Phase 5), injected from an
|
||||
/// applied cluster `providers.embedding` profile via [`Omnigraph::with_embedding_config`].
|
||||
/// When set, the embedding cell builds its client from this instead of
|
||||
/// `EmbeddingClient::from_env()`; `None` keeps the env fallback.
|
||||
embedding_config: Option<Arc<crate::embedding::EmbeddingConfig>>,
|
||||
}
|
||||
|
||||
/// Whether [`Omnigraph::open`] runs the open-time recovery sweep.
|
||||
|
|
@ -319,6 +332,8 @@ impl Omnigraph {
|
|||
write_queue: Arc::new(crate::db::write_queue::WriteQueueManager::new()),
|
||||
merge_exclusive: Arc::new(tokio::sync::Mutex::new(())),
|
||||
policy: None,
|
||||
embedding: Arc::new(tokio::sync::OnceCell::new()),
|
||||
embedding_config: None,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -418,6 +433,8 @@ impl Omnigraph {
|
|||
write_queue: Arc::new(crate::db::write_queue::WriteQueueManager::new()),
|
||||
merge_exclusive: Arc::new(tokio::sync::Mutex::new(())),
|
||||
policy: None,
|
||||
embedding: Arc::new(tokio::sync::OnceCell::new()),
|
||||
embedding_config: None,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -465,6 +482,29 @@ impl Omnigraph {
|
|||
self
|
||||
}
|
||||
|
||||
/// The lazily-initialized, reused-across-queries embedding client cell
|
||||
/// (see the `embedding` field doc). The query executor resolves the client
|
||||
/// through this on the first `nearest($v, "string")` that needs embedding.
|
||||
pub(crate) fn embedding_cell(
|
||||
&self,
|
||||
) -> &tokio::sync::OnceCell<crate::embedding::EmbeddingClient> {
|
||||
&self.embedding
|
||||
}
|
||||
|
||||
/// Install a pre-resolved embedding config (RFC-012 Phase 5). Builder-style,
|
||||
/// mirroring [`Omnigraph::with_policy`]: a graph served from a cluster
|
||||
/// embedding provider profile injects it here; an embedded/CLI caller that doesn't
|
||||
/// call this keeps the `EmbeddingClient::from_env()` fallback.
|
||||
pub fn with_embedding_config(mut self, config: Arc<crate::embedding::EmbeddingConfig>) -> Self {
|
||||
self.embedding_config = Some(config);
|
||||
self
|
||||
}
|
||||
|
||||
/// The injected embedding config, if any (see the `embedding_config` field).
|
||||
pub(crate) fn embedding_config_ref(&self) -> Option<&crate::embedding::EmbeddingConfig> {
|
||||
self.embedding_config.as_deref()
|
||||
}
|
||||
|
||||
/// Engine-layer policy enforcement gate (MR-722 chassis core).
|
||||
///
|
||||
/// * If no policy is installed → no-op (returns `Ok(())`).
|
||||
|
|
@ -1069,11 +1109,15 @@ impl Omnigraph {
|
|||
/// unbranched subtables keep inheriting `main`, while subtables inherited
|
||||
/// from an ancestor branch are first forked into the active branch before
|
||||
/// their index metadata is updated.
|
||||
pub async fn ensure_indices(&self) -> Result<()> {
|
||||
/// Returns the declared indexes that could not be materialized on this
|
||||
/// pass (today: vector columns with no trainable vectors yet). They are
|
||||
/// deferred, not errors; a later `ensure_indices`/`optimize` builds them
|
||||
/// once the column is trainable. Reads stay correct (brute-force) meanwhile.
|
||||
pub async fn ensure_indices(&self) -> Result<Vec<PendingIndex>> {
|
||||
table_ops::ensure_indices(self).await
|
||||
}
|
||||
|
||||
pub async fn ensure_indices_on(&self, branch: &str) -> Result<()> {
|
||||
pub async fn ensure_indices_on(&self, branch: &str) -> Result<Vec<PendingIndex>> {
|
||||
table_ops::ensure_indices_on(self, branch).await
|
||||
}
|
||||
|
||||
|
|
@ -1479,6 +1523,13 @@ impl Omnigraph {
|
|||
table_ops::open_for_mutation_on_branch(self, branch, table_key, op_kind).await
|
||||
}
|
||||
|
||||
/// Fork `table_key` onto `active_branch` from the given source state,
|
||||
/// self-healing a manifest-unreferenced leftover fork if one is in the
|
||||
/// way. Callers that reach this MUST already hold the per-`(table_key,
|
||||
/// active_branch)` write queue (so the reclaim cannot race an in-process
|
||||
/// fork) and must have confirmed via the live manifest that the table is
|
||||
/// not yet on `active_branch`. Both the first-write fork path
|
||||
/// (`open_owned_dataset_for_branch_write`) and `branch_merge` satisfy this.
|
||||
pub(crate) async fn fork_dataset_from_entry_state(
|
||||
&self,
|
||||
table_key: &str,
|
||||
|
|
@ -1487,7 +1538,7 @@ impl Omnigraph {
|
|||
source_version: u64,
|
||||
active_branch: &str,
|
||||
) -> Result<SnapshotHandle> {
|
||||
table_ops::fork_dataset_from_entry_state(
|
||||
match table_ops::fork_dataset_from_entry_state(
|
||||
self,
|
||||
table_key,
|
||||
full_path,
|
||||
|
|
@ -1495,7 +1546,21 @@ impl Omnigraph {
|
|||
source_version,
|
||||
active_branch,
|
||||
)
|
||||
.await
|
||||
.await?
|
||||
{
|
||||
crate::storage_layer::ForkOutcome::Created(ds) => Ok(ds),
|
||||
crate::storage_layer::ForkOutcome::RefAlreadyExists => {
|
||||
table_ops::reclaim_orphaned_fork_and_refork(
|
||||
self,
|
||||
table_key,
|
||||
full_path,
|
||||
source_branch,
|
||||
source_version,
|
||||
active_branch,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn reopen_for_mutation(
|
||||
|
|
@ -1530,19 +1595,10 @@ impl Omnigraph {
|
|||
&self,
|
||||
table_key: &str,
|
||||
ds: &mut SnapshotHandle,
|
||||
) -> Result<()> {
|
||||
) -> Result<Vec<PendingIndex>> {
|
||||
table_ops::build_indices_on_dataset(self, table_key, ds).await
|
||||
}
|
||||
|
||||
pub(crate) async fn build_indices_on_dataset_for_catalog(
|
||||
&self,
|
||||
catalog: &Catalog,
|
||||
table_key: &str,
|
||||
ds: &mut SnapshotHandle,
|
||||
) -> Result<()> {
|
||||
table_ops::build_indices_on_dataset_for_catalog(self, catalog, table_key, ds).await
|
||||
}
|
||||
|
||||
// Used only by in-tree tests (`#[cfg(test)]`); the runtime path now
|
||||
// uses `commit_updates_on_branch_with_expected` exclusively.
|
||||
#[cfg(test)]
|
||||
|
|
@ -2498,25 +2554,49 @@ edge WorksAt: Person -> Company
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_apply_schema_adds_index_for_existing_property() {
|
||||
async fn test_apply_schema_defers_index_then_reconciler_builds_it() {
|
||||
// iss-848: schema apply records the @index intent but builds nothing
|
||||
// inline; a later ensure_indices materializes it once the table has
|
||||
// rows. (Use `age`, which is unindexed in TEST_SCHEMA — `name @key` is
|
||||
// already FTS-indexed at seed, so it can't show the deferral.)
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
seed_person_row(&mut db, "Alice", Some(30)).await;
|
||||
|
||||
let desired = TEST_SCHEMA.replace("name: String @key", "name: String @key @index");
|
||||
let desired = TEST_SCHEMA.replace("age: I32?", "age: I32? @index");
|
||||
db.apply_schema(&desired).await.unwrap();
|
||||
|
||||
// Apply built nothing — the BTREE on `age` is deferred.
|
||||
let snapshot = db.snapshot().await;
|
||||
let ds = db
|
||||
.storage()
|
||||
.open_snapshot_at_table(&snapshot, "node:Person")
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(db.storage().has_fts_index(&ds, "name").await.unwrap());
|
||||
assert!(
|
||||
!db.storage().has_btree_index(&ds, "age").await.unwrap(),
|
||||
"apply must not build the index inline (deferred to the reconciler)"
|
||||
);
|
||||
|
||||
// The reconciler materializes it (Person has a row).
|
||||
db.ensure_indices().await.unwrap();
|
||||
let snapshot = db.snapshot().await;
|
||||
let ds = db
|
||||
.storage()
|
||||
.open_snapshot_at_table(&snapshot, "node:Person")
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(
|
||||
db.storage().has_btree_index(&ds, "age").await.unwrap(),
|
||||
"ensure_indices must build the deferred index"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_apply_schema_rewrite_preserves_existing_indices() {
|
||||
async fn test_apply_schema_rewrite_defers_index_then_reconciler_restores() {
|
||||
// iss-848: an AddProperty rewrite writes a new dataset version without
|
||||
// rebuilding indexes inline (deferred); ensure_indices restores them.
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let initial_schema = TEST_SCHEMA.replace("name: String @key", "name: String @key @index");
|
||||
|
|
@ -2529,6 +2609,8 @@ edge WorksAt: Person -> Company
|
|||
);
|
||||
db.apply_schema(&desired).await.unwrap();
|
||||
|
||||
// After the rewrite the reconciler restores index coverage.
|
||||
db.ensure_indices().await.unwrap();
|
||||
let snapshot = db.snapshot().await;
|
||||
let ds = db
|
||||
.storage()
|
||||
|
|
|
|||
|
|
@ -32,6 +32,8 @@ use lance::dataset::cleanup::{CleanupPolicy, RemovalStats};
|
|||
use lance::dataset::optimize::{
|
||||
CompactionMetrics, CompactionOptions, compact_files, plan_compaction,
|
||||
};
|
||||
use lance::index::DatasetIndexExt;
|
||||
use lance_index::optimize::OptimizeOptions;
|
||||
|
||||
use super::*;
|
||||
|
||||
|
|
@ -138,6 +140,12 @@ pub struct TableOptimizeStats {
|
|||
/// Lance HEAD version observed by optimize for drift skips. `None` for
|
||||
/// normal compaction/no-op/blob skips.
|
||||
pub lance_head_version: Option<u64>,
|
||||
/// Declared `@index` columns on this table the reconciler could not build
|
||||
/// this run, each with the `reason` (today: a vector column with no
|
||||
/// trainable vectors yet). Empty on the common path. Reported, not fatal — a
|
||||
/// later `optimize` retries; the `list_indices`/`indisvalid` analog so
|
||||
/// operators can see which index is pending and why.
|
||||
pub pending_indexes: Vec<super::PendingIndex>,
|
||||
}
|
||||
|
||||
impl TableOptimizeStats {
|
||||
|
|
@ -151,6 +159,7 @@ impl TableOptimizeStats {
|
|||
skipped: None,
|
||||
manifest_version: None,
|
||||
lance_head_version: None,
|
||||
pending_indexes: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -164,6 +173,7 @@ impl TableOptimizeStats {
|
|||
skipped: Some(reason),
|
||||
manifest_version: None,
|
||||
lance_head_version: None,
|
||||
pending_indexes: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -181,6 +191,7 @@ impl TableOptimizeStats {
|
|||
skipped: Some(SkipReason::DriftNeedsRepair),
|
||||
manifest_version: Some(manifest_version),
|
||||
lance_head_version: Some(lance_head_version),
|
||||
pending_indexes: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -257,9 +268,7 @@ pub async fn optimize_all_tables(db: &Omnigraph) -> Result<Vec<TableOptimizeStat
|
|||
// the original row addresses on rewrite). The CSR/CSC graph topology index
|
||||
// is rebuilt only when an edge table moved. Mirrors schema_apply's
|
||||
// post-publish invalidation.
|
||||
let any_committed = stats
|
||||
.iter()
|
||||
.any(|s| matches!(s, Ok(st) if st.committed));
|
||||
let any_committed = stats.iter().any(|s| matches!(s, Ok(st) if st.committed));
|
||||
let edge_committed = stats
|
||||
.iter()
|
||||
.any(|s| matches!(s, Ok(st) if st.committed && st.table_key.starts_with("edge:")));
|
||||
|
|
@ -361,16 +370,34 @@ async fn optimize_one_table(
|
|||
}
|
||||
|
||||
// Precise "will it compact?" check — `plan_compaction` also accounts for
|
||||
// deletion materialization (which can rewrite even a single fragment). A
|
||||
// steady-state already-compacted table yields an empty plan and is never
|
||||
// pinned in a sidecar (a zero-commit pin would classify NoMovement on
|
||||
// recovery and force an all-or-nothing rollback). Uncovered pre-existing
|
||||
// drift is skipped above and must go through explicit repair.
|
||||
// deletion materialization (which can rewrite even a single fragment).
|
||||
let options = CompactionOptions::default();
|
||||
let plan = plan_compaction(&ds, &options)
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
if plan.num_tasks() == 0 {
|
||||
let will_compact = plan.num_tasks() > 0;
|
||||
// Even when there is nothing to compact, the table may still have index
|
||||
// work: rows appended since the index was built (e.g. via `ingest --mode
|
||||
// merge`) are scanned unindexed until folded in (needs_reindex), OR a
|
||||
// declared `@index` was never built — schema apply records the intent but
|
||||
// defers the physical build (iss-848), so optimize is the operator-facing
|
||||
// reconciler that materializes it (needs_index_create). Any of the three is
|
||||
// enough to enter the publish path. If NONE, this table is a no-op and must
|
||||
// NOT be pinned in a sidecar — a zero-commit pin classifies NoMovement on
|
||||
// recovery and forces an all-or-nothing rollback of sibling tables'
|
||||
// legitimate work. Uncovered pre-existing manifest/HEAD drift is skipped
|
||||
// above and goes through explicit repair, so this only runs on a healthy
|
||||
// table under the per-table queue + sidecar.
|
||||
let needs_reindex = TableStore::has_unindexed_fragments(&ds).await?;
|
||||
// needs_index_work_* checks "a declared index is missing AND row_count > 0",
|
||||
// so empty tables stay no-ops (never pinned). It re-reads the head under the
|
||||
// queue we already hold, so it is consistent with `ds`.
|
||||
let needs_index_create = if let Some(type_name) = table_key.strip_prefix("node:") {
|
||||
super::table_ops::needs_index_work_node(db, type_name, &table_key, &full_path, None).await?
|
||||
} else {
|
||||
super::table_ops::needs_index_work_edge(db, &table_key, &full_path, None).await?
|
||||
};
|
||||
if !will_compact && !needs_reindex && !needs_index_create {
|
||||
return Ok(TableOptimizeStats::compacted(
|
||||
table_key,
|
||||
&CompactionMetrics::default(),
|
||||
|
|
@ -378,8 +405,9 @@ async fn optimize_one_table(
|
|||
));
|
||||
}
|
||||
|
||||
// Phase A: recovery sidecar BEFORE compaction advances the Lance HEAD, so a
|
||||
// crash before the manifest publish rolls forward on next open.
|
||||
// Phase A: recovery sidecar BEFORE any HEAD-advancing op (compaction or
|
||||
// index optimize), so a crash before the manifest publish rolls forward on
|
||||
// next open.
|
||||
let sidecar = crate::db::manifest::new_sidecar(
|
||||
crate::db::manifest::SidecarKind::Optimize,
|
||||
None,
|
||||
|
|
@ -398,12 +426,50 @@ async fn optimize_one_table(
|
|||
let handle =
|
||||
crate::db::manifest::write_sidecar(db.root_uri(), db.storage_adapter(), &sidecar).await?;
|
||||
|
||||
// Phase B: compaction (reserve-fragments + rewrite commits advance HEAD).
|
||||
// Phase B: compaction (if any) then incremental index optimize — both
|
||||
// advance Lance HEAD inside the sidecar window. `compact_files` rewrites
|
||||
// fragments and drops them from existing index segments' coverage;
|
||||
// `optimize_indices` folds the rewritten and any previously-unindexed
|
||||
// fragments back in (Lance's incremental merge, not a full retrain). This
|
||||
// is the same compact -> optimize_indices sequencing LanceDB's `optimize()`
|
||||
// uses. `optimize_indices` is an inline-commit residual: lance-6.0.1
|
||||
// exposes no uncommitted variant, so like `compact_files` it commits
|
||||
// directly and relies on the sidecar for recovery.
|
||||
let version_before = ds.version().version;
|
||||
let metrics: CompactionMetrics = compact_files(&mut ds, options, None)
|
||||
let metrics: CompactionMetrics = if will_compact {
|
||||
compact_files(&mut ds, options, None)
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
} else {
|
||||
CompactionMetrics::default()
|
||||
};
|
||||
ds.optimize_indices(&OptimizeOptions::default())
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
let version_after = ds.version().version;
|
||||
.map_err(|e| OmniError::Lance(format!("optimize_indices on {}: {}", table_key, e)))?;
|
||||
|
||||
// Materialize any declared-but-missing index over the just-compacted layout,
|
||||
// reusing the build chokepoint (idempotent: skips existing indexes; fault-
|
||||
// isolates an untrainable vector column into `pending` rather than failing).
|
||||
// Run it UNCONDITIONALLY now that we are past the no-op gate — not only when
|
||||
// `needs_index_create`. A table can enter this path for compaction or
|
||||
// reindex while its sole missing index is an untrainable Vector column
|
||||
// (which `needs_index_work_*` does not count as buildable work); calling the
|
||||
// build here is what surfaces that column in `pending_indexes`, so optimize
|
||||
// can't compact a table yet silently drop the deferred-index signal.
|
||||
// Idempotent + cheap when there is nothing to build. Vector index creation
|
||||
// is an inline-commit residual; the Optimize sidecar's loose post_commit_pin
|
||||
// covers the extra commits.
|
||||
let catalog = db.catalog();
|
||||
let mut snapshot = crate::storage_layer::SnapshotHandle::new(ds);
|
||||
let pending_indexes: Vec<super::PendingIndex> =
|
||||
super::table_ops::build_indices_on_dataset_for_catalog(
|
||||
db,
|
||||
&catalog,
|
||||
&table_key,
|
||||
&mut snapshot,
|
||||
)
|
||||
.await?;
|
||||
let version_after = snapshot.dataset().version().version;
|
||||
let committed = version_after != version_before;
|
||||
|
||||
// Pin the per-writer Phase B → Phase C residual for optimize: Lance HEAD has
|
||||
|
|
@ -414,9 +480,6 @@ async fn optimize_one_table(
|
|||
// expected = the version observed under the queue). On failure the sidecar
|
||||
// is intentionally left for the open-time recovery sweep to roll forward.
|
||||
if committed {
|
||||
// Re-wrap the post-compaction dataset to read its state through the
|
||||
// trait surface (`table_state` is a read; no HEAD advance).
|
||||
let snapshot = crate::storage_layer::SnapshotHandle::new(ds);
|
||||
let state = db.storage().table_state(&full_path, &snapshot).await?;
|
||||
let update = crate::db::SubTableUpdate {
|
||||
table_key: table_key.clone(),
|
||||
|
|
@ -443,7 +506,9 @@ async fn optimize_one_table(
|
|||
);
|
||||
}
|
||||
|
||||
Ok(TableOptimizeStats::compacted(table_key, &metrics, committed))
|
||||
let mut stat = TableOptimizeStats::compacted(table_key, &metrics, committed);
|
||||
stat.pending_indexes = pending_indexes;
|
||||
Ok(stat)
|
||||
}
|
||||
|
||||
/// Run Lance `cleanup_old_versions` on every node + edge table on `main`,
|
||||
|
|
@ -575,27 +640,37 @@ pub struct BranchReconcileStats {
|
|||
pub failures: Vec<(String, String)>,
|
||||
}
|
||||
|
||||
/// Drop every per-table and commit-graph Lance branch that the manifest no
|
||||
/// longer references.
|
||||
/// Drop every per-table and commit-graph Lance branch fork the manifest does
|
||||
/// not reference.
|
||||
///
|
||||
/// Orphaned forks arise when a `branch_delete` flips the manifest authority
|
||||
/// (atomic) but a downstream best-effort reclaim does not complete. They are
|
||||
/// unreachable through any snapshot — no manifest entry can name them — yet
|
||||
/// they pin their `tree/{branch}/` storage and can block reusing the branch
|
||||
/// name. This is the guaranteed convergence backstop: it is idempotent and
|
||||
/// derived purely from the manifest authority, so it no-ops once everything is
|
||||
/// reconciled, and it would harmlessly find nothing if a future Lance atomic
|
||||
/// multi-dataset branch op prevented orphans from forming.
|
||||
/// Two origins produce a manifest-unreferenced fork:
|
||||
/// 1. A `branch_delete` flips the manifest authority (atomic) but a
|
||||
/// downstream best-effort reclaim does not complete — the whole branch is
|
||||
/// gone from the manifest, but a `tree/{branch}/` ref lingers.
|
||||
/// 2. A first-write fork (or a merge fork) creates the branch ref before the
|
||||
/// manifest publish, then the writer dies / is cancelled — the branch is
|
||||
/// still a live manifest branch, but the manifest's snapshot of it does
|
||||
/// not place *this table* on the branch.
|
||||
///
|
||||
/// The keep-set is the full (unfiltered) manifest branch list, so system
|
||||
/// branches' forks are never reclaimed; `main`/default is not a named Lance
|
||||
/// branch and so is never a candidate. Referencing children are dropped before
|
||||
/// parents (Lance refuses to delete a referenced parent) by ordering longest
|
||||
/// branch names first.
|
||||
/// The write path self-heals (2) on the next write to the table
|
||||
/// (`reclaim_orphaned_fork_and_refork`); this is the guaranteed-convergence
|
||||
/// backstop that also covers (1) and any table the write path never revisits.
|
||||
///
|
||||
/// The orphan test is therefore **per-table**, not per-branch-name: a Lance
|
||||
/// branch `B` on table `T` is an orphan iff `B` is not a live manifest branch
|
||||
/// at all (origin 1) OR the manifest's branch-`B` snapshot does not place `T`
|
||||
/// on `B` (origin 2). A legitimately-forked table (`table_branch == Some(B)`)
|
||||
/// is kept. `main` and internal/system branches are never candidates. Lance
|
||||
/// refuses to force-delete a branch with referencing descendants, so children
|
||||
/// are dropped before parents (longest name first). Idempotent and authority-
|
||||
/// derived: no-ops once reconciled, and degrades to finding nothing if a future
|
||||
/// Lance atomic multi-dataset branch op prevents orphans from forming.
|
||||
pub async fn reconcile_orphaned_branches(db: &Omnigraph) -> Result<BranchReconcileStats> {
|
||||
use std::collections::HashSet;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
let keep: HashSet<String> = db
|
||||
// Live manifest branches: the set whose per-table placements are
|
||||
// authoritative. A branch absent here is a whole-branch (origin-1) orphan.
|
||||
let live_branches: HashSet<String> = db
|
||||
.coordinator
|
||||
.read()
|
||||
.await
|
||||
|
|
@ -616,6 +691,12 @@ pub async fn reconcile_orphaned_branches(db: &Omnigraph) -> Result<BranchReconci
|
|||
.collect();
|
||||
|
||||
let mut stats = BranchReconcileStats::default();
|
||||
// Per-branch snapshots are resolved once and cached across tables (few
|
||||
// branches in practice); origin-2 detection consults the branch's own view.
|
||||
// Failures are cached too: one branch-level read failure should not refetch
|
||||
// and append duplicate per-table noise for every table that lists the ref.
|
||||
let mut branch_snapshots: HashMap<String, crate::db::Snapshot> = HashMap::new();
|
||||
let mut failed_branch_snapshots: HashSet<String> = HashSet::new();
|
||||
|
||||
// Per-table fault isolation: one table's transient failure is recorded and
|
||||
// logged, never aborting the rest of the sweep.
|
||||
|
|
@ -634,7 +715,104 @@ pub async fn reconcile_orphaned_branches(db: &Omnigraph) -> Result<BranchReconci
|
|||
continue;
|
||||
}
|
||||
};
|
||||
for branch in orphan_branches(listed, &keep) {
|
||||
|
||||
// Decide per (table, branch) whether the fork is an orphan.
|
||||
let mut orphans: Vec<String> = Vec::new();
|
||||
for branch in listed {
|
||||
// `main` is not a named Lance branch; system/internal branches
|
||||
// (e.g. the schema-apply lock) own legitimate forks — never touch.
|
||||
if branch == "main" || crate::db::is_internal_system_branch(&branch) {
|
||||
continue;
|
||||
}
|
||||
let is_orphan = if !live_branches.contains(&branch) {
|
||||
true // origin 1: whole branch gone from the manifest
|
||||
} else {
|
||||
// origin 2: live branch, but does the manifest place THIS
|
||||
// table on it? Resolve (and cache) the branch's snapshot.
|
||||
if failed_branch_snapshots.contains(&branch) {
|
||||
continue;
|
||||
}
|
||||
if !branch_snapshots.contains_key(&branch) {
|
||||
let branch_snapshot =
|
||||
match crate::failpoints::maybe_fail("cleanup.resolve_branch_snapshot") {
|
||||
Ok(()) => db.snapshot_for_branch(Some(&branch)).await,
|
||||
Err(injected) => Err(injected),
|
||||
};
|
||||
match branch_snapshot {
|
||||
Ok(snap) => {
|
||||
branch_snapshots.insert(branch.clone(), snap);
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::warn!(
|
||||
target: "omnigraph::cleanup",
|
||||
table = %table_key,
|
||||
branch = %branch,
|
||||
error = %err,
|
||||
"resolving branch snapshot failed during reconcile; skipping",
|
||||
);
|
||||
stats.failures.push((table_key.clone(), err.to_string()));
|
||||
failed_branch_snapshots.insert(branch.clone());
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
branch_snapshots[&branch]
|
||||
.entry(&table_key)
|
||||
.map(|e| e.table_branch.as_deref() != Some(branch.as_str()))
|
||||
.unwrap_or(true)
|
||||
};
|
||||
if is_orphan {
|
||||
orphans.push(branch);
|
||||
}
|
||||
}
|
||||
// Children before parents (longest name first) so Lance's referenced-
|
||||
// parent RefConflict cannot block reclamation.
|
||||
orphans.sort_by(|a, b| b.len().cmp(&a.len()).then_with(|| a.cmp(b)));
|
||||
|
||||
for branch in orphans {
|
||||
// Serialize against in-process live writers before destroying a ref.
|
||||
// A first-write fork holds the per-(table, branch) write queue from
|
||||
// before the fork through the manifest publish; on a LIVE branch its
|
||||
// in-flight fork looks exactly like an origin-2 orphan (manifest not
|
||||
// yet advanced). Acquire the same queue so cleanup waits for any such
|
||||
// writer, then RE-VALIDATE under the queue with a fresh read: if the
|
||||
// writer published in the meantime (table now placed on the branch),
|
||||
// it is no longer an orphan — skip it. (Cross-process writers remain
|
||||
// the documented one-winner-CAS gap.) One key held at a time → no
|
||||
// lock-order inversion against multi-table `acquire_many` writers.
|
||||
let _guard = db
|
||||
.write_queue()
|
||||
.acquire(&(table_key.clone(), Some(branch.clone())))
|
||||
.await;
|
||||
// Decide under the queue from FRESH authority via the shared
|
||||
// classifier (same decision the write-path reclaim uses) — never
|
||||
// from the sweep-start `live_branches` capture. A branch created
|
||||
// AFTER that capture is absent from the stale set yet may already
|
||||
// carry a legitimately-published fork (an in-process writer held
|
||||
// this queue through its fork+publish; we just waited on it), so a
|
||||
// stale "origin-1 ⇒ delete" shortcut would destroy a live fork.
|
||||
// Only `Orphan` is reclaimed; `Indeterminate` (transient read) is
|
||||
// skipped and recorded. (Cross-process writers remain the documented
|
||||
// one-winner-CAS gap.) One key held at a time → no lock-order
|
||||
// inversion vs multi-table `acquire_many` writers.
|
||||
match super::table_ops::classify_fork_ref(db, &table_key, &branch).await {
|
||||
super::table_ops::ForkRefStatus::Orphan => {}
|
||||
super::table_ops::ForkRefStatus::Legitimate => continue,
|
||||
super::table_ops::ForkRefStatus::Indeterminate => {
|
||||
tracing::warn!(
|
||||
target: "omnigraph::cleanup",
|
||||
table = %table_key,
|
||||
branch = %branch,
|
||||
"fresh re-check inconclusive during reconcile; skipping to avoid \
|
||||
destroying a possibly-live fork (will retry next cleanup)",
|
||||
);
|
||||
stats.failures.push((
|
||||
table_key.clone(),
|
||||
format!("indeterminate fork status for {branch}"),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let outcome = match crate::failpoints::maybe_fail("cleanup.reconcile_fork") {
|
||||
Ok(()) => storage.force_delete_branch(&full_path, &branch).await,
|
||||
Err(injected) => Err(injected),
|
||||
|
|
@ -655,15 +833,17 @@ pub async fn reconcile_orphaned_branches(db: &Omnigraph) -> Result<BranchReconci
|
|||
}
|
||||
}
|
||||
|
||||
// Commit-graph orphans (best-effort: the dataset may not exist on a graph
|
||||
// that has never committed; any failure is isolated and retried next time).
|
||||
if let Err(err) = reconcile_commit_graph_orphans(db, &keep, &mut stats).await {
|
||||
// Commit-graph orphans are whole-branch (not per-table), so the simple
|
||||
// "branch name not in the live set" test still applies there.
|
||||
if let Err(err) = reconcile_commit_graph_orphans(db, &live_branches, &mut stats).await {
|
||||
tracing::warn!(
|
||||
target: "omnigraph::cleanup",
|
||||
error = %err,
|
||||
"commit-graph orphan reconcile failed; will retry next cleanup",
|
||||
);
|
||||
stats.failures.push(("_graph_commits".to_string(), err.to_string()));
|
||||
stats
|
||||
.failures
|
||||
.push(("_graph_commits".to_string(), err.to_string()));
|
||||
}
|
||||
|
||||
Ok(stats)
|
||||
|
|
@ -691,7 +871,9 @@ async fn reconcile_commit_graph_orphans(
|
|||
error = %err,
|
||||
"reclaiming orphaned commit-graph branch failed; will retry next cleanup",
|
||||
);
|
||||
stats.failures.push(("_graph_commits".to_string(), err.to_string()));
|
||||
stats
|
||||
.failures
|
||||
.push(("_graph_commits".to_string(), err.to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -720,3 +902,66 @@ pub(super) fn all_table_keys(catalog: &omnigraph_compiler::catalog::Catalog) ->
|
|||
keys.sort();
|
||||
keys
|
||||
}
|
||||
|
||||
#[cfg(all(test, feature = "failpoints"))]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::failpoints::ScopedFailPoint;
|
||||
use crate::loader::{LoadMode, load_jsonl};
|
||||
|
||||
fn node_table_uri(root: &str, type_name: &str) -> String {
|
||||
let mut hash: u64 = 0xcbf2_9ce4_8422_2325;
|
||||
for &b in type_name.as_bytes() {
|
||||
hash ^= b as u64;
|
||||
hash = hash.wrapping_mul(0x100_0000_01b3);
|
||||
}
|
||||
format!("{}/nodes/{hash:016x}", root.trim_end_matches('/'))
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reconcile_caches_live_branch_snapshot_resolution_failure() {
|
||||
let _scenario = fail::FailScenario::setup();
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let schema = "node Person { name: String @key }\nnode Company { name: String @key }\n";
|
||||
let mut db = Omnigraph::init(uri, schema).await.unwrap();
|
||||
load_jsonl(
|
||||
&mut db,
|
||||
"{\"type\":\"Person\",\"data\":{\"name\":\"Alice\"}}\n\
|
||||
{\"type\":\"Company\",\"data\":{\"name\":\"Acme\"}}",
|
||||
LoadMode::Merge,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
db.branch_create("feature").await.unwrap();
|
||||
|
||||
for type_name in ["Person", "Company"] {
|
||||
let table_uri = node_table_uri(uri, type_name);
|
||||
let mut ds = lance::Dataset::open(&table_uri).await.unwrap();
|
||||
let base = ds.version().version;
|
||||
ds.create_branch("feature", base, None).await.unwrap();
|
||||
}
|
||||
|
||||
let _fp = ScopedFailPoint::new("cleanup.resolve_branch_snapshot", "return");
|
||||
let stats = reconcile_orphaned_branches(&db).await.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
stats.failures.len(),
|
||||
1,
|
||||
"one live-branch snapshot resolution failure should be reported once, \
|
||||
not once per table: {:?}",
|
||||
stats.failures
|
||||
);
|
||||
assert!(
|
||||
stats.failures[0]
|
||||
.1
|
||||
.contains("cleanup.resolve_branch_snapshot"),
|
||||
"the recorded failure should be the branch-snapshot resolution failure: {:?}",
|
||||
stats.failures
|
||||
);
|
||||
assert!(
|
||||
stats.reclaimed.is_empty(),
|
||||
"unreadable live-branch refs must be left for the next cleanup run"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -193,7 +193,6 @@ where
|
|||
let mut added_tables = BTreeSet::new();
|
||||
let mut renamed_tables = HashMap::new();
|
||||
let mut rewritten_tables = BTreeSet::new();
|
||||
let mut indexed_tables = BTreeSet::new();
|
||||
let mut dropped_tables = BTreeSet::new();
|
||||
// Hard-drop cleanup targets: (table_key, full_dataset_uri).
|
||||
// Populated for DropProperty { Hard } and DropType { Hard }; the
|
||||
|
|
@ -252,14 +251,14 @@ where
|
|||
.or_default()
|
||||
.insert(to.clone(), from.clone());
|
||||
}
|
||||
SchemaMigrationStep::AddConstraint {
|
||||
type_kind,
|
||||
type_name,
|
||||
..
|
||||
} => {
|
||||
indexed_tables.insert(schema_table_key(*type_kind, type_name));
|
||||
}
|
||||
SchemaMigrationStep::UpdateTypeMetadata { .. }
|
||||
// AddConstraint is only ever an `@index` addition (every other
|
||||
// added constraint plans as UnsupportedChange). It records intent
|
||||
// in the desired catalog/IR; the physical index is built off the
|
||||
// critical path by ensure_indices/optimize (iss-848), so the apply
|
||||
// does no table work for it — a pure metadata change like the two
|
||||
// metadata steps below.
|
||||
SchemaMigrationStep::AddConstraint { .. }
|
||||
| SchemaMigrationStep::UpdateTypeMetadata { .. }
|
||||
| SchemaMigrationStep::UpdatePropertyMetadata { .. } => {}
|
||||
SchemaMigrationStep::DropProperty {
|
||||
type_kind,
|
||||
|
|
@ -347,18 +346,15 @@ where
|
|||
let mut table_updates = HashMap::<String, crate::db::SubTableUpdate>::new();
|
||||
let mut table_tombstones = HashMap::<String, u64>::new();
|
||||
|
||||
// Recovery sidecar: protect the per-table commit_staged loop in
|
||||
// rewritten_tables + indexed_tables. The post_commit_pin we record
|
||||
// here is a lower bound (expected + 1); the classifier loose-matches
|
||||
// for SidecarKind::SchemaApply because the actual N depends on how
|
||||
// many indices need building. See classify_table's loose-match arm.
|
||||
// Recovery sidecar: protect the per-table `stage_overwrite` +
|
||||
// `commit_staged` in rewritten_tables — the only tables that advance Lance
|
||||
// HEAD inline now that index building is deferred to the reconciler
|
||||
// (iss-848). Each rewritten table is exactly one commit, so
|
||||
// `post_commit_pin = expected + 1` is now exact (it was a loose lower bound
|
||||
// when index builds added extra commits); the classifier's loose-match for
|
||||
// SidecarKind::SchemaApply still accepts it.
|
||||
let recovery_pins: Vec<crate::db::manifest::SidecarTablePin> = rewritten_tables
|
||||
.iter()
|
||||
.chain(indexed_tables.iter().filter(|t| {
|
||||
!rewritten_tables.contains(*t)
|
||||
&& !added_tables.contains(*t)
|
||||
&& !renamed_tables.contains_key(*t)
|
||||
}))
|
||||
.filter_map(|table_key| {
|
||||
let entry = snapshot.entry(table_key)?;
|
||||
Some(crate::db::manifest::SidecarTablePin {
|
||||
|
|
@ -432,10 +428,10 @@ where
|
|||
// manifest publish via `commit_changes_with_actor` below.
|
||||
//
|
||||
// Schema-apply already holds the graph-wide `__schema_apply_lock__`
|
||||
// sentinel branch, so under PR 1b's intermediate state these
|
||||
// per-table acquisitions are uncontended. They exist for symmetry
|
||||
// with future MR-870 recovery, which will need queue acquisition
|
||||
// before any `Dataset::restore` it issues for SchemaApply sidecars.
|
||||
// sentinel branch, so these per-table acquisitions are uncontended in
|
||||
// practice. They exist for symmetry with the recovery reconciler, which
|
||||
// acquires the same queues before any `Dataset::restore` it issues for
|
||||
// SchemaApply sidecars.
|
||||
let mut schema_apply_queue_keys: Vec<(String, Option<String>)> = recovery_pins
|
||||
.iter()
|
||||
.map(|pin| (pin.table_key.clone(), pin.table_branch.clone()))
|
||||
|
|
@ -490,10 +486,11 @@ where
|
|||
let table_path = table_path_for_table_key(table_key)?;
|
||||
let dataset_uri = db.storage().dataset_uri(&table_path);
|
||||
let schema = schema_for_table_key(&desired_catalog, table_key)?;
|
||||
let mut ds =
|
||||
let ds =
|
||||
SnapshotHandle::new(TableStore::create_empty_dataset(&dataset_uri, &schema).await?);
|
||||
db.build_indices_on_dataset_for_catalog(&desired_catalog, table_key, &mut ds)
|
||||
.await?;
|
||||
// Indexes for the new table are materialized off the critical path by
|
||||
// ensure_indices/optimize (iss-848); a 0-row table is never trainable
|
||||
// anyway. The @index intent is recorded in the persisted catalog/IR.
|
||||
let state = db.storage().table_state(&dataset_uri, &ds).await?;
|
||||
table_registrations.insert(table_key.clone(), table_path);
|
||||
table_updates.insert(
|
||||
|
|
@ -533,10 +530,9 @@ where
|
|||
.await?;
|
||||
let table_path = table_path_for_table_key(target_table_key)?;
|
||||
let dataset_uri = db.storage().dataset_uri(&table_path);
|
||||
let mut target_ds =
|
||||
let target_ds =
|
||||
SnapshotHandle::new(TableStore::write_dataset(&dataset_uri, batch).await?);
|
||||
db.build_indices_on_dataset_for_catalog(&desired_catalog, target_table_key, &mut target_ds)
|
||||
.await?;
|
||||
// Indexes on the renamed table are reconciled later (iss-848).
|
||||
let state = db.storage().table_state(&dataset_uri, &target_ds).await?;
|
||||
table_registrations.insert(target_table_key.clone(), table_path);
|
||||
table_updates.insert(
|
||||
|
|
@ -593,9 +589,10 @@ where
|
|||
.open_dataset_head_for_write(table_key, &dataset_uri, entry.table_branch.as_deref())
|
||||
.await?;
|
||||
let staged = db.storage().stage_overwrite(&existing, batch).await?;
|
||||
let mut target_ds = db.storage().commit_staged(existing, staged).await?;
|
||||
db.build_indices_on_dataset_for_catalog(&desired_catalog, table_key, &mut target_ds)
|
||||
.await?;
|
||||
let target_ds = db.storage().commit_staged(existing, staged).await?;
|
||||
// The rewrite drops the table's existing index coverage; it is
|
||||
// restored off the critical path by optimize's optimize_indices /
|
||||
// ensure_indices (iss-848). Reads scan uncovered fragments meanwhile.
|
||||
let state = db.storage().table_state(&dataset_uri, &target_ds).await?;
|
||||
table_updates.insert(
|
||||
table_key.clone(),
|
||||
|
|
@ -609,41 +606,12 @@ where
|
|||
);
|
||||
}
|
||||
|
||||
for table_key in &indexed_tables {
|
||||
if added_tables.contains(table_key)
|
||||
|| renamed_tables.contains_key(table_key)
|
||||
|| rewritten_tables.contains(table_key)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
let entry = snapshot.entry(table_key).ok_or_else(|| {
|
||||
OmniError::manifest(format!(
|
||||
"missing table '{}' for schema index apply",
|
||||
table_key
|
||||
))
|
||||
})?;
|
||||
ensure_snapshot_entry_head_matches(db, entry).await?;
|
||||
let dataset_uri = db.storage().dataset_uri(&entry.table_path);
|
||||
let mut ds = db
|
||||
.storage()
|
||||
.open_dataset_head_for_write(table_key, &dataset_uri, entry.table_branch.as_deref())
|
||||
.await?;
|
||||
db.storage()
|
||||
.ensure_expected_version(&ds, table_key, entry.table_version)?;
|
||||
db.build_indices_on_dataset_for_catalog(&desired_catalog, table_key, &mut ds)
|
||||
.await?;
|
||||
let state = db.storage().table_state(&dataset_uri, &ds).await?;
|
||||
table_updates.insert(
|
||||
table_key.clone(),
|
||||
crate::db::SubTableUpdate {
|
||||
table_key: table_key.clone(),
|
||||
table_version: state.version,
|
||||
table_branch: None,
|
||||
row_count: state.row_count,
|
||||
version_metadata: state.version_metadata,
|
||||
},
|
||||
);
|
||||
}
|
||||
// Index-only changes (AddConstraint, i.e. adding an `@index`) are pure
|
||||
// metadata: the new `@index` intent is recorded in the desired catalog/IR
|
||||
// persisted below, and the physical index is materialized off the critical
|
||||
// path by `ensure_indices`/`optimize` (iss-848). Schema apply touches no
|
||||
// table data for them, so there is no per-table loop here and no recovery
|
||||
// pin (no Lance HEAD advances). Reads stay correct meanwhile via a scan.
|
||||
|
||||
let mut manifest_changes = Vec::new();
|
||||
for (table_key, table_path) in table_registrations {
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ pub(super) async fn graph_index_for_resolved(
|
|||
db.runtime_cache.graph_index(resolved, &catalog).await
|
||||
}
|
||||
|
||||
pub(super) async fn ensure_indices(db: &Omnigraph) -> Result<()> {
|
||||
pub(super) async fn ensure_indices(db: &Omnigraph) -> Result<Vec<PendingIndex>> {
|
||||
let current_branch = db
|
||||
.coordinator
|
||||
.read()
|
||||
|
|
@ -31,7 +31,7 @@ pub(super) async fn ensure_indices(db: &Omnigraph) -> Result<()> {
|
|||
ensure_indices_for_branch(db, current_branch.as_deref()).await
|
||||
}
|
||||
|
||||
pub(super) async fn ensure_indices_on(db: &Omnigraph, branch: &str) -> Result<()> {
|
||||
pub(super) async fn ensure_indices_on(db: &Omnigraph, branch: &str) -> Result<Vec<PendingIndex>> {
|
||||
let branch = normalize_branch_name(branch)?;
|
||||
ensure_indices_for_branch(db, branch.as_deref()).await
|
||||
}
|
||||
|
|
@ -73,12 +73,16 @@ pub(super) async fn failpoint_publish_table_head_without_index_rebuild_for_test(
|
|||
.await
|
||||
}
|
||||
|
||||
pub(super) async fn ensure_indices_for_branch(db: &Omnigraph, branch: Option<&str>) -> Result<()> {
|
||||
pub(super) async fn ensure_indices_for_branch(
|
||||
db: &Omnigraph,
|
||||
branch: Option<&str>,
|
||||
) -> Result<Vec<PendingIndex>> {
|
||||
db.ensure_schema_state_valid().await?;
|
||||
db.ensure_schema_apply_idle("ensure_indices").await?;
|
||||
let resolved = db.resolved_branch_target(branch).await?;
|
||||
let snapshot = resolved.snapshot;
|
||||
let mut updates = Vec::new();
|
||||
let mut pending = Vec::new();
|
||||
let active_branch = resolved.branch;
|
||||
let catalog = db.catalog();
|
||||
|
||||
|
|
@ -160,9 +164,8 @@ pub(super) async fn ensure_indices_for_branch(db: &Omnigraph, branch: Option<&st
|
|||
// that needs index work. Held across the per-table commit loop and
|
||||
// the manifest publish at the end of this function. Sorted-order
|
||||
// acquisition prevents lock-order inversion against concurrent
|
||||
// multi-table writers (mutation finalize, branch_merge, future
|
||||
// MR-870 recovery). Under PR 1b's intermediate state (global server
|
||||
// RwLock still in place), this acquisition is uncontended.
|
||||
// multi-table writers (mutation finalize, branch_merge, the fork
|
||||
// path, recovery).
|
||||
let queue_keys: Vec<(String, Option<String>)> = recovery_pins
|
||||
.iter()
|
||||
.map(|pin| (pin.table_key.clone(), pin.table_branch.clone()))
|
||||
|
|
@ -217,7 +220,7 @@ pub(super) async fn ensure_indices_for_branch(db: &Omnigraph, branch: Option<&st
|
|||
};
|
||||
let row_count = db.storage().count_rows(&ds, None).await.unwrap_or(0);
|
||||
if row_count > 0 {
|
||||
build_indices_on_dataset(db, &table_key, &mut ds).await?;
|
||||
pending.extend(build_indices_on_dataset(db, &table_key, &mut ds).await?);
|
||||
}
|
||||
|
||||
let state = db.storage().table_state(&full_path, &ds).await?;
|
||||
|
|
@ -265,7 +268,7 @@ pub(super) async fn ensure_indices_for_branch(db: &Omnigraph, branch: Option<&st
|
|||
};
|
||||
let row_count = db.storage().count_rows(&ds, None).await.unwrap_or(0);
|
||||
if row_count > 0 {
|
||||
build_indices_on_dataset(db, &table_key, &mut ds).await?;
|
||||
pending.extend(build_indices_on_dataset(db, &table_key, &mut ds).await?);
|
||||
}
|
||||
|
||||
let state = db.storage().table_state(&full_path, &ds).await?;
|
||||
|
|
@ -307,7 +310,69 @@ pub(super) async fn ensure_indices_for_branch(db: &Omnigraph, branch: Option<&st
|
|||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
Ok(pending)
|
||||
}
|
||||
|
||||
/// The single scalar/vector index a node property receives from a one-column
|
||||
/// `@index`/`@key` declaration, or `None` when the property type is not
|
||||
/// indexable here (a list column or `Blob`).
|
||||
///
|
||||
/// Shared by `build_indices_on_dataset_for_catalog` (which builds the index)
|
||||
/// and `needs_index_work_node` (which checks coverage to decide recovery-
|
||||
/// sidecar pinning) so the two cannot drift: an enum or orderable scalar the
|
||||
/// builder gives a BTREE must also be reported as "needs work" until that
|
||||
/// BTREE exists, or the HEAD-advancing build would run without sidecar cover.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
|
||||
enum NodePropIndexKind {
|
||||
Btree,
|
||||
Fts,
|
||||
Vector,
|
||||
}
|
||||
|
||||
fn node_prop_index_kind(prop_type: &PropType) -> Option<NodePropIndexKind> {
|
||||
if prop_type.list {
|
||||
return None;
|
||||
}
|
||||
// Enums are physically `String` but filtered by equality, so they take a
|
||||
// scalar BTREE, not an FTS inverted index (Lance never consults an inverted
|
||||
// index for `=`/range). Free-text Strings keep FTS for
|
||||
// `search()`/`match_text`/`bm25`.
|
||||
let is_enum = prop_type.enum_values.is_some();
|
||||
match prop_type.scalar {
|
||||
ScalarType::String if !is_enum => Some(NodePropIndexKind::Fts),
|
||||
ScalarType::Vector(_) => Some(NodePropIndexKind::Vector),
|
||||
ScalarType::String
|
||||
| ScalarType::DateTime
|
||||
| ScalarType::Date
|
||||
| ScalarType::I32
|
||||
| ScalarType::I64
|
||||
| ScalarType::U32
|
||||
| ScalarType::U64
|
||||
| ScalarType::F32
|
||||
| ScalarType::F64
|
||||
| ScalarType::Bool => Some(NodePropIndexKind::Btree),
|
||||
ScalarType::Blob => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether a vector column currently has at least one non-null vector — the
|
||||
/// minimum for Lance IVF k-means to train (the `ivf_flat(1)` index we build
|
||||
/// needs >=1 vector). Used identically by `needs_index_work_node` (so an
|
||||
/// untrainable column is not pinned for recovery — avoiding a zero-commit pin
|
||||
/// that would roll back a sibling's index work) and by the vector build arm (so
|
||||
/// `create_vector_index` is only attempted when it can succeed, keeping its
|
||||
/// genuine errors fatal instead of swallowed as pending). If index params
|
||||
/// become size-aware (dev-graph iss-687), this threshold moves with them.
|
||||
async fn vector_column_trainable(
|
||||
db: &Omnigraph,
|
||||
ds: &SnapshotHandle,
|
||||
column: &str,
|
||||
) -> Result<bool> {
|
||||
Ok(db
|
||||
.storage()
|
||||
.count_rows(ds, Some(format!("{column} IS NOT NULL")))
|
||||
.await?
|
||||
> 0)
|
||||
}
|
||||
|
||||
/// Returns true if the node table is missing at least one declared
|
||||
|
|
@ -318,12 +383,13 @@ pub(super) async fn ensure_indices_for_branch(db: &Omnigraph, branch: Option<&st
|
|||
/// would force `NoMovement` classification on recovery and trigger the
|
||||
/// all-or-nothing rollback of sibling tables' legitimate index work).
|
||||
///
|
||||
/// Per the actual `build_indices_on_dataset_for_catalog` implementation
|
||||
/// (this file, ~line 419-491), nodes get BTree (id) + per-prop FTS
|
||||
/// (@search String) + per-prop Vector indices; edges get BTree only
|
||||
/// (id, src, dst). The two helpers mirror that asymmetry — see the
|
||||
/// `needs_index_work_edge` doc comment.
|
||||
async fn needs_index_work_node(
|
||||
/// Per `build_indices_on_dataset_for_catalog`, nodes get BTree (id) plus, for
|
||||
/// each one-column `@index`/`@key` property, the index `node_prop_index_kind`
|
||||
/// assigns: a scalar BTREE for enums and orderable scalars
|
||||
/// (DateTime/Date/numeric/Bool), FTS for free-text Strings, or a Vector index.
|
||||
/// Edges get BTree only (id, src, dst). This helper and the builder share
|
||||
/// `node_prop_index_kind` so they cannot drift — see its doc comment.
|
||||
pub(super) async fn needs_index_work_node(
|
||||
db: &Omnigraph,
|
||||
type_name: &str,
|
||||
table_key: &str,
|
||||
|
|
@ -359,14 +425,30 @@ async fn needs_index_work_node(
|
|||
let Some(prop_type) = node_type.properties.get(prop_name) else {
|
||||
continue;
|
||||
};
|
||||
if matches!(prop_type.scalar, ScalarType::String) && !prop_type.list {
|
||||
if !db.storage().has_fts_index(&ds, prop_name).await? {
|
||||
return Ok(true);
|
||||
match node_prop_index_kind(prop_type) {
|
||||
Some(NodePropIndexKind::Fts) => {
|
||||
if !db.storage().has_fts_index(&ds, prop_name).await? {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
} else if matches!(prop_type.scalar, ScalarType::Vector(_)) && !prop_type.list {
|
||||
if !db.storage().has_vector_index(&ds, prop_name).await? {
|
||||
return Ok(true);
|
||||
Some(NodePropIndexKind::Vector) => {
|
||||
// Only count a missing vector index as buildable *work* when the
|
||||
// column is trainable (>=1 non-null vector). An untrainable
|
||||
// column would defer in the build and commit nothing; pinning it
|
||||
// for recovery would be a zero-commit pin that classifies
|
||||
// NoMovement and rolls back a sibling table's index work.
|
||||
if !db.storage().has_vector_index(&ds, prop_name).await?
|
||||
&& vector_column_trainable(db, &ds, prop_name).await?
|
||||
{
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
Some(NodePropIndexKind::Btree) => {
|
||||
if !db.storage().has_btree_index(&ds, prop_name).await? {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
|
|
@ -382,7 +464,7 @@ async fn needs_index_work_node(
|
|||
///
|
||||
/// Empty edge tables are skipped by the ensure_indices loop the same
|
||||
/// way node tables are; see `needs_index_work_node`.
|
||||
async fn needs_index_work_edge(
|
||||
pub(super) async fn needs_index_work_edge(
|
||||
db: &Omnigraph,
|
||||
table_key: &str,
|
||||
full_path: &str,
|
||||
|
|
@ -499,8 +581,14 @@ pub(super) async fn open_owned_dataset_for_branch_write(
|
|||
));
|
||||
}
|
||||
}
|
||||
fork_dataset_from_entry_state(
|
||||
db,
|
||||
// The fork advances Lance state before the manifest publish. The
|
||||
// caller holds the per-(table, active_branch) write queue from
|
||||
// before this fork through the publish, so a leftover ref is a
|
||||
// manifest-unreferenced fork (interrupted prior fork, or
|
||||
// delete+recreate), not a live in-process fork. The wrapper
|
||||
// self-heals it (reclaim + re-fork); see
|
||||
// `Omnigraph::fork_dataset_from_entry_state`.
|
||||
db.fork_dataset_from_entry_state(
|
||||
table_key,
|
||||
full_path,
|
||||
source_branch,
|
||||
|
|
@ -528,7 +616,7 @@ pub(super) async fn fork_dataset_from_entry_state(
|
|||
source_branch: Option<&str>,
|
||||
source_version: u64,
|
||||
active_branch: &str,
|
||||
) -> Result<SnapshotHandle> {
|
||||
) -> Result<crate::storage_layer::ForkOutcome<SnapshotHandle>> {
|
||||
db.storage()
|
||||
.fork_branch_from_state(
|
||||
full_path,
|
||||
|
|
@ -540,6 +628,172 @@ pub(super) async fn fork_dataset_from_entry_state(
|
|||
.await
|
||||
}
|
||||
|
||||
/// Classification of a Lance branch ref `B` on table `T` against FRESH manifest
|
||||
/// authority — the single decision both fork-ref reclaim sites share: the
|
||||
/// write-path reclaim ([`reclaim_orphaned_fork_and_refork`]) and the cleanup
|
||||
/// reconciler (`optimize::reconcile_orphaned_branches`). Having one classifier
|
||||
/// keeps the two destructive sites from drifting (the bug history: each was
|
||||
/// hardened separately and the other lagged).
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub(crate) enum ForkRefStatus {
|
||||
/// The manifest places `T` on `B` — a legitimate fork. Never destroy.
|
||||
Legitimate,
|
||||
/// The manifest does not reference this fork (`T` not on `B`, or `B` absent
|
||||
/// from the manifest entirely). Reclaimable.
|
||||
Orphan,
|
||||
/// Fresh authority could not be established (a transient read failure on a
|
||||
/// live branch). Ambiguous — do not destroy; the caller retries / converges.
|
||||
Indeterminate,
|
||||
}
|
||||
|
||||
/// Classify a fork ref from FRESH manifest authority (bypasses the coordinator
|
||||
/// cache). MUST be called with the per-`(table, branch)` write queue held, so
|
||||
/// the classification is stable against in-process writers for the caller's
|
||||
/// critical section. Both reclaim sites map the result to their own action
|
||||
/// (write path: reclaim vs retryable; cleanup: delete vs skip), but the
|
||||
/// destroy-only-on-`Orphan` rule is enforced here, once.
|
||||
pub(crate) async fn classify_fork_ref(
|
||||
db: &Omnigraph,
|
||||
table_key: &str,
|
||||
branch: &str,
|
||||
) -> ForkRefStatus {
|
||||
// `classify.fresh_read` failpoint: simulate a transient failure of the
|
||||
// fresh-authority read (no-op without the `failpoints` feature). Lets a
|
||||
// test exercise the Indeterminate path — a read failure on a live branch
|
||||
// must classify as Indeterminate (skip), never Orphan (destroy).
|
||||
let fresh = match crate::failpoints::maybe_fail("classify.fresh_read") {
|
||||
Ok(()) => db.fresh_snapshot_for_branch(Some(branch)).await,
|
||||
Err(injected) => Err(injected),
|
||||
};
|
||||
match fresh {
|
||||
Ok(snap) => {
|
||||
let placed = snap
|
||||
.entry(table_key)
|
||||
.map(|e| e.table_branch.as_deref() == Some(branch))
|
||||
.unwrap_or(false);
|
||||
if placed {
|
||||
ForkRefStatus::Legitimate
|
||||
} else {
|
||||
// Branch resolves but the manifest does not place this table on
|
||||
// it — a manifest-unreferenced fork.
|
||||
ForkRefStatus::Orphan
|
||||
}
|
||||
}
|
||||
// Branch did not resolve. `all_branches` lists `_refs/branches/` live, so
|
||||
// absent there = genuinely no such manifest branch (origin-1 orphan);
|
||||
// present (or a list error) = transient read — never destroy on that.
|
||||
Err(_) => match db.coordinator.read().await.all_branches().await {
|
||||
Ok(fresh) if !fresh.iter().any(|b| b == branch) => ForkRefStatus::Orphan,
|
||||
_ => ForkRefStatus::Indeterminate,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Reclaim a manifest-unreferenced fork and re-fork in its place.
|
||||
///
|
||||
/// Reached when `fork_branch_from_state` reports `RefAlreadyExists`. This is a
|
||||
/// destructive op (it force-deletes a Lance branch ref), so it owns its own
|
||||
/// safety precondition rather than trusting the caller's: it re-derives, via
|
||||
/// [`classify_fork_ref`], that the manifest does not place this table on
|
||||
/// `active_branch`. The caller's earlier proof may have come from the
|
||||
/// coordinator's *cached* branch snapshot (`resolved_branch_target` returns
|
||||
/// the cache when the handle is bound to `active_branch` — an embedded handle
|
||||
/// on the branch, or `branch_merge`'s target swap); trusting it could
|
||||
/// force-delete a fork a concurrent writer just legitimately published. Only
|
||||
/// once fresh authority confirms the ref is unreferenced does it drop the ref
|
||||
/// (idempotent `force_delete_branch`) and re-fork, exactly once.
|
||||
///
|
||||
/// If fresh authority shows the table IS on `active_branch` (a legitimate
|
||||
/// concurrent fork), or a second collision occurs after reclaim (a foreign-
|
||||
/// process writer recreated the ref — the documented one-winner-CAS gap), it
|
||||
/// surfaces a retryable conflict; on retry the winner's fork is visible and
|
||||
/// the no-fork path runs.
|
||||
pub(super) async fn reclaim_orphaned_fork_and_refork(
|
||||
db: &Omnigraph,
|
||||
table_key: &str,
|
||||
full_path: &str,
|
||||
source_branch: Option<&str>,
|
||||
source_version: u64,
|
||||
active_branch: &str,
|
||||
) -> Result<SnapshotHandle> {
|
||||
// Self-validate against FRESH authority before destroying anything. Only an
|
||||
// Orphan is reclaimable; a Legitimate status (a concurrent writer published
|
||||
// a real fork despite the caller's possibly-cached proof) or an
|
||||
// Indeterminate one (transient read) surfaces a retryable conflict rather
|
||||
// than stranding the manifest at a version the recreated ref won't have.
|
||||
match classify_fork_ref(db, table_key, active_branch).await {
|
||||
ForkRefStatus::Orphan => {}
|
||||
ForkRefStatus::Legitimate => {
|
||||
let actual = db
|
||||
.fresh_snapshot_for_branch(Some(active_branch))
|
||||
.await
|
||||
.ok()
|
||||
.and_then(|s| s.entry(table_key).map(|e| e.table_version))
|
||||
.unwrap_or(source_version);
|
||||
return Err(OmniError::manifest_expected_version_mismatch(
|
||||
table_key,
|
||||
source_version,
|
||||
actual,
|
||||
));
|
||||
}
|
||||
ForkRefStatus::Indeterminate => {
|
||||
return Err(OmniError::manifest_conflict(format!(
|
||||
"could not verify whether branch '{active_branch}' still owns an orphaned \
|
||||
fork for table '{table_key}' because fresh manifest authority was \
|
||||
unavailable; refresh and retry"
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
crate::failpoints::maybe_fail("fork.before_reclaim")?;
|
||||
db.storage()
|
||||
.force_delete_branch(full_path, active_branch)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
// Lance refuses to delete a branch with dependent child branches
|
||||
// even under force (RefConflict). Unreachable for a leaf first-write
|
||||
// fork (the cleanup reconciler also drops children before parents),
|
||||
// but surface it actionably if it ever happens. We match loosely on
|
||||
// "referenc" rather than the exact prose, which is not a Lance API
|
||||
// contract; a typed RefConflict variant through `force_delete_branch`
|
||||
// is the durable follow-up.
|
||||
if e.to_string().contains("referenc") {
|
||||
OmniError::manifest_conflict(format!(
|
||||
"branch '{active_branch}' cannot reclaim the leftover fork for \
|
||||
table '{table_key}' because it has dependent child branches; \
|
||||
delete the child branches (or run `omnigraph cleanup`) first"
|
||||
))
|
||||
} else {
|
||||
e
|
||||
}
|
||||
})?;
|
||||
|
||||
match fork_dataset_from_entry_state(
|
||||
db,
|
||||
table_key,
|
||||
full_path,
|
||||
source_branch,
|
||||
source_version,
|
||||
active_branch,
|
||||
)
|
||||
.await?
|
||||
{
|
||||
crate::storage_layer::ForkOutcome::Created(ds) => Ok(ds),
|
||||
crate::storage_layer::ForkOutcome::RefAlreadyExists => {
|
||||
let live = db.fresh_snapshot_for_branch(Some(active_branch)).await?;
|
||||
let actual = live
|
||||
.entry(table_key)
|
||||
.map(|e| e.table_version)
|
||||
.unwrap_or(source_version);
|
||||
Err(OmniError::manifest_expected_version_mismatch(
|
||||
table_key,
|
||||
source_version,
|
||||
actual,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn reopen_for_mutation(
|
||||
db: &Omnigraph,
|
||||
table_key: &str,
|
||||
|
|
@ -580,11 +834,25 @@ pub(super) async fn open_dataset_at_state(
|
|||
.await
|
||||
}
|
||||
|
||||
/// A declared index the builder could not materialize on this pass. Today the
|
||||
/// only such case is a vector (IVF) column with no trainable vectors yet
|
||||
/// (KMeans needs >=1 vector), e.g. the load-before-embed window. Reported, not
|
||||
/// fatal: a later `ensure_indices`/`optimize` retries once the column is
|
||||
/// buildable, and reads stay correct via brute-force meanwhile. Surfacing
|
||||
/// pending index *status* rather than failing the operation is the database
|
||||
/// norm (Postgres `indisvalid`, LanceDB `list_indices`).
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PendingIndex {
|
||||
pub table_key: String,
|
||||
pub column: String,
|
||||
pub reason: String,
|
||||
}
|
||||
|
||||
pub(super) async fn build_indices_on_dataset(
|
||||
db: &Omnigraph,
|
||||
table_key: &str,
|
||||
ds: &mut SnapshotHandle,
|
||||
) -> Result<()> {
|
||||
) -> Result<Vec<PendingIndex>> {
|
||||
let catalog = db.catalog();
|
||||
build_indices_on_dataset_for_catalog(db, &catalog, table_key, ds).await
|
||||
}
|
||||
|
|
@ -594,8 +862,9 @@ pub(super) async fn build_indices_on_dataset_for_catalog(
|
|||
catalog: &Catalog,
|
||||
table_key: &str,
|
||||
ds: &mut SnapshotHandle,
|
||||
) -> Result<()> {
|
||||
) -> Result<Vec<PendingIndex>> {
|
||||
if let Some(type_name) = table_key.strip_prefix("node:") {
|
||||
let mut pending = Vec::new();
|
||||
if !db.storage().has_btree_index(ds, "id").await? {
|
||||
stage_and_commit_btree(db, table_key, ds, &["id"]).await?;
|
||||
}
|
||||
|
|
@ -615,35 +884,79 @@ pub(super) async fn build_indices_on_dataset_for_catalog(
|
|||
}
|
||||
let prop_name = &index_cols[0];
|
||||
if let Some(prop_type) = node_type.properties.get(prop_name) {
|
||||
if matches!(prop_type.scalar, ScalarType::String) && !prop_type.list {
|
||||
if !db.storage().has_fts_index(ds, prop_name).await? {
|
||||
stage_and_commit_inverted(db, table_key, ds, prop_name.as_str())
|
||||
.await?;
|
||||
match node_prop_index_kind(prop_type) {
|
||||
Some(NodePropIndexKind::Fts) => {
|
||||
if !db.storage().has_fts_index(ds, prop_name).await? {
|
||||
stage_and_commit_inverted(db, table_key, ds, prop_name.as_str())
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
} else if matches!(prop_type.scalar, ScalarType::Vector(_)) && !prop_type.list {
|
||||
if !db.storage().has_vector_index(ds, prop_name).await? {
|
||||
// Inline-commit residual: lance-6.0.1 does not
|
||||
// expose `build_index_metadata_from_segments` as
|
||||
// `pub`, so vector indices cannot be staged from
|
||||
// outside the lance crate. Document at the call
|
||||
// site; companion ticket to lance-format/lance#6658.
|
||||
let new_snap = db
|
||||
.storage_inline_residual()
|
||||
.create_vector_index(ds.clone(), prop_name.as_str())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
OmniError::Lance(format!(
|
||||
"create Vector index on {}({}): {}",
|
||||
table_key, prop_name, e
|
||||
))
|
||||
})?;
|
||||
*ds = new_snap;
|
||||
Some(NodePropIndexKind::Vector) => {
|
||||
if !db.storage().has_vector_index(ds, prop_name).await? {
|
||||
// A vector (IVF) index trains k-means over the column,
|
||||
// so it needs >=1 non-null vector (KMeans errors
|
||||
// "cannot train N centroids with 0 vectors"). Precheck
|
||||
// trainability: a column with no vectors yet (e.g. rows
|
||||
// loaded before `embed`) is recorded as a *pending*
|
||||
// index and skipped — deferred, not failed. The SAME
|
||||
// predicate gates `needs_index_work_node`, so an
|
||||
// untrainable column is never pinned for recovery (no
|
||||
// zero-commit pin that would roll back a sibling
|
||||
// table's index work). This function is the chokepoint
|
||||
// every write path funnels through (load/mutate, schema
|
||||
// apply, ensure_indices, optimize, merge), realizing
|
||||
// the governing principle — physical index state never
|
||||
// fails a logical operation. Only when trainable do we
|
||||
// attempt the build, and then we PROPAGATE any error: a
|
||||
// genuine I/O/manifest/Lance failure must stay fatal,
|
||||
// not be hidden as pending. (Vector creation is an
|
||||
// inline-commit residual until lance#6666; iss-951.)
|
||||
if vector_column_trainable(db, ds, prop_name).await? {
|
||||
let new_snap = db
|
||||
.storage_inline_residual()
|
||||
.create_vector_index(ds.clone(), prop_name.as_str())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
OmniError::Lance(format!(
|
||||
"create Vector index on {}({}): {}",
|
||||
table_key, prop_name, e
|
||||
))
|
||||
})?;
|
||||
*ds = new_snap;
|
||||
} else {
|
||||
tracing::info!(
|
||||
target: "omnigraph::index",
|
||||
table = %table_key,
|
||||
column = %prop_name,
|
||||
"deferring Vector index: column has no \
|
||||
trainable vectors yet",
|
||||
);
|
||||
pending.push(PendingIndex {
|
||||
table_key: table_key.to_string(),
|
||||
column: prop_name.clone(),
|
||||
reason: "column has no non-null vectors to \
|
||||
train on yet"
|
||||
.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
// Enum + orderable scalars (DateTime/Date/numeric/Bool)
|
||||
// get a BTREE so `=`, range, IN, and IS NULL are index-
|
||||
// accelerated instead of degrading to a full scan.
|
||||
Some(NodePropIndexKind::Btree) => {
|
||||
if !db.storage().has_btree_index(ds, prop_name).await? {
|
||||
stage_and_commit_btree(db, table_key, ds, &[prop_name.as_str()])
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
// List or Blob column: not indexable as a scalar here.
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
return Ok(pending);
|
||||
}
|
||||
|
||||
if table_key.starts_with("edge:") {
|
||||
|
|
@ -656,7 +969,9 @@ pub(super) async fn build_indices_on_dataset_for_catalog(
|
|||
if !db.storage().has_btree_index(ds, "dst").await? {
|
||||
stage_and_commit_btree(db, table_key, ds, &["dst"]).await?;
|
||||
}
|
||||
return Ok(());
|
||||
// Edge tables only get BTree (id/src/dst), which build at any
|
||||
// cardinality; no pending state is possible here.
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
Err(OmniError::manifest(format!(
|
||||
|
|
@ -778,7 +1093,11 @@ async fn prepare_updates_for_commit(
|
|||
crate::db::MutationOpKind::SchemaRewrite,
|
||||
)
|
||||
.await?;
|
||||
build_indices_on_dataset(db, &prepared_update.table_key, &mut ds).await?;
|
||||
// Any column not yet buildable (e.g. a vector column whose rows
|
||||
// have null embeddings) is deferred and logged inside
|
||||
// build_indices; a later ensure_indices/optimize materializes it.
|
||||
// The load/mutate/merge commit must not fail on it.
|
||||
let _pending = build_indices_on_dataset(db, &prepared_update.table_key, &mut ds).await?;
|
||||
let state = db.storage().table_state(&full_path, &ds).await?;
|
||||
prepared_update.table_version = state.version;
|
||||
prepared_update.row_count = state.row_count;
|
||||
|
|
@ -979,3 +1298,78 @@ pub(super) async fn ensure_commit_graph_initialized(db: &Omnigraph) -> Result<()
|
|||
pub(super) async fn invalidate_graph_index(db: &Omnigraph) {
|
||||
db.runtime_cache.invalidate_all().await;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod classify_fork_ref_tests {
|
||||
//! Direct coverage of [`classify_fork_ref`] — the single fresh-authority
|
||||
//! decision both fork-ref reclaim sites (write-path reclaim + cleanup
|
||||
//! reconciler) route through. Pins each deterministic status so reverting
|
||||
//! the fresh-authority logic at either site fails here. (The `Indeterminate`
|
||||
//! arm needs an injected transient read and is covered under the
|
||||
//! `failpoints` suite.)
|
||||
use super::*;
|
||||
use crate::db::Omnigraph;
|
||||
use crate::loader::LoadMode;
|
||||
|
||||
const SCHEMA: &str = "node Person { name: String @key }\nnode Company { name: String @key }\n";
|
||||
|
||||
/// On-disk dataset path for a node table, taken from the manifest entry
|
||||
/// (the same path the engine uses) so the test forges against the real ref.
|
||||
async fn node_path(db: &Omnigraph, branch: &str, table_key: &str) -> String {
|
||||
let snap = db.snapshot_for_branch(Some(branch)).await.unwrap();
|
||||
let entry = snap.entry(table_key).unwrap();
|
||||
format!("{}/{}", db.root_uri, entry.table_path)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn classify_distinguishes_legitimate_unreferenced_and_ghost() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let db = Omnigraph::init(dir.path().to_str().unwrap(), SCHEMA)
|
||||
.await
|
||||
.unwrap();
|
||||
db.branch_create("feature").await.unwrap();
|
||||
|
||||
// Legitimate: a real write forks Company onto `feature`, and the
|
||||
// manifest places Company on `feature`.
|
||||
db.load_as(
|
||||
"feature",
|
||||
None,
|
||||
r#"{"type":"Company","data":{"name":"Acme"}}"#,
|
||||
LoadMode::Merge,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
classify_fork_ref(&db, "node:Company", "feature").await,
|
||||
ForkRefStatus::Legitimate,
|
||||
"a manifest-placed fork must classify as Legitimate (never destroyed)"
|
||||
);
|
||||
|
||||
// Orphan (manifest-unreferenced): forge a `feature` ref on Person, which
|
||||
// the manifest's `feature` snapshot still places on main.
|
||||
let person = node_path(&db, "feature", "node:Person").await;
|
||||
{
|
||||
let mut ds = lance::Dataset::open(&person).await.unwrap();
|
||||
let v = ds.version().version;
|
||||
ds.create_branch("feature", v, None).await.unwrap();
|
||||
}
|
||||
assert_eq!(
|
||||
classify_fork_ref(&db, "node:Person", "feature").await,
|
||||
ForkRefStatus::Orphan,
|
||||
"a ref the manifest does not place on the branch must classify as Orphan"
|
||||
);
|
||||
|
||||
// Orphan (ghost): a ref for a branch the manifest does not have at all.
|
||||
{
|
||||
let mut ds = lance::Dataset::open(&person).await.unwrap();
|
||||
let v = ds.version().version;
|
||||
ds.create_branch("ghost", v, None).await.unwrap();
|
||||
}
|
||||
assert_eq!(
|
||||
classify_fork_ref(&db, "node:Person", "ghost").await,
|
||||
ForkRefStatus::Orphan,
|
||||
"a ref for a branch absent from the manifest must classify as Orphan"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -189,6 +189,8 @@ async fn create_recoveries_dataset(root_uri: &str) -> Result<Dataset> {
|
|||
mode: WriteMode::Create,
|
||||
enable_stable_row_ids: true,
|
||||
data_storage_version: Some(LanceFileVersion::V2_2),
|
||||
auto_cleanup: None,
|
||||
skip_auto_cleanup: true,
|
||||
..Default::default()
|
||||
};
|
||||
match Dataset::write(reader, &uri as &str, Some(params)).await {
|
||||
|
|
|
|||
|
|
@ -1,12 +1,15 @@
|
|||
//! Per-`(table_key, branch)` writer queues — MR-686 scaffolding.
|
||||
//! Per-`(table_key, branch)` writer queues.
|
||||
//!
|
||||
//! Today every server-layer write serializes on the global
|
||||
//! `Arc<RwLock<Omnigraph>>` in `AppState`. MR-686 replaces that with
|
||||
//! per-`(table_key, branch_ref)` queues so disjoint-key writes proceed
|
||||
//! concurrently. This module owns the queue data structure; callers in
|
||||
//! `MutationStaging::commit_all`, `branch_merge`, `schema_apply`,
|
||||
//! `ensure_indices`, `delete_where`, and the future MR-870 recovery
|
||||
//! reconciler acquire guards before any per-table Lance commit.
|
||||
//! These queues are the engine's write-serialization mechanism: the server
|
||||
//! holds the engine as a lockless `Arc<Omnigraph>` (writes are `&self`), so
|
||||
//! disjoint-key writes proceed concurrently and only writes to the same
|
||||
//! `(table_key, branch_ref)` serialize here. This module owns the queue
|
||||
//! data structure; callers in `MutationStaging::commit_all`, `branch_merge`,
|
||||
//! `schema_apply`, `ensure_indices`, `delete_where`, the fork path (first
|
||||
//! write to a table on a branch — acquired before the fork, held through the
|
||||
//! manifest publish), and the recovery reconciler acquire guards before any
|
||||
//! per-table Lance commit. Serialization is in-process only; cross-process
|
||||
//! writers on one graph remain one-winner-CAS at the manifest publish.
|
||||
//!
|
||||
//! ## Why exclusive `tokio::sync::Mutex<()>` per key
|
||||
//!
|
||||
|
|
|
|||
|
|
@ -8,29 +8,157 @@ use tokio::time::sleep;
|
|||
|
||||
use crate::error::{OmniError, Result};
|
||||
|
||||
const GEMINI_EMBED_MODEL: &str = "gemini-embedding-2-preview";
|
||||
const DEFAULT_OPENROUTER_BASE_URL: &str = "https://openrouter.ai/api/v1";
|
||||
const DEFAULT_OPENROUTER_MODEL: &str = "openai/text-embedding-3-large";
|
||||
const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1";
|
||||
const DEFAULT_OPENAI_MODEL: &str = "text-embedding-3-large";
|
||||
const DEFAULT_GEMINI_BASE_URL: &str = "https://generativelanguage.googleapis.com/v1beta";
|
||||
const DEFAULT_GEMINI_MODEL: &str = "gemini-embedding-2";
|
||||
const DEFAULT_TIMEOUT_MS: u64 = 30_000;
|
||||
const DEFAULT_RETRY_ATTEMPTS: usize = 4;
|
||||
const DEFAULT_RETRY_BACKOFF_MS: u64 = 200;
|
||||
const QUERY_TASK_TYPE: &str = "RETRIEVAL_QUERY";
|
||||
const DOCUMENT_TASK_TYPE: &str = "RETRIEVAL_DOCUMENT";
|
||||
const DEFAULT_DEADLINE_MS: u64 = 60_000;
|
||||
const GEMINI_QUERY_TASK_TYPE: &str = "RETRIEVAL_QUERY";
|
||||
const GEMINI_DOCUMENT_TASK_TYPE: &str = "RETRIEVAL_DOCUMENT";
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum EmbeddingTransport {
|
||||
/// Which embedding API a client speaks. Each variant owns its request shape,
|
||||
/// auth, and response parsing; everything else (retry, deadline, normalization,
|
||||
/// tracing) is provider-independent.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum Provider {
|
||||
/// OpenAI-compatible (`POST {base}/embeddings`, bearer auth,
|
||||
/// `{model, input, dimensions}`). Covers OpenRouter (the default gateway),
|
||||
/// OpenAI direct, and self-hosted endpoints (vLLM/Ollama/LM Studio).
|
||||
OpenAiCompatible,
|
||||
/// Google Gemini `generativelanguage` (`POST {base}/models/{model}:embedContent`,
|
||||
/// `x-goog-api-key`), with `RETRIEVAL_QUERY` / `RETRIEVAL_DOCUMENT` task types.
|
||||
Gemini,
|
||||
/// Deterministic, offline. No network, no key.
|
||||
Mock,
|
||||
Gemini {
|
||||
}
|
||||
|
||||
/// Whether the text being embedded is a search query or a stored document.
|
||||
/// Only Gemini distinguishes these (`RETRIEVAL_QUERY` vs `RETRIEVAL_DOCUMENT`);
|
||||
/// OpenAI-compatible providers and Mock produce the identical request for both,
|
||||
/// which is also the same-space property a query relies on.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
enum EmbedRole {
|
||||
Query,
|
||||
Document,
|
||||
}
|
||||
|
||||
/// The single source of truth for how embedding text becomes a vector:
|
||||
/// provider + model + endpoint + key. Resolved once (from env for direct
|
||||
/// engine/CLI callers, or from an applied cluster `providers.embedding` profile
|
||||
/// at server boot) and shared by the query path and the offline CLI so stored
|
||||
/// and query vectors stay same-space by construction.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct EmbeddingConfig {
|
||||
pub provider: Provider,
|
||||
pub model: String,
|
||||
pub base_url: String,
|
||||
pub api_key: String,
|
||||
}
|
||||
|
||||
impl EmbeddingConfig {
|
||||
/// Resolve from the environment. Precedence:
|
||||
/// 1. `OMNIGRAPH_EMBEDDINGS_MOCK` → Mock.
|
||||
/// 2. `OMNIGRAPH_EMBED_PROVIDER` (`openai-compatible`|`openai`|`gemini`|`mock`);
|
||||
/// unset defaults to `openai-compatible` (OpenRouter).
|
||||
/// 3. `OMNIGRAPH_EMBED_BASE_URL` else the provider default.
|
||||
/// 4. `OMNIGRAPH_EMBED_MODEL` else the provider default.
|
||||
/// 5. provider api-key env (`OPENROUTER_API_KEY`/`OPENAI_API_KEY`, or `GEMINI_API_KEY`).
|
||||
pub fn from_env() -> Result<Self> {
|
||||
if env_flag("OMNIGRAPH_EMBEDDINGS_MOCK") {
|
||||
return Ok(Self::mock());
|
||||
}
|
||||
|
||||
let alias = env_string("OMNIGRAPH_EMBED_PROVIDER");
|
||||
if alias.as_deref() == Some("mock") {
|
||||
return Ok(Self::mock());
|
||||
}
|
||||
|
||||
let (provider, default_base, default_model, key_envs) = provider_profile(alias.as_deref())?;
|
||||
let base_url = env_string("OMNIGRAPH_EMBED_BASE_URL")
|
||||
.unwrap_or_else(|| default_base.to_string())
|
||||
.trim_end_matches('/')
|
||||
.to_string();
|
||||
let model =
|
||||
env_string("OMNIGRAPH_EMBED_MODEL").unwrap_or_else(|| default_model.to_string());
|
||||
|
||||
let api_key = key_envs.iter().copied().find_map(env_string).ok_or_else(|| {
|
||||
OmniError::manifest_internal(format!(
|
||||
"{} is required for the {} embedding provider",
|
||||
key_envs.join(" or "),
|
||||
alias.as_deref().unwrap_or("openai-compatible")
|
||||
))
|
||||
})?;
|
||||
|
||||
Ok(Self {
|
||||
provider,
|
||||
model,
|
||||
base_url,
|
||||
api_key,
|
||||
})
|
||||
}
|
||||
|
||||
/// Build a config from explicit parts — the cluster `providers.embedding` profile path
|
||||
/// (RFC-012 Phase 5). `provider`/`base_url`/`model` default exactly as
|
||||
/// `from_env` does (shared `provider_profile`); `api_key` is already resolved
|
||||
/// (the cluster path resolves a `${NAME}` ref before calling this).
|
||||
pub fn from_parts(
|
||||
provider: Option<&str>,
|
||||
base_url: Option<String>,
|
||||
model: Option<String>,
|
||||
api_key: String,
|
||||
base_url: String,
|
||||
http: Client,
|
||||
},
|
||||
) -> Result<Self> {
|
||||
if provider == Some("mock") {
|
||||
// An explicit `model` (e.g. a cluster `providers.embedding` profile) is
|
||||
// authoritative — it is what the same-space check compares against —
|
||||
// so honor it; fall back to `mock()`'s env-based model only when the
|
||||
// caller supplied none. Without this, a profile's `model` is silently
|
||||
// dropped and the same-space check resolves to OMNIGRAPH_EMBED_MODEL.
|
||||
let mut config = Self::mock();
|
||||
if let Some(model) = model {
|
||||
config.model = model;
|
||||
}
|
||||
return Ok(config);
|
||||
}
|
||||
let (provider, default_base, default_model, _key_envs) = provider_profile(provider)?;
|
||||
let base_url = base_url
|
||||
.unwrap_or_else(|| default_base.to_string())
|
||||
.trim_end_matches('/')
|
||||
.to_string();
|
||||
let model = model.unwrap_or_else(|| default_model.to_string());
|
||||
Ok(Self {
|
||||
provider,
|
||||
model,
|
||||
base_url,
|
||||
api_key,
|
||||
})
|
||||
}
|
||||
|
||||
fn mock() -> Self {
|
||||
Self {
|
||||
provider: Provider::Mock,
|
||||
// Honor OMNIGRAPH_EMBED_MODEL so the same-space check is exercisable
|
||||
// under mock; the mock vectors themselves don't depend on the model.
|
||||
model: env_string("OMNIGRAPH_EMBED_MODEL").unwrap_or_default(),
|
||||
base_url: String::new(),
|
||||
api_key: String::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct EmbeddingClient {
|
||||
config: EmbeddingConfig,
|
||||
http: Client,
|
||||
retry_attempts: usize,
|
||||
retry_backoff_ms: u64,
|
||||
transport: EmbeddingTransport,
|
||||
/// Total wall-clock budget for one embed call, across all retries
|
||||
/// (`OMNIGRAPH_EMBED_DEADLINE_MS`). `0` = unbounded.
|
||||
deadline_ms: u64,
|
||||
}
|
||||
|
||||
struct EmbedCallError {
|
||||
|
|
@ -58,35 +186,39 @@ struct GoogleErrorBody {
|
|||
message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct OpenAiEmbeddingResponse {
|
||||
data: Vec<OpenAiEmbeddingDatum>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct OpenAiEmbeddingDatum {
|
||||
index: usize,
|
||||
embedding: Vec<f32>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct OpenAiErrorEnvelope {
|
||||
error: OpenAiErrorBody,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct OpenAiErrorBody {
|
||||
message: String,
|
||||
}
|
||||
|
||||
impl EmbeddingClient {
|
||||
pub fn from_env() -> Result<Self> {
|
||||
Self::new(EmbeddingConfig::from_env()?)
|
||||
}
|
||||
|
||||
pub fn new(config: EmbeddingConfig) -> Result<Self> {
|
||||
let retry_attempts =
|
||||
parse_env_usize("OMNIGRAPH_EMBED_RETRY_ATTEMPTS", DEFAULT_RETRY_ATTEMPTS);
|
||||
let retry_backoff_ms =
|
||||
parse_env_u64("OMNIGRAPH_EMBED_RETRY_BACKOFF_MS", DEFAULT_RETRY_BACKOFF_MS);
|
||||
|
||||
if env_flag("OMNIGRAPH_EMBEDDINGS_MOCK") {
|
||||
return Ok(Self {
|
||||
retry_attempts,
|
||||
retry_backoff_ms,
|
||||
transport: EmbeddingTransport::Mock,
|
||||
});
|
||||
}
|
||||
|
||||
let api_key = std::env::var("GEMINI_API_KEY")
|
||||
.ok()
|
||||
.map(|v| v.trim().to_string())
|
||||
.filter(|v| !v.is_empty())
|
||||
.ok_or_else(|| {
|
||||
OmniError::manifest_internal(
|
||||
"GEMINI_API_KEY is required when nearest() needs a string embedding",
|
||||
)
|
||||
})?;
|
||||
let base_url = std::env::var("OMNIGRAPH_GEMINI_BASE_URL")
|
||||
.ok()
|
||||
.map(|v| v.trim_end_matches('/').to_string())
|
||||
.filter(|v| !v.is_empty())
|
||||
.unwrap_or_else(|| DEFAULT_GEMINI_BASE_URL.to_string());
|
||||
let deadline_ms =
|
||||
parse_env_u64_allow_zero("OMNIGRAPH_EMBED_DEADLINE_MS", DEFAULT_DEADLINE_MS);
|
||||
let timeout_ms = parse_env_u64("OMNIGRAPH_EMBED_TIMEOUT_MS", DEFAULT_TIMEOUT_MS);
|
||||
let http = Client::builder()
|
||||
.timeout(Duration::from_millis(timeout_ms))
|
||||
|
|
@ -96,39 +228,36 @@ impl EmbeddingClient {
|
|||
})?;
|
||||
|
||||
Ok(Self {
|
||||
config,
|
||||
http,
|
||||
retry_attempts,
|
||||
retry_backoff_ms,
|
||||
transport: EmbeddingTransport::Gemini {
|
||||
api_key,
|
||||
base_url,
|
||||
http,
|
||||
},
|
||||
deadline_ms,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn config(&self) -> &EmbeddingConfig {
|
||||
&self.config
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn mock_for_tests() -> Self {
|
||||
Self {
|
||||
retry_attempts: DEFAULT_RETRY_ATTEMPTS,
|
||||
retry_backoff_ms: DEFAULT_RETRY_BACKOFF_MS,
|
||||
transport: EmbeddingTransport::Mock,
|
||||
}
|
||||
Self::new(EmbeddingConfig::mock()).expect("mock client builds")
|
||||
}
|
||||
|
||||
pub async fn embed_query_text(&self, input: &str, expected_dim: usize) -> Result<Vec<f32>> {
|
||||
self.embed_text(input, expected_dim, QUERY_TASK_TYPE).await
|
||||
self.embed_text(input, expected_dim, EmbedRole::Query).await
|
||||
}
|
||||
|
||||
pub async fn embed_document_text(&self, input: &str, expected_dim: usize) -> Result<Vec<f32>> {
|
||||
self.embed_text(input, expected_dim, DOCUMENT_TASK_TYPE)
|
||||
.await
|
||||
self.embed_text(input, expected_dim, EmbedRole::Document).await
|
||||
}
|
||||
|
||||
async fn embed_text(
|
||||
&self,
|
||||
input: &str,
|
||||
expected_dim: usize,
|
||||
task_type: &'static str,
|
||||
role: EmbedRole,
|
||||
) -> Result<Vec<f32>> {
|
||||
if expected_dim == 0 {
|
||||
return Err(OmniError::manifest_internal(
|
||||
|
|
@ -136,10 +265,71 @@ impl EmbeddingClient {
|
|||
));
|
||||
}
|
||||
|
||||
match &self.transport {
|
||||
EmbeddingTransport::Mock => Ok(mock_embedding(input, expected_dim)),
|
||||
EmbeddingTransport::Gemini { .. } => {
|
||||
self.with_retry(|| self.embed_text_gemini_once(input, expected_dim, task_type))
|
||||
let started = std::time::Instant::now();
|
||||
let result = self
|
||||
.run_with_deadline(self.embed_text_inner(input, expected_dim, role))
|
||||
.await;
|
||||
let elapsed_ms = started.elapsed().as_millis() as u64;
|
||||
|
||||
match &result {
|
||||
Ok(_) => tracing::info!(
|
||||
target: "omnigraph::embedding",
|
||||
provider = ?self.config.provider,
|
||||
model = %self.config.model,
|
||||
dim = expected_dim,
|
||||
elapsed_ms,
|
||||
outcome = "ok",
|
||||
"embedding succeeded"
|
||||
),
|
||||
Err(err) => tracing::warn!(
|
||||
target: "omnigraph::embedding",
|
||||
provider = ?self.config.provider,
|
||||
model = %self.config.model,
|
||||
dim = expected_dim,
|
||||
elapsed_ms,
|
||||
outcome = "error",
|
||||
error = %err,
|
||||
"embedding failed"
|
||||
),
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Bound the whole embed operation (all retries + backoff) by `deadline_ms`,
|
||||
/// so a degraded provider can never hang the caller for the full retry
|
||||
/// envelope. Applies to every embed call (query and document). `0` =
|
||||
/// unbounded. Embedding has no Lance/manifest side effects, so cancelling the
|
||||
/// in-flight request future on elapse is safe.
|
||||
async fn run_with_deadline<F>(&self, fut: F) -> Result<Vec<f32>>
|
||||
where
|
||||
F: Future<Output = Result<Vec<f32>>>,
|
||||
{
|
||||
if self.deadline_ms == 0 {
|
||||
return fut.await;
|
||||
}
|
||||
match tokio::time::timeout(Duration::from_millis(self.deadline_ms), fut).await {
|
||||
Ok(res) => res,
|
||||
Err(_elapsed) => Err(OmniError::manifest_internal(format!(
|
||||
"embedding deadline exceeded after {} ms (provider={:?}, model={})",
|
||||
self.deadline_ms, self.config.provider, self.config.model
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
async fn embed_text_inner(
|
||||
&self,
|
||||
input: &str,
|
||||
expected_dim: usize,
|
||||
role: EmbedRole,
|
||||
) -> Result<Vec<f32>> {
|
||||
match self.config.provider {
|
||||
Provider::Mock => Ok(mock_embedding(input, expected_dim)),
|
||||
Provider::Gemini => {
|
||||
self.with_retry(|| self.embed_gemini_once(input, expected_dim, role))
|
||||
.await
|
||||
}
|
||||
Provider::OpenAiCompatible => {
|
||||
self.with_retry(|| self.embed_openai_once(input, expected_dim))
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
|
@ -160,6 +350,14 @@ impl EmbeddingClient {
|
|||
if !err.retryable || attempt >= max_attempt {
|
||||
return Err(OmniError::manifest_internal(err.message));
|
||||
}
|
||||
tracing::warn!(
|
||||
target: "omnigraph::embedding",
|
||||
provider = ?self.config.provider,
|
||||
model = %self.config.model,
|
||||
attempt,
|
||||
error = %err.message,
|
||||
"embedding attempt failed, retrying"
|
||||
);
|
||||
let shift = (attempt - 1).min(10) as u32;
|
||||
let delay = self.retry_backoff_ms.saturating_mul(1u64 << shift);
|
||||
sleep(Duration::from_millis(delay)).await;
|
||||
|
|
@ -168,25 +366,27 @@ impl EmbeddingClient {
|
|||
}
|
||||
}
|
||||
|
||||
async fn embed_text_gemini_once(
|
||||
async fn embed_gemini_once(
|
||||
&self,
|
||||
input: &str,
|
||||
expected_dim: usize,
|
||||
task_type: &'static str,
|
||||
role: EmbedRole,
|
||||
) -> std::result::Result<Vec<f32>, EmbedCallError> {
|
||||
let (api_key, base_url, http) = match &self.transport {
|
||||
EmbeddingTransport::Gemini {
|
||||
api_key,
|
||||
base_url,
|
||||
http,
|
||||
} => (api_key, base_url, http),
|
||||
EmbeddingTransport::Mock => unreachable!("mock transport should not call Gemini"),
|
||||
let task_type = match role {
|
||||
EmbedRole::Query => GEMINI_QUERY_TASK_TYPE,
|
||||
EmbedRole::Document => GEMINI_DOCUMENT_TASK_TYPE,
|
||||
};
|
||||
|
||||
let response = http
|
||||
.post(gemini_endpoint(base_url))
|
||||
.header("x-goog-api-key", api_key)
|
||||
.json(&build_gemini_request(input, expected_dim, task_type))
|
||||
let response = self
|
||||
.http
|
||||
.post(gemini_endpoint(&self.config.base_url, &self.config.model))
|
||||
.header("x-goog-api-key", &self.config.api_key)
|
||||
.json(&build_gemini_request(
|
||||
&self.config.model,
|
||||
input,
|
||||
expected_dim,
|
||||
task_type,
|
||||
))
|
||||
.send()
|
||||
.await;
|
||||
let response = match response {
|
||||
|
|
@ -205,10 +405,7 @@ impl EmbeddingClient {
|
|||
Ok(body) => body,
|
||||
Err(err) => {
|
||||
return Err(EmbedCallError {
|
||||
message: format!(
|
||||
"embedding response read failed (status {}): {}",
|
||||
status, err
|
||||
),
|
||||
message: format!("embedding response read failed (status {}): {}", status, err),
|
||||
retryable: status.is_server_error() || status.as_u16() == 429,
|
||||
});
|
||||
}
|
||||
|
|
@ -217,10 +414,7 @@ impl EmbeddingClient {
|
|||
if !status.is_success() {
|
||||
let message = parse_google_error_message(&body).unwrap_or(body);
|
||||
return Err(EmbedCallError {
|
||||
message: format!(
|
||||
"embedding request failed with status {}: {}",
|
||||
status, message
|
||||
),
|
||||
message: format!("embedding request failed with status {}: {}", status, message),
|
||||
retryable: status.is_server_error() || status.as_u16() == 429,
|
||||
});
|
||||
}
|
||||
|
|
@ -238,19 +432,85 @@ impl EmbeddingClient {
|
|||
}
|
||||
})
|
||||
}
|
||||
|
||||
async fn embed_openai_once(
|
||||
&self,
|
||||
input: &str,
|
||||
expected_dim: usize,
|
||||
) -> std::result::Result<Vec<f32>, EmbedCallError> {
|
||||
let response = self
|
||||
.http
|
||||
.post(format!("{}/embeddings", self.config.base_url))
|
||||
.bearer_auth(&self.config.api_key)
|
||||
.json(&build_openai_request(&self.config.model, input, expected_dim))
|
||||
.send()
|
||||
.await;
|
||||
let response = match response {
|
||||
Ok(response) => response,
|
||||
Err(err) => {
|
||||
let retryable = err.is_timeout() || err.is_connect() || err.is_request();
|
||||
return Err(EmbedCallError {
|
||||
message: format!("embedding request failed: {}", err),
|
||||
retryable,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let status = response.status();
|
||||
let body = match response.text().await {
|
||||
Ok(body) => body,
|
||||
Err(err) => {
|
||||
return Err(EmbedCallError {
|
||||
message: format!("embedding response read failed (status {}): {}", status, err),
|
||||
retryable: status.is_server_error() || status.as_u16() == 429,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
if !status.is_success() {
|
||||
let message = parse_openai_error_message(&body).unwrap_or(body);
|
||||
return Err(EmbedCallError {
|
||||
message: format!("embedding request failed with status {}: {}", status, message),
|
||||
retryable: status.is_server_error() || status.as_u16() == 429,
|
||||
});
|
||||
}
|
||||
|
||||
let parsed: OpenAiEmbeddingResponse =
|
||||
serde_json::from_str(&body).map_err(|err| EmbedCallError {
|
||||
message: format!("embedding response decode failed: {}", err),
|
||||
retryable: false,
|
||||
})?;
|
||||
|
||||
// The query path embeds exactly one string, so expect one datum at index 0.
|
||||
let datum = parsed
|
||||
.data
|
||||
.into_iter()
|
||||
.find(|d| d.index == 0)
|
||||
.ok_or_else(|| EmbedCallError {
|
||||
message: "embedding response missing data[0]".to_string(),
|
||||
retryable: false,
|
||||
})?;
|
||||
|
||||
validate_and_normalize_embedding(datum.embedding, expected_dim).map_err(|message| {
|
||||
EmbedCallError {
|
||||
message,
|
||||
retryable: false,
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn gemini_endpoint(base_url: &str) -> String {
|
||||
fn gemini_endpoint(base_url: &str, model: &str) -> String {
|
||||
format!(
|
||||
"{}/models/{}:embedContent",
|
||||
base_url.trim_end_matches('/'),
|
||||
GEMINI_EMBED_MODEL
|
||||
model
|
||||
)
|
||||
}
|
||||
|
||||
fn build_gemini_request(input: &str, expected_dim: usize, task_type: &'static str) -> Value {
|
||||
fn build_gemini_request(model: &str, input: &str, expected_dim: usize, task_type: &str) -> Value {
|
||||
json!({
|
||||
"model": format!("models/{}", GEMINI_EMBED_MODEL),
|
||||
"model": format!("models/{}", model),
|
||||
"content": {
|
||||
"parts": [
|
||||
{
|
||||
|
|
@ -263,6 +523,14 @@ fn build_gemini_request(input: &str, expected_dim: usize, task_type: &'static st
|
|||
})
|
||||
}
|
||||
|
||||
fn build_openai_request(model: &str, input: &str, expected_dim: usize) -> Value {
|
||||
json!({
|
||||
"model": model,
|
||||
"input": [input],
|
||||
"dimensions": expected_dim,
|
||||
})
|
||||
}
|
||||
|
||||
fn validate_and_normalize_embedding(
|
||||
values: Vec<f32>,
|
||||
expected_dim: usize,
|
||||
|
|
@ -298,6 +566,57 @@ fn parse_google_error_message(body: &str) -> Option<String> {
|
|||
.filter(|msg| !msg.trim().is_empty())
|
||||
}
|
||||
|
||||
fn parse_openai_error_message(body: &str) -> Option<String> {
|
||||
serde_json::from_str::<OpenAiErrorEnvelope>(body)
|
||||
.ok()
|
||||
.map(|e| e.error.message)
|
||||
.filter(|msg| !msg.trim().is_empty())
|
||||
}
|
||||
|
||||
/// Map a provider alias to `(provider, default base URL, default model, ordered
|
||||
/// api-key envs)`. Shared by `from_env` and `from_parts` so both apply identical
|
||||
/// defaults: `openai-compatible`/unset → the OpenRouter gateway, `openai` →
|
||||
/// OpenAI's own host. `mock` is handled by callers before this is reached. The
|
||||
/// `Provider` enum alone would collapse the two openai aliases, so the alias
|
||||
/// (not the enum) determines the key-env order here.
|
||||
fn provider_profile(
|
||||
alias: Option<&str>,
|
||||
) -> Result<(Provider, &'static str, &'static str, &'static [&'static str])> {
|
||||
Ok(match alias {
|
||||
None | Some("openai-compatible") => (
|
||||
Provider::OpenAiCompatible,
|
||||
DEFAULT_OPENROUTER_BASE_URL,
|
||||
DEFAULT_OPENROUTER_MODEL,
|
||||
&["OPENROUTER_API_KEY", "OPENAI_API_KEY"],
|
||||
),
|
||||
Some("openai") => (
|
||||
Provider::OpenAiCompatible,
|
||||
DEFAULT_OPENAI_BASE_URL,
|
||||
DEFAULT_OPENAI_MODEL,
|
||||
&["OPENAI_API_KEY"],
|
||||
),
|
||||
Some("gemini") => (
|
||||
Provider::Gemini,
|
||||
DEFAULT_GEMINI_BASE_URL,
|
||||
DEFAULT_GEMINI_MODEL,
|
||||
&["GEMINI_API_KEY"],
|
||||
),
|
||||
Some(other) => {
|
||||
return Err(OmniError::manifest_internal(format!(
|
||||
"unknown embedding provider '{}' (expected openai-compatible|openai|gemini|mock)",
|
||||
other
|
||||
)));
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn env_string(name: &str) -> Option<String> {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
.map(|v| v.trim().to_string())
|
||||
.filter(|v| !v.is_empty())
|
||||
}
|
||||
|
||||
fn parse_env_usize(name: &str, default: usize) -> usize {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
|
|
@ -314,6 +633,15 @@ fn parse_env_u64(name: &str, default: u64) -> u64 {
|
|||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
/// Like [`parse_env_u64`] but accepts `0` as a meaningful value (the deadline
|
||||
/// uses `0` for "unbounded").
|
||||
fn parse_env_u64_allow_zero(name: &str, default: u64) -> u64 {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
.and_then(|v| v.trim().parse::<u64>().ok())
|
||||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
fn env_flag(name: &str) -> bool {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
|
|
@ -395,6 +723,25 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
// Every test that calls `EmbeddingConfig::from_env` clears the full set of
|
||||
// embedding env vars first so the host environment can't leak in.
|
||||
const EMBED_ENV: &[&str] = &[
|
||||
"OMNIGRAPH_EMBEDDINGS_MOCK",
|
||||
"OMNIGRAPH_EMBED_PROVIDER",
|
||||
"OMNIGRAPH_EMBED_BASE_URL",
|
||||
"OMNIGRAPH_EMBED_MODEL",
|
||||
"OPENROUTER_API_KEY",
|
||||
"OPENAI_API_KEY",
|
||||
"GEMINI_API_KEY",
|
||||
];
|
||||
|
||||
fn cleared_env(extra: &[(&'static str, Option<&str>)]) -> EnvGuard {
|
||||
let mut vars: Vec<(&'static str, Option<&str>)> =
|
||||
EMBED_ENV.iter().map(|n| (*n, None)).collect();
|
||||
vars.extend_from_slice(extra);
|
||||
EnvGuard::set(&vars)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn mock_embeddings_are_deterministic() {
|
||||
let client = EmbeddingClient::mock_for_tests();
|
||||
|
|
@ -407,18 +754,30 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn gemini_request_uses_preview_model_retrieval_query_and_dimension() {
|
||||
let request = build_gemini_request("alpha", 4, QUERY_TASK_TYPE);
|
||||
assert_eq!(request["model"], "models/gemini-embedding-2-preview");
|
||||
assert_eq!(request["taskType"], QUERY_TASK_TYPE);
|
||||
fn gemini_request_uses_model_retrieval_query_and_dimension() {
|
||||
let request =
|
||||
build_gemini_request("gemini-embedding-2", "alpha", 4, GEMINI_QUERY_TASK_TYPE);
|
||||
assert_eq!(request["model"], "models/gemini-embedding-2");
|
||||
assert_eq!(request["taskType"], GEMINI_QUERY_TASK_TYPE);
|
||||
assert_eq!(request["outputDimensionality"], 4);
|
||||
assert_eq!(request["content"]["parts"][0]["text"], "alpha");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gemini_document_request_uses_retrieval_document_task_type() {
|
||||
let request = build_gemini_request("alpha", 4, DOCUMENT_TASK_TYPE);
|
||||
assert_eq!(request["taskType"], DOCUMENT_TASK_TYPE);
|
||||
let request =
|
||||
build_gemini_request("gemini-embedding-2", "alpha", 4, GEMINI_DOCUMENT_TASK_TYPE);
|
||||
assert_eq!(request["taskType"], GEMINI_DOCUMENT_TASK_TYPE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn openai_request_uses_model_input_array_and_dimensions() {
|
||||
let request = build_openai_request("openai/text-embedding-3-large", "alpha", 4);
|
||||
assert_eq!(request["model"], "openai/text-embedding-3-large");
|
||||
assert_eq!(request["input"][0], "alpha");
|
||||
assert!(request["input"].is_array());
|
||||
assert_eq!(request["dimensions"], 4);
|
||||
assert!(request.get("taskType").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -475,15 +834,202 @@ mod tests {
|
|||
assert!(err.to_string().contains("do not retry"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn run_with_deadline_aborts_slow_future() {
|
||||
let mut client = EmbeddingClient::mock_for_tests();
|
||||
client.deadline_ms = 20;
|
||||
let slow = async {
|
||||
tokio::time::sleep(Duration::from_secs(5)).await;
|
||||
Ok(vec![0.0_f32])
|
||||
};
|
||||
let err = client.run_with_deadline(slow).await.unwrap_err();
|
||||
assert!(err.to_string().contains("deadline exceeded"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn run_with_deadline_passes_through_fast_future() {
|
||||
let client = EmbeddingClient::mock_for_tests();
|
||||
let ok = client
|
||||
.run_with_deadline(async { Ok(vec![1.0_f32, 2.0]) })
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(ok, vec![1.0, 2.0]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn run_with_deadline_zero_is_unbounded() {
|
||||
let mut client = EmbeddingClient::mock_for_tests();
|
||||
client.deadline_ms = 0;
|
||||
let ok = client
|
||||
.run_with_deadline(async { Ok(vec![3.0_f32]) })
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(ok, vec![3.0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial]
|
||||
fn from_env_requires_gemini_api_key_when_not_mocking() {
|
||||
let _guard = EnvGuard::set(&[
|
||||
("OMNIGRAPH_EMBEDDINGS_MOCK", None),
|
||||
("GEMINI_API_KEY", None),
|
||||
]);
|
||||
fn from_env_defaults_to_openai_compatible_openrouter() {
|
||||
let _guard = cleared_env(&[("OPENROUTER_API_KEY", Some("sk-test"))]);
|
||||
let config = EmbeddingConfig::from_env().unwrap();
|
||||
assert_eq!(config.provider, Provider::OpenAiCompatible);
|
||||
assert_eq!(config.base_url, DEFAULT_OPENROUTER_BASE_URL);
|
||||
assert_eq!(config.model, DEFAULT_OPENROUTER_MODEL);
|
||||
assert_eq!(config.api_key, "sk-test");
|
||||
}
|
||||
|
||||
let err = EmbeddingClient::from_env().unwrap_err();
|
||||
assert!(err.to_string().contains("GEMINI_API_KEY"));
|
||||
#[test]
|
||||
#[serial]
|
||||
fn from_env_openai_alias_uses_openai_host_not_openrouter() {
|
||||
let _guard = cleared_env(&[
|
||||
("OMNIGRAPH_EMBED_PROVIDER", Some("openai")),
|
||||
("OPENAI_API_KEY", Some("k")),
|
||||
]);
|
||||
let config = EmbeddingConfig::from_env().unwrap();
|
||||
assert_eq!(config.provider, Provider::OpenAiCompatible);
|
||||
assert_eq!(config.base_url, DEFAULT_OPENAI_BASE_URL); // api.openai.com, not OpenRouter
|
||||
assert_eq!(config.model, DEFAULT_OPENAI_MODEL); // text-embedding-3-large, no openai/ prefix
|
||||
assert_eq!(config.api_key, "k");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial]
|
||||
fn from_env_openai_alias_prefers_openai_key_over_openrouter() {
|
||||
// `openai` targets api.openai.com, so an OpenRouter key must not be sent there.
|
||||
let _guard = cleared_env(&[
|
||||
("OMNIGRAPH_EMBED_PROVIDER", Some("openai")),
|
||||
("OPENROUTER_API_KEY", Some("router")),
|
||||
("OPENAI_API_KEY", Some("openai")),
|
||||
]);
|
||||
let config = EmbeddingConfig::from_env().unwrap();
|
||||
assert_eq!(config.base_url, DEFAULT_OPENAI_BASE_URL);
|
||||
assert_eq!(config.api_key, "openai");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial]
|
||||
fn from_env_openai_alias_errors_when_only_openrouter_key_is_set() {
|
||||
let _guard = cleared_env(&[
|
||||
("OMNIGRAPH_EMBED_PROVIDER", Some("openai")),
|
||||
("OPENROUTER_API_KEY", Some("router")),
|
||||
]);
|
||||
let err = EmbeddingConfig::from_env().unwrap_err();
|
||||
assert!(err.to_string().contains("OPENAI_API_KEY"), "got: {err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_parts_applies_provider_defaults_and_overrides() {
|
||||
let openrouter = EmbeddingConfig::from_parts(None, None, None, "k".to_string()).unwrap();
|
||||
assert_eq!(openrouter.provider, Provider::OpenAiCompatible);
|
||||
assert_eq!(openrouter.base_url, DEFAULT_OPENROUTER_BASE_URL);
|
||||
assert_eq!(openrouter.model, DEFAULT_OPENROUTER_MODEL);
|
||||
assert_eq!(openrouter.api_key, "k");
|
||||
|
||||
let gemini =
|
||||
EmbeddingConfig::from_parts(Some("gemini"), None, None, "g".to_string()).unwrap();
|
||||
assert_eq!(gemini.provider, Provider::Gemini);
|
||||
assert_eq!(gemini.base_url, DEFAULT_GEMINI_BASE_URL);
|
||||
|
||||
let overridden = EmbeddingConfig::from_parts(
|
||||
Some("openai"),
|
||||
Some("https://x/v1/".to_string()),
|
||||
Some("custom".to_string()),
|
||||
"k".to_string(),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(overridden.base_url, "https://x/v1"); // trailing slash trimmed
|
||||
assert_eq!(overridden.model, "custom");
|
||||
|
||||
let err =
|
||||
EmbeddingConfig::from_parts(Some("cohere"), None, None, "k".to_string()).unwrap_err();
|
||||
assert!(
|
||||
err.to_string().contains("unknown embedding provider"),
|
||||
"got: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial]
|
||||
fn from_parts_mock_honors_an_explicit_model() {
|
||||
// A cluster `providers.embedding` profile that sets `kind: mock, model: X`
|
||||
// must resolve to model X — it is what the query-time same-space check
|
||||
// compares against. Env cleared so the assertion isolates the arg.
|
||||
let _guard = cleared_env(&[]);
|
||||
let pinned =
|
||||
EmbeddingConfig::from_parts(Some("mock"), None, Some("recorded-x".to_string()), String::new())
|
||||
.unwrap();
|
||||
assert_eq!(pinned.provider, Provider::Mock);
|
||||
assert_eq!(pinned.model, "recorded-x");
|
||||
// With no explicit model, mock falls back to its env-based default (here
|
||||
// empty, since the env is cleared).
|
||||
let bare = EmbeddingConfig::from_parts(Some("mock"), None, None, String::new()).unwrap();
|
||||
assert_eq!(bare.provider, Provider::Mock);
|
||||
assert_eq!(bare.model, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial]
|
||||
fn from_env_openai_compatible_prefers_openrouter_key() {
|
||||
let _guard = cleared_env(&[
|
||||
("OPENROUTER_API_KEY", Some("router")),
|
||||
("OPENAI_API_KEY", Some("openai")),
|
||||
]);
|
||||
let config = EmbeddingConfig::from_env().unwrap();
|
||||
assert_eq!(config.api_key, "router");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial]
|
||||
fn from_env_explicit_gemini_provider() {
|
||||
let _guard = cleared_env(&[
|
||||
("OMNIGRAPH_EMBED_PROVIDER", Some("gemini")),
|
||||
("GEMINI_API_KEY", Some("g-key")),
|
||||
]);
|
||||
let config = EmbeddingConfig::from_env().unwrap();
|
||||
assert_eq!(config.provider, Provider::Gemini);
|
||||
assert_eq!(config.base_url, DEFAULT_GEMINI_BASE_URL);
|
||||
assert_eq!(config.model, DEFAULT_GEMINI_MODEL);
|
||||
assert_eq!(config.api_key, "g-key");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial]
|
||||
fn from_env_base_url_and_model_overrides_apply() {
|
||||
let _guard = cleared_env(&[
|
||||
("OMNIGRAPH_EMBED_PROVIDER", Some("openai-compatible")),
|
||||
("OMNIGRAPH_EMBED_BASE_URL", Some("https://example.test/v1/")),
|
||||
("OMNIGRAPH_EMBED_MODEL", Some("custom/model")),
|
||||
("OPENAI_API_KEY", Some("k")),
|
||||
]);
|
||||
let config = EmbeddingConfig::from_env().unwrap();
|
||||
assert_eq!(config.base_url, "https://example.test/v1"); // trailing slash trimmed
|
||||
assert_eq!(config.model, "custom/model");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial]
|
||||
fn from_env_unknown_provider_errors() {
|
||||
let _guard = cleared_env(&[("OMNIGRAPH_EMBED_PROVIDER", Some("cohere"))]);
|
||||
let err = EmbeddingConfig::from_env().unwrap_err();
|
||||
assert!(err.to_string().contains("unknown embedding provider"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial]
|
||||
fn from_env_errors_when_no_key_present() {
|
||||
let _guard = cleared_env(&[]);
|
||||
let err = EmbeddingConfig::from_env().unwrap_err();
|
||||
assert!(err.to_string().contains("OPENROUTER_API_KEY or OPENAI_API_KEY"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial]
|
||||
fn from_env_mock_flag_wins() {
|
||||
let _guard = cleared_env(&[
|
||||
("OMNIGRAPH_EMBEDDINGS_MOCK", Some("1")),
|
||||
("OMNIGRAPH_EMBED_PROVIDER", Some("gemini")),
|
||||
]);
|
||||
let config = EmbeddingConfig::from_env().unwrap();
|
||||
assert_eq!(config.provider, Provider::Mock);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1323,9 +1323,9 @@ impl Omnigraph {
|
|||
// branch_merge writes only to the target branch.
|
||||
//
|
||||
// Held across the per-table publish loop and the manifest
|
||||
// commit + record_merge_commit calls below. Under PR 1b's
|
||||
// intermediate state (global server RwLock still in place),
|
||||
// this acquisition is uncontended.
|
||||
// commit + record_merge_commit calls below, so no concurrent
|
||||
// writer to a touched (table, target_branch) can interleave
|
||||
// between our commit_staged and our publish.
|
||||
let active_branch_for_keys = self.active_branch().await;
|
||||
let merge_queue_keys: Vec<(String, Option<String>)> = ordered_table_keys
|
||||
.iter()
|
||||
|
|
|
|||
|
|
@ -741,14 +741,45 @@ impl Omnigraph {
|
|||
// tables. Branch is threaded explicitly — no coordinator swap.
|
||||
let mut staging = MutationStaging::default();
|
||||
|
||||
// Lower + validate up front so the touched-table set is known before
|
||||
// execution. A lowering/validation error returns exactly as it did
|
||||
// when this happened inside execute_named_mutation.
|
||||
let ir = self.lower_named_mutation(query_source, query_name)?;
|
||||
|
||||
// Up-front fork-queue acquisition (see the loader for the full
|
||||
// rationale): if this mutation will fork any touched table onto a
|
||||
// non-main branch, acquire the per-(table, branch) write queues for
|
||||
// every touched table before the first fork and hold them through the
|
||||
// publish, so the orphan-fork reclaim can't race a concurrent
|
||||
// in-process fork. The touched set is derived from the lowered IR.
|
||||
let fork_queue_guards: Option<(
|
||||
Vec<(String, Option<String>)>,
|
||||
Vec<tokio::sync::OwnedMutexGuard<()>>,
|
||||
)> = if let Some(active) = requested.as_deref() {
|
||||
let snapshot = self.snapshot_for_branch(Some(active)).await?;
|
||||
let touched: Vec<(String, Option<String>)> = self
|
||||
.touched_table_keys(&ir)
|
||||
.into_iter()
|
||||
.map(|k| (k, Some(active.to_string())))
|
||||
.collect();
|
||||
let needs_fork = touched.iter().any(|(table_key, _)| {
|
||||
snapshot
|
||||
.entry(table_key)
|
||||
.map(|e| e.table_branch.as_deref() != Some(active))
|
||||
.unwrap_or(false)
|
||||
});
|
||||
if needs_fork {
|
||||
let guards = self.write_queue().acquire_many(&touched).await;
|
||||
Some((touched, guards))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let exec_result = self
|
||||
.execute_named_mutation(
|
||||
query_source,
|
||||
query_name,
|
||||
&resolved_params,
|
||||
requested.as_deref(),
|
||||
&mut staging,
|
||||
)
|
||||
.execute_named_mutation(&ir, &resolved_params, requested.as_deref(), &mut staging)
|
||||
.await;
|
||||
|
||||
match exec_result {
|
||||
|
|
@ -768,6 +799,7 @@ impl Omnigraph {
|
|||
requested.as_deref(),
|
||||
crate::db::manifest::SidecarKind::Mutation,
|
||||
actor_id,
|
||||
fork_queue_guards,
|
||||
)
|
||||
.await?;
|
||||
// Failpoint that wedges the documented finalize→publisher
|
||||
|
|
@ -817,14 +849,19 @@ impl Omnigraph {
|
|||
}
|
||||
}
|
||||
|
||||
async fn execute_named_mutation(
|
||||
/// Lower + validate a named mutation query into its IR.
|
||||
///
|
||||
/// Hoisted out of [`Self::execute_named_mutation`] so the caller can
|
||||
/// inspect the IR before execution — specifically to compute the
|
||||
/// touched-table set (see [`Self::touched_table_keys`]) for up-front
|
||||
/// write-queue acquisition. Performs the same find → typecheck → lower
|
||||
/// → D₂ checks that execution previously did inline, so error behavior
|
||||
/// is unchanged.
|
||||
fn lower_named_mutation(
|
||||
&self,
|
||||
query_source: &str,
|
||||
query_name: &str,
|
||||
params: &ParamMap,
|
||||
branch: Option<&str>,
|
||||
staging: &mut MutationStaging,
|
||||
) -> Result<MutationResult> {
|
||||
) -> Result<omnigraph_compiler::ir::MutationIR> {
|
||||
let query_decl = omnigraph_compiler::find_named_query(query_source, query_name)
|
||||
.map_err(|e| OmniError::manifest(e.to_string()))?;
|
||||
|
||||
|
|
@ -841,7 +878,61 @@ impl Omnigraph {
|
|||
let ir = lower_mutation_query(&query_decl)?;
|
||||
// D₂: reject mixed insert/update + delete before any I/O.
|
||||
enforce_no_mixed_destructive_constructive(&ir)?;
|
||||
Ok(ir)
|
||||
}
|
||||
|
||||
/// The COMPLETE set of `(node|edge):{type}` table keys a mutation IR can
|
||||
/// touch at execution time, keyed as `MutationStaging`/`commit_all` key
|
||||
/// them. Must be a superset of everything execution forks/commits, since
|
||||
/// it drives the up-front fork-queue acquisition and `commit_all`'s
|
||||
/// held-guard coverage check — a miss means an unserialized fork/commit.
|
||||
///
|
||||
/// The set is a pure function of (IR ops + catalog). For each op it mirrors
|
||||
/// the execute path's node-vs-edge dispatch (`node_types` first, then
|
||||
/// `edge_types`). A `delete <Node>` additionally **cascades** to every edge
|
||||
/// type whose endpoint is that node (see `execute_delete_node`), forking
|
||||
/// those edge tables during execution — so they are included here, derived
|
||||
/// the same way the executor derives them (`from_type`/`to_type` match).
|
||||
/// Unknown types are skipped (the execute path surfaces the error).
|
||||
/// Sorted + deduped for one-shot `acquire_many`.
|
||||
fn touched_table_keys(&self, ir: &omnigraph_compiler::ir::MutationIR) -> Vec<String> {
|
||||
use omnigraph_compiler::ir::MutationOpIR;
|
||||
let catalog = self.catalog();
|
||||
let mut keys: Vec<String> = Vec::new();
|
||||
for op in &ir.ops {
|
||||
let type_name = match op {
|
||||
MutationOpIR::Insert { type_name, .. }
|
||||
| MutationOpIR::Update { type_name, .. }
|
||||
| MutationOpIR::Delete { type_name, .. } => type_name,
|
||||
};
|
||||
if catalog.node_types.contains_key(type_name) {
|
||||
keys.push(format!("node:{type_name}"));
|
||||
// A node delete cascades to every edge touching this node type,
|
||||
// forking those edge tables. Include them so the up-front
|
||||
// acquisition covers the cascade (mirrors execute_delete_node).
|
||||
if matches!(op, MutationOpIR::Delete { .. }) {
|
||||
for (edge_name, edge_type) in &catalog.edge_types {
|
||||
if edge_type.from_type == *type_name || edge_type.to_type == *type_name {
|
||||
keys.push(format!("edge:{edge_name}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if catalog.edge_types.contains_key(type_name) {
|
||||
keys.push(format!("edge:{type_name}"));
|
||||
}
|
||||
}
|
||||
keys.sort();
|
||||
keys.dedup();
|
||||
keys
|
||||
}
|
||||
|
||||
async fn execute_named_mutation(
|
||||
&self,
|
||||
ir: &omnigraph_compiler::ir::MutationIR,
|
||||
params: &ParamMap,
|
||||
branch: Option<&str>,
|
||||
staging: &mut MutationStaging,
|
||||
) -> Result<MutationResult> {
|
||||
let mut total = MutationResult::default();
|
||||
for op in &ir.ops {
|
||||
let result = match op {
|
||||
|
|
|
|||
|
|
@ -72,7 +72,11 @@ fn evaluate_expr(batch: &RecordBatch, expr: &IRExpr, params: &ParamMap) -> Resul
|
|||
}
|
||||
|
||||
/// Create a constant array from a literal value.
|
||||
fn literal_to_array(lit: &Literal, num_rows: usize) -> Result<ArrayRef> {
|
||||
///
|
||||
/// `pub(super)` so the pushdown arm (`query.rs::literal_to_typed_expr`) can build
|
||||
/// a literal in the same natural Arrow type and cast it to the column type through
|
||||
/// the identical `arrow_cast` path used here, keeping the two filter arms in sync.
|
||||
pub(super) fn literal_to_array(lit: &Literal, num_rows: usize) -> Result<ArrayRef> {
|
||||
Ok(match lit {
|
||||
Literal::Null => arrow_array::new_null_array(&DataType::Utf8, num_rows),
|
||||
Literal::String(s) => Arc::new(StringArray::from(vec![s.as_str(); num_rows])) as ArrayRef,
|
||||
|
|
|
|||
|
|
@ -2,6 +2,30 @@ use super::*;
|
|||
|
||||
use super::projection::{apply_filter, apply_ordering, project_return};
|
||||
|
||||
/// Bundles the per-handle embedding client cell with the optional injected
|
||||
/// config (RFC-012 Phase 5) so the lazy init uses the injected config when
|
||||
/// present, else `EmbeddingClient::from_env()`. Threaded through the query path
|
||||
/// in place of the bare cell, preserving laziness (a graph that never embeds
|
||||
/// builds no client and needs no key).
|
||||
pub(crate) struct EmbeddingResolver<'a> {
|
||||
cell: &'a tokio::sync::OnceCell<EmbeddingClient>,
|
||||
config: Option<&'a crate::embedding::EmbeddingConfig>,
|
||||
}
|
||||
|
||||
impl EmbeddingResolver<'_> {
|
||||
async fn resolve(&self) -> Result<&EmbeddingClient> {
|
||||
let config = self.config.cloned();
|
||||
self.cell
|
||||
.get_or_try_init(|| async move {
|
||||
match config {
|
||||
Some(cfg) => EmbeddingClient::new(cfg),
|
||||
None => EmbeddingClient::from_env(),
|
||||
}
|
||||
})
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
impl Omnigraph {
|
||||
/// Run a named query against an explicit branch or snapshot target.
|
||||
pub async fn query(
|
||||
|
|
@ -31,7 +55,18 @@ impl Omnigraph {
|
|||
GraphIndexHandle::none()
|
||||
};
|
||||
|
||||
execute_query(&ir, params, &resolved.snapshot, &graph_index, &catalog).await
|
||||
execute_query(
|
||||
&ir,
|
||||
params,
|
||||
&resolved.snapshot,
|
||||
&graph_index,
|
||||
&catalog,
|
||||
&EmbeddingResolver {
|
||||
cell: self.embedding_cell(),
|
||||
config: self.embedding_config_ref(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Run a named query against the graph as it existed at a prior manifest version.
|
||||
|
|
@ -72,7 +107,18 @@ impl Omnigraph {
|
|||
GraphIndexHandle::none()
|
||||
};
|
||||
|
||||
execute_query(&ir, params, &snapshot, &graph_index, &catalog).await
|
||||
execute_query(
|
||||
&ir,
|
||||
params,
|
||||
&snapshot,
|
||||
&graph_index,
|
||||
&catalog,
|
||||
&EmbeddingResolver {
|
||||
cell: self.embedding_cell(),
|
||||
config: self.embedding_config_ref(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -102,6 +148,7 @@ async fn extract_search_mode(
|
|||
ir: &QueryIR,
|
||||
params: &ParamMap,
|
||||
catalog: &Catalog,
|
||||
embedding: &EmbeddingResolver<'_>,
|
||||
) -> Result<SearchMode> {
|
||||
if ir.order_by.is_empty() {
|
||||
return Ok(SearchMode::default());
|
||||
|
|
@ -114,7 +161,8 @@ async fn extract_search_mode(
|
|||
query,
|
||||
} => {
|
||||
let vec =
|
||||
resolve_nearest_query_vec(ir, catalog, variable, property, query, params).await?;
|
||||
resolve_nearest_query_vec(ir, catalog, variable, property, query, params, embedding)
|
||||
.await?;
|
||||
let k = ir.limit.ok_or_else(|| {
|
||||
OmniError::manifest("nearest() ordering requires a limit clause".to_string())
|
||||
})? as usize;
|
||||
|
|
@ -157,9 +205,10 @@ async fn extract_search_mode(
|
|||
.unwrap_or(60) as u32;
|
||||
|
||||
let primary_mode =
|
||||
extract_sub_search_mode(ir, primary, params, catalog, ir.limit).await?;
|
||||
extract_sub_search_mode(ir, primary, params, catalog, ir.limit, embedding).await?;
|
||||
let secondary_mode =
|
||||
extract_sub_search_mode(ir, secondary, params, catalog, ir.limit).await?;
|
||||
extract_sub_search_mode(ir, secondary, params, catalog, ir.limit, embedding)
|
||||
.await?;
|
||||
|
||||
Ok(SearchMode {
|
||||
rrf: Some(RrfMode {
|
||||
|
|
@ -182,6 +231,7 @@ async fn extract_sub_search_mode(
|
|||
params: &ParamMap,
|
||||
catalog: &Catalog,
|
||||
limit: Option<u64>,
|
||||
embedding: &EmbeddingResolver<'_>,
|
||||
) -> Result<SearchMode> {
|
||||
match expr {
|
||||
IRExpr::Nearest {
|
||||
|
|
@ -190,7 +240,8 @@ async fn extract_sub_search_mode(
|
|||
query,
|
||||
} => {
|
||||
let vec =
|
||||
resolve_nearest_query_vec(ir, catalog, variable, property, query, params).await?;
|
||||
resolve_nearest_query_vec(ir, catalog, variable, property, query, params, embedding)
|
||||
.await?;
|
||||
let k = limit.unwrap_or(100) as usize;
|
||||
Ok(SearchMode {
|
||||
nearest: Some((variable.clone(), property.clone(), vec, k)),
|
||||
|
|
@ -229,15 +280,34 @@ async fn resolve_nearest_query_vec(
|
|||
property: &str,
|
||||
expr: &IRExpr,
|
||||
params: &ParamMap,
|
||||
embedding: &EmbeddingResolver<'_>,
|
||||
) -> Result<Vec<f32>> {
|
||||
let lit = resolve_literal_or_param(expr, params)?;
|
||||
match lit {
|
||||
Literal::List(_) => literal_to_f32_vec(&lit),
|
||||
Literal::String(text) => {
|
||||
let expected_dim = nearest_property_dimension(ir, catalog, variable, property)?;
|
||||
EmbeddingClient::from_env()?
|
||||
.embed_query_text(&text, expected_dim)
|
||||
.await
|
||||
let (expected_dim, recorded_model) =
|
||||
nearest_property_dim_and_model(ir, catalog, variable, property)?;
|
||||
// Lazily resolve the per-handle client once, then reuse it across
|
||||
// queries (keeps the provider connection pool warm); a graph that
|
||||
// never embeds never builds a client and needs no provider key.
|
||||
let client = embedding.resolve().await?;
|
||||
// Same-space guarantee: if the property recorded the model that
|
||||
// produced its stored vectors (`@embed("…", model="…")`), the query
|
||||
// embedder must resolve to that same model — otherwise the comparison
|
||||
// is across vector spaces. Reject loudly instead of ranking garbage.
|
||||
if let Some(recorded) = &recorded_model {
|
||||
let resolved = &client.config().model;
|
||||
if resolved != recorded {
|
||||
return Err(OmniError::manifest(format!(
|
||||
"nearest() on '{property}': its stored vectors were embedded with model \
|
||||
'{recorded}', but the query embedder resolves to '{resolved}'. Set \
|
||||
OMNIGRAPH_EMBED_MODEL='{recorded}' (and the matching provider) or re-embed \
|
||||
the stored vectors."
|
||||
)));
|
||||
}
|
||||
}
|
||||
client.embed_query_text(&text, expected_dim).await
|
||||
}
|
||||
_ => Err(OmniError::manifest(
|
||||
"nearest query must be a string or list of floats".to_string(),
|
||||
|
|
@ -279,12 +349,14 @@ fn literal_to_f32_vec(lit: &Literal) -> Result<Vec<f32>> {
|
|||
}
|
||||
}
|
||||
|
||||
fn nearest_property_dimension(
|
||||
/// Resolve the nearest() target property's vector dimension and the embedding
|
||||
/// model recorded for it via `@embed("…", model="…")` (`None` if unrecorded).
|
||||
fn nearest_property_dim_and_model(
|
||||
ir: &QueryIR,
|
||||
catalog: &Catalog,
|
||||
variable: &str,
|
||||
property: &str,
|
||||
) -> Result<usize> {
|
||||
) -> Result<(usize, Option<String>)> {
|
||||
let type_name = resolve_binding_type_name(&ir.pipeline, variable).ok_or_else(|| {
|
||||
OmniError::manifest_internal(format!(
|
||||
"nearest() variable '${}' is not bound to a node type in the lowered pipeline",
|
||||
|
|
@ -303,13 +375,20 @@ fn nearest_property_dimension(
|
|||
type_name, property
|
||||
))
|
||||
})?;
|
||||
match prop.scalar {
|
||||
ScalarType::Vector(dim) if !prop.list => Ok(dim as usize),
|
||||
_ => Err(OmniError::manifest_internal(format!(
|
||||
"nearest() property '{}.{}' is not a scalar vector",
|
||||
type_name, property
|
||||
))),
|
||||
}
|
||||
let dim = match prop.scalar {
|
||||
ScalarType::Vector(dim) if !prop.list => dim as usize,
|
||||
_ => {
|
||||
return Err(OmniError::manifest_internal(format!(
|
||||
"nearest() property '{}.{}' is not a scalar vector",
|
||||
type_name, property
|
||||
)));
|
||||
}
|
||||
};
|
||||
let recorded_model = node_type
|
||||
.embed_sources
|
||||
.get(property)
|
||||
.and_then(|embed| embed.model.clone());
|
||||
Ok((dim, recorded_model))
|
||||
}
|
||||
|
||||
fn resolve_binding_type_name<'a>(pipeline: &'a [IROp], variable: &str) -> Option<&'a str> {
|
||||
|
|
@ -341,8 +420,9 @@ pub async fn execute_query(
|
|||
snapshot: &Snapshot,
|
||||
graph_index: &GraphIndexHandle<'_>,
|
||||
catalog: &Catalog,
|
||||
embedding: &EmbeddingResolver<'_>,
|
||||
) -> Result<QueryResult> {
|
||||
let search_mode = extract_search_mode(ir, params, catalog).await?;
|
||||
let search_mode = extract_search_mode(ir, params, catalog, embedding).await?;
|
||||
|
||||
// RRF requires forked execution
|
||||
if let Some(ref rrf) = search_mode.rrf {
|
||||
|
|
@ -763,7 +843,7 @@ fn traversal_indexed_override() -> Option<bool> {
|
|||
|
||||
/// Max source-row frontier for which Expand uses the BTREE-indexed path.
|
||||
/// Larger frontiers fall back to the in-memory CSR (dense / whole-graph). See
|
||||
/// `docs/user/constants.md`.
|
||||
/// `docs/user/reference/constants.md`.
|
||||
const DEFAULT_EXPAND_INDEXED_MAX_FRONTIER: usize = 1024;
|
||||
/// Max hop count for the indexed path (each hop is one indexed scan; very deep
|
||||
/// traversals fan out toward whole-graph and are better served by CSR).
|
||||
|
|
@ -1289,10 +1369,12 @@ async fn expand_hydrate_and_align(
|
|||
params: &ParamMap,
|
||||
) -> Result<()> {
|
||||
// Pushable destination filters are applied by `hydrate_nodes`; the rest
|
||||
// (`ir_filter_to_expr` → None) are applied in memory after hconcat.
|
||||
// (`ir_filter_to_expr` → None) are applied in memory after hconcat. The
|
||||
// schema arg only affects a pushable literal's TYPE, never Some-vs-None, so
|
||||
// `None` here yields the same pushable/non-pushable split as `hydrate_nodes`.
|
||||
let non_pushable: Vec<&IRFilter> = dst_filters
|
||||
.iter()
|
||||
.filter(|f| ir_filter_to_expr(f, params).is_none())
|
||||
.filter(|f| ir_filter_to_expr(f, params, None).is_none())
|
||||
.collect();
|
||||
|
||||
// Unique destination ids (first-seen order) for one batched hydration.
|
||||
|
|
@ -1506,7 +1588,8 @@ async fn hydrate_nodes(
|
|||
// `id IN (ids)` AND any pushable destination filters, as a structured Expr.
|
||||
let id_list: Vec<datafusion::prelude::Expr> = ids.iter().map(|id| lit(id.clone())).collect();
|
||||
let mut filter_expr = col("id").in_list(id_list, false);
|
||||
if let Some(dst_expr) = build_lance_filter_expr(dst_filters, params) {
|
||||
if let Some(dst_expr) = build_lance_filter_expr(dst_filters, params, Some(&node_type.arrow_schema))
|
||||
{
|
||||
filter_expr = filter_expr.and(dst_expr);
|
||||
}
|
||||
|
||||
|
|
@ -1747,21 +1830,23 @@ async fn execute_node_scan(
|
|||
let table_key = format!("node:{}", type_name);
|
||||
let ds = snapshot.open(&table_key).await?;
|
||||
|
||||
let node_type = &catalog.node_types[type_name];
|
||||
|
||||
// Lower the IR filters to a DataFusion `Expr` and apply via
|
||||
// `Scanner::filter_expr` inside the configure closure. The string
|
||||
// pushdown path (`build_lance_filter` → `scanner.filter(&str)`) is
|
||||
// gone for node scans — structured Expr unlocks `CompOp::Contains`
|
||||
// pushdown (via `array_has`) and lets DF 53's optimizer rules
|
||||
// (vectorized IN-list, PhysicalExprSimplifier, CASE-NULL shortcut)
|
||||
// reach our predicates. Other call sites that still take string SQL
|
||||
// (hydrate_nodes for the Expand pushdown, count_rows, the mutation
|
||||
// delete path) migrate in follow-up MRs.
|
||||
let filter_expr = build_lance_filter_expr(filters, params);
|
||||
// reach our predicates. Passing the node's `arrow_schema` lets the lowering
|
||||
// coerce literals to each column's exact type so narrow-numeric BTREEs are
|
||||
// used. Other call sites that still take string SQL (count_rows, the
|
||||
// mutation delete path) migrate in follow-up MRs.
|
||||
let filter_expr = build_lance_filter_expr(filters, params, Some(&node_type.arrow_schema));
|
||||
|
||||
// Blob columns must be excluded from scan when a filter is present
|
||||
// (Lance bug: BlobsDescriptions + filter triggers a projection assertion).
|
||||
// We exclude blob columns and add metadata post-scan via take_blobs_by_indices.
|
||||
let node_type = &catalog.node_types[type_name];
|
||||
let has_blobs = !node_type.blob_properties.is_empty();
|
||||
let non_blob_cols: Vec<&str> = node_type
|
||||
.arrow_schema
|
||||
|
|
@ -1990,13 +2075,14 @@ pub(super) fn literal_to_sql(lit: &Literal) -> String {
|
|||
pub(super) fn build_lance_filter_expr(
|
||||
filters: &[IRFilter],
|
||||
params: &ParamMap,
|
||||
schema: Option<&Schema>,
|
||||
) -> Option<datafusion::prelude::Expr> {
|
||||
use datafusion::logical_expr::Operator;
|
||||
use datafusion::prelude::Expr;
|
||||
|
||||
let mut acc: Option<Expr> = None;
|
||||
for f in filters {
|
||||
let Some(e) = ir_filter_to_expr(f, params) else {
|
||||
let Some(e) = ir_filter_to_expr(f, params, schema) else {
|
||||
continue;
|
||||
};
|
||||
acc = Some(match acc {
|
||||
|
|
@ -2017,6 +2103,7 @@ pub(super) fn build_lance_filter_expr(
|
|||
pub(super) fn ir_filter_to_expr(
|
||||
filter: &IRFilter,
|
||||
params: &ParamMap,
|
||||
schema: Option<&Schema>,
|
||||
) -> Option<datafusion::prelude::Expr> {
|
||||
use datafusion::functions_nested::expr_fn::array_has;
|
||||
|
||||
|
|
@ -2027,14 +2114,22 @@ pub(super) fn ir_filter_to_expr(
|
|||
// List-contains: `prop CONTAINS value` lowers to `array_has(prop, value)`.
|
||||
// This is the case the old SQL-string pushdown had to return None for
|
||||
// ("Can't pushdown list contains"); with structured Expr it pushes down fine.
|
||||
// (Element-type coercion for the contained value is deferred — list columns
|
||||
// are not scalar-indexed, so the index-eligibility concern below does not apply.)
|
||||
if matches!(filter.op, CompOp::Contains) {
|
||||
let left = ir_expr_to_expr(&filter.left, params)?;
|
||||
let right = ir_expr_to_expr(&filter.right, params)?;
|
||||
let left = ir_expr_to_expr(&filter.left, params, None)?;
|
||||
let right = ir_expr_to_expr(&filter.right, params, None)?;
|
||||
return Some(array_has(left, right));
|
||||
}
|
||||
|
||||
let left = ir_expr_to_expr(&filter.left, params)?;
|
||||
let right = ir_expr_to_expr(&filter.right, params)?;
|
||||
// A literal/param operand is coerced to the OTHER operand's column type so
|
||||
// the predicate stays a direct `col OP literal` and the scalar index is used.
|
||||
// Without this, DataFusion widens a narrow column (`CAST(col AS Int64)`),
|
||||
// which defeats the BTREE (validated by `probe_scalar_index_use_under_literal_type`).
|
||||
let left_col_type = prop_data_type(&filter.left, schema);
|
||||
let right_col_type = prop_data_type(&filter.right, schema);
|
||||
let left = ir_expr_to_expr(&filter.left, params, right_col_type.as_ref())?;
|
||||
let right = ir_expr_to_expr(&filter.right, params, left_col_type.as_ref())?;
|
||||
Some(match filter.op {
|
||||
CompOp::Eq => left.eq(right),
|
||||
CompOp::Ne => left.not_eq(right),
|
||||
|
|
@ -2052,19 +2147,91 @@ pub(super) fn ir_filter_to_expr(
|
|||
pub(super) fn ir_expr_to_expr(
|
||||
expr: &IRExpr,
|
||||
params: &ParamMap,
|
||||
target: Option<&arrow_schema::DataType>,
|
||||
) -> Option<datafusion::prelude::Expr> {
|
||||
use datafusion::prelude::{col, lit};
|
||||
use datafusion::prelude::col;
|
||||
match expr {
|
||||
IRExpr::PropAccess { property, .. } => Some(col(property)),
|
||||
IRExpr::Literal(l) => literal_to_expr(l),
|
||||
IRExpr::Param(name) => params.get(name).and_then(literal_to_expr),
|
||||
IRExpr::Literal(l) => literal_to_expr_coerced(l, target),
|
||||
IRExpr::Param(name) => params
|
||||
.get(name)
|
||||
.and_then(|l| literal_to_expr_coerced(l, target)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a Literal to a DataFusion `Expr`. Returns `None` for List
|
||||
/// (which the existing SQL path also can't pushdown — falls through to
|
||||
/// post-scan in-memory application).
|
||||
/// The Arrow type of a `PropAccess` operand, looked up in the scan's schema, or
|
||||
/// `None` if the expr is not a column or the schema/field is unavailable.
|
||||
fn prop_data_type(expr: &IRExpr, schema: Option<&Schema>) -> Option<arrow_schema::DataType> {
|
||||
match expr {
|
||||
IRExpr::PropAccess { property, .. } => schema?
|
||||
.field_with_name(property)
|
||||
.ok()
|
||||
.map(|f| f.data_type().clone()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Lower a literal for pushdown, coercing it to `target` (the comparison
|
||||
/// column's Arrow type) when known. Falls back to the natural-type
|
||||
/// `literal_to_expr` on a missing target or any coercion failure, so a filter is
|
||||
/// never demoted to `None` by coercion (a node scan has no in-memory fallback for
|
||||
/// inline filters — see `execute_node_scan`).
|
||||
fn literal_to_expr_coerced(
|
||||
lit: &Literal,
|
||||
target: Option<&arrow_schema::DataType>,
|
||||
) -> Option<datafusion::prelude::Expr> {
|
||||
if let Some(target) = target {
|
||||
if let Some(e) = literal_to_typed_expr(lit, target) {
|
||||
return Some(e);
|
||||
}
|
||||
}
|
||||
literal_to_expr(lit)
|
||||
}
|
||||
|
||||
/// Build a literal as a typed Arrow scalar matching `target`, reusing the same
|
||||
/// `literal_to_array` + `arrow_cast` path as the in-memory arm
|
||||
/// (`projection.rs::evaluate_filter`) so the two arms agree. Returns `None` on
|
||||
/// any failure (unbuildable literal, incompatible cast) — the caller then falls
|
||||
/// back to the natural-type literal.
|
||||
///
|
||||
/// Lossless-only for integer targets: typecheck permits numeric cross-type
|
||||
/// comparisons (`types_compatible`), so a fractional float or out-of-range
|
||||
/// integer can reach here. Casting those to a narrower integer would truncate
|
||||
/// (`2.7 -> 2`) or overflow to null, silently changing which rows match. We
|
||||
/// round-trip the cast and, on mismatch, return `None` so the caller keeps the
|
||||
/// natural literal — correct via DataFusion coercion, the index just goes unused
|
||||
/// for that out-of-domain predicate. Float targets are exempt: narrowing
|
||||
/// `F64 -> F32` is the column's own precision domain, not a value error.
|
||||
fn literal_to_typed_expr(
|
||||
lit: &Literal,
|
||||
target: &arrow_schema::DataType,
|
||||
) -> Option<datafusion::prelude::Expr> {
|
||||
use datafusion::prelude::lit as df_lit;
|
||||
use datafusion::scalar::ScalarValue;
|
||||
|
||||
let arr = super::projection::literal_to_array(lit, 1).ok()?;
|
||||
if arr.data_type() == target {
|
||||
return Some(df_lit(ScalarValue::try_from_array(&arr, 0).ok()?));
|
||||
}
|
||||
let casted = arrow_cast::cast::cast(&arr, target).ok()?;
|
||||
if target.is_integer() {
|
||||
let back = arrow_cast::cast::cast(&casted, arr.data_type()).ok()?;
|
||||
let original = ScalarValue::try_from_array(&arr, 0).ok()?;
|
||||
let round_tripped = ScalarValue::try_from_array(&back, 0).ok()?;
|
||||
if original != round_tripped {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Some(df_lit(ScalarValue::try_from_array(&casted, 0).ok()?))
|
||||
}
|
||||
|
||||
/// Convert a Literal to a DataFusion `Expr` in its NATURAL Arrow type. This is
|
||||
/// the fallback used when the comparison column's type is unknown (no schema) or
|
||||
/// when coercion to it fails; the typed, column-matched coercion that keeps
|
||||
/// scalar indexes usable lives in `literal_to_typed_expr`. Returns `None` for
|
||||
/// List (the SQL path also could not pushdown it — falls through to post-scan
|
||||
/// in-memory application).
|
||||
fn literal_to_expr(lit: &Literal) -> Option<datafusion::prelude::Expr> {
|
||||
use datafusion::prelude::lit as df_lit;
|
||||
Some(match lit {
|
||||
|
|
@ -2073,9 +2240,12 @@ fn literal_to_expr(lit: &Literal) -> Option<datafusion::prelude::Expr> {
|
|||
Literal::Integer(n) => df_lit(*n),
|
||||
Literal::Float(f) => df_lit(*f),
|
||||
Literal::Bool(b) => df_lit(*b),
|
||||
// Date/DateTime stored as strings; pass through as string literals
|
||||
// — Lance/DataFusion handles the comparison against typed columns
|
||||
// via implicit cast, matching the existing string-SQL behavior.
|
||||
// Date/DateTime pass through as strings here. Against a typed Date
|
||||
// column DataFusion casts the LITERAL (`CAST(Utf8 AS Date32)`), which is
|
||||
// index-safe (proven by `scalar_index_use_requires_matched_literal_type`).
|
||||
// At real pushdown sites the schema is known, so `literal_to_typed_expr`
|
||||
// produces a typed Date32/Date64 anyway; this branch is only the
|
||||
// no-schema fallback.
|
||||
Literal::Date(s) => df_lit(s.clone()),
|
||||
Literal::DateTime(s) => df_lit(s.clone()),
|
||||
Literal::List(_) => return None,
|
||||
|
|
@ -2285,3 +2455,205 @@ mod expand_chooser_tests {
|
|||
assert_eq!(choose_expand_mode(&i), ExpandMode::Csr);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod literal_lowering_tests {
|
||||
use super::*;
|
||||
use datafusion::prelude::Expr;
|
||||
use datafusion::scalar::ScalarValue;
|
||||
|
||||
// With the column type known, the generic coercion types a date literal to
|
||||
// the column's Date32/Date64 (the live pushdown path). Without a target it
|
||||
// is the natural Utf8 fallback, which is still index-safe for dates because
|
||||
// DataFusion casts the LITERAL, not the column (proven by
|
||||
// `lance_surface_guards::scalar_index_use_requires_matched_literal_type`).
|
||||
#[test]
|
||||
fn date_literals_coerce_to_typed_arrow_scalars() {
|
||||
use arrow_schema::DataType;
|
||||
let dt = literal_to_expr_coerced(
|
||||
&Literal::DateTime("2024-06-01T12:00:00Z".into()),
|
||||
Some(&DataType::Date64),
|
||||
)
|
||||
.unwrap();
|
||||
assert!(
|
||||
matches!(dt, Expr::Literal(ScalarValue::Date64(Some(_)), ..)),
|
||||
"DateTime vs Date64 column must coerce to a typed Date64, got {dt:?}"
|
||||
);
|
||||
let d = literal_to_expr_coerced(&Literal::Date("2024-06-01".into()), Some(&DataType::Date32))
|
||||
.unwrap();
|
||||
assert!(
|
||||
matches!(d, Expr::Literal(ScalarValue::Date32(Some(_)), ..)),
|
||||
"Date vs Date32 column must coerce to a typed Date32, got {d:?}"
|
||||
);
|
||||
let nat = literal_to_expr_coerced(&Literal::Date("2024-06-01".into()), None).unwrap();
|
||||
assert!(
|
||||
matches!(nat, Expr::Literal(ScalarValue::Utf8(Some(_)), ..)),
|
||||
"no target should keep the natural Utf8 date literal, got {nat:?}"
|
||||
);
|
||||
}
|
||||
|
||||
// A malformed date string makes coercion fail, so it falls back to the
|
||||
// natural Utf8 literal rather than dropping the predicate to None.
|
||||
#[test]
|
||||
fn malformed_date_literal_falls_back_to_string() {
|
||||
use arrow_schema::DataType;
|
||||
let bad = literal_to_expr_coerced(
|
||||
&Literal::DateTime("not-a-date".into()),
|
||||
Some(&DataType::Date64),
|
||||
)
|
||||
.unwrap();
|
||||
assert!(
|
||||
matches!(bad, Expr::Literal(ScalarValue::Utf8(Some(_)), ..)),
|
||||
"malformed DateTime literal should fall back to a Utf8 literal, got {bad:?}"
|
||||
);
|
||||
}
|
||||
|
||||
// With a column target, a literal lowers to the column's EXACT Arrow type
|
||||
// (not its natural width), so DataFusion does not widen and cast the column
|
||||
// — keeping the scalar BTREE usable. See
|
||||
// `lance_surface_guards::scalar_index_use_requires_matched_literal_type`.
|
||||
#[test]
|
||||
fn integer_literal_coerces_to_narrow_column_type() {
|
||||
use arrow_schema::DataType;
|
||||
let i32_lit = literal_to_expr_coerced(&Literal::Integer(5), Some(&DataType::Int32)).unwrap();
|
||||
assert!(
|
||||
matches!(i32_lit, Expr::Literal(ScalarValue::Int32(Some(5)), ..)),
|
||||
"integer literal vs Int32 column must lower to Int32, got {i32_lit:?}"
|
||||
);
|
||||
let u32_lit = literal_to_expr_coerced(&Literal::Integer(7), Some(&DataType::UInt32)).unwrap();
|
||||
assert!(
|
||||
matches!(u32_lit, Expr::Literal(ScalarValue::UInt32(Some(7)), ..)),
|
||||
"integer literal vs UInt32 column must lower to UInt32, got {u32_lit:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn float_literal_coerces_to_f32_column_type() {
|
||||
use arrow_schema::DataType;
|
||||
let f32_lit =
|
||||
literal_to_expr_coerced(&Literal::Float(1.5), Some(&DataType::Float32)).unwrap();
|
||||
assert!(
|
||||
matches!(f32_lit, Expr::Literal(ScalarValue::Float32(Some(_)), ..)),
|
||||
"float literal vs Float32 column must lower to Float32, got {f32_lit:?}"
|
||||
);
|
||||
}
|
||||
|
||||
// Lossless guard: a fractional float against an integer column must NOT
|
||||
// truncate (2.7 -> 2). Fall back to the natural Float64 so the comparison
|
||||
// stays exact (no integer equals 2.7).
|
||||
#[test]
|
||||
fn fractional_float_vs_int_column_falls_back_not_truncate() {
|
||||
use arrow_schema::DataType;
|
||||
let e = literal_to_expr_coerced(&Literal::Float(2.7), Some(&DataType::Int32)).unwrap();
|
||||
assert!(
|
||||
matches!(e, Expr::Literal(ScalarValue::Float64(Some(_)), ..)),
|
||||
"fractional float vs Int32 must fall back to natural Float64, got {e:?}"
|
||||
);
|
||||
}
|
||||
|
||||
// A whole-number float IS lossless against an integer column, so it coerces.
|
||||
#[test]
|
||||
fn whole_float_vs_int_column_coerces() {
|
||||
use arrow_schema::DataType;
|
||||
let e = literal_to_expr_coerced(&Literal::Float(2.0), Some(&DataType::Int32)).unwrap();
|
||||
assert!(
|
||||
matches!(e, Expr::Literal(ScalarValue::Int32(Some(2)), ..)),
|
||||
"whole-number float vs Int32 is lossless and must coerce to Int32(2), got {e:?}"
|
||||
);
|
||||
}
|
||||
|
||||
// Lossless guard: an integer literal outside the column's range must NOT
|
||||
// overflow to null; fall back to the natural Int64 (correct via DataFusion).
|
||||
#[test]
|
||||
fn out_of_range_int_vs_narrow_column_falls_back() {
|
||||
use arrow_schema::DataType;
|
||||
let e = literal_to_expr_coerced(&Literal::Integer(3_000_000_000), Some(&DataType::Int32))
|
||||
.unwrap();
|
||||
assert!(
|
||||
matches!(e, Expr::Literal(ScalarValue::Int64(Some(3_000_000_000)), ..)),
|
||||
"out-of-range integer vs Int32 must fall back to natural Int64, got {e:?}"
|
||||
);
|
||||
}
|
||||
|
||||
// Float targets are exempt from the lossless guard: narrowing to the column's
|
||||
// own precision is the correct comparison domain, even when the value is not
|
||||
// exactly representable in F32 (0.1).
|
||||
#[test]
|
||||
fn float_vs_f32_column_coerces_even_when_not_exactly_representable() {
|
||||
use arrow_schema::DataType;
|
||||
let e = literal_to_expr_coerced(&Literal::Float(0.1), Some(&DataType::Float32)).unwrap();
|
||||
assert!(
|
||||
matches!(e, Expr::Literal(ScalarValue::Float32(Some(_)), ..)),
|
||||
"float target must coerce 0.1 to Float32 (exempt from lossless guard), got {e:?}"
|
||||
);
|
||||
}
|
||||
|
||||
// No target (caller without a schema) keeps the natural width — the existing
|
||||
// fallback, so behavior never regresses where the column type is unknown.
|
||||
#[test]
|
||||
fn literal_without_target_keeps_natural_width() {
|
||||
let nat = literal_to_expr_coerced(&Literal::Integer(5), None).unwrap();
|
||||
assert!(
|
||||
matches!(nat, Expr::Literal(ScalarValue::Int64(Some(5)), ..)),
|
||||
"no target should keep the natural Int64 width, got {nat:?}"
|
||||
);
|
||||
}
|
||||
|
||||
// True if either operand of a binary comparison is an Int32 literal.
|
||||
fn binary_has_int32_literal(e: &Expr) -> bool {
|
||||
if let Expr::BinaryExpr(b) = e {
|
||||
[b.left.as_ref(), b.right.as_ref()]
|
||||
.iter()
|
||||
.any(|side| matches!(side, Expr::Literal(ScalarValue::Int32(Some(_)), ..)))
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn int32_schema() -> arrow_schema::Schema {
|
||||
use arrow_schema::{DataType, Field};
|
||||
arrow_schema::Schema::new(vec![Field::new("count", DataType::Int32, true)])
|
||||
}
|
||||
|
||||
fn count_prop() -> IRExpr {
|
||||
IRExpr::PropAccess {
|
||||
variable: "m".into(),
|
||||
property: "count".into(),
|
||||
}
|
||||
}
|
||||
|
||||
// Coercion is operator-independent: a range comparison's literal coerces to
|
||||
// the column type just like equality does, so range filters on a narrow
|
||||
// numeric column keep the BTREE.
|
||||
#[test]
|
||||
fn ir_filter_coerces_literal_for_range_op() {
|
||||
let schema = int32_schema();
|
||||
let filter = IRFilter {
|
||||
left: count_prop(),
|
||||
op: CompOp::Ge,
|
||||
right: IRExpr::Literal(Literal::Integer(2)),
|
||||
};
|
||||
let expr = ir_filter_to_expr(&filter, &ParamMap::new(), Some(&schema)).unwrap();
|
||||
assert!(
|
||||
binary_has_int32_literal(&expr),
|
||||
"range-op literal must coerce to the Int32 column type, got {expr:?}"
|
||||
);
|
||||
}
|
||||
|
||||
// The column may be on either side; the literal coerces to the opposite
|
||||
// operand's column type regardless of order (`5 < count`).
|
||||
#[test]
|
||||
fn ir_filter_coerces_literal_when_column_is_on_the_right() {
|
||||
let schema = int32_schema();
|
||||
let filter = IRFilter {
|
||||
left: IRExpr::Literal(Literal::Integer(2)),
|
||||
op: CompOp::Lt,
|
||||
right: count_prop(),
|
||||
};
|
||||
let expr = ir_filter_to_expr(&filter, &ParamMap::new(), Some(&schema)).unwrap();
|
||||
assert!(
|
||||
binary_has_int32_literal(&expr),
|
||||
"reversed-operand literal must coerce to the Int32 column type, got {expr:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -463,12 +463,28 @@ impl StagedMutation {
|
|||
/// unreferenced (cleaned by `cleanup_old_versions`'s age sweep)
|
||||
/// rather than being committed and creating a Lance-HEAD-ahead
|
||||
/// residual.
|
||||
/// `held_guards`: when the caller already holds the per-`(table_key,
|
||||
/// branch)` write queues for every touched table (the fork path acquires
|
||||
/// them up front, before the fork, and holds them through the manifest
|
||||
/// publish), it passes `(acquired_keys, guards)` here so `commit_all`
|
||||
/// reuses them instead of re-acquiring — the queue is a non-re-entrant
|
||||
/// `tokio::Mutex`, so re-acquiring a held key would self-deadlock.
|
||||
/// `None` (the steady-state path) means `commit_all` acquires them
|
||||
/// itself. `acquired_keys` must cover every key `commit_all` would
|
||||
/// acquire (debug-asserted below) — the guards from `acquire_many` don't
|
||||
/// carry their keys, so the caller hands the key set alongside them. The
|
||||
/// fork path guarantees coverage by keying every touched table uniformly
|
||||
/// by the resolved target branch.
|
||||
pub(crate) async fn commit_all(
|
||||
self,
|
||||
db: &crate::db::Omnigraph,
|
||||
branch: Option<&str>,
|
||||
sidecar_kind: SidecarKind,
|
||||
actor_id: Option<&str>,
|
||||
held_guards: Option<(
|
||||
Vec<(String, Option<String>)>,
|
||||
Vec<tokio::sync::OwnedMutexGuard<()>>,
|
||||
)>,
|
||||
) -> Result<(
|
||||
Vec<SubTableUpdate>,
|
||||
HashMap<String, u64>,
|
||||
|
|
@ -483,21 +499,18 @@ impl StagedMutation {
|
|||
op_kinds,
|
||||
} = self;
|
||||
|
||||
// Acquire per-(table_key, branch) queues for every touched
|
||||
// table — both staged and inline-committed. Sorted by
|
||||
// `acquire_many` internally so all multi-table writers
|
||||
// (mutation, branch_merge, schema_apply, future MR-870
|
||||
// recovery) agree on acquisition order — prevents lock-order
|
||||
// inversion deadlock.
|
||||
// Per-(table_key, branch) queues for every touched table — both
|
||||
// staged and inline-committed. Sorted by `acquire_many` internally
|
||||
// so all multi-table writers (mutation, branch_merge, schema_apply,
|
||||
// the fork path, recovery) agree on acquisition order — prevents
|
||||
// lock-order inversion deadlock.
|
||||
//
|
||||
// For inline-committed tables (delete-only mutations), Lance
|
||||
// HEAD has already advanced inside `delete_where` before
|
||||
// `commit_all` runs. Holding the queue here doesn't prevent
|
||||
// that interleaving (commit 6 will move queue acquisition into
|
||||
// `delete_where`'s call site); it does prevent another writer
|
||||
// from interleaving between our delete and our publish, which
|
||||
// would otherwise leave a Lance-HEAD-ahead residual the
|
||||
// delete-only sidecar (added below) would have to recover.
|
||||
// For inline-committed tables (delete-only mutations), Lance HEAD
|
||||
// has already advanced inside `delete_where` before `commit_all`
|
||||
// runs. Holding the queue here prevents another writer from
|
||||
// interleaving between our delete and our publish, which would
|
||||
// otherwise leave a Lance-HEAD-ahead residual the delete-only
|
||||
// sidecar (added below) would have to recover.
|
||||
let mut queue_keys: Vec<(String, Option<String>)> =
|
||||
Vec::with_capacity(staged.len() + inline_committed.len());
|
||||
for entry in &staged {
|
||||
|
|
@ -512,7 +525,30 @@ impl StagedMutation {
|
|||
})?;
|
||||
queue_keys.push((table_key.clone(), path.table_branch.clone()));
|
||||
}
|
||||
let guards = db.write_queue().acquire_many(&queue_keys).await;
|
||||
// Reuse the caller's guards (fork path) when handed in, else acquire
|
||||
// our own. When reusing, every key we would acquire MUST already be
|
||||
// covered — re-acquiring a held non-re-entrant key would deadlock, and
|
||||
// a key we'd need but DON'T hold would commit unserialized. This is a
|
||||
// load-bearing safety invariant, so it is checked in ALL builds (not a
|
||||
// debug_assert) and fails the write loudly+safely rather than silently
|
||||
// proceeding unguarded if a future execution path ever touches a table
|
||||
// outside the caller's pre-computed set.
|
||||
let guards = match held_guards {
|
||||
Some((acquired_keys, guards)) => {
|
||||
let held: std::collections::HashSet<&(String, Option<String>)> =
|
||||
acquired_keys.iter().collect();
|
||||
if let Some(missing) = queue_keys.iter().find(|k| !held.contains(k)) {
|
||||
return Err(OmniError::manifest_internal(format!(
|
||||
"commit_all: pre-held write-queue guards do not cover touched table \
|
||||
'{}' on branch {:?} — the caller's up-front acquisition set diverged \
|
||||
from the staged/inline set (a touched-table-set bug)",
|
||||
missing.0, missing.1
|
||||
)));
|
||||
}
|
||||
guards
|
||||
}
|
||||
None => db.write_queue().acquire_many(&queue_keys).await,
|
||||
};
|
||||
|
||||
// Re-capture manifest pins under the queue (PR 2 / MR-686).
|
||||
//
|
||||
|
|
|
|||
|
|
@ -418,6 +418,45 @@ async fn load_jsonl_reader<R: BufRead>(
|
|||
LoadMode::Overwrite => crate::db::MutationOpKind::SchemaRewrite,
|
||||
};
|
||||
|
||||
// Up-front fork-queue acquisition. The first write to a table on a
|
||||
// non-main branch forks it (create_branch), which advances Lance state
|
||||
// before the manifest publish; the reclaim of any manifest-unreferenced
|
||||
// leftover (`reclaim_orphaned_fork_and_refork`) must not race a concurrent
|
||||
// in-process fork. So when this load will fork at least one touched table,
|
||||
// acquire the per-(table, branch) write queues for ALL touched tables up
|
||||
// front (one sorted `acquire_many`, keyed uniformly by the target branch
|
||||
// so it covers what `commit_all` recomputes) and hold them through the
|
||||
// publish. Main-branch loads never fork; branch loads where every touched
|
||||
// table is already forked skip this and let `commit_all` acquire at commit.
|
||||
let fork_queue_guards: Option<(
|
||||
Vec<(String, Option<String>)>,
|
||||
Vec<tokio::sync::OwnedMutexGuard<()>>,
|
||||
)> = if let Some(active) = branch {
|
||||
let touched: Vec<(String, Option<String>)> = node_rows
|
||||
.keys()
|
||||
.map(|t| (format!("node:{t}"), Some(active.to_string())))
|
||||
.chain(
|
||||
edge_rows
|
||||
.keys()
|
||||
.map(|e| (format!("edge:{e}"), Some(active.to_string()))),
|
||||
)
|
||||
.collect();
|
||||
let needs_fork = touched.iter().any(|(table_key, _)| {
|
||||
snapshot
|
||||
.entry(table_key)
|
||||
.map(|e| e.table_branch.as_deref() != Some(active))
|
||||
.unwrap_or(false)
|
||||
});
|
||||
if needs_fork {
|
||||
let guards = db.write_queue().acquire_many(&touched).await;
|
||||
Some((touched, guards))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Phase 2a: build and validate every node batch up front. Cheap and
|
||||
// synchronous — surfaces validation errors before any S3 traffic.
|
||||
let mut prepared_nodes: Vec<(String, String, RecordBatch, usize)> =
|
||||
|
|
@ -551,7 +590,13 @@ async fn load_jsonl_reader<R: BufRead>(
|
|||
// across the manifest publish below — see exec/mutation.rs for
|
||||
// the rationale (interleaving prevention).
|
||||
let (updates, expected_versions, sidecar_handle, _queue_guards) = staged
|
||||
.commit_all(db, branch, crate::db::manifest::SidecarKind::Load, actor_id)
|
||||
.commit_all(
|
||||
db,
|
||||
branch,
|
||||
crate::db::manifest::SidecarKind::Load,
|
||||
actor_id,
|
||||
fork_queue_guards,
|
||||
)
|
||||
.await?;
|
||||
// Same finalize → publisher residual as mutations: per-table
|
||||
// staged commits have advanced Lance HEAD, but the manifest
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ use object_store::aws::AmazonS3Builder;
|
|||
use object_store::local::LocalFileSystem;
|
||||
use object_store::memory::InMemory;
|
||||
use object_store::path::Path as ObjectPath;
|
||||
use object_store::{DynObjectStore, ObjectStore, PutMode, PutPayload};
|
||||
use object_store::{DynObjectStore, ObjectStore, ObjectStoreExt, PutMode, PutPayload};
|
||||
use url::Url;
|
||||
|
||||
use crate::error::{OmniError, Result};
|
||||
|
|
|
|||
|
|
@ -184,6 +184,26 @@ pub(crate) fn staged_handles_as_writes(handles: &[StagedHandle]) -> Vec<StagedWr
|
|||
handles.iter().map(|h| h.inner.clone()).collect()
|
||||
}
|
||||
|
||||
/// Outcome of a per-table branch fork (`fork_branch_from_state`).
|
||||
///
|
||||
/// `RefAlreadyExists` means a Lance branch ref for the target already exists
|
||||
/// on the dataset, so `create_branch` could not create it cleanly. By the
|
||||
/// fork caller's contract — the caller re-checks the live manifest under the
|
||||
/// held per-`(table, branch)` write queue and only forks when the manifest
|
||||
/// does *not* place the table on the branch — such a ref is a
|
||||
/// manifest-unreferenced fork (the residue of an interrupted prior fork, or a
|
||||
/// delete+recreate), which the caller reclaims and re-forks. The fork
|
||||
/// operation does not editorialize ("incomplete prior delete"); it returns
|
||||
/// this typed signal and lets the db layer decide.
|
||||
// `pub` (not `pub(crate)`) to match the visibility of the sealed
|
||||
// `TableStorage::fork_branch_from_state` that returns it (and the already-`pub`
|
||||
// `SnapshotHandle`); avoids a private-interfaces warning. The trait is sealed,
|
||||
// so this widening does not let external code construct or branch on it.
|
||||
pub enum ForkOutcome<D> {
|
||||
Created(D),
|
||||
RefAlreadyExists,
|
||||
}
|
||||
|
||||
// ─── TableStorage trait ────────────────────────────────────────────────────
|
||||
|
||||
/// Engine-internal trait covering every Lance dataset operation an
|
||||
|
|
@ -231,7 +251,7 @@ pub trait TableStorage: sealed::Sealed + Send + Sync + Debug {
|
|||
table_key: &str,
|
||||
source_version: u64,
|
||||
target_branch: &str,
|
||||
) -> Result<SnapshotHandle>;
|
||||
) -> Result<ForkOutcome<SnapshotHandle>>;
|
||||
|
||||
async fn delete_branch(&self, dataset_uri: &str, branch: &str) -> Result<()>;
|
||||
|
||||
|
|
@ -497,17 +517,22 @@ impl TableStorage for TableStore {
|
|||
table_key: &str,
|
||||
source_version: u64,
|
||||
target_branch: &str,
|
||||
) -> Result<SnapshotHandle> {
|
||||
TableStore::fork_branch_from_state(
|
||||
self,
|
||||
dataset_uri,
|
||||
source_branch,
|
||||
table_key,
|
||||
source_version,
|
||||
target_branch,
|
||||
) -> Result<ForkOutcome<SnapshotHandle>> {
|
||||
Ok(
|
||||
match TableStore::fork_branch_from_state(
|
||||
self,
|
||||
dataset_uri,
|
||||
source_branch,
|
||||
table_key,
|
||||
source_version,
|
||||
target_branch,
|
||||
)
|
||||
.await?
|
||||
{
|
||||
ForkOutcome::Created(ds) => ForkOutcome::Created(SnapshotHandle::new(ds)),
|
||||
ForkOutcome::RefAlreadyExists => ForkOutcome::RefAlreadyExists,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.map(SnapshotHandle::new)
|
||||
}
|
||||
|
||||
async fn delete_branch(&self, dataset_uri: &str, branch: &str) -> Result<()> {
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ use std::sync::Arc;
|
|||
use crate::db::manifest::{TableVersionMetadata, open_table_head_for_write};
|
||||
use crate::db::{Snapshot, SubTableEntry};
|
||||
use crate::error::{OmniError, Result};
|
||||
use crate::storage_layer::ForkOutcome;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct TableState {
|
||||
|
|
@ -285,7 +286,7 @@ impl TableStore {
|
|||
table_key: &str,
|
||||
source_version: u64,
|
||||
target_branch: &str,
|
||||
) -> Result<Dataset> {
|
||||
) -> Result<ForkOutcome<Dataset>> {
|
||||
let mut source_ds = self
|
||||
.open_dataset_head(dataset_uri, source_branch)
|
||||
.await?
|
||||
|
|
@ -294,31 +295,49 @@ impl TableStore {
|
|||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
self.ensure_expected_version(&source_ds, table_key, source_version)?;
|
||||
|
||||
if source_ds
|
||||
if let Err(create_err) = source_ds
|
||||
.create_branch(target_branch, source_version, None)
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
// The target branch ref already exists. The caller
|
||||
// (`open_owned_dataset_for_branch_write`) re-reads the live manifest
|
||||
// before forking and returns a retryable error when a concurrent
|
||||
// writer legitimately holds the fork, so reaching here means the
|
||||
// manifest does NOT reference this fork: it is an orphan from an
|
||||
// incomplete prior `branch_delete`. Surface the actionable cleanup
|
||||
// error rather than guessing from Lance branch versions.
|
||||
return Err(OmniError::manifest_conflict(format!(
|
||||
"branch '{}' has orphaned table state for '{}' from an incomplete \
|
||||
prior delete; run `omnigraph cleanup` to reclaim it before reusing \
|
||||
this branch name",
|
||||
target_branch, table_key
|
||||
)));
|
||||
// Disambiguate the failure: only a genuinely pre-existing ref is a
|
||||
// reclaim candidate. Mapping EVERY create_branch failure to
|
||||
// `RefAlreadyExists` would route a transient I/O / version / Lance
|
||||
// internal error into the destructive reclaim path. So check whether
|
||||
// the ref actually exists; if not, the failure is real — propagate
|
||||
// it (preserving error fidelity) rather than force-deleting.
|
||||
//
|
||||
// `list_branches` reads `_refs/branches/` from the store, so it sees
|
||||
// a fully-formed manifest-unreferenced fork (our common case — a
|
||||
// create_branch that completed but whose manifest publish did not).
|
||||
// It does NOT see a phase-1-only Lance "zombie" (tree dir written,
|
||||
// no BranchContents) — but neither does `cleanup`'s reconciler, also
|
||||
// list_branches-based. A zombie only forms if create_branch is
|
||||
// interrupted *between its two internal phases* (a far narrower
|
||||
// window than the manifest-publish gap), and it surfaces here as the
|
||||
// propagated create error requiring manual reclaim. We deliberately
|
||||
// do NOT force-delete on a not-found-ref failure: it is
|
||||
// indistinguishable from a transient error on a fresh create, and
|
||||
// force-deleting there is the destructive overreach this guard
|
||||
// removes. The caller holds the per-(table, branch) write queue, so
|
||||
// no in-process writer races this fork; a cross-process create
|
||||
// between our check and now is the documented one-winner-CAS gap and
|
||||
// propagates as a retryable error.
|
||||
let ref_exists = source_ds
|
||||
.list_branches()
|
||||
.await
|
||||
.map(|b| b.contains_key(target_branch))
|
||||
.unwrap_or(false);
|
||||
if ref_exists {
|
||||
return Ok(ForkOutcome::RefAlreadyExists);
|
||||
}
|
||||
return Err(OmniError::Lance(create_err.to_string()));
|
||||
}
|
||||
|
||||
let ds = self
|
||||
.open_dataset_head(dataset_uri, Some(target_branch))
|
||||
.await?;
|
||||
self.ensure_expected_version(&ds, table_key, source_version)?;
|
||||
Ok(ds)
|
||||
Ok(ForkOutcome::Created(ds))
|
||||
}
|
||||
|
||||
pub async fn scan_batches(&self, ds: &Dataset) -> Result<Vec<RecordBatch>> {
|
||||
|
|
@ -705,6 +724,36 @@ impl TableStore {
|
|||
Ok(IndexCoverage::Indexed)
|
||||
}
|
||||
|
||||
/// True if any non-system index on `ds` leaves at least one current
|
||||
/// fragment uncovered, i.e. rows that the index does not yet account for
|
||||
/// (appended after the index was built, or rewritten by compaction). Such
|
||||
/// fragments are scanned unindexed until a reindex (`optimize_indices`)
|
||||
/// folds them in. Returns false when every index covers every fragment, or
|
||||
/// when the table has no (non-system) indices to optimize. A `None`
|
||||
/// `fragment_bitmap` means Lance cannot report coverage for that index, so
|
||||
/// we do not treat it as uncovered (mirrors `key_column_index_coverage`).
|
||||
///
|
||||
/// Used by `optimize` to decide whether an otherwise-already-compacted
|
||||
/// table still has index work to do.
|
||||
pub async fn has_unindexed_fragments(ds: &Dataset) -> Result<bool> {
|
||||
let indices = ds
|
||||
.load_indices()
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
let frag_ids: Vec<u32> = ds.fragments().iter().map(|f| f.id as u32).collect();
|
||||
for index in indices.iter() {
|
||||
if is_system_index(index) {
|
||||
continue;
|
||||
}
|
||||
if let Some(bitmap) = index.fragment_bitmap.as_ref() {
|
||||
if frag_ids.iter().any(|id| !bitmap.contains(*id)) {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
pub async fn count_rows(&self, ds: &Dataset, filter: Option<String>) -> Result<usize> {
|
||||
ds.count_rows(filter)
|
||||
.await
|
||||
|
|
@ -745,6 +794,8 @@ impl TableStore {
|
|||
let params = WriteParams {
|
||||
mode: WriteMode::Append,
|
||||
allow_external_blob_outside_bases: true,
|
||||
auto_cleanup: None,
|
||||
skip_auto_cleanup: true,
|
||||
..Default::default()
|
||||
};
|
||||
ds.append(reader, Some(params))
|
||||
|
|
@ -764,6 +815,8 @@ impl TableStore {
|
|||
let params = WriteParams {
|
||||
mode: WriteMode::Append,
|
||||
allow_external_blob_outside_bases: true,
|
||||
auto_cleanup: None,
|
||||
skip_auto_cleanup: true,
|
||||
..Default::default()
|
||||
};
|
||||
ds.append(reader, Some(params))
|
||||
|
|
@ -777,6 +830,8 @@ impl TableStore {
|
|||
enable_stable_row_ids: true,
|
||||
data_storage_version: Some(LanceFileVersion::V2_2),
|
||||
allow_external_blob_outside_bases: true,
|
||||
auto_cleanup: None,
|
||||
skip_auto_cleanup: true,
|
||||
..Default::default()
|
||||
};
|
||||
Dataset::write(reader, dataset_uri, Some(params))
|
||||
|
|
@ -867,6 +922,8 @@ impl TableStore {
|
|||
let params = WriteParams {
|
||||
mode: WriteMode::Append,
|
||||
allow_external_blob_outside_bases: true,
|
||||
auto_cleanup: None,
|
||||
skip_auto_cleanup: true,
|
||||
..Default::default()
|
||||
};
|
||||
let transaction = InsertBuilder::new(Arc::new(ds.clone()))
|
||||
|
|
@ -1040,7 +1097,16 @@ impl TableStore {
|
|||
ds: Arc<Dataset>,
|
||||
transaction: Transaction,
|
||||
) -> Result<Dataset> {
|
||||
// Skip Lance's auto-cleanup hook on every commit. OmniGraph owns version
|
||||
// GC explicitly (optimize.rs::cleanup_all_tables); Lance's hook fires off
|
||||
// the *dataset's stored* `lance.auto_cleanup.*` config, which graphs
|
||||
// created before the v7 bump (6.0.1 defaulted auto_cleanup ON) still
|
||||
// carry — so `WriteParams::auto_cleanup = None` alone does NOT stop it on
|
||||
// upgraded graphs. Skipping here covers the staged write path (the main
|
||||
// data path) for new and legacy datasets alike, preventing Lance from
|
||||
// GC'ing versions the __manifest still pins for snapshots/time-travel.
|
||||
CommitBuilder::new(ds)
|
||||
.with_skip_auto_cleanup(true)
|
||||
.execute(transaction)
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))
|
||||
|
|
@ -1087,6 +1153,8 @@ impl TableStore {
|
|||
mode: WriteMode::Overwrite,
|
||||
enable_stable_row_ids: true,
|
||||
allow_external_blob_outside_bases: true,
|
||||
auto_cleanup: None,
|
||||
skip_auto_cleanup: true,
|
||||
..Default::default()
|
||||
};
|
||||
let transaction = InsertBuilder::new(Arc::new(ds.clone()))
|
||||
|
|
@ -1503,6 +1571,8 @@ impl TableStore {
|
|||
enable_stable_row_ids: true,
|
||||
data_storage_version: Some(LanceFileVersion::V2_2),
|
||||
allow_external_blob_outside_bases: true,
|
||||
auto_cleanup: None,
|
||||
skip_auto_cleanup: true,
|
||||
..Default::default()
|
||||
};
|
||||
Dataset::write(reader, dataset_uri, Some(params))
|
||||
|
|
|
|||
|
|
@ -5,7 +5,9 @@ mod helpers;
|
|||
use fail::FailScenario;
|
||||
use futures::FutureExt;
|
||||
use omnigraph::db::Omnigraph;
|
||||
use omnigraph::error::{ManifestErrorKind, OmniError};
|
||||
use omnigraph::failpoints::ScopedFailPoint;
|
||||
use omnigraph::loader::LoadMode;
|
||||
|
||||
use helpers::recovery::{
|
||||
FollowUpMutation, RecoveryExpectation, TableExpectation, assert_post_recovery_invariants,
|
||||
|
|
@ -127,12 +129,12 @@ async fn branch_delete_partial_failure_converges_via_cleanup() {
|
|||
}
|
||||
|
||||
// Reusing a branch name whose delete left an orphaned fork (before `cleanup`
|
||||
// reconciles it) must fail with a clear, actionable error pointing at
|
||||
// `cleanup`, not the opaque `ExpectedVersionMismatch` that leaks from the fork
|
||||
// path. The recreate itself succeeds; the first write to the previously-forked
|
||||
// table is where the stale orphan collides.
|
||||
// reconciles it) must SELF-HEAL on the next write — the write reclaims the
|
||||
// manifest-unreferenced fork and re-forks, rather than wedging with "incomplete
|
||||
// prior delete; run cleanup". (This test was the inverse before the fork-as-
|
||||
// idempotent-reconcile fix; its flip is the signal the bug class is closed.)
|
||||
#[tokio::test]
|
||||
async fn recreate_over_orphaned_fork_before_cleanup_is_actionable() {
|
||||
async fn recreate_over_orphaned_fork_self_heals_without_cleanup() {
|
||||
let _scenario = FailScenario::setup();
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap().to_string();
|
||||
|
|
@ -158,10 +160,10 @@ async fn recreate_over_orphaned_fork_before_cleanup_is_actionable() {
|
|||
}
|
||||
|
||||
// Recreate the name and write to the previously-forked table WITHOUT a
|
||||
// cleanup in between.
|
||||
// cleanup in between. The write must self-heal the stale orphan fork.
|
||||
main.branch_create("feature").await.unwrap();
|
||||
let mut feature2 = Omnigraph::open(&uri).await.unwrap();
|
||||
let err = helpers::mutate_branch(
|
||||
helpers::mutate_branch(
|
||||
&mut feature2,
|
||||
"feature",
|
||||
MUTATION_QUERIES,
|
||||
|
|
@ -169,20 +171,83 @@ async fn recreate_over_orphaned_fork_before_cleanup_is_actionable() {
|
|||
&mixed_params(&[("$name", "Frank")], &[("$age", 41)]),
|
||||
)
|
||||
.await
|
||||
.expect_err("write should collide with the stale orphaned fork");
|
||||
.expect("recreate-over-orphan write must self-heal, not require cleanup");
|
||||
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("cleanup")
|
||||
&& (msg.contains("orphan") || msg.contains("incomplete prior delete")),
|
||||
"expected an actionable orphaned-fork error pointing at cleanup, got: {msg}"
|
||||
);
|
||||
assert!(
|
||||
!msg.contains("expected manifest table version"),
|
||||
"should not surface the opaque ExpectedVersionMismatch, got: {msg}"
|
||||
// The recreated branch forks FRESH from main: the deleted branch's Eve is
|
||||
// gone and only the new Frank is added on top of main's seed. A count of
|
||||
// main + 2 would mean Eve resurrected from the stale fork (the bug).
|
||||
let main_people = helpers::count_rows(&main, "node:Person").await;
|
||||
let feature_people = helpers::count_rows_branch(&feature2, "feature", "node:Person").await;
|
||||
assert_eq!(
|
||||
feature_people,
|
||||
main_people + 1,
|
||||
"self-healed feature must fork fresh from main (+Frank only); \
|
||||
main={main_people}, feature={feature_people} (main+2 ⇒ Eve resurrected)"
|
||||
);
|
||||
}
|
||||
|
||||
// The write-path orphan reclaim shares the same fresh-authority classifier as
|
||||
// cleanup. If that classifier is Indeterminate (transient read on a live
|
||||
// branch), the write must return a clear retryable authority-read conflict and
|
||||
// leave the ref in place. It must not squeeze the ambiguity through
|
||||
// ExpectedVersionMismatch with expected == actual, which lies about the cause.
|
||||
#[tokio::test]
|
||||
async fn recreate_over_orphaned_fork_reports_indeterminate_authority_read() {
|
||||
let _scenario = FailScenario::setup();
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap().to_string();
|
||||
let db = helpers::init_and_load(&dir).await;
|
||||
db.branch_create("feature").await.unwrap();
|
||||
|
||||
let person_uri = node_table_uri(&uri, "Person");
|
||||
{
|
||||
let mut ds = lance::Dataset::open(&person_uri).await.unwrap();
|
||||
let base = ds.version().version;
|
||||
ds.create_branch("feature", base, None).await.unwrap();
|
||||
}
|
||||
|
||||
let row = r#"{"type":"Person","data":{"name":"Grace","age":37}}"#;
|
||||
{
|
||||
let _fp = ScopedFailPoint::new("classify.fresh_read", "return");
|
||||
let err = db
|
||||
.load_as("feature", None, row, LoadMode::Merge, None)
|
||||
.await
|
||||
.expect_err("indeterminate authority read must fail retryably");
|
||||
|
||||
match &err {
|
||||
OmniError::Manifest(manifest) => {
|
||||
assert_eq!(manifest.kind, ManifestErrorKind::Conflict);
|
||||
assert!(
|
||||
manifest.details.is_none(),
|
||||
"indeterminate authority read is not an expected-version mismatch: {manifest:?}"
|
||||
);
|
||||
}
|
||||
other => panic!("expected manifest conflict, got {other:?}"),
|
||||
}
|
||||
let message = err.to_string();
|
||||
assert!(
|
||||
message.contains("could not verify")
|
||||
&& message.contains("fresh manifest authority was unavailable")
|
||||
&& message.contains("refresh and retry"),
|
||||
"error should name the unavailable authority read, got: {message}"
|
||||
);
|
||||
assert!(
|
||||
!message.contains("expected manifest table version"),
|
||||
"indeterminate authority must not be reported as a version mismatch: {message}"
|
||||
);
|
||||
|
||||
let ds = lance::Dataset::open(&person_uri).await.unwrap();
|
||||
assert!(
|
||||
ds.list_branches().await.unwrap().contains_key("feature"),
|
||||
"ambiguous orphan status must leave the fork for a later retry"
|
||||
);
|
||||
}
|
||||
|
||||
db.load_as("feature", None, row, LoadMode::Merge, None)
|
||||
.await
|
||||
.expect("when fresh authority is available, the orphan is reclaimed and write converges");
|
||||
}
|
||||
|
||||
// cleanup is the guaranteed convergence backstop, so one table's transient
|
||||
// failure must not abort the whole sweep. Inject a one-shot version-GC failure
|
||||
// for a single table and assert: cleanup still succeeds, the failure is
|
||||
|
|
@ -330,6 +395,68 @@ async fn cleanup_reclaims_orphaned_commit_graph_branch() {
|
|||
}
|
||||
}
|
||||
|
||||
// `classify_fork_ref` returns `Indeterminate` when the fresh-authority read
|
||||
// fails on a LIVE branch — and a destructive caller must SKIP, never delete, on
|
||||
// that ambiguity. Here the reconciler has a genuine origin-2 orphan candidate
|
||||
// (a manifest-unreferenced Person fork on the live `feature` branch), but the
|
||||
// `classify.fresh_read` failpoint makes the fresh re-check fail: cleanup must
|
||||
// leave the ref in place (cannot confirm it is unreferenced), then reclaim it on
|
||||
// the next run once the read succeeds. This pins the Indeterminate arm and the
|
||||
// don't-destroy-on-ambiguity rule end-to-end through cleanup.
|
||||
#[tokio::test]
|
||||
async fn reconcile_skips_fork_when_fresh_recheck_is_unavailable_then_converges() {
|
||||
let _scenario = FailScenario::setup();
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap().to_string();
|
||||
let mut db = helpers::init_and_load(&dir).await;
|
||||
db.branch_create("feature").await.unwrap();
|
||||
|
||||
// Forge a manifest-unreferenced Person fork on the live `feature` branch —
|
||||
// a genuine orphan the reconciler would normally reclaim.
|
||||
let person_uri = node_table_uri(&uri, "Person");
|
||||
{
|
||||
let mut ds = lance::Dataset::open(&person_uri).await.unwrap();
|
||||
let base = ds.version().version;
|
||||
ds.create_branch("feature", base, None).await.unwrap();
|
||||
assert!(
|
||||
ds.list_branches().await.unwrap().contains_key("feature"),
|
||||
"precondition: forged orphan fork present"
|
||||
);
|
||||
}
|
||||
|
||||
// With the fresh re-check failing, the fork's status is Indeterminate (the
|
||||
// branch is live but unreadable) → cleanup must SKIP it, not delete.
|
||||
{
|
||||
let _fp = ScopedFailPoint::new("classify.fresh_read", "return");
|
||||
db.cleanup(omnigraph::db::CleanupPolicyOptions {
|
||||
keep_versions: Some(1),
|
||||
older_than: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
let ds = lance::Dataset::open(&person_uri).await.unwrap();
|
||||
assert!(
|
||||
ds.list_branches().await.unwrap().contains_key("feature"),
|
||||
"reconcile must NOT delete a fork whose fresh re-check is inconclusive"
|
||||
);
|
||||
}
|
||||
|
||||
// Read succeeds now → cleanup confirms the orphan and reclaims it (converges).
|
||||
db.cleanup(omnigraph::db::CleanupPolicyOptions {
|
||||
keep_versions: Some(1),
|
||||
older_than: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
{
|
||||
let ds = lance::Dataset::open(&person_uri).await.unwrap();
|
||||
assert!(
|
||||
!ds.list_branches().await.unwrap().contains_key("feature"),
|
||||
"next cleanup (fresh read available) must reclaim the confirmed orphan"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// A branch_delete whose best-effort commit-graph reclaim fails leaves a
|
||||
// commit-graph "zombie" branch. Recreating that name must heal the zombie and
|
||||
// succeed (branch_create force-deletes a stale commit-graph ref since the
|
||||
|
|
@ -2619,69 +2746,66 @@ async fn finalize_publisher_residual_does_not_drift_untouched_tables() {
|
|||
}
|
||||
|
||||
/// Acceptance test: a stage-step failure in the staged-index path
|
||||
/// (`stage_create_btree_index` succeeded; `commit_staged` not yet
|
||||
/// called) leaves NO Lance-HEAD drift on the existing tables.
|
||||
/// Subsequent operations against those tables succeed without
|
||||
/// `ExpectedVersionMismatch`.
|
||||
/// (`stage_create_btree_index` succeeded; `commit_staged` not yet called)
|
||||
/// leaves NO Lance-HEAD drift, so other tables stay writable.
|
||||
///
|
||||
/// Path: `apply_schema(v1 → v2)` adds a new node type. The
|
||||
/// `added_tables` loop in `schema_apply` creates the empty dataset and
|
||||
/// then calls `build_indices_on_dataset_for_catalog` →
|
||||
/// `stage_and_commit_btree(..., &["id"])`. The failpoint fires
|
||||
/// between `stage_create_btree_index` and `commit_staged`, so the
|
||||
/// staged segments are written under `_indices/<uuid>/` but Lance HEAD
|
||||
/// on the new dataset is unchanged at v=1. The schema-apply lock
|
||||
/// branch is released by `apply_schema`'s outer match. Existing
|
||||
/// tables (e.g. `node:Person`) are completely untouched by the new
|
||||
/// node's added_tables iteration — they're outside the failed apply
|
||||
/// path entirely — and we assert that mutations against them continue
|
||||
/// to work.
|
||||
///
|
||||
/// The orphan empty dataset from the failed apply is acceptable
|
||||
/// residual: it's unreferenced by `__manifest` and will be reclaimed
|
||||
/// by `cleanup_old_versions` (or removed when a future apply at the
|
||||
/// same target path resolves the rename).
|
||||
/// Under iss-848 schema apply no longer builds indexes inline — the build
|
||||
/// happens in the reconciler (`ensure_indices`/`optimize`) and at load. So this
|
||||
/// fires the failpoint where it lives now: an `ensure_indices` build of a BTREE
|
||||
/// that a prior apply declared (`@index`) but deferred. The failpoint fires
|
||||
/// between `stage_create_btree_index` and `commit_staged`, so the staged
|
||||
/// segment is written under `_indices/<uuid>/` but `node:Person`'s Lance HEAD is
|
||||
/// unchanged. `ensure_indices` fails and its EnsureIndices sidecar pins only
|
||||
/// Person at NoMovement (a clean no-op on the next open). A write to a
|
||||
/// different, unpinned table (`node:Company`) is unaffected: mutations/loads run
|
||||
/// a roll-forward-only heal and proceed — they do not refuse on a pending
|
||||
/// sidecar the way `optimize`/`repair` do — so the write succeeds with no drift.
|
||||
#[tokio::test]
|
||||
async fn ensure_indices_stage_btree_failure_leaves_existing_tables_writable() {
|
||||
let _scenario = FailScenario::setup();
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap().to_string();
|
||||
|
||||
// Init with TEST_SCHEMA which declares Person + Knows. Indices on
|
||||
// those tables get built during init.
|
||||
let mut db = Omnigraph::init(&uri, helpers::TEST_SCHEMA).await.unwrap();
|
||||
|
||||
// Apply a schema that adds a new node type. The added_tables loop
|
||||
// will hit the failpoint between stage and commit on the new
|
||||
// node:Project table's btree-on-id build. (TEST_SCHEMA already
|
||||
// has Person + Company + Knows + WorksAt — pick a name that isn't
|
||||
// already declared.)
|
||||
let extended_schema = format!(
|
||||
"{}\nnode Project {{ name: String @key }}\n",
|
||||
helpers::TEST_SCHEMA
|
||||
);
|
||||
|
||||
{
|
||||
let _failpoint =
|
||||
ScopedFailPoint::new("ensure_indices.post_stage_pre_commit_btree", "return");
|
||||
let err = db.apply_schema(&extended_schema).await.unwrap_err();
|
||||
assert!(
|
||||
err.to_string()
|
||||
.contains("ensure_indices.post_stage_pre_commit_btree"),
|
||||
"schema apply should fail with the synthetic failpoint error, got: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
// Existing tables stayed at their pre-apply versions; subsequent
|
||||
// mutations against them succeed (no Lance-HEAD drift).
|
||||
// Seed a Person row — the load builds Person's id BTREE + name FTS.
|
||||
mutate_main(
|
||||
&mut db,
|
||||
helpers::MUTATION_QUERIES,
|
||||
"insert_person",
|
||||
&mixed_params(&[("$name", "Eve")], &[("$age", 22)]),
|
||||
&mixed_params(&[("$name", "Alice")], &[("$age", 30)]),
|
||||
)
|
||||
.await
|
||||
.expect("Person mutation must succeed after the failed schema apply — existing tables are not drifted");
|
||||
.expect("seed Person");
|
||||
|
||||
// Add `@index` on `age`: schema apply records the intent but defers the
|
||||
// physical build (iss-848), so the BTREE on `age` is unbuilt.
|
||||
let indexed_schema = helpers::TEST_SCHEMA.replace("age: I32?", "age: I32? @index");
|
||||
db.apply_schema(&indexed_schema)
|
||||
.await
|
||||
.expect("adding an @index is metadata-only and succeeds");
|
||||
|
||||
{
|
||||
// ensure_indices builds the deferred `age` BTREE on Person; the failpoint
|
||||
// fires between stage and commit, so Person's Lance HEAD does not move.
|
||||
let _failpoint =
|
||||
ScopedFailPoint::new("ensure_indices.post_stage_pre_commit_btree", "return");
|
||||
let err = db.ensure_indices().await.unwrap_err();
|
||||
assert!(
|
||||
err.to_string()
|
||||
.contains("ensure_indices.post_stage_pre_commit_btree"),
|
||||
"ensure_indices should fail with the synthetic failpoint error, got: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
// A different, unpinned table is untouched by the failed index build.
|
||||
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||
load_jsonl(
|
||||
&mut db,
|
||||
r#"{"type": "Company", "data": {"name": "Acme"}}"#,
|
||||
LoadMode::Append,
|
||||
)
|
||||
.await
|
||||
.expect("Company write on a table untouched by the failed ensure_indices should succeed");
|
||||
}
|
||||
|
||||
fn assert_no_staging_files(graph: &std::path::Path) {
|
||||
|
|
|
|||
|
|
@ -54,6 +54,19 @@ pub async fn init_and_load(dir: &tempfile::TempDir) -> Omnigraph {
|
|||
db
|
||||
}
|
||||
|
||||
/// On-disk Lance dataset URI for a node type, mirroring the engine's
|
||||
/// `nodes/{fnv1a(type)}` layout. Used by tests that reach the raw Lance
|
||||
/// dataset to forge or inspect branch state. (Local copies exist in
|
||||
/// `failpoints.rs` / `maintenance.rs`; this is the shared one for new tests.)
|
||||
pub fn node_table_uri(root: &str, type_name: &str) -> String {
|
||||
let mut hash: u64 = 0xcbf2_9ce4_8422_2325;
|
||||
for &b in type_name.as_bytes() {
|
||||
hash ^= b as u64;
|
||||
hash = hash.wrapping_mul(0x100_0000_01b3);
|
||||
}
|
||||
format!("{}/nodes/{hash:016x}", root.trim_end_matches('/'))
|
||||
}
|
||||
|
||||
/// Read all rows from a sub-table by table_key.
|
||||
pub async fn read_table(db: &Omnigraph, table_key: &str) -> Vec<RecordBatch> {
|
||||
let snap = snapshot_main(db).await.unwrap();
|
||||
|
|
|
|||
|
|
@ -32,10 +32,14 @@ use lance::dataset::builder::DatasetBuilder;
|
|||
use lance::dataset::optimize::{CompactionOptions, compact_files};
|
||||
use lance::dataset::transaction::Operation;
|
||||
use lance::dataset::write::delete::DeleteResult;
|
||||
use lance::dataset::{MergeInsertBuilder, WhenMatched, WhenNotMatched, WriteMode, WriteParams};
|
||||
use lance::dataset::{
|
||||
CommitBuilder, InsertBuilder, MergeInsertBuilder, WhenMatched, WhenNotMatched, WriteMode,
|
||||
WriteParams,
|
||||
};
|
||||
use lance::index::DatasetIndexExt;
|
||||
use lance_file::version::LanceFileVersion;
|
||||
use lance_index::IndexType;
|
||||
use lance_index::optimize::OptimizeOptions;
|
||||
use lance_index::scalar::ScalarIndexParams;
|
||||
use lance_namespace::LanceNamespace;
|
||||
use lance_table::io::commit::ManifestNamingScheme;
|
||||
|
|
@ -541,3 +545,449 @@ async fn fragment_deletion_metadata_is_available() {
|
|||
per-fragment deletions and would need to read the deletion vector.",
|
||||
);
|
||||
}
|
||||
|
||||
// --- Guard 14: Dataset::optimize_indices signature ----------------------------
|
||||
//
|
||||
// `db/omnigraph/optimize.rs::optimize_one_table` calls
|
||||
// `ds.optimize_indices(&OptimizeOptions::default())` (via `DatasetIndexExt`) to
|
||||
// fold appended/compacted fragments back into existing indexes. If Lance
|
||||
// changes the receiver, the options type, or the return shape, this fails to
|
||||
// compile. Compile-only.
|
||||
|
||||
#[allow(
|
||||
dead_code,
|
||||
unreachable_code,
|
||||
unused_variables,
|
||||
unused_mut,
|
||||
clippy::diverging_sub_expression
|
||||
)]
|
||||
async fn _compile_optimize_indices_signature() -> lance::Result<()> {
|
||||
let mut ds: Dataset = unimplemented!();
|
||||
let options = OptimizeOptions::default();
|
||||
// `&mut self`, `&OptimizeOptions`, returns `Result<()>` (mutates in place
|
||||
// and commits — there is no uncommitted variant in this Lance, which is why
|
||||
// optimize treats it as an inline-commit residual under a recovery sidecar).
|
||||
let _: () = ds.optimize_indices(&options).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// --- Guard 15: optimize_indices extends fragment coverage ----------------------
|
||||
//
|
||||
// PR3's reindex assumes `optimize_indices` folds fragments appended AFTER an
|
||||
// index was built into that index (incremental merge, not retrain). This pins
|
||||
// that Lance behavior at the surface layer so a regression turns red here, the
|
||||
// first smoke check on a Lance bump, before the slower engine suite.
|
||||
|
||||
#[tokio::test]
|
||||
async fn optimize_indices_extends_fragment_coverage() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().join("guard_optimize_indices.lance");
|
||||
let uri = uri.to_str().unwrap();
|
||||
|
||||
// Fragment 0: alice, bob. Build a BTREE over `value` covering only it.
|
||||
let mut ds = fresh_dataset(uri).await;
|
||||
ds.create_index_builder(&["value"], IndexType::BTree, &ScalarIndexParams::default())
|
||||
.replace(true)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Append a second fragment the existing index does not cover.
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Utf8, false),
|
||||
Field::new("value", DataType::Int32, false),
|
||||
]));
|
||||
let batch = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(StringArray::from(vec!["carol"])),
|
||||
Arc::new(Int32Array::from(vec![3])),
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
||||
let params = WriteParams {
|
||||
mode: WriteMode::Append,
|
||||
enable_stable_row_ids: true,
|
||||
data_storage_version: Some(LanceFileVersion::V2_2),
|
||||
..Default::default()
|
||||
};
|
||||
Dataset::write(reader, uri, Some(params)).await.unwrap();
|
||||
|
||||
let mut ds = Dataset::open(uri).await.unwrap();
|
||||
assert!(
|
||||
value_index_uncovered_count(&ds).await > 0,
|
||||
"appended fragment should be uncovered by the BTREE before optimize_indices"
|
||||
);
|
||||
|
||||
ds.optimize_indices(&OptimizeOptions::default())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
value_index_uncovered_count(&ds).await,
|
||||
0,
|
||||
"optimize_indices must fold the appended fragment into the existing index \
|
||||
(incremental coverage); if this regresses, PR3's reindex no longer keeps \
|
||||
coverage current — revisit db/omnigraph/optimize.rs and docs/dev/lance.md."
|
||||
);
|
||||
}
|
||||
|
||||
/// Count current fragments not covered by the single-column `value` BTREE —
|
||||
/// mirrors `TableStore::has_unindexed_fragments` (load_indices +
|
||||
/// `fragment_bitmap.contains`), pinned by Guard 11.
|
||||
async fn value_index_uncovered_count(ds: &Dataset) -> usize {
|
||||
let indices = ds.load_indices().await.unwrap();
|
||||
let frag_ids: Vec<u32> = ds.fragments().iter().map(|f| f.id as u32).collect();
|
||||
let value_fid = ds.schema().field("value").unwrap().id;
|
||||
for index in indices.iter() {
|
||||
if index.fields.len() == 1 && index.fields[0] == value_fid {
|
||||
if let Some(bitmap) = index.fragment_bitmap.as_ref() {
|
||||
return frag_ids.iter().filter(|id| !bitmap.contains(**id)).count();
|
||||
}
|
||||
}
|
||||
}
|
||||
// No `value` index found — treat as fully uncovered so a missing index
|
||||
// is never mistaken for full coverage.
|
||||
frag_ids.len()
|
||||
}
|
||||
|
||||
// --- Guard 16: scalar index use requires a literal matching the column type ---
|
||||
//
|
||||
// Pins the substrate behavior the pushdown literal-coercion fix relies on
|
||||
// (`query.rs::literal_to_typed_expr`): Lance uses the BTREE only when the filter
|
||||
// is `column OP literal` with a matching type. A width-mismatched literal makes
|
||||
// DataFusion widen and cast the COLUMN (`CAST(n32 AS Int64)`), which drops the
|
||||
// scalar index and full-scans. Temporal columns are immune (DataFusion casts the
|
||||
// Utf8 LITERAL to the date type, not the column). If a Lance/DataFusion bump
|
||||
// changes either coercion direction, this turns red — re-validate the fix.
|
||||
#[tokio::test]
|
||||
async fn scalar_index_use_requires_matched_literal_type() {
|
||||
use datafusion::physical_plan::displayable;
|
||||
use datafusion::prelude::{col, lit};
|
||||
use datafusion::scalar::ScalarValue;
|
||||
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().join("probe_literal_type.lance");
|
||||
let uri = uri.to_str().unwrap();
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Utf8, false),
|
||||
Field::new("n32", DataType::Int32, false),
|
||||
Field::new("d32", DataType::Date32, false),
|
||||
]));
|
||||
let batch = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(StringArray::from(vec!["a", "b", "c", "d"])),
|
||||
Arc::new(Int32Array::from(vec![1, 5, 9, 13])),
|
||||
Arc::new(arrow_array::Date32Array::from(vec![19000, 19723, 20000, 20500])),
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
||||
let params = WriteParams {
|
||||
mode: WriteMode::Create,
|
||||
enable_stable_row_ids: true,
|
||||
data_storage_version: Some(LanceFileVersion::V2_2),
|
||||
..Default::default()
|
||||
};
|
||||
let mut ds = Dataset::write(reader, uri, Some(params)).await.unwrap();
|
||||
for c in ["n32", "d32"] {
|
||||
ds.create_index_builder(&[c], IndexType::BTree, &ScalarIndexParams::default())
|
||||
.replace(true)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
async fn plan_str(ds: &Dataset, filter: datafusion::prelude::Expr) -> String {
|
||||
let mut scanner = ds.scan();
|
||||
scanner.filter_expr(filter);
|
||||
let plan = scanner.create_plan().await.unwrap();
|
||||
format!("{}", displayable(plan.as_ref()).indent(true))
|
||||
}
|
||||
|
||||
// (label, filter, expect_index_used)
|
||||
let cases = [
|
||||
("n32 = 5i32 (matched Int32)", col("n32").eq(lit(5i32)), true),
|
||||
("n32 = 5i64 (widened Int64)", col("n32").eq(lit(5i64)), false),
|
||||
(
|
||||
"d32 = Date32 (matched)",
|
||||
col("d32").eq(lit(ScalarValue::Date32(Some(19723)))),
|
||||
true,
|
||||
),
|
||||
(
|
||||
"d32 = '2024-01-01' (Utf8 vs Date32)",
|
||||
col("d32").eq(lit("2024-01-01")),
|
||||
true,
|
||||
),
|
||||
];
|
||||
|
||||
for (label, filter, expect_index) in cases {
|
||||
let s = plan_str(&ds, filter).await;
|
||||
let uses_index = s.contains("ScalarIndexQuery");
|
||||
assert_eq!(
|
||||
uses_index, expect_index,
|
||||
"[{label}] expected scalar-index use = {expect_index}, got {uses_index}.\n\
|
||||
A change here means Lance/DataFusion shifted its coercion or index \
|
||||
pushdown; re-validate query.rs::literal_to_typed_expr.\nplan:\n{s}"
|
||||
);
|
||||
}
|
||||
|
||||
// The widened case must show the index-defeating column CAST (the precise
|
||||
// shape the fix avoids by coercing the literal to the column type).
|
||||
let widened = plan_str(&ds, col("n32").eq(lit(5i64))).await;
|
||||
assert!(
|
||||
widened.contains("CAST(n32 AS Int64)"),
|
||||
"expected a column-side cast in the widened plan, got:\n{widened}"
|
||||
);
|
||||
}
|
||||
|
||||
// --- Guard 17: BTREE scalar-index range-boundary correctness (lance#6796) -----
|
||||
//
|
||||
// lance#6796 (issue #6792) fixed a BTREE range-query bound-inclusiveness bug:
|
||||
// `price <= 10 AND price > 5` returned the wrong boundary row (5.0 instead of
|
||||
// 10.0). OmniGraph today builds BTREE only on string `@key` columns and queries
|
||||
// them by equality/IN, not range, so its current patterns do not hit this — the
|
||||
// guard protects any future BTREE-range path. It reproduces the exact #6792 shape
|
||||
// (5 rows + an explicit BTREE drives the index path even on tiny data, per the
|
||||
// upstream repro) and pins the corrected inclusive-`<=` / exclusive-`>` semantics.
|
||||
#[tokio::test]
|
||||
async fn btree_range_query_boundary_is_correct() {
|
||||
use arrow_array::Float64Array;
|
||||
use futures::TryStreamExt;
|
||||
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().join("guard17.lance");
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Utf8, false),
|
||||
Field::new("price", DataType::Float64, false),
|
||||
]));
|
||||
let batch = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"])),
|
||||
Arc::new(Float64Array::from(vec![1.0, 5.0, 10.0, 15.0, 20.0])),
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
||||
let params = WriteParams {
|
||||
mode: WriteMode::Create,
|
||||
enable_stable_row_ids: true,
|
||||
data_storage_version: Some(LanceFileVersion::V2_2),
|
||||
..Default::default()
|
||||
};
|
||||
let mut ds = Dataset::write(reader, uri.to_str().unwrap(), Some(params))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Build the BTREE on the numeric column so the range filter resolves through
|
||||
// the scalar index (the path lance#6796 fixed).
|
||||
ds.create_index_builder(&["price"], IndexType::BTree, &ScalarIndexParams::default())
|
||||
.replace(true)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut scanner = ds.scan();
|
||||
scanner.filter("price <= 10.0 AND price > 5.0").unwrap();
|
||||
let batches: Vec<RecordBatch> = scanner
|
||||
.try_into_stream()
|
||||
.await
|
||||
.unwrap()
|
||||
.try_collect()
|
||||
.await
|
||||
.unwrap();
|
||||
let mut got: Vec<f64> = Vec::new();
|
||||
for b in &batches {
|
||||
let col = b
|
||||
.column_by_name("price")
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<Float64Array>()
|
||||
.unwrap();
|
||||
for i in 0..col.len() {
|
||||
got.push(col.value(i));
|
||||
}
|
||||
}
|
||||
got.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
||||
assert_eq!(
|
||||
got,
|
||||
vec![10.0],
|
||||
"BTREE range `price <= 10 AND price > 5` must return exactly [10.0] \
|
||||
(lance#6796 / issue #6792 boundary fix); got {got:?}. If this regressed, \
|
||||
Lance reintroduced the range-bound inclusiveness bug.",
|
||||
);
|
||||
}
|
||||
|
||||
// --- Guard 18: skip_auto_cleanup suppresses version GC (lance#6755 / PR #229) --
|
||||
//
|
||||
// After the v7 bump, OmniGraph relies on `CommitBuilder::with_skip_auto_cleanup`
|
||||
// (`commit_staged`) and `MergeInsertBuilder::skip_auto_cleanup` (the `__manifest`
|
||||
// publisher) to stop Lance's per-commit auto-cleanup hook from GC'ing versions
|
||||
// the `__manifest` pins for snapshots/time-travel. This is load-bearing for
|
||||
// graphs created BEFORE the bump: 6.0.1 defaulted `WriteParams::auto_cleanup` ON,
|
||||
// so those datasets carry `lance.auto_cleanup.*` config that `auto_cleanup = None`
|
||||
// on new writes cannot retroactively clear — only the per-commit skip stops it.
|
||||
//
|
||||
// Pins both halves: WITHOUT the skip the aggressive config GCs v1; WITH the skip
|
||||
// (the exact call `commit_staged` makes) v1 survives.
|
||||
#[tokio::test]
|
||||
async fn skip_auto_cleanup_suppresses_version_gc() {
|
||||
use std::collections::HashMap;
|
||||
|
||||
// The cleanup config 6.0.1 stored by default, made aggressive: fire on every
|
||||
// commit, delete anything older than now.
|
||||
async fn set_legacy_cleanup(ds: &mut Dataset) {
|
||||
let mut cfg = HashMap::new();
|
||||
cfg.insert("lance.auto_cleanup.interval".to_string(), "1".to_string());
|
||||
cfg.insert("lance.auto_cleanup.older_than".to_string(), "0ms".to_string());
|
||||
ds.update_config(cfg).await.unwrap();
|
||||
}
|
||||
fn row(i: i32) -> (Arc<Schema>, RecordBatch) {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Utf8, false),
|
||||
Field::new("value", DataType::Int32, false),
|
||||
]));
|
||||
let batch = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(StringArray::from(vec![format!("k{i}")])),
|
||||
Arc::new(Int32Array::from(vec![i])),
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
(schema, batch)
|
||||
}
|
||||
|
||||
// Negative control: WITHOUT skip, the legacy config GCs the pinned v1.
|
||||
let ctrl = tempfile::tempdir().unwrap();
|
||||
let curi = ctrl.path().join("g18_ctrl.lance");
|
||||
let curi = curi.to_str().unwrap();
|
||||
let mut ds = fresh_dataset(curi).await;
|
||||
let v1 = ds.version().version;
|
||||
set_legacy_cleanup(&mut ds).await;
|
||||
for i in 0..5 {
|
||||
let (schema, batch) = row(i);
|
||||
let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
||||
ds.append(
|
||||
reader,
|
||||
Some(WriteParams {
|
||||
mode: WriteMode::Append,
|
||||
..Default::default()
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
assert!(
|
||||
ds.checkout_version(v1).await.is_err(),
|
||||
"negative control: without skip_auto_cleanup, the legacy auto_cleanup \
|
||||
config should have GC'd pinned v{v1}; if this fails the config is not \
|
||||
firing and the positive assertion below proves nothing."
|
||||
);
|
||||
|
||||
// The guarantee: WITH the per-commit skip, v1 survives. Mirrors
|
||||
// `TableStore::commit_staged` (InsertBuilder::execute_uncommitted +
|
||||
// CommitBuilder::with_skip_auto_cleanup(true)).
|
||||
let keep = tempfile::tempdir().unwrap();
|
||||
let kuri = keep.path().join("g18.lance");
|
||||
let kuri = kuri.to_str().unwrap();
|
||||
let mut ds = fresh_dataset(kuri).await;
|
||||
let v1 = ds.version().version;
|
||||
set_legacy_cleanup(&mut ds).await;
|
||||
for i in 0..5 {
|
||||
let (_schema, batch) = row(i);
|
||||
let tx = InsertBuilder::new(Arc::new(ds.clone()))
|
||||
.with_params(&WriteParams {
|
||||
mode: WriteMode::Append,
|
||||
..Default::default()
|
||||
})
|
||||
.execute_uncommitted(vec![batch])
|
||||
.await
|
||||
.unwrap();
|
||||
ds = CommitBuilder::new(Arc::new(ds.clone()))
|
||||
.with_skip_auto_cleanup(true)
|
||||
.execute(tx)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
assert!(
|
||||
ds.checkout_version(v1).await.is_ok(),
|
||||
"v{v1} was GC'd despite CommitBuilder::with_skip_auto_cleanup(true) — the \
|
||||
commit_staged / publisher skip is the only thing protecting \
|
||||
__manifest-pinned versions on upgraded (pre-bump) graphs."
|
||||
);
|
||||
}
|
||||
|
||||
// --- Guard 19: unenforced primary key is immutable once set (lance v7) ------
|
||||
//
|
||||
// Lance 7 (`lance::dataset::transaction`) makes the unenforced PK reserved:
|
||||
// once `lance-schema:unenforced-primary-key` is set on a field, any later write
|
||||
// that touches that reserved key — even re-applying the SAME value — errors
|
||||
// "the unenforced primary key is a reserved key and cannot be changed once set".
|
||||
//
|
||||
// This is the upstream behavior that broke
|
||||
// `db/manifest/migrations.rs::migrate_v1_to_v2`'s crash-idempotency: a
|
||||
// pre-v0.4.0 graph that crashed after the field-set but before the stamp bump
|
||||
// re-enters the migration with the PK already present, and on Lance 6 the
|
||||
// re-apply was a no-op. The migration now guards the set on the manifest's
|
||||
// unenforced-PK field (`["object_id"]` → no-op, `[]` → set, anything else →
|
||||
// loud refusal). If Lance ever relaxes immutability (a re-set becomes a no-op
|
||||
// again), this guard goes red — revisit whether that field-guard is still
|
||||
// needed, and re-pin docs/dev/lance.md.
|
||||
#[tokio::test]
|
||||
async fn unenforced_primary_key_is_immutable_once_set() {
|
||||
use lance::datatypes::LANCE_UNENFORCED_PRIMARY_KEY;
|
||||
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().join("g19.lance");
|
||||
let mut ds = fresh_dataset(uri.to_str().unwrap()).await;
|
||||
|
||||
// Precondition: no unenforced PK yet (mirrors a genuine pre-v0.4.0 manifest).
|
||||
assert!(
|
||||
ds.schema().unenforced_primary_key().is_empty(),
|
||||
"fresh dataset should carry no unenforced primary key"
|
||||
);
|
||||
|
||||
// First set succeeds — the genuine pre-v0.4.0 migration path. (Discard the
|
||||
// returned &Schema so the &mut borrow ends before the next call.)
|
||||
ds.update_field_metadata()
|
||||
.update(
|
||||
"id",
|
||||
[(LANCE_UNENFORCED_PRIMARY_KEY.to_string(), "true".to_string())],
|
||||
)
|
||||
.unwrap()
|
||||
.await
|
||||
.unwrap();
|
||||
let pk: Vec<String> = ds
|
||||
.schema()
|
||||
.unenforced_primary_key()
|
||||
.iter()
|
||||
.map(|field| field.name.clone())
|
||||
.collect();
|
||||
assert_eq!(
|
||||
pk,
|
||||
["id"],
|
||||
"first set should install `id` as the unenforced PK"
|
||||
);
|
||||
|
||||
// Re-applying the SAME reserved key must still error. Normalize the sync
|
||||
// validation stage (`.update()`) and the async commit stage (`.await`) into
|
||||
// one Result so the actionable diagnostic below fires whichever stage Lance
|
||||
// enforces immutability at — and even if a future Lance relaxes it to `Ok`.
|
||||
// Bare `.unwrap()` / `.unwrap_err()` would instead panic with a generic
|
||||
// message in those cases, defeating the guard's purpose.
|
||||
let outcome: lance::Result<()> = match ds.update_field_metadata().update(
|
||||
"id",
|
||||
[(LANCE_UNENFORCED_PRIMARY_KEY.to_string(), "true".to_string())],
|
||||
) {
|
||||
Ok(builder) => builder.await.map(|_| ()),
|
||||
Err(e) => Err(e),
|
||||
};
|
||||
assert!(
|
||||
matches!(&outcome, Err(e) if e.to_string().contains("cannot be changed once set")),
|
||||
"Lance no longer rejects re-setting the unenforced PK as immutable \
|
||||
(got: {outcome:?}); immutability relaxed or moved off the commit path \
|
||||
— revisit migrate_v1_to_v2's field-guard and re-pin docs/dev/lance.md."
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -191,14 +191,16 @@ async fn lance_merge_insert_new_row_stamps_created_at_version() {
|
|||
let eve = rows.iter().find(|r| r.0 == "eve").unwrap();
|
||||
eprintln!("Eve: created_at_version={}, v1={}, v2={}", eve.2, v1, v2);
|
||||
|
||||
// Lance behavior (as of 3.0.1): merge_insert stamps new rows with
|
||||
// _row_created_at_version = dataset_creation_version (v1), NOT the
|
||||
// merge_insert commit version (v2). This is why Omnigraph's change
|
||||
// detection uses _row_last_updated_at_version + ID set membership
|
||||
// to classify inserts vs updates, not _row_created_at_version alone.
|
||||
// Lance behavior (7.0.0, lance#6774): merge_insert stamps new INSERT
|
||||
// rows with _row_created_at_version = the commit version (v2). Earlier
|
||||
// Lance used a fallback of the dataset creation version; #6774 changed
|
||||
// it so created_at reflects when the row actually entered the dataset.
|
||||
// Omnigraph's change detection keys on _row_last_updated_at_version + ID
|
||||
// set membership (see changes/mod.rs), so this stamping change leaves
|
||||
// insert-vs-update classification unaffected.
|
||||
assert_eq!(
|
||||
eve.2, v1,
|
||||
"Lance merge_insert stamps new rows with created_at = dataset creation version, not commit version"
|
||||
eve.2, v2,
|
||||
"Lance merge_insert stamps new rows with created_at = commit version (lance#6774)"
|
||||
);
|
||||
assert_eq!(
|
||||
eve.3, v2,
|
||||
|
|
@ -258,11 +260,24 @@ async fn lance_merge_insert_update_preserves_created_at_version() {
|
|||
assert_eq!(alice.2, v1, "alice created_at should still be v1");
|
||||
assert_eq!(alice.3, v1, "alice updated_at should still be v1");
|
||||
|
||||
// Bob: updated via merge_insert
|
||||
// created_at should be preserved (v1), updated_at should be bumped (v2)
|
||||
// Bob: updated via merge_insert.
|
||||
eprintln!(
|
||||
"Bob: created_at={}, updated_at={}, v1={}, v2={}",
|
||||
bob.2, bob.3, v1, v2
|
||||
);
|
||||
assert_eq!(bob.1, 99, "bob's value should be updated to 99");
|
||||
// created_at is preserved across an UPDATE (lance#6774 only changed the
|
||||
// INSERT-row stamping), which is what this test's name promises.
|
||||
assert_eq!(
|
||||
bob.2, v1,
|
||||
"bob created_at must be preserved across a merge_insert UPDATE"
|
||||
);
|
||||
// updated_at bumps to the commit version on UPDATE — the change-feed
|
||||
// invariant OmniGraph's insert/update classification relies on
|
||||
// (changes/mod.rs keys on _row_last_updated_at_version). If this regresses,
|
||||
// the diff/change feed silently misses updates.
|
||||
assert_eq!(
|
||||
bob.3, v2,
|
||||
"bob updated_at must bump to the commit version on a merge_insert UPDATE"
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ node Metric {
|
|||
name: String @key
|
||||
score: F64?
|
||||
ratio: F32?
|
||||
count: I32?
|
||||
active: Bool?
|
||||
born: Date?
|
||||
seen: DateTime?
|
||||
|
|
@ -26,10 +27,10 @@ node Metric {
|
|||
"#;
|
||||
|
||||
// Seeds partition every predicate, so a dropped filter returns all 4 rows.
|
||||
const DATA: &str = r#"{"type":"Metric","data":{"name":"m1","score":2.5,"ratio":0.5,"active":true,"born":"2024-06-01","seen":"2024-06-01T12:00:00Z"}}
|
||||
{"type":"Metric","data":{"name":"m2","score":1.0,"ratio":0.25,"active":false,"born":"2023-01-01","seen":"2023-01-01T00:00:00Z"}}
|
||||
{"type":"Metric","data":{"name":"m3","score":3.0,"ratio":0.75,"active":true,"born":"2025-01-01","seen":"2025-01-01T00:00:00Z"}}
|
||||
{"type":"Metric","data":{"name":"m4","score":0.5,"ratio":0.1,"active":false,"born":"2022-12-31","seen":"2022-01-01T00:00:00Z"}}"#;
|
||||
const DATA: &str = r#"{"type":"Metric","data":{"name":"m1","score":2.5,"ratio":0.5,"count":1,"active":true,"born":"2024-06-01","seen":"2024-06-01T12:00:00Z"}}
|
||||
{"type":"Metric","data":{"name":"m2","score":1.0,"ratio":0.25,"count":2,"active":false,"born":"2023-01-01","seen":"2023-01-01T00:00:00Z"}}
|
||||
{"type":"Metric","data":{"name":"m3","score":3.0,"ratio":0.75,"count":3,"active":true,"born":"2025-01-01","seen":"2025-01-01T00:00:00Z"}}
|
||||
{"type":"Metric","data":{"name":"m4","score":0.5,"ratio":0.1,"count":4,"active":false,"born":"2022-12-31","seen":"2022-01-01T00:00:00Z"}}"#;
|
||||
|
||||
async fn metric_db(dir: &tempfile::TempDir) -> Omnigraph {
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
|
|
@ -67,6 +68,50 @@ query inline() { match { $m: Metric { score: 3.0 } } return { $m.name } }
|
|||
assert_eq!(sorted_metric_names(&mut db, q, "inline").await, vec!["m3"]);
|
||||
}
|
||||
|
||||
// Inline-binding equality is the Lance-pushdown arm. With the literal coerced to
|
||||
// the column's exact Arrow type, a narrow-numeric column (I32) and an F32 column
|
||||
// must still select the right rows — the coercion changes the literal's type, not
|
||||
// the result set. (The index-use win this enables is pinned at the Lance-surface
|
||||
// layer by `lance_surface_guards::scalar_index_use_requires_matched_literal_type`.)
|
||||
#[tokio::test]
|
||||
async fn int_and_f32_literal_pushdown_coercion() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut db = metric_db(&dir).await;
|
||||
let q = r#"
|
||||
query count_eq() { match { $m: Metric { count: 2 } } return { $m.name } }
|
||||
query ratio_eq() { match { $m: Metric { ratio: 0.25 } } return { $m.name } }
|
||||
query count_ge() { match { $m: Metric $m.count >= 3 } return { $m.name } }
|
||||
"#;
|
||||
// I32 column, integer literal coerced Int64 -> Int32: count == 2 is m2 only.
|
||||
assert_eq!(sorted_metric_names(&mut db, q, "count_eq").await, vec!["m2"]);
|
||||
// F32 column, float literal coerced Float64 -> Float32: ratio == 0.25 is m2.
|
||||
assert_eq!(sorted_metric_names(&mut db, q, "ratio_eq").await, vec!["m2"]);
|
||||
// Range on the I32 column: count 3,4 >= 3 -> m3, m4 (coercion is op-independent).
|
||||
assert_eq!(
|
||||
sorted_metric_names(&mut db, q, "count_ge").await,
|
||||
vec!["m3", "m4"]
|
||||
);
|
||||
}
|
||||
|
||||
// A fractional float against an integer column must not be truncated by the
|
||||
// pushdown coercion (`2.7 -> 2` would wrongly match the count=2 row). The
|
||||
// lossless guard falls back to the natural Float64 literal, so `count = 2.7`
|
||||
// matches no integer and returns no rows.
|
||||
#[tokio::test]
|
||||
async fn fractional_float_equality_on_int_column_returns_no_rows() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut db = metric_db(&dir).await;
|
||||
let q = r#"
|
||||
query count_frac() { match { $m: Metric { count: 2.7 } } return { $m.name } }
|
||||
"#;
|
||||
assert!(
|
||||
sorted_metric_names(&mut db, q, "count_frac")
|
||||
.await
|
||||
.is_empty(),
|
||||
"count = 2.7 must match no integer rows (no truncation to count = 2)"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bool_literal_filters_execute() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
|
@ -88,9 +133,15 @@ async fn date_and_datetime_literal_filters_execute() {
|
|||
let q = r#"
|
||||
query born_ge() { match { $m: Metric $m.born >= date("2024-01-01") } return { $m.name } }
|
||||
query seen_lt() { match { $m: Metric $m.seen < datetime("2024-01-01T00:00:00Z") } return { $m.name } }
|
||||
query born_eq() { match { $m: Metric { born: date("2024-06-01") } } return { $m.name } }
|
||||
query seen_eq() { match { $m: Metric { seen: datetime("2024-06-01T12:00:00Z") } } return { $m.name } }
|
||||
"#;
|
||||
// born: m1 2024-06, m3 2025 >= 2024-01-01
|
||||
assert_eq!(sorted_metric_names(&mut db, q, "born_ge").await, vec!["m1", "m3"]);
|
||||
// seen: m2 2023, m4 2022 < 2024-01-01
|
||||
assert_eq!(sorted_metric_names(&mut db, q, "seen_lt").await, vec!["m2", "m4"]);
|
||||
// Inline-binding equality exercises the Lance-pushdown arm with a typed
|
||||
// Date32/Date64 literal: the epoch conversion must select exactly m1.
|
||||
assert_eq!(sorted_metric_names(&mut db, q, "born_eq").await, vec!["m1"]);
|
||||
assert_eq!(sorted_metric_names(&mut db, q, "seen_eq").await, vec!["m1"]);
|
||||
}
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue