Merge branch 'main' into andrew/datafusion-future-improvements-doc

This commit is contained in:
Andrew Altshuler 2026-06-06 19:36:35 +03:00 committed by GitHub
commit 2d6591dfb3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
134 changed files with 16450 additions and 2643 deletions

14
.github/CODEOWNERS vendored
View file

@ -8,11 +8,11 @@
# CI fails if this file drifts from its source, and rejects PRs that
# edit this file directly without also editing the yml.
* @aaltshuler
* @ragnorc
crates/** @aaltshuler
docs/** @aaltshuler @ragnorc
README.md @aaltshuler @ragnorc
AGENTS.md @aaltshuler @ragnorc
CLAUDE.md @aaltshuler @ragnorc
SECURITY.md @aaltshuler @ragnorc
crates/** @ragnorc
docs/** @ragnorc
README.md @ragnorc
AGENTS.md @ragnorc
CLAUDE.md @ragnorc
SECURITY.md @ragnorc

View file

@ -7,8 +7,8 @@
"Check AGENTS.md Links",
"Test Workspace",
"Test omnigraph-server --features aws",
"CODEOWNERS / drift",
"CODEOWNERS / noedit"
"CODEOWNERS matches source",
"CODEOWNERS not hand-edited"
]
},
"enforce_admins": false,

View file

@ -19,18 +19,15 @@ roles:
engineering:
description: >
All production code under crates/**. Engine, CLI, server,
compiler. Single owner; review must come from this person.
compiler.
members:
- aaltshuler
- ragnorc
docs:
description: >
Documentation under docs/**, plus repo-level docs (README.md,
AGENTS.md, CLAUDE.md symlink, SECURITY.md). Either named member
can approve; both are listed so reviews can route to whoever is
available.
AGENTS.md, CLAUDE.md symlink, SECURITY.md).
members:
- aaltshuler
- ragnorc
# Path → role mapping. GitHub CODEOWNERS uses "last match wins"

View file

@ -1,10 +1,14 @@
#!/usr/bin/env python3
"""Render .github/CODEOWNERS from .github/codeowners-roles.yml.
"""Render .github/CODEOWNERS and the ownership tables in
docs/dev/codeowners.md from .github/codeowners-roles.yml.
The yml is the source of truth editing CODEOWNERS directly is
rejected by CI (see .github/workflows/codeowners.yml). This script
expands the role-based yml into the flat pathowners format GitHub
expects.
The yml is the source of truth. This script expands the role-based yml
into (1) the flat pathowners format GitHub expects in
`.github/CODEOWNERS`, and (2) the "who owns what" markdown tables spliced
between the generated-region markers in `docs/dev/codeowners.md`. Both are
derived artifacts; CI re-renders them on every PR (see
.github/workflows/codeowners.yml) and auto-commits the result on same-repo
PRs, so the source of truth and the human-readable view never drift.
Usage:
python3 .github/scripts/render-codeowners.py
@ -16,6 +20,7 @@ Exits non-zero on:
one owner; otherwise CODEOWNERS would assign nobody and GitHub
would silently fall back to "no required reviewer", which
defeats the purpose).
- Missing generated-region markers in docs/dev/codeowners.md.
"""
from __future__ import annotations
@ -34,6 +39,13 @@ except ImportError:
REPO_ROOT = Path(__file__).resolve().parents[2]
SOURCE = REPO_ROOT / ".github" / "codeowners-roles.yml"
OUTPUT = REPO_ROOT / ".github" / "CODEOWNERS"
DOCS = REPO_ROOT / "docs" / "dev" / "codeowners.md"
# The "who owns what" tables in docs/dev/codeowners.md are spliced between
# these markers so the human-readable view never drifts from the source of
# truth. Edit codeowners-roles.yml and re-render — never the table by hand.
DOCS_BEGIN = "<!-- BEGIN GENERATED OWNERSHIP — edit codeowners-roles.yml + run render-codeowners.py -->"
DOCS_END = "<!-- END GENERATED OWNERSHIP -->"
BANNER = """\
# AUTOGENERATED from .github/codeowners-roles.yml. Do not edit by hand.
@ -75,6 +87,62 @@ def owners_for(role_names: list[str], roles: dict) -> list[str]:
return seen
def _oneline(text: str) -> str:
"""Collapse a folded/multi-line YAML description into one cell of text."""
return " ".join((text or "").split())
def ownership_tables(spec: dict, roles: dict) -> str:
"""Render the human-readable "who owns what" markdown — a path→owners
table (the operative view at PR time, in last-match-wins order with the
catch-all first) plus a rolemembers table. Spliced into the docs between
the markers so it is always current with the source of truth."""
out: list[str] = []
out.append("**Path → owners** (GitHub applies *last match wins*; the `*` "
"catch-all is listed first and is overridden by the specific "
"patterns below it):")
out.append("")
out.append("| Path | Owners | Role(s) |")
out.append("|---|---|---|")
if "default" in spec:
owners = " ".join(owners_for(spec["default"], roles))
out.append(f"| `*` | {owners} | {', '.join(spec['default'])} |")
for pattern, role_names in (spec.get("paths") or {}).items():
owners = " ".join(owners_for(role_names, roles))
out.append(f"| `{pattern}` | {owners} | {', '.join(role_names)} |")
out.append("")
out.append("**Roles**:")
out.append("")
out.append("| Role | Members | Description |")
out.append("|---|---|---|")
for name, role in roles.items():
members = " ".join(f"@{m}" for m in (role.get("members") or []))
out.append(f"| `{name}` | {members} | {_oneline(role.get('description', ''))} |")
out.append("")
return "\n".join(out)
def splice_docs(table_md: str) -> None:
"""Replace the region between DOCS_BEGIN/DOCS_END in the docs file with the
freshly generated tables, leaving surrounding prose untouched."""
if not DOCS.exists():
sys.exit(f"error: docs file not found: {DOCS}")
text = DOCS.read_text()
if DOCS_BEGIN not in text or DOCS_END not in text:
sys.exit(
f"error: ownership markers not found in {DOCS.relative_to(REPO_ROOT)}. "
f"Add the lines:\n {DOCS_BEGIN}\n {DOCS_END}\n"
f"around the generated table region."
)
head, rest = text.split(DOCS_BEGIN, 1)
_, tail = rest.split(DOCS_END, 1)
new = f"{head}{DOCS_BEGIN}\n\n{table_md}\n{DOCS_END}{tail}"
DOCS.write_text(new)
def main() -> int:
if not SOURCE.exists():
sys.exit(f"error: source file not found: {SOURCE}")
@ -127,6 +195,9 @@ def main() -> int:
OUTPUT.write_text(rendered)
print(f"wrote {OUTPUT.relative_to(REPO_ROOT)}")
splice_docs(ownership_tables(spec, roles))
print(f"updated {DOCS.relative_to(REPO_ROOT)}")
return 0

View file

@ -111,6 +111,18 @@ jobs:
- name: Verify AGENTS.md ↔ docs/ cross-links
run: bash scripts/check-agents-md.sh
entrypoint_test:
name: Container Entrypoint
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout source
uses: actions/checkout@v5.0.1
- name: Verify omnigraph-server entrypoint arg composition
run: sh docker/entrypoint_test.sh
test:
name: Test Workspace
needs: classify_changes
@ -249,6 +261,63 @@ jobs:
if: needs.classify_changes.outputs.run_full_ci == 'true'
run: cargo test --locked -p omnigraph-server --features aws
test_windows_binaries:
name: Test Windows release binaries
needs: classify_changes
runs-on: windows-latest
timeout-minutes: 75
permissions:
contents: read
env:
CARGO_TERM_COLOR: always
steps:
- name: Skip for text-only changes
if: needs.classify_changes.outputs.run_full_ci != 'true'
run: Write-Host "Text-only change detected; skipping Windows binary build."
- name: Checkout source
if: needs.classify_changes.outputs.run_full_ci == 'true'
uses: actions/checkout@v5.0.1
- name: Install system dependencies
if: needs.classify_changes.outputs.run_full_ci == 'true'
run: choco install protoc -y
- name: Install Rust stable
if: needs.classify_changes.outputs.run_full_ci == 'true'
uses: dtolnay/rust-toolchain@stable
with:
toolchain: stable
- name: Cache Rust build data
if: needs.classify_changes.outputs.run_full_ci == 'true'
uses: Swatinem/rust-cache@v2
with:
workspaces: |
. -> target
key: windows-release-binaries
- name: Build Windows binaries
if: needs.classify_changes.outputs.run_full_ci == 'true'
run: cargo build --release --locked -p omnigraph-cli -p omnigraph-server
- name: Smoke test Windows binaries
if: needs.classify_changes.outputs.run_full_ci == 'true'
run: |
& ./target/release/omnigraph.exe version
& ./target/release/omnigraph-server.exe --help
- name: Check PowerShell installer syntax
if: needs.classify_changes.outputs.run_full_ci == 'true'
run: |
$tokens = $null
$errors = $null
[System.Management.Automation.Language.Parser]::ParseFile("scripts/install.ps1", [ref]$tokens, [ref]$errors) | Out-Null
if ($errors.Count -gt 0) {
$errors | Format-List
exit 1
}
rustfs_integration:
name: RustFS S3 Integration
needs:

View file

@ -1,19 +1,24 @@
name: CODEOWNERS
# Runs on EVERY pull request (no paths filter). The two jobs below are
# required status checks on `main`; a path-filtered required check never
# reports for PRs outside the filter and leaves them permanently "pending"
# (the trap that forced admin-override merges). Always-run + cheap
# short-circuit is what keeps them honest.
on:
pull_request:
paths:
- '.github/codeowners-roles.yml'
- '.github/CODEOWNERS'
- '.github/scripts/render-codeowners.py'
- '.github/workflows/codeowners.yml'
workflow_dispatch:
# Read-only; we never push from this workflow.
# `drift` auto-commits the regenerated artifacts back to same-repo PR
# branches, so it needs write access.
permissions:
contents: read
contents: write
jobs:
# NOTE: the job `name:` values below ("CODEOWNERS matches source" /
# "CODEOWNERS not hand-edited") ARE the status-check contexts that
# .github/branch-protection.json must list verbatim. Renaming a job here
# is a branch-protection change — update the JSON and re-apply.
drift:
name: CODEOWNERS matches source
runs-on: ubuntu-latest
@ -28,19 +33,56 @@ jobs:
- name: Install PyYAML
run: pip install pyyaml
- name: Re-render CODEOWNERS
- name: Re-render CODEOWNERS + ownership docs
run: python3 .github/scripts/render-codeowners.py
- name: Reject drift
# Same-repo PR: push the regenerated artifacts back so contributors
# never have to run the script locally. Mirrors the openapi.json
# auto-commit in ci.yml (separate shallow clone of the head branch so
# the pushed commit carries only the regenerated files).
- name: Commit regenerated artifacts to PR branch
if: |
github.event_name == 'pull_request' &&
github.event.pull_request.head.repo.full_name == github.repository
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
if ! git diff --quiet .github/CODEOWNERS; then
echo "::error::.github/CODEOWNERS is out of sync with .github/codeowners-roles.yml."
echo "::error::Run \`python3 .github/scripts/render-codeowners.py\` locally and commit the result."
if git diff --quiet -- .github/CODEOWNERS docs/dev/codeowners.md; then
echo "CODEOWNERS and ownership docs already in sync."
exit 0
fi
tmp=$(mktemp -d)
git clone --depth 1 --branch "${{ github.head_ref }}" \
"https://x-access-token:${GITHUB_TOKEN}@github.com/${{ github.repository }}.git" \
"$tmp"
cp .github/CODEOWNERS "$tmp/.github/CODEOWNERS"
cp docs/dev/codeowners.md "$tmp/docs/dev/codeowners.md"
cd "$tmp"
if git diff --quiet -- .github/CODEOWNERS docs/dev/codeowners.md; then
echo "Head branch already matches; nothing to push."
exit 0
fi
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add .github/CODEOWNERS docs/dev/codeowners.md
git commit -m "chore: regenerate CODEOWNERS + ownership docs"
git push
# Fork PR / workflow_dispatch: cannot push back, so enforce drift
# strictly. The contributor runs the script and commits the result.
- name: Verify in sync (forks / manual runs)
if: |
!(github.event_name == 'pull_request' &&
github.event.pull_request.head.repo.full_name == github.repository)
run: |
if ! git diff --quiet -- .github/CODEOWNERS docs/dev/codeowners.md; then
echo "::error::Generated CODEOWNERS / ownership docs are out of sync with .github/codeowners-roles.yml."
echo "::error::Run \`python3 .github/scripts/render-codeowners.py\` and commit the result."
echo "--- diff ---"
git --no-pager diff .github/CODEOWNERS
git --no-pager diff -- .github/CODEOWNERS docs/dev/codeowners.md
exit 1
fi
echo "CODEOWNERS is in sync with its source."
echo "Generated artifacts are in sync with their source."
noedit:
name: CODEOWNERS not hand-edited
@ -52,6 +94,8 @@ jobs:
fetch-depth: 0
- name: Reject hand-edits to generated file
# Only meaningful for PRs (needs a base to diff against).
if: github.event_name == 'pull_request'
run: |
base="origin/${{ github.base_ref }}"
git fetch origin "${{ github.base_ref }}" --quiet

View file

@ -80,8 +80,15 @@ jobs:
version=$(cargo metadata --format-version=1 --no-deps \
| jq -r --arg c "$crate" '.packages[] | select(.name==$c) | .version')
# crates.io API requires a User-Agent header — without it the
# API responds 403 and the skip check below would silently
# fall through to a real publish attempt that errors with
# "already exists on crates.io index" when re-running after a
# partial publish. Send a UA naming the workflow.
local current
current=$(curl -fsSL "https://crates.io/api/v1/crates/${crate}" \
current=$(curl -fsSL \
-A 'ModernRelay-omnigraph-ci (https://github.com/ModernRelay/omnigraph)' \
"https://crates.io/api/v1/crates/${crate}" \
| jq -r '.crate.max_version' || echo "")
if [[ "$current" == "$version" ]]; then
@ -90,10 +97,28 @@ jobs:
fi
echo "==> publishing ${crate} ${version} (current crates.io: ${current:-none})"
cargo publish -p "$crate" --locked
# Defense in depth: if the skip check missed an existing
# version (e.g. crates.io API hiccup), cargo publish errors
# with "already exists on crates.io index". Treat that as
# success so the workflow can be re-run idempotently.
local output
if ! output=$(cargo publish -p "$crate" --locked 2>&1); then
echo "$output"
if echo "$output" | grep -q "already exists on crates.io"; then
echo "==> ${crate} ${version} was already published; treating as success"
return 0
fi
return 1
fi
echo "$output"
}
# Order matters: each crate must precede anything that depends on it.
# omnigraph-compiler and omnigraph-policy have no internal deps;
# omnigraph-engine depends on both; server depends on engine + the
# two leaf crates; cli depends on everything.
publish_if_new omnigraph-compiler
publish_if_new omnigraph-policy
publish_if_new omnigraph-engine
publish_if_new omnigraph-server
publish_if_new omnigraph-cli

View file

@ -43,6 +43,8 @@ jobs:
asset_name: omnigraph-linux-x86_64
- runner: macos-14
asset_name: omnigraph-macos-arm64
- runner: windows-latest
asset_name: omnigraph-windows-x86_64
env:
CARGO_TERM_COLOR: always
steps:
@ -59,6 +61,10 @@ jobs:
if: runner.os == 'macOS'
run: brew install protobuf
- name: Install Windows dependencies
if: runner.os == 'Windows'
run: choco install protoc -y
- name: Install Rust stable
uses: dtolnay/rust-toolchain@stable
with:
@ -73,7 +79,8 @@ jobs:
- name: Build release binaries
run: cargo build --release --locked -p omnigraph-cli -p omnigraph-server
- name: Package release archive
- name: Package Unix release archive
if: runner.os != 'Windows'
run: |
mkdir -p release
install -m 0755 target/release/omnigraph release/omnigraph
@ -81,6 +88,22 @@ jobs:
tar -C release -czf "${{ matrix.asset_name }}.tar.gz" omnigraph omnigraph-server
shasum -a 256 "${{ matrix.asset_name }}.tar.gz" > "${{ matrix.asset_name }}.sha256"
- name: Package Windows release archive
if: runner.os == 'Windows'
run: |
New-Item -ItemType Directory -Force -Path release | Out-Null
Copy-Item target/release/omnigraph.exe release/omnigraph.exe
Copy-Item target/release/omnigraph-server.exe release/omnigraph-server.exe
Compress-Archive -Path release/omnigraph.exe, release/omnigraph-server.exe -DestinationPath "${{ matrix.asset_name }}.zip" -Force
$hash = (Get-FileHash "${{ matrix.asset_name }}.zip" -Algorithm SHA256).Hash.ToLowerInvariant()
"$hash ${{ matrix.asset_name }}.zip" | Out-File -FilePath "${{ matrix.asset_name }}.sha256" -Encoding ascii
New-Item -ItemType Directory -Force -Path verify | Out-Null
Expand-Archive -Path "${{ matrix.asset_name }}.zip" -DestinationPath verify -Force
$items = Get-ChildItem -Path verify -File
if ($items.Count -ne 2 -or !(Test-Path verify/omnigraph.exe) -or !(Test-Path verify/omnigraph-server.exe)) {
throw "Windows release archive is missing expected binaries"
}
- name: Publish edge release assets
uses: softprops/action-gh-release@v2.5.0
with:
@ -91,5 +114,22 @@ jobs:
body: |
Rolling prerelease from `${{ github.sha }}`.
files: |
${{ matrix.asset_name }}.tar.gz
${{ matrix.asset_name }}.sha256
${{ matrix.asset_name }}.*
smoke_windows_installer:
name: Smoke Windows installer
needs: build_release
runs-on: windows-latest
permissions:
contents: read
steps:
- name: Checkout source
uses: actions/checkout@v5.0.1
- name: Install from edge release
run: ./scripts/install.ps1 -ReleaseChannel edge -InstallDir "$env:RUNNER_TEMP/omnigraph-bin"
- name: Smoke installed binaries
run: |
& "$env:RUNNER_TEMP/omnigraph-bin/omnigraph.exe" version
& "$env:RUNNER_TEMP/omnigraph-bin/omnigraph-server.exe" --help

View file

@ -20,6 +20,8 @@ jobs:
asset_name: omnigraph-linux-x86_64
- runner: macos-14
asset_name: omnigraph-macos-arm64
- runner: windows-latest
asset_name: omnigraph-windows-x86_64
env:
CARGO_TERM_COLOR: always
steps:
@ -36,6 +38,10 @@ jobs:
if: runner.os == 'macOS'
run: brew install protobuf
- name: Install Windows dependencies
if: runner.os == 'Windows'
run: choco install protoc -y
- name: Install Rust stable
uses: dtolnay/rust-toolchain@stable
with:
@ -50,7 +56,8 @@ jobs:
- name: Build release binaries
run: cargo build --release --locked -p omnigraph-cli -p omnigraph-server
- name: Package release archive
- name: Package Unix release archive
if: runner.os != 'Windows'
run: |
mkdir -p release
install -m 0755 target/release/omnigraph release/omnigraph
@ -58,12 +65,27 @@ jobs:
tar -C release -czf "${{ matrix.asset_name }}.tar.gz" omnigraph omnigraph-server
shasum -a 256 "${{ matrix.asset_name }}.tar.gz" > "${{ matrix.asset_name }}.sha256"
- name: Package Windows release archive
if: runner.os == 'Windows'
run: |
New-Item -ItemType Directory -Force -Path release | Out-Null
Copy-Item target/release/omnigraph.exe release/omnigraph.exe
Copy-Item target/release/omnigraph-server.exe release/omnigraph-server.exe
Compress-Archive -Path release/omnigraph.exe, release/omnigraph-server.exe -DestinationPath "${{ matrix.asset_name }}.zip" -Force
$hash = (Get-FileHash "${{ matrix.asset_name }}.zip" -Algorithm SHA256).Hash.ToLowerInvariant()
"$hash ${{ matrix.asset_name }}.zip" | Out-File -FilePath "${{ matrix.asset_name }}.sha256" -Encoding ascii
New-Item -ItemType Directory -Force -Path verify | Out-Null
Expand-Archive -Path "${{ matrix.asset_name }}.zip" -DestinationPath verify -Force
$items = Get-ChildItem -Path verify -File
if ($items.Count -ne 2 -or !(Test-Path verify/omnigraph.exe) -or !(Test-Path verify/omnigraph-server.exe)) {
throw "Windows release archive is missing expected binaries"
}
- name: Publish GitHub release assets
uses: softprops/action-gh-release@v2.5.0
with:
files: |
${{ matrix.asset_name }}.tar.gz
${{ matrix.asset_name }}.sha256
${{ matrix.asset_name }}.*
update_homebrew_tap:
name: Update Homebrew tap
@ -99,6 +121,31 @@ jobs:
run: |
./scripts/update-homebrew-formula.sh "${GITHUB_REF_NAME}" homebrew-tap/Formula/omnigraph.rb
# Diagnostic only: brew is not on PATH on the ubuntu runner by default, so
# set it up explicitly. Both this setup and the audit below are best-effort
# canaries, not gates — continue-on-error on each keeps a failed/flaky brew
# (the action is pinned to a moving @master ref) from skipping the actual
# tap publish below. The formula is correct by construction
# (update-homebrew-formula.sh), so brew tooling must never block the push.
- name: Set up Homebrew
if: env.HOMEBREW_TAP_SKIP != '1'
continue-on-error: true
uses: Homebrew/actions/setup-homebrew@master
- name: Audit generated formula
if: env.HOMEBREW_TAP_SKIP != '1'
continue-on-error: true
run: |
# Audit the checked-out tap by name (brew audit rejects bare paths
# and needs tap context). Symlink the checkout into Homebrew's Taps
# tree so `modernrelay/tap/omnigraph` resolves to it. Offline audit
# (no --online) keeps it deterministic; it still catches the
# ComponentsOrder/structure class of problems.
tap_dir="$(brew --repository)/Library/Taps/modernrelay/homebrew-tap"
mkdir -p "$(dirname "$tap_dir")"
ln -sfn "$PWD/homebrew-tap" "$tap_dir"
brew audit --strict modernrelay/tap/omnigraph
- name: Commit and push formula update
if: env.HOMEBREW_TAP_SKIP != '1'
working-directory: homebrew-tap
@ -113,3 +160,22 @@ jobs:
git add Formula/omnigraph.rb
git commit -m "Update Omnigraph formula to ${GITHUB_REF_NAME}"
git push origin HEAD:main
smoke_windows_installer:
name: Smoke Windows installer
needs: build_release
if: startsWith(github.ref, 'refs/tags/v')
runs-on: windows-latest
permissions:
contents: read
steps:
- name: Checkout source
uses: actions/checkout@v5.0.1
- name: Install from tagged release
run: ./scripts/install.ps1 -Version "$env:GITHUB_REF_NAME" -InstallDir "$env:RUNNER_TEMP/omnigraph-bin"
- name: Smoke installed binaries
run: |
& "$env:RUNNER_TEMP/omnigraph-bin/omnigraph.exe" version
& "$env:RUNNER_TEMP/omnigraph-bin/omnigraph-server.exe" --help

1
.gitignore vendored
View file

@ -16,6 +16,7 @@ __pycache__/
*.pyc
demo/*.omni/
.omnigraph-rustfs-demo/
/docs/internal
# Local-only working files (not for the public repo)
.claude/

View file

@ -1,6 +1,6 @@
# OmniGraph — Agent Guide
This file is the always-on map for AI coding agents (Claude Code, Codex, Cursor, Cline) working in this repo. It is loaded into context on every turn, so it stays as a **map plus the rules and invariants that need to be in scope at all times** — the encyclopedia content lives under [`docs/`](docs/). When you need depth, follow a pointer.
This file is the always-on map for AI coding agents (Claude Code, Codex, Cursor, Cline) working in this codebase. It is loaded into context on every turn, so it stays as a **map plus the rules and invariants that need to be in scope at all times** — the encyclopedia content lives under [`docs/`](docs/). When you need depth, follow a pointer.
**Required reading every session, every change:**
@ -16,8 +16,8 @@ Tools that support `@`-imports (Claude Code) auto-include all three files via th
`CLAUDE.md` is a symlink to this file — there is exactly one source of truth. Edit `AGENTS.md`.
**Version surveyed:** 0.4.2
**Workspace crates:** `omnigraph-compiler`, `omnigraph` (engine), `omnigraph-cli`, `omnigraph-server`
**Version surveyed:** 0.6.1
**Workspace crates:** `omnigraph-compiler`, `omnigraph` (engine), `omnigraph-policy`, `omnigraph-cli`, `omnigraph-server`
**Storage substrate:** Lance 6.x (columnar, versioned, branchable)
**License:** MIT
**Toolchain:** Rust stable, edition 2024
@ -33,7 +33,7 @@ OmniGraph is a typed property-graph engine built as a coordination layer over ma
- **Multi-modal querying**: vector ANN (`nearest`), full-text (`search`/`fuzzy`/`match_text`/`bm25`), Reciprocal Rank Fusion (`rrf`), and graph traversal (`Expand`, anti-join `not { … }`) in one runtime.
- **Branches and commits across the whole graph**: Git-style — every successful publish appends to a commit DAG; merges are three-way at the row level.
- **Atomic per-query writes**: `mutate_as` and `load` accumulate insert/update batches into an in-memory `MutationStaging.pending` per touched table; one `stage_*` + `commit_staged` per table runs at end-of-query, then `ManifestBatchPublisher::publish` commits the manifest atomically with per-table `expected_table_versions` CAS. A mid-query failure leaves Lance HEAD untouched on staged tables — no drift, no run state machine, no staging branches. Deletes still inline-commit; D₂ at parse time prevents inserts/updates and deletes from coexisting in one query.
- **HTTP server**: Axum + utoipa OpenAPI, bearer auth (SHA-256 hashed, optional AWS Secrets Manager). Cedar policy enforcement is engine-wide — every `_as` writer calls `Omnigraph::enforce(action, scope, actor)`, so HTTP, CLI, and embedded SDK consumers all hit the same gate.
- **HTTP server**: Axum + utoipa OpenAPI, bearer auth (SHA-256 hashed, optional AWS Secrets Manager). Cedar policy enforcement is engine-wide — every `_as` writer calls `Omnigraph::enforce(action, scope, actor)`, so HTTP, CLI, and embedded SDK consumers all hit the same gate. **Two modes** (v0.6.0+): single-graph (legacy flat routes) and multi-graph (`/graphs/{graph_id}/...` cluster routes + read-only `GET /graphs` enumeration). Per-graph + server-level Cedar policies. Runtime add/remove (`POST /graphs`, `DELETE /graphs/{id}`) is not exposed — operators edit `omnigraph.yaml` and restart.
- **CLI** driven by a single `omnigraph.yaml`; multi-format output (json/jsonl/csv/kv/table).
Throughout the docs, capabilities are split into **L1 — Inherited from Lance** vs **L2 — Added by OmniGraph**.
@ -50,7 +50,7 @@ CLI (omnigraph) HTTP Server (omnigraph-server, Axum)
omnigraph-compiler ── Pest grammars, catalog, IR, lowering, lint, migration plan
omnigraph (engine) ── ManifestRepo, CommitGraph, RunRegistry, GraphIndex (CSR/CSC), exec
omnigraph (engine) ── ManifestCoordinator, CommitGraph, RunRegistry, GraphIndex (CSR/CSC), exec
Lance 6.x ── columnar Arrow, fragments, per-dataset versions/branches, indexes
@ -81,7 +81,7 @@ Full diagram and concurrency model: [docs/dev/architecture.md](docs/dev/architec
| Embeddings (compiler + engine clients, env vars, `@embed`) | [docs/user/embeddings.md](docs/user/embeddings.md) |
| Branches, commit graph, snapshots, system branches | [docs/user/branches-commits.md](docs/user/branches-commits.md) |
| Transactions and atomicity (per-query atomic; branches as multi-query transactions) | [docs/user/transactions.md](docs/user/transactions.md) |
| Direct-publish writes (the former Run state machine, now demoted to publisher CAS) | [docs/dev/runs.md](docs/dev/runs.md) |
| Direct-publish write path (staging, D2, recovery sidecars; the former Run state machine) | [docs/dev/writes.md](docs/dev/writes.md) |
| Three-way merge and conflict kinds | [docs/dev/merge.md](docs/dev/merge.md) |
| Diff / change feed (`diff_between`, `diff_commits`) | [docs/user/changes.md](docs/user/changes.md) |
| Query execution, mutation execution, bulk loader, `load` vs `ingest` | [docs/dev/execution.md](docs/dev/execution.md) |
@ -164,38 +164,64 @@ If a proposal fits one of these, the burden is on the proposer to justify why th
---
## Build, test, lint
Rust stable workspace (edition 2024). `protoc` is a build dependency (`brew install protobuf` / `apt-get install protobuf-compiler libprotobuf-dev`). **Crate dir ≠ package name** for the engine: the directory is `crates/omnigraph` but its Cargo package is `omnigraph-engine` (use that in `-p`). The CLI binary built from `omnigraph-cli` is named `omnigraph`.
```bash
cargo build --workspace --locked # build everything
cargo test --workspace --locked # the canonical CI gate (matches CI exactly)
cargo run -p omnigraph-cli -- <args> # run the `omnigraph` CLI from source
cargo run -p omnigraph-server -- <uri> --bind 0.0.0.0:8080 # run the server from source
# Run one crate / one test file / one test fn
cargo test -p omnigraph-engine --test traversal # one integration-test file (see docs/dev/testing.md)
cargo test -p omnigraph-engine --test writes concurrent # one test fn by name substring
cargo test -p omnigraph-engine some_inline_test -- --nocapture # show stdout
# Feature-gated suites (each is its own job in CI, not part of the default run)
cargo test -p omnigraph-engine --features failpoints --test failpoints # fault injection
cargo build -p omnigraph-server --features aws # AWS Secrets Manager bearer-token source
```
S3-backed tests (`s3_storage`, and the S3 paths in server/CLI system tests) **skip** unless `OMNIGRAPH_S3_TEST_BUCKET` + `AWS_*` (incl. `AWS_ENDPOINT_URL_S3` for non-AWS) are set; CI runs them against containerized RustFS. `scripts/local-rustfs-bootstrap.sh` stands up a local S3 environment.
CI does **not** run `clippy` or `rustfmt` as gates — but `cargo test --workspace --locked` is the exact gate, so run it before pushing. Two non-test CI checks: `scripts/check-agents-md.sh` (doc cross-link integrity — run it after moving/renaming docs) and OpenAPI drift (`crates/omnigraph-server/tests/openapi.rs` regenerates `openapi.json`; set `OMNIGRAPH_UPDATE_OPENAPI=1` to update the checked-in copy when a server/API change is intentional).
---
## Quick-reference flows
```bash
# Initialize an S3-backed repo
omnigraph init --schema ./schema.pg s3://my-bucket/repo.omni
# Initialize an S3-backed graph
omnigraph init --schema ./schema.pg s3://my-bucket/graph.omni
# Bulk load
omnigraph load --data ./seed.jsonl --mode overwrite s3://my-bucket/repo.omni
omnigraph load --data ./seed.jsonl --mode overwrite s3://my-bucket/graph.omni
# Branch + ingest a review batch
omnigraph branch create --from main review/2026-04-25 s3://my-bucket/repo.omni
omnigraph ingest --branch review/2026-04-25 --data ./batch.jsonl s3://my-bucket/repo.omni
omnigraph branch create --from main review/2026-04-25 s3://my-bucket/graph.omni
omnigraph ingest --branch review/2026-04-25 --data ./batch.jsonl s3://my-bucket/graph.omni
# Run a hybrid (vector + BM25) query
omnigraph read --query ./queries.gq --name find_similar \
--params '{"q":"trends in AI safety"}' --format table s3://my-bucket/repo.omni
--params '{"q":"trends in AI safety"}' --format table s3://my-bucket/graph.omni
# Plan + apply schema migration
omnigraph schema plan --schema ./next.pg s3://my-bucket/repo.omni
omnigraph schema apply --schema ./next.pg s3://my-bucket/repo.omni --json
omnigraph schema plan --schema ./next.pg s3://my-bucket/graph.omni
omnigraph schema apply --schema ./next.pg s3://my-bucket/graph.omni --json
# Merge review branch back
omnigraph branch merge review/2026-04-25 --into main s3://my-bucket/repo.omni
omnigraph branch merge review/2026-04-25 --into main s3://my-bucket/graph.omni
# Compact + GC (preview, then confirm)
omnigraph optimize s3://my-bucket/repo.omni
omnigraph cleanup --keep 10 --older-than 7d s3://my-bucket/repo.omni
omnigraph cleanup --keep 10 --older-than 7d --confirm s3://my-bucket/repo.omni
omnigraph optimize s3://my-bucket/graph.omni
omnigraph cleanup --keep 10 --older-than 7d s3://my-bucket/graph.omni
omnigraph cleanup --keep 10 --older-than 7d --confirm s3://my-bucket/graph.omni
# Stand up the HTTP server (token from env)
OMNIGRAPH_SERVER_BEARER_TOKEN=xxxx \
omnigraph-server s3://my-bucket/repo.omni --bind 0.0.0.0:8080
omnigraph-server s3://my-bucket/graph.omni --bind 0.0.0.0:8080
# Cedar policy explain
omnigraph policy explain --actor act-alice --action change --branch main
@ -211,7 +237,7 @@ omnigraph policy explain --actor act-alice --action change --branch main
| Per-dataset versioning + time travel | ✅ | `snapshot_at_version`, `entity_at`, snapshot-pinned reads across many tables |
| Per-dataset branches | ✅ | **Graph-level** branches (atomic across all sub-tables), lazy fork, system branch filtering |
| Atomic single-dataset commits | ✅ | **Multi-table publish via three layers**, NOT a single Lance primitive: (1) per-table Lance `commit_staged` for the data write, (2) `__manifest` row-level CAS via `ManifestBatchPublisher` for cross-table ordering, (3) the open-time recovery sweep for the residual gap between (1) and (2). All three layers ship; the four migrated writers (`MutationStaging::finalize`, `schema_apply`, `branch_merge`, `ensure_indices`) write a `__recovery/{ulid}.json` sidecar before Phase B and delete it after Phase C. The next `Omnigraph::open` (gated on `OpenMode::ReadWrite`) runs the sweep in `db/manifest/recovery.rs`: classify, decide all-or-nothing per sidecar, roll forward via single `ManifestBatchPublisher::publish` or roll back via `Dataset::restore`, and record an audit row in `_graph_commit_recoveries.lance` (queryable via `omnigraph commit list --filter actor=omnigraph:recovery`). Continuous in-process recovery (no restart needed between Phase B failure and recovery) is the goal of a future background reconciler. Engine writes route through a sealed `TableStorage` trait exposing `stage_*` + `commit_staged` as the canonical staged-write surface; documented inline-commit residuals (`delete_where`, `create_vector_index`, plus legacy `append_batch` / `merge_insert_batches` / `overwrite_batch` / `create_*_index`) remain on the trait until upstream Lance ships a public two-phase API ([#6658](https://github.com/lance-format/lance/issues/6658), [#6666](https://github.com/lance-format/lance/issues/6666)) and the migration of every call site completes. |
| Compaction (`compact_files`) | ✅ | `omnigraph optimize` orchestrates over all node/edge tables, bounded concurrency |
| Compaction (`compact_files`) | ✅ | `omnigraph optimize` orchestrates over all node/edge tables, bounded concurrency; **skips blob-bearing tables** (reported via `TableOptimizeStats.skipped`, not silent), gated on `LANCE_SUPPORTS_BLOB_COMPACTION` until the upstream blob-v2 compaction-decode bug is fixed (see [docs/dev/invariants.md](docs/dev/invariants.md) Known Gaps) |
| Cleanup (`cleanup_old_versions`) | ✅ | `omnigraph cleanup` with `--keep` / `--older-than` policy |
| BTREE / inverted (FTS) / vector indexes | ✅ | `ensure_indices` builds them on every relevant column; idempotent; lazy across branches |
| `merge_insert` upsert | ✅ | `LoadMode::Merge`, mutation `update`/`insert`/`delete` lowering |
@ -222,12 +248,12 @@ omnigraph policy explain --actor act-alice --action change --branch main
| Schema language | — | `.pg` + Pest grammar + catalog + interfaces + constraints + annotations |
| Query language | — | `.gq` + Pest grammar + IR + lowering + linter |
| Schema migration planning | — | `plan_schema_migration` + `apply_schema` step types + `__schema_apply_lock__` |
| Commit graph (DAG) across whole repo | — | `_graph_commits.lance` with linear + merge parents, ULID ids, actor map |
| Commit graph (DAG) across whole graph | — | `_graph_commits.lance` with linear + merge parents, ULID ids, actor map |
| Per-query atomic writes | — | In-memory `MutationStaging.pending` accumulator + `stage_*` / `commit_staged` per touched table at end-of-query + publisher CAS via `commit_with_expected` (single manifest commit per `mutate_as` / `load`); D₂ parse-time rule keeps inserts/updates and deletes from mixing |
| Three-way row-level merge | — | `OrderedTableCursor` + `StagedTableWriter`, structured `MergeConflictKind` |
| Change feeds | — | `diff_between` / `diff_commits` with manifest fast path + ID streaming |
| Cedar policy | — | 8 actions, branch / target_branch / protected scopes, validate/test/explain CLI. **Engine-wide enforcement** (MR-722): every `_as` writer (`apply_schema_as`, `mutate_as`, `load_as`, `ingest_as`, `branch_create_as` / `branch_create_from_as`, `branch_delete_as`, `branch_merge_as`) calls `Omnigraph::enforce(action, scope, actor)` — HTTP, CLI, embedded SDK all hit the same gate. |
| HTTP server | — | Axum, OpenAPI via utoipa, bearer auth (SHA-256, AWS Secrets Manager option), `authorize_request` at the HTTP boundary (resolves bearer→actor, applies admission control), NDJSON streaming export |
| Cedar policy | — | Per-graph actions plus server-scoped actions (see [docs/user/policy.md](docs/user/policy.md) for the current list), branch / target_branch / protected scopes, validate/test/explain CLI. **Engine-wide enforcement** (MR-722): every `_as` writer (`apply_schema_as`, `mutate_as`, `load_as`, `ingest_as`, `branch_create_as` / `branch_create_from_as`, `branch_delete_as`, `branch_merge_as`) calls `Omnigraph::enforce(action, scope, actor)` — HTTP, CLI, embedded SDK all hit the same gate. |
| HTTP server | — | Axum, OpenAPI via utoipa, bearer auth (SHA-256, AWS Secrets Manager option), `authorize_request` at the HTTP boundary (resolves bearer→actor, applies admission control), NDJSON streaming export, **multi-graph mode (v0.6.0+) with cluster routes + read-only `GET /graphs` enumeration + per-graph + server-level Cedar policies. Add/remove graphs by editing `omnigraph.yaml` and restarting.** |
| CLI with config | — | `omnigraph.yaml`, aliases, multi-format output (json/jsonl/csv/kv/table) |
| Audit / actor tracking | — | `_as` write APIs + actor map in commit graph |
| Local RustFS bootstrap | — | `scripts/local-rustfs-bootstrap.sh` one-shot S3-backed dev environment |

13
Cargo.lock generated
View file

@ -4543,7 +4543,7 @@ dependencies = [
[[package]]
name = "omnigraph-cli"
version = "0.4.2"
version = "0.6.1"
dependencies = [
"assert_cmd",
"clap",
@ -4565,7 +4565,7 @@ dependencies = [
[[package]]
name = "omnigraph-compiler"
version = "0.4.2"
version = "0.6.1"
dependencies = [
"ahash",
"arrow-array",
@ -4586,7 +4586,7 @@ dependencies = [
[[package]]
name = "omnigraph-engine"
version = "0.4.2"
version = "0.6.1"
dependencies = [
"arc-swap",
"arrow-array",
@ -4627,7 +4627,7 @@ dependencies = [
[[package]]
name = "omnigraph-policy"
version = "0.4.2"
version = "0.6.1"
dependencies = [
"cedar-policy",
"clap",
@ -4640,8 +4640,9 @@ dependencies = [
[[package]]
name = "omnigraph-server"
version = "0.4.2"
version = "0.6.1"
dependencies = [
"arc-swap",
"async-trait",
"aws-config",
"aws-sdk-secretsmanager",
@ -4655,6 +4656,7 @@ dependencies = [
"omnigraph-compiler",
"omnigraph-engine",
"omnigraph-policy",
"regex",
"serde",
"serde_json",
"serde_yaml",
@ -4662,6 +4664,7 @@ dependencies = [
"sha2",
"subtle",
"tempfile",
"thiserror",
"tokio",
"tower",
"tower-http",

View file

@ -5,33 +5,35 @@
[![Crates.io](https://img.shields.io/crates/v/omnigraph-cli.svg)](https://crates.io/crates/omnigraph-cli)
[![CI](https://github.com/ModernRelay/omnigraph/actions/workflows/ci.yml/badge.svg)](https://github.com/ModernRelay/omnigraph/actions/workflows/ci.yml)
**Object-storage native graph engine with git-style workflows. Designed for agents as first-class operators.**
**Lakehouse native graph engine built for context assembly**
Branch, commit, and merge typed graph data like source code. Multi-modal, self-hosted, open source.
Omnigraph acts as operational state & coordination layer for agents
Built on Rust, Arrow, DataFusion and Lance.
Join the [Omnigraph Slack community](https://join.slack.com/t/omnigraphworkspace/shared_invite/zt-3wfpglyxj-lHvJGhuySPfqLtN35uJZNw)
## Use Cases
- Company brains / [Second brains](https://github.com/ModernRelay/omnigraph-cookbooks/tree/main/second-brain)
- Context graphs
- Backbone for multi-agent research
- Incident response graphs
- Compliance & audit graphs
- Enterprise knowledge systems
## Capabilities
- Typed schema, typed queries, and typed mutations
- Git-style versioning & branching
- Multimodal retrieval (graph+vector/fts+filters) optimized for context assembly
- Object storage native (S3, RustFS)
- Native blob-as-data support (docs, images, videos, etc)
- Schema-as-code, query validation and linting
- Git-style graph workflows: branches, commits, merges, and transactional runs
- Local, on-prem & cloud S3-native storage with snapshot-pinned reads
- Graph traversal + text, fuzzy, BM25, vector, and RRF search in one runtime
- Policy-as-code for server-side access control
- Single CLI for multiple deployments
- VPC, On-prem, hybrid deployment
- [`Lance`](https://github.com/lance-format/lance) format as open storage layer
| AS CODE | What it means |
|---|---|
| **Schema AS CODE** | Typed `.pg` schemas, planned, applied, enforced |
| **Context AS CODE** | Linted queries & agentic nudges, versioned and reusable |
| **Security AS CODE** | Cedar policies enforced server-side on every mutation |
| **Dashboards AS CODE** | Declarative views & controls over the graph *(coming)* |
## Core Use Cases
| Use case | What it's for
|---|---|
| **Company brain** | Org knowledge unified into one queryable graph |
| **Context graph** | Decision traces and codified tribal knowledge |
| **Agentic memory** | Durable, versioned memory for long-running agents |
| **Dev graph** | Issues & dependency model for coding agents |
| **R&D data layer** | Experiments & trials data written into branches |
| **ML workflows** | Versioned, branchable graphs for training & eval |
| **Karpathy's LLM wiki** | A living, agent-updatable knowledge base |
## Quick Install
@ -60,7 +62,7 @@ curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/
That bootstrap:
- starts RustFS on `127.0.0.1:9000`
- creates a bucket and S3-backed repo
- creates a bucket and S3-backed graph
- loads the checked-in context fixture
- launches `omnigraph-server` on `127.0.0.1:8080`
@ -69,8 +71,8 @@ Docker must be installed and running first.
The RustFS bootstrap prefers the rolling `edge` binaries and only falls back to
source builds when release assets are unavailable.
If a previous run left objects under the same repo prefix but did not finish
initializing the repo, rerun with `RESET_REPO=1` or set `PREFIX` to a new
If a previous run left objects under the same graph prefix but did not finish
initializing the graph, rerun with `RESET_REPO=1` or set `PREFIX` to a new
value.
## Common Commands
@ -78,20 +80,37 @@ value.
The same URI works for local paths, `s3://…`, or `http://host:port`.
```bash
omnigraph init --schema ./schema.pg ./repo.omni
omnigraph load --data ./data.jsonl ./repo.omni
omnigraph read --query ./queries.gq --name get_person --params '{"name":"Alice"}' ./repo.omni
omnigraph change --query ./queries.gq --name insert_person --params '{"name":"Mina"}' ./repo.omni
omnigraph branch create --from main feature-x ./repo.omni
omnigraph branch merge feature-x --into main ./repo.omni
omnigraph init --schema ./schema.pg ./graph.omni
omnigraph load --data ./data.jsonl ./graph.omni
omnigraph read --query ./queries.gq --name get_person --params '{"name":"Alice"}' ./graph.omni
omnigraph change --query ./queries.gq --name insert_person --params '{"name":"Mina"}' ./graph.omni
omnigraph branch create --from main feature-x ./graph.omni
omnigraph branch merge feature-x --into main ./graph.omni
```
See [docs/user/cli.md](docs/user/cli.md) for schema apply, snapshots, ingest, runs, and policy commands.
See [docs/user/cli.md](docs/user/cli.md) for schema apply, snapshots, ingest, commits, and policy commands.
## Clients
For programmatic access to a running `omnigraph-server`:
- **TypeScript SDK** — [`@modernrelay/omnigraph`](https://www.npmjs.com/package/@modernrelay/omnigraph) ([source](https://github.com/ModernRelay/omnigraph-ts/tree/main/packages/sdk)). Instance-per-client, typed errors, camelCase types, async-iterator streaming export.
```bash
npm install @modernrelay/omnigraph
```
- **Model Context Protocol server** — [`@modernrelay/omnigraph-mcp`](https://www.npmjs.com/package/@modernrelay/omnigraph-mcp) ([source](https://github.com/ModernRelay/omnigraph-ts/tree/main/packages/mcp)). Bridges Omnigraph to LLM hosts (Claude Desktop, Claude Code, …) over stdio. Exposes tools and resources for schema, branches, queries, mutations, ingest, and bundles curated best-practices guidance from the cookbook.
```bash
npm install -g @modernrelay/omnigraph-mcp
```
Both packages are versioned in lockstep with `omnigraph-server` on major.minor: `@modernrelay/omnigraph@X.Y.*` targets `omnigraph-server@X.Y.*`. See [`ModernRelay/omnigraph-ts`](https://github.com/ModernRelay/omnigraph-ts) for the monorepo.
## Docs
- [Install guide](docs/user/install.md)
- [CLI guide](docs/user/cli.md)
- [Deployment guide](docs/user/deployment.md)
## Build And Test
@ -113,8 +132,8 @@ Notes:
- `crates/omnigraph-compiler`: shared schema/query parser, typechecker, catalog, and IR lowering
- `crates/omnigraph`: storage/runtime, branching, merge, change detection, and query execution
- `crates/omnigraph-cli`: CLI for init/load/ingest/read/change/branch/snapshot/export/policy operations
- `crates/omnigraph-server`: Axum HTTP server for remote reads, changes, ingest, export, branches, commits, and runs
- `crates/omnigraph-cli`: CLI for graph lifecycle (init/load/ingest), query/mutate, branch/commit/merge, schema/lint, snapshot/export, policy, and maintenance (optimize/cleanup)
- `crates/omnigraph-server`: Axum HTTP server for remote reads, changes, ingest, export, branches, and commits
## Contributing

View file

@ -1,6 +1,6 @@
[package]
name = "omnigraph-cli"
version = "0.4.2"
version = "0.6.1"
edition = "2024"
description = "CLI for the Omnigraph graph database."
license = "MIT"
@ -13,10 +13,10 @@ name = "omnigraph"
path = "src/main.rs"
[dependencies]
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.4.2" }
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" }
omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" }
omnigraph-server = { path = "../omnigraph-server", version = "0.4.2" }
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.1" }
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.1" }
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.1" }
omnigraph-server = { path = "../omnigraph-server", version = "0.6.1" }
clap = { workspace = true }
color-eyre = { workspace = true }
serde = { workspace = true }

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -52,7 +52,7 @@ pub fn fixture(name: &str) -> PathBuf {
.join(name)
}
pub fn repo_path(root: &Path) -> PathBuf {
pub fn graph_path(root: &Path) -> PathBuf {
root.join("demo.omni")
}
@ -86,14 +86,14 @@ pub fn parse_stdout_json(output: &Output) -> Value {
serde_json::from_slice(&output.stdout).unwrap()
}
pub fn init_repo(repo: &Path) {
pub fn init_graph(graph: &Path) {
let schema = fixture("test.pg");
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(repo));
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(graph));
}
pub fn load_fixture(repo: &Path) {
pub fn load_fixture(graph: &Path) {
let data = fixture("test.jsonl");
output_success(cli().arg("load").arg("--data").arg(&data).arg(repo));
output_success(cli().arg("load").arg("--data").arg(&data).arg(graph));
}
pub fn write_jsonl(path: &Path, rows: &str) {
@ -116,7 +116,7 @@ fn yaml_string(value: &str) -> String {
format!("'{}'", value.replace('\'', "''"))
}
pub fn local_yaml_config(repo: &Path) -> String {
pub fn local_yaml_config(graph: &Path) -> String {
format!(
"\
graphs:
@ -130,7 +130,7 @@ query:
- .
policy: {{}}
",
yaml_string(&repo.to_string_lossy())
yaml_string(&graph.to_string_lossy())
)
}
@ -200,9 +200,9 @@ fn spawn_server_process(mut command: StdCommand) -> TestServer {
panic!("server did not become healthy");
}
pub fn spawn_server(repo: &Path) -> TestServer {
pub fn spawn_server(graph: &Path) -> TestServer {
let mut command = server_process();
command.arg(repo);
command.arg(graph);
spawn_server_process(command)
}
@ -221,58 +221,57 @@ pub fn spawn_server_with_config_env(config: &Path, envs: &[(&str, &str)]) -> Tes
spawn_server_process(command)
}
pub struct SystemRepo {
pub struct SystemGraph {
_temp: TempDir,
repo: PathBuf,
graph: PathBuf,
}
impl SystemRepo {
impl SystemGraph {
pub fn initialized() -> Self {
let temp = tempdir().unwrap();
let repo = repo_path(temp.path());
init_repo(&repo);
Self { _temp: temp, repo }
let graph = graph_path(temp.path());
init_graph(&graph);
Self { _temp: temp, graph }
}
pub fn loaded() -> Self {
let temp = tempdir().unwrap();
let repo = repo_path(temp.path());
init_repo(&repo);
load_fixture(&repo);
Self { _temp: temp, repo }
let graph = graph_path(temp.path());
init_graph(&graph);
load_fixture(&graph);
Self { _temp: temp, graph }
}
pub fn path(&self) -> &Path {
&self.repo
&self.graph
}
pub fn write_query(&self, name: &str, source: &str) -> PathBuf {
let path = self.repo.parent().unwrap().join(name);
let path = self.graph.parent().unwrap().join(name);
write_query_file(&path, source);
path
}
pub fn write_jsonl(&self, name: &str, rows: &str) -> PathBuf {
let path = self.repo.parent().unwrap().join(name);
let path = self.graph.parent().unwrap().join(name);
write_jsonl(&path, rows);
path
}
pub fn write_config(&self, name: &str, source: &str) -> PathBuf {
let path = self.repo.parent().unwrap().join(name);
let path = self.graph.parent().unwrap().join(name);
write_config(&path, source);
path
}
pub fn write_file(&self, name: &str, source: &str) -> PathBuf {
let path = self.repo.parent().unwrap().join(name);
let path = self.graph.parent().unwrap().join(name);
write_file(&path, source);
path
}
pub fn spawn_server(&self) -> TestServer {
spawn_server(&self.repo)
spawn_server(&self.graph)
}
pub fn spawn_server_with_config(&self, config: &Path) -> TestServer {

View file

@ -66,7 +66,7 @@ fn yaml_string(value: &str) -> String {
format!("'{}'", value.replace('\'', "''"))
}
fn local_policy_config(repo: &SystemRepo) -> String {
fn local_policy_config(graph: &SystemGraph) -> String {
format!(
"\
project:
@ -74,21 +74,43 @@ project:
graphs:
local:
uri: {}
policy:
file: ./policy.yaml
cli:
graph: local
branch: main
query:
roots:
- .
policy:
file: ./policy.yaml
",
yaml_string(&repo.path().to_string_lossy())
yaml_string(&graph.path().to_string_lossy())
)
}
fn insert_person_query(repo: &SystemRepo, name: &str) -> std::path::PathBuf {
repo.write_query(
fn local_policy_server_graph_config(graph: &SystemGraph) -> String {
format!(
"\
project:
name: policy-e2e-local
graphs:
local:
uri: {}
policy:
file: ./policy.yaml
server:
graph: local
cli:
branch: main
query:
roots:
- .
",
yaml_string(&graph.path().to_string_lossy())
)
}
fn insert_person_query(graph: &SystemGraph, name: &str) -> std::path::PathBuf {
graph.write_query(
name,
r#"
query insert_person($name: String, $age: I32) {
@ -98,8 +120,8 @@ query insert_person($name: String, $age: I32) {
)
}
fn add_friend_query(repo: &SystemRepo, name: &str) -> std::path::PathBuf {
repo.write_query(
fn add_friend_query(graph: &SystemGraph, name: &str) -> std::path::PathBuf {
graph.write_query(
name,
r#"
query add_friend($from: String, $to: String) {
@ -109,13 +131,13 @@ query add_friend($from: String, $to: String) {
)
}
fn snapshot_table_row_count(repo: &SystemRepo, table_key: &str) -> u64 {
snapshot_table_row_count_at(repo.path(), table_key)
fn snapshot_table_row_count(graph: &SystemGraph, table_key: &str) -> u64 {
snapshot_table_row_count_at(graph.path(), table_key)
}
fn snapshot_table_row_count_at(repo: &std::path::Path, table_key: &str) -> u64 {
fn snapshot_table_row_count_at(graph: &std::path::Path, table_key: &str) -> u64 {
let payload = parse_stdout_json(&output_success(
cli().arg("snapshot").arg(repo).arg("--json"),
cli().arg("snapshot").arg(graph).arg("--json"),
));
payload["tables"]
.as_array()
@ -178,7 +200,7 @@ fn format_vector(values: &[f32]) -> String {
.join(", ")
}
fn s3_test_repo_uri(suite: &str) -> Option<String> {
fn s3_test_graph_uri(suite: &str) -> Option<String> {
let bucket = env::var("OMNIGRAPH_S3_TEST_BUCKET").ok()?;
let prefix = env::var("OMNIGRAPH_S3_TEST_PREFIX")
.ok()
@ -193,21 +215,21 @@ fn s3_test_repo_uri(suite: &str) -> Option<String> {
#[test]
fn local_cli_end_to_end_init_load_read_change_read_flow() {
let repo = SystemRepo::initialized();
let mutation_file = insert_person_query(&repo, "system-local-init-change.gq");
let graph = SystemGraph::initialized();
let mutation_file = insert_person_query(&graph, "system-local-init-change.gq");
output_success(
cli()
.arg("load")
.arg("--data")
.arg(fixture("test.jsonl"))
.arg(repo.path()),
.arg(graph.path()),
);
let read_before = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -222,7 +244,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
let change_payload = parse_stdout_json(&output_success(
cli()
.arg("change")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(&mutation_file)
.arg("--params")
@ -235,7 +257,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
let read_after = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -246,19 +268,50 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
));
assert_eq!(read_after["row_count"], 1);
assert_eq!(read_after["rows"][0]["p.name"], "Eve");
// Inline-source variants of the same read/change flow (CLI `-e` /
// `--query-string`). Confirms that file-less invocations reach the
// engine identically, including param binding and `branch=main` defaults.
let inline_change = parse_stdout_json(&output_success(
cli()
.arg("change")
.arg(graph.path())
.arg("-e")
.arg("query add($name: String, $age: I32) { insert Person { name: $name, age: $age } }")
.arg("--params")
.arg(r#"{"name":"Inline","age":42}"#)
.arg("--json"),
));
assert_eq!(inline_change["branch"], "main");
assert_eq!(inline_change["query_name"], "add");
assert_eq!(inline_change["affected_nodes"], 1);
let inline_read = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(graph.path())
.arg("--query-string")
.arg("query find($name: String) { match { $p: Person { name: $name } } return { $p.name, $p.age } }")
.arg("--params")
.arg(r#"{"name":"Inline"}"#)
.arg("--json"),
));
assert_eq!(inline_read["row_count"], 1);
assert_eq!(inline_read["rows"][0]["p.name"], "Inline");
assert_eq!(inline_read["rows"][0]["p.age"], 42);
}
#[test]
fn local_cli_end_to_end_branch_change_merge_flow() {
let repo = SystemRepo::loaded();
let mutation_file = insert_person_query(&repo, "system-local-change.gq");
let graph = SystemGraph::loaded();
let mutation_file = insert_person_query(&graph, "system-local-change.gq");
output_success(
cli()
.arg("branch")
.arg("create")
.arg("--uri")
.arg(repo.path())
.arg(graph.path())
.arg("--from")
.arg("main")
.arg("feature"),
@ -267,7 +320,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
let change_payload = parse_stdout_json(&output_success(
cli()
.arg("change")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(&mutation_file)
.arg("--branch")
@ -282,7 +335,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
let feature_read = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -301,7 +354,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
.arg("branch")
.arg("merge")
.arg("--uri")
.arg(repo.path())
.arg(graph.path())
.arg("feature")
.arg("--json"),
));
@ -310,7 +363,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
let main_read = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -327,7 +380,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
cli()
.arg("commit")
.arg("list")
.arg(repo.path())
.arg(graph.path())
.arg("--branch")
.arg("main")
.arg("--json"),
@ -337,8 +390,8 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
#[test]
fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
let repo = SystemRepo::loaded();
let ingest_data = repo.write_jsonl(
let graph = SystemGraph::loaded();
let ingest_data = graph.write_jsonl(
"system-local-ingest.jsonl",
r#"{"type":"Person","data":{"name":"Zoe","age":33}}
{"type":"Person","data":{"name":"Bob","age":26}}"#,
@ -351,7 +404,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
.arg(&ingest_data)
.arg("--branch")
.arg("feature-ingest")
.arg(repo.path())
.arg(graph.path())
.arg("--json"),
));
assert_eq!(ingest_payload["branch"], "feature-ingest");
@ -364,7 +417,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
let feature_snapshot = parse_stdout_json(&output_success(
cli()
.arg("snapshot")
.arg(repo.path())
.arg(graph.path())
.arg("--branch")
.arg("feature-ingest")
.arg("--json"),
@ -374,7 +427,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
let zoe = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -391,7 +444,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
let bob = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -408,20 +461,20 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
#[test]
fn local_cli_export_round_trips_full_branch_graph() {
let repo = SystemRepo::loaded();
let graph = SystemGraph::loaded();
output_success(
cli()
.arg("branch")
.arg("create")
.arg("--uri")
.arg(repo.path())
.arg(graph.path())
.arg("--from")
.arg("main")
.arg("feature"),
);
let feature_data = repo.write_jsonl(
let feature_data = graph.write_jsonl(
"system-local-export-feature.jsonl",
r#"{"type":"Person","data":{"name":"Eve","age":29}}
{"edge":"Knows","from":"Alice","to":"Eve"}"#,
@ -435,53 +488,56 @@ fn local_cli_export_round_trips_full_branch_graph() {
.arg("feature")
.arg("--mode")
.arg("append")
.arg(repo.path()),
.arg(graph.path()),
);
let exported = stdout_string(&output_success(
cli()
.arg("export")
.arg(repo.path())
.arg(graph.path())
.arg("--branch")
.arg("feature")
.arg("--jsonl"),
));
let export_path = repo.write_jsonl("system-local-exported.jsonl", &exported);
let imported_repo = repo.path().parent().unwrap().join("imported-export.omni");
let export_path = graph.write_jsonl("system-local-exported.jsonl", &exported);
let imported_graph = graph.path().parent().unwrap().join("imported-export.omni");
output_success(
cli()
.arg("init")
.arg("--schema")
.arg(fixture("test.pg"))
.arg(&imported_repo),
.arg(&imported_graph),
);
output_success(
cli()
.arg("load")
.arg("--data")
.arg(&export_path)
.arg(&imported_repo),
.arg(&imported_graph),
);
assert_eq!(
snapshot_table_row_count_at(&imported_repo, "node:Person"),
snapshot_table_row_count_at(&imported_graph, "node:Person"),
5
);
assert_eq!(
snapshot_table_row_count_at(&imported_repo, "node:Company"),
snapshot_table_row_count_at(&imported_graph, "node:Company"),
2
);
assert_eq!(snapshot_table_row_count_at(&imported_repo, "edge:Knows"), 4);
assert_eq!(
snapshot_table_row_count_at(&imported_repo, "edge:WorksAt"),
snapshot_table_row_count_at(&imported_graph, "edge:Knows"),
4
);
assert_eq!(
snapshot_table_row_count_at(&imported_graph, "edge:WorksAt"),
2
);
let eve = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(&imported_repo)
.arg(&imported_graph)
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -496,7 +552,7 @@ fn local_cli_export_round_trips_full_branch_graph() {
let friends = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(&imported_repo)
.arg(&imported_graph)
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -510,7 +566,7 @@ fn local_cli_export_round_trips_full_branch_graph() {
#[test]
fn local_cli_s3_end_to_end_init_load_read_flow() {
let Some(repo_uri) = s3_test_repo_uri("cli-local") else {
let Some(graph_uri) = s3_test_graph_uri("cli-local") else {
eprintln!("skipping s3 cli test: OMNIGRAPH_S3_TEST_BUCKET is not set");
return;
};
@ -535,7 +591,7 @@ query:
- .
policy: {{}}
",
repo_uri
graph_uri
),
);
@ -544,14 +600,14 @@ policy: {{}}
.arg("init")
.arg("--schema")
.arg(fixture("test.pg"))
.arg(&repo_uri),
.arg(&graph_uri),
);
output_success(
cli()
.arg("load")
.arg("--data")
.arg(fixture("test.jsonl"))
.arg(&repo_uri),
.arg(&graph_uri),
);
let read = parse_stdout_json(&output_success(
@ -584,13 +640,13 @@ policy: {{}}
#[test]
fn local_cli_failed_load_keeps_target_state_unchanged() {
let repo = SystemRepo::loaded();
let bad_data = repo.write_jsonl(
let graph = SystemGraph::loaded();
let bad_data = graph.write_jsonl(
"system-bad-load.jsonl",
r#"{"edge":"Knows","from":"Alice","to":"Missing"}"#,
);
let person_rows_before = snapshot_table_row_count(&repo, "node:Person");
let knows_rows_before = snapshot_table_row_count(&repo, "edge:Knows");
let person_rows_before = snapshot_table_row_count(&graph, "node:Person");
let knows_rows_before = snapshot_table_row_count(&graph, "edge:Knows");
let output = output_failure(
cli()
@ -599,17 +655,17 @@ fn local_cli_failed_load_keeps_target_state_unchanged() {
.arg(&bad_data)
.arg("--mode")
.arg("append")
.arg(repo.path()),
.arg(graph.path()),
);
let stderr = String::from_utf8(output.stderr).unwrap();
assert!(stderr.contains("not found") || stderr.contains("Missing"));
assert_eq!(
snapshot_table_row_count(&repo, "node:Person"),
snapshot_table_row_count(&graph, "node:Person"),
person_rows_before
);
assert_eq!(
snapshot_table_row_count(&repo, "edge:Knows"),
snapshot_table_row_count(&graph, "edge:Knows"),
knows_rows_before
);
// Failed loads leave no run record (the run lifecycle has been
@ -618,13 +674,13 @@ fn local_cli_failed_load_keeps_target_state_unchanged() {
#[test]
fn local_cli_failed_change_keeps_target_state_unchanged() {
let repo = SystemRepo::loaded();
let mutation_file = add_friend_query(&repo, "system-invalid-change.gq");
let graph = SystemGraph::loaded();
let mutation_file = add_friend_query(&graph, "system-invalid-change.gq");
let output = output_failure(
cli()
.arg("change")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(&mutation_file)
.arg("--params")
@ -636,7 +692,7 @@ fn local_cli_failed_change_keeps_target_state_unchanged() {
let friends_payload = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -652,8 +708,8 @@ fn local_cli_failed_change_keeps_target_state_unchanged() {
#[test]
fn local_cli_resolves_relative_query_against_config_base_dir() {
let repo = SystemRepo::loaded();
let root = repo.path().parent().unwrap();
let graph = SystemGraph::loaded();
let root = graph.path().parent().unwrap();
let config_dir = root.join("config");
let query_dir = config_dir.join("queries");
let ambient_dir = root.join("ambient");
@ -676,7 +732,7 @@ query:
- queries
policy: {{}}
",
repo.path().display()
graph.path().display()
),
);
write_query_file(
@ -730,7 +786,7 @@ query get_person($name: String) {
#[test]
fn local_cli_datetime_and_list_types_round_trip_through_load_read_and_change() {
let temp = tempfile::tempdir().unwrap();
let repo = repo_path(temp.path());
let graph = graph_path(temp.path());
let schema = temp.path().join("datatypes.pg");
let data = temp.path().join("datatypes.jsonl");
let queries = temp.path().join("datatypes.gq");
@ -805,13 +861,13 @@ query get_task($slug: String) {
"#,
);
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&repo));
output_success(cli().arg("load").arg("--data").arg(&data).arg(&repo));
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&graph));
output_success(cli().arg("load").arg("--data").arg(&data).arg(&graph));
let filtered = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(&repo)
.arg(&graph)
.arg("--query")
.arg(&queries)
.arg("--name")
@ -836,7 +892,7 @@ query get_task($slug: String) {
let insert_payload = parse_stdout_json(&output_success(
cli()
.arg("change")
.arg(&repo)
.arg(&graph)
.arg("--query")
.arg(&queries)
.arg("--name")
@ -852,7 +908,7 @@ query get_task($slug: String) {
let update_payload = parse_stdout_json(&output_success(
cli()
.arg("change")
.arg(&repo)
.arg(&graph)
.arg("--query")
.arg(&queries)
.arg("--name")
@ -866,7 +922,7 @@ query get_task($slug: String) {
let gamma = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(&repo)
.arg(&graph)
.arg("--query")
.arg(&queries)
.arg("--name")
@ -893,7 +949,7 @@ query get_task($slug: String) {
#[ignore = "requires GEMINI_API_KEY and network access"]
fn local_cli_real_gemini_string_nearest_query_returns_expected_match() {
let temp = tempfile::tempdir().unwrap();
let repo = repo_path(temp.path());
let graph = graph_path(temp.path());
let schema = temp.path().join("gemini.pg");
let data = temp.path().join("gemini.jsonl");
let queries = temp.path().join("gemini.gq");
@ -935,13 +991,13 @@ query vector_search($q: String) {
"#,
);
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&repo));
output_success(cli().arg("load").arg("--data").arg(&data).arg(&repo));
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&graph));
output_success(cli().arg("load").arg("--data").arg(&data).arg(&graph));
let result = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(&repo)
.arg(&graph)
.arg("--query")
.arg(&queries)
.arg("--name")
@ -957,7 +1013,7 @@ query vector_search($q: String) {
// The publisher CAS conflict shape is verified end-to-end at the engine
// level in
// `crates/omnigraph/tests/runs.rs::concurrent_writers_one_succeeds_one_gets_expected_version_mismatch`
// `crates/omnigraph/tests/writes.rs::concurrent_writers_one_succeeds_one_gets_expected_version_mismatch`
// and at the HTTP boundary in
// `crates/omnigraph-server/tests/server.rs::change_conflict_returns_manifest_conflict_409`.
// A CLI-level race would be timing-dependent; with direct-publish the
@ -966,49 +1022,55 @@ query vector_search($q: String) {
#[test]
fn local_cli_policy_tooling_is_end_to_end() {
// Sanity check for the read-only policy CLI surfaces. These don't
// mutate the graph — they just parse and evaluate the policy file —
// so they don't depend on PR #4's engine-side enforcement.
let repo = SystemRepo::loaded();
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
repo.write_config("policy.yaml", POLICY_E2E_YAML);
repo.write_config("policy.tests.yaml", POLICY_E2E_TESTS_YAML);
let validate = output_success(
cli()
.arg("policy")
.arg("validate")
.arg("--config")
.arg(&config),
// mutate the graph; they parse and evaluate the effective policy for
// named graph selections, including per-graph policy files.
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
let server_graph_config = graph.write_config(
"omnigraph-policy-server.yaml",
&local_policy_server_graph_config(&graph),
);
assert!(stdout_string(&validate).contains("policy valid:"));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
graph.write_config("policy.tests.yaml", POLICY_E2E_TESTS_YAML);
let tests = output_success(cli().arg("policy").arg("test").arg("--config").arg(&config));
assert!(stdout_string(&tests).contains("policy tests passed: 2 cases"));
for config in [&config, &server_graph_config] {
let validate = output_success(
cli()
.arg("policy")
.arg("validate")
.arg("--config")
.arg(config),
);
assert!(stdout_string(&validate).contains("policy valid:"));
let explain = output_success(
cli()
.arg("policy")
.arg("explain")
.arg("--config")
.arg(&config)
.arg("--actor")
.arg("act-bruno")
.arg("--action")
.arg("change")
.arg("--branch")
.arg("main"),
);
let explain_stdout = stdout_string(&explain);
assert!(explain_stdout.contains("decision: deny"));
assert!(explain_stdout.contains("branch: main"));
let tests = output_success(cli().arg("policy").arg("test").arg("--config").arg(config));
assert!(stdout_string(&tests).contains("policy tests passed: 2 cases"));
let explain = output_success(
cli()
.arg("policy")
.arg("explain")
.arg("--config")
.arg(config)
.arg("--actor")
.arg("act-bruno")
.arg("--action")
.arg("change")
.arg("--branch")
.arg("main"),
);
let explain_stdout = stdout_string(&explain);
assert!(explain_stdout.contains("decision: deny"));
assert!(explain_stdout.contains("branch: main"));
}
}
#[test]
fn local_cli_change_enforces_engine_layer_policy() {
// Asserts MR-722 PR #4: when `policy.file` is configured in
// `omnigraph.yaml`, the CLI loads PolicyEngine into Omnigraph and
// every direct-engine write hits `enforce(action, scope, actor)` —
// identical to what the HTTP server gets, regardless of transport.
// Asserts MR-722 PR #4: when the selected graph has a configured
// policy file, the CLI loads PolicyEngine into Omnigraph and every
// direct-engine write hits `enforce(action, scope, actor)` — identical
// to what the HTTP server gets, regardless of transport.
//
// Three cases, each discriminating:
//
@ -1022,10 +1084,10 @@ fn local_cli_change_enforces_engine_layer_policy() {
// 3. Policy installed, `--as act-ragnor`, change on main →
// Cedar permits (admins-write rule). Write succeeds and the
// inserted row is readable.
let repo = SystemRepo::loaded();
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let mutation_file = insert_person_query(&repo, "system-local-policy-change.gq");
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
let mutation_file = insert_person_query(&graph, "system-local-policy-change.gq");
// Case 1: policy configured, no actor threaded → footgun guard.
let no_actor = output_failure(
@ -1088,7 +1150,7 @@ fn local_cli_change_enforces_engine_layer_policy() {
let verify = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -1101,6 +1163,32 @@ fn local_cli_change_enforces_engine_layer_policy() {
assert_eq!(verify["rows"][0]["p.name"], "RagnorOnMain");
}
#[test]
fn local_cli_positional_uri_does_not_inherit_default_graph_policy() {
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
let mutation_file = insert_person_query(&graph, "system-local-policy-positional.gq");
let allowed = parse_stdout_json(&output_success(
cli()
.arg("--as")
.arg("act-bruno")
.arg("change")
.arg("--config")
.arg(&config)
.arg("--uri")
.arg(graph.path())
.arg("--query")
.arg(&mutation_file)
.arg("--params")
.arg(r#"{"name":"PositionalUriBruno","age":4}"#)
.arg("--json"),
));
assert_eq!(allowed["affected_nodes"], 1);
assert_eq!(allowed["actor_id"], "act-bruno");
}
// ─── MR-722 PR A: CLI×writer matrix ───────────────────────────────────────
//
// The change writer is covered above by `local_cli_change_enforces_engine_layer_policy`.
@ -1114,10 +1202,10 @@ fn local_cli_change_enforces_engine_layer_policy() {
#[test]
fn local_cli_load_enforces_engine_layer_policy() {
let repo = SystemRepo::loaded();
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let data = repo.write_jsonl(
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
let data = graph.write_jsonl(
"system-local-policy-load.jsonl",
r#"{"type":"Person","data":{"name":"LoadPolicy","age":11}}"#,
);
@ -1158,10 +1246,10 @@ fn local_cli_load_enforces_engine_layer_policy() {
#[test]
fn local_cli_ingest_enforces_engine_layer_policy() {
let repo = SystemRepo::loaded();
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let data = repo.write_jsonl(
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
let data = graph.write_jsonl(
"system-local-policy-ingest.jsonl",
r#"{"type":"Person","data":{"name":"IngestPolicy","age":12}}"#,
);
@ -1211,16 +1299,19 @@ fn local_cli_ingest_enforces_engine_layer_policy() {
#[test]
fn local_cli_schema_apply_enforces_engine_layer_policy() {
let repo = SystemRepo::loaded();
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
// Additive: add a nullable property; SDK-compatible with the fixture
// schema. Uses the schema-apply scope (TargetBranch("main")).
let new_schema = std::fs::read_to_string(fixture("test.pg"))
.unwrap()
.replace(" age: I32?\n}", " age: I32?\n nickname: String?\n}");
let schema_path = repo.path().join("policy-additive.pg");
.replace(
" age: I32?\n}",
" age: I32?\n nickname: String?\n}",
);
let schema_path = graph.path().join("policy-additive.pg");
std::fs::write(&schema_path, &new_schema).unwrap();
let denied = output_failure(
@ -1256,11 +1347,67 @@ fn local_cli_schema_apply_enforces_engine_layer_policy() {
assert_eq!(allowed["applied"], true);
}
#[test]
fn local_cli_schema_apply_rejects_stored_query_breakage_before_publish() {
let graph = SystemGraph::loaded();
graph.write_query(
"stored-find-person.gq",
"query find_person($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
);
let config = graph.write_config(
"omnigraph-stored-query-schema.yaml",
&format!(
"\
graphs:
local:
uri: {}
queries:
find_person:
file: ./stored-find-person.gq
cli:
graph: local
branch: main
query:
roots:
- .
policy: {{}}
",
yaml_string(&graph.path().to_string_lossy())
),
);
let renamed_schema = std::fs::read_to_string(fixture("test.pg"))
.unwrap()
.replace("age: I32?", "years: I32? @rename_from(\"age\")");
let schema_path = graph.write_file("stored-query-breaks.pg", &renamed_schema);
let rejected = output_failure(
cli()
.arg("schema")
.arg("apply")
.arg("--config")
.arg(&config)
.arg("--schema")
.arg(&schema_path)
.arg("--json"),
);
let stderr = String::from_utf8_lossy(&rejected.stderr);
assert!(
stderr.contains("find_person") && stderr.contains("schema check"),
"schema apply should reject the stored-query breakage before publish; stderr: {stderr}"
);
let schema = stdout_string(&output_success(
cli().arg("schema").arg("show").arg("--config").arg(&config),
));
assert!(schema.contains("age: I32?"));
assert!(!schema.contains("years: I32?"));
}
#[test]
fn local_cli_branch_create_enforces_engine_layer_policy() {
let repo = SystemRepo::loaded();
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
let denied = output_failure(
cli()
@ -1296,9 +1443,9 @@ fn local_cli_branch_create_enforces_engine_layer_policy() {
#[test]
fn local_cli_branch_delete_enforces_engine_layer_policy() {
let repo = SystemRepo::loaded();
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
// Pre-create the branch as ragnor so there's something to delete.
output_success(
@ -1344,9 +1491,9 @@ fn local_cli_branch_delete_enforces_engine_layer_policy() {
#[test]
fn local_cli_branch_merge_enforces_engine_layer_policy() {
let repo = SystemRepo::loaded();
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let graph = SystemGraph::loaded();
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
graph.write_config("policy.yaml", POLICY_E2E_YAML);
// Pre-create a feature branch as ragnor (admins-branch-ops covers it).
output_success(
@ -1400,7 +1547,7 @@ fn local_cli_branch_merge_enforces_engine_layer_policy() {
// pin the precedence rule that `main.rs::resolve_cli_actor` implements:
// `--as` flag > `cli.actor` from `omnigraph.yaml` > None.
fn local_policy_config_with_actor(repo: &SystemRepo, actor: &str) -> String {
fn local_policy_config_with_actor(graph: &SystemGraph, actor: &str) -> String {
// Mirrors `local_policy_config` but adds `cli.actor` so the
// config-only precedence path is exercised. The `cli:` block
// already has `graph` and `branch`; appending `actor` here.
@ -1411,6 +1558,8 @@ project:
graphs:
local:
uri: {}
policy:
file: ./policy.yaml
cli:
graph: local
branch: main
@ -1418,10 +1567,8 @@ cli:
query:
roots:
- .
policy:
file: ./policy.yaml
",
yaml_string(&repo.path().to_string_lossy()),
yaml_string(&graph.path().to_string_lossy()),
actor,
)
}
@ -1431,13 +1578,13 @@ fn local_cli_actor_from_config_used_when_no_flag() {
// cli.actor: act-ragnor in omnigraph.yaml, no --as flag → change
// permitted via admins-write rule. Proves the config-only path
// works; previously the only proof was structural.
let repo = SystemRepo::loaded();
let config = repo.write_config(
let graph = SystemGraph::loaded();
let config = graph.write_config(
"omnigraph-policy.yaml",
&local_policy_config_with_actor(&repo, "act-ragnor"),
&local_policy_config_with_actor(&graph, "act-ragnor"),
);
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let mutation_file = insert_person_query(&repo, "system-local-cli-actor.gq");
graph.write_config("policy.yaml", POLICY_E2E_YAML);
let mutation_file = insert_person_query(&graph, "system-local-cli-actor.gq");
let allowed = parse_stdout_json(&output_success(
cli()
@ -1459,13 +1606,13 @@ fn local_cli_actor_flag_overrides_config_actor() {
// cli.actor: act-ragnor in config + --as act-bruno on CLI → change
// denied. Flag wins per the precedence rule. Without this test, a
// future change that reverses precedence would ride through silently.
let repo = SystemRepo::loaded();
let config = repo.write_config(
let graph = SystemGraph::loaded();
let config = graph.write_config(
"omnigraph-policy.yaml",
&local_policy_config_with_actor(&repo, "act-ragnor"),
&local_policy_config_with_actor(&graph, "act-ragnor"),
);
repo.write_config("policy.yaml", POLICY_E2E_YAML);
let mutation_file = insert_person_query(&repo, "system-local-cli-actor-override.gq");
graph.write_config("policy.yaml", POLICY_E2E_YAML);
let mutation_file = insert_person_query(&graph, "system-local-cli-actor-override.gq");
let denied = output_failure(
cli()

View file

@ -37,11 +37,22 @@ rules:
target_branch_scope: protected
"#;
const GRAPH_LIST_SERVER_POLICY_YAML: &str = r#"
version: 1
groups:
admins: [act-admin]
rules:
- id: admins-can-list-graphs
allow:
actors: { group: admins }
actions: [graph_list]
"#;
fn yaml_string(value: &str) -> String {
format!("'{}'", value.replace('\'', "''"))
}
fn remote_policy_server_config(repo: &SystemRepo) -> String {
fn remote_policy_server_config(graph: &SystemGraph) -> String {
format!(
"\
project:
@ -49,12 +60,12 @@ project:
graphs:
local:
uri: {}
policy:
file: ./policy.yaml
server:
graph: local
policy:
file: ./policy.yaml
",
yaml_string(&repo.path().to_string_lossy())
yaml_string(&graph.path().to_string_lossy())
)
}
@ -81,10 +92,10 @@ auth:
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_server_and_cli_end_to_end_flow() {
let repo = SystemRepo::loaded();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let mutation_file = repo.write_query(
let graph = SystemGraph::loaded();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let mutation_file = graph.write_query(
"system-remote-change.gq",
r#"
query insert_person($name: String, $age: I32) {
@ -105,7 +116,7 @@ query insert_person($name: String, $age: I32) {
assert_eq!(health["status"], "ok");
let local_snapshot = parse_stdout_json(&output_success(
cli().arg("snapshot").arg(repo.path()).arg("--json"),
cli().arg("snapshot").arg(graph.path()).arg("--json"),
));
let snapshot = parse_stdout_json(&output_success(
cli()
@ -120,7 +131,7 @@ query insert_person($name: String, $age: I32) {
let local_read = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -180,7 +191,7 @@ query insert_person($name: String, $age: I32) {
let local_verify = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(repo.path())
.arg(graph.path())
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -192,6 +203,67 @@ query insert_person($name: String, $age: I32) {
assert_eq!(local_verify["row_count"], 1);
assert_eq!(local_verify["rows"][0]["p.name"], "Mina");
// CLI `-e` over the HTTP transport (--config points at remote server).
// Confirms inline source survives the remote-execution path identically
// to file-based queries, and exercises `POST /query` end-to-end via the
// change-then-read round trip we just established.
let inline_remote_read = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg("--config")
.arg(&config)
.arg("-e")
.arg("query find($name: String) { match { $p: Person { name: $name } } return { $p.name, $p.age } }")
.arg("--params")
.arg(r#"{"name":"Mina"}"#)
.arg("--json"),
));
assert_eq!(inline_remote_read["row_count"], 1);
assert_eq!(inline_remote_read["rows"][0]["p.name"], "Mina");
let inline_remote_change = parse_stdout_json(&output_success(
cli()
.arg("change")
.arg("--config")
.arg(&config)
.arg("--query-string")
.arg("query add($name: String, $age: I32) { insert Person { name: $name, age: $age } }")
.arg("--params")
.arg(r#"{"name":"Inline","age":42}"#)
.arg("--json"),
));
assert_eq!(inline_remote_change["affected_nodes"], 1);
// `POST /query` happy path directly: a hand-rolled HTTP body using the
// new clean field names.
let http_query = client
.post(format!("{}/query", server.base_url))
.json(&json!({
"branch": "main",
"query": "query find($name: String) { match { $p: Person { name: $name } } return { $p.name } }",
"params": { "name": "Inline" }
}))
.send()
.unwrap()
.error_for_status()
.unwrap()
.json::<serde_json::Value>()
.unwrap();
assert_eq!(http_query["row_count"], 1);
assert_eq!(http_query["rows"][0]["p.name"], "Inline");
// `POST /query` rejects mutations with 400.
let http_query_mutation = client
.post(format!("{}/query", server.base_url))
.json(&json!({
"branch": "main",
"query": "query bad($name: String, $age: I32) { insert Person { name: $name, age: $age } }",
"params": { "name": "Nope", "age": 1 }
}))
.send()
.unwrap();
assert_eq!(http_query_mutation.status(), reqwest::StatusCode::BAD_REQUEST);
// `run publish` / `run list` removed. Direct-to-target writes
// already landed via the change call above; the commit graph is now
// the audit surface (verified separately by `commit list`).
@ -199,11 +271,11 @@ query insert_person($name: String, $age: I32) {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_schema_apply_via_cli_updates_repo() {
let repo = SystemRepo::initialized();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let next_schema = repo.write_file(
fn remote_schema_apply_via_cli_updates_graph() {
let graph = SystemGraph::initialized();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let next_schema = graph.write_file(
"next.pg",
&fs::read_to_string(fixture("test.pg")).unwrap().replace(
" age: I32?\n}",
@ -225,7 +297,7 @@ fn remote_schema_apply_via_cli_updates_repo() {
let db = tokio::runtime::Runtime::new()
.unwrap()
.block_on(Omnigraph::open(repo.path().to_string_lossy().as_ref()))
.block_on(Omnigraph::open(graph.path().to_string_lossy().as_ref()))
.unwrap();
assert!(
db.catalog().node_types["Person"]
@ -237,10 +309,10 @@ fn remote_schema_apply_via_cli_updates_repo() {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_schema_apply_rejects_unsupported_plan() {
let repo = SystemRepo::initialized();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let breaking_schema = repo.write_file(
let graph = SystemGraph::initialized();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let breaking_schema = graph.write_file(
"breaking.pg",
&fs::read_to_string(fixture("test.pg"))
.unwrap()
@ -263,7 +335,7 @@ fn remote_schema_apply_rejects_unsupported_plan() {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_schema_apply_rejects_when_non_main_branch_exists() {
let repo = SystemRepo::initialized();
let graph = SystemGraph::initialized();
output_success(
cli()
.arg("branch")
@ -271,12 +343,12 @@ fn remote_schema_apply_rejects_when_non_main_branch_exists() {
.arg("--from")
.arg("main")
.arg("--uri")
.arg(repo.path())
.arg(graph.path())
.arg("feature"),
);
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let next_schema = repo.write_file(
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let next_schema = graph.write_file(
"next.pg",
&fs::read_to_string(fixture("test.pg")).unwrap().replace(
" age: I32?\n}",
@ -294,16 +366,16 @@ fn remote_schema_apply_rejects_when_non_main_branch_exists() {
.arg(&next_schema),
);
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(stderr.contains("schema apply requires a repo with only main"));
assert!(stderr.contains("schema apply requires a graph with only main"));
}
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_read_preserves_projection_order_in_json_and_csv() {
let repo = SystemRepo::loaded();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let ordered_query = repo.write_query(
let graph = SystemGraph::loaded();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let ordered_query = graph.write_query(
"ordered-remote.gq",
r#"
query ordered_person($name: String) {
@ -358,10 +430,10 @@ query ordered_person($name: String) {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_branch_create_list_merge_flow() {
let repo = SystemRepo::loaded();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let mutation_file = repo.write_query(
let graph = SystemGraph::loaded();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let mutation_file = graph.write_query(
"system-remote-branch-change.gq",
r#"
query insert_person($name: String, $age: I32) {
@ -455,9 +527,9 @@ query insert_person($name: String, $age: I32) {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_branch_delete_removes_branch() {
let repo = SystemRepo::loaded();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let graph = SystemGraph::loaded();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
parse_stdout_json(&output_success(
cli()
@ -496,10 +568,10 @@ fn remote_branch_delete_removes_branch() {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_export_round_trips_full_branch_graph() {
let repo = SystemRepo::loaded();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let mutation_file = repo.write_query(
let graph = SystemGraph::loaded();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let mutation_file = graph.write_query(
"system-remote-export-change.gq",
r#"
query insert_person($name: String, $age: I32) {
@ -563,8 +635,8 @@ query add_friend($from: String, $to: String) {
.arg("feature")
.arg("--jsonl"),
));
let export_path = repo.write_jsonl("system-remote-exported.jsonl", &exported);
let imported_repo = repo
let export_path = graph.write_jsonl("system-remote-exported.jsonl", &exported);
let imported_graph = graph
.path()
.parent()
.unwrap()
@ -575,18 +647,18 @@ query add_friend($from: String, $to: String) {
.arg("init")
.arg("--schema")
.arg(fixture("test.pg"))
.arg(&imported_repo),
.arg(&imported_graph),
);
output_success(
cli()
.arg("load")
.arg("--data")
.arg(&export_path)
.arg(&imported_repo),
.arg(&imported_graph),
);
let snapshot = parse_stdout_json(&output_success(
cli().arg("snapshot").arg(&imported_repo).arg("--json"),
cli().arg("snapshot").arg(&imported_graph).arg("--json"),
));
assert_eq!(
snapshot["tables"]
@ -610,7 +682,7 @@ query add_friend($from: String, $to: String) {
let eve = parse_stdout_json(&output_success(
cli()
.arg("read")
.arg(&imported_repo)
.arg(&imported_graph)
.arg("--query")
.arg(fixture("test.gq"))
.arg("--name")
@ -626,10 +698,10 @@ query add_friend($from: String, $to: String) {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_ingest_creates_review_branch_and_keeps_it_readable() {
let repo = SystemRepo::loaded();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let ingest_data = repo.write_jsonl(
let graph = SystemGraph::loaded();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let ingest_data = graph.write_jsonl(
"system-remote-ingest.jsonl",
r#"{"type":"Person","data":{"name":"Zoe","age":33}}
{"type":"Person","data":{"name":"Bob","age":26}}"#,
@ -686,9 +758,9 @@ fn remote_ingest_creates_review_branch_and_keeps_it_readable() {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_ingest_reuses_existing_branch_and_merges_updates() {
let repo = SystemRepo::loaded();
let server = repo.spawn_server();
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
let graph = SystemGraph::loaded();
let server = graph.spawn_server();
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
output_success(
cli()
@ -701,7 +773,7 @@ fn remote_ingest_reuses_existing_branch_and_merges_updates() {
.arg("feature-ingest"),
);
let ingest_data = repo.write_jsonl(
let ingest_data = graph.write_jsonl(
"system-remote-ingest-merge.jsonl",
r#"{"type":"Person","data":{"name":"Bob","age":26}}
{"type":"Person","data":{"name":"Zoe","age":33}}"#,
@ -767,23 +839,23 @@ fn remote_ingest_reuses_existing_branch_and_merges_updates() {
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn remote_policy_enforces_branch_first_cli_workflow() {
let repo = SystemRepo::loaded();
let graph = SystemGraph::loaded();
let server_config =
repo.write_config("server-policy.yaml", &remote_policy_server_config(&repo));
repo.write_config("policy.yaml", REMOTE_POLICY_E2E_YAML);
let server = repo.spawn_server_with_config_env(
graph.write_config("server-policy.yaml", &remote_policy_server_config(&graph));
graph.write_config("policy.yaml", REMOTE_POLICY_E2E_YAML);
let server = graph.spawn_server_with_config_env(
&server_config,
&[(
"OMNIGRAPH_SERVER_BEARER_TOKENS_JSON",
r#"{"act-bruno":"team-token","act-ragnor":"admin-token"}"#,
)],
);
let client_config = repo.write_config(
let client_config = graph.write_config(
"omnigraph-policy.yaml",
&remote_policy_client_config(&server.base_url),
);
repo.write_config(".env.omni", "POLICY_TEST_TOKEN=team-token\n");
let mutation_file = repo.write_query(
graph.write_config(".env.omni", "POLICY_TEST_TOKEN=team-token\n");
let mutation_file = graph.write_query(
"system-remote-policy-change.gq",
r#"
query insert_person($name: String, $age: I32) {
@ -888,3 +960,112 @@ query insert_person($name: String, $age: I32) {
assert_eq!(verify["row_count"], 1);
assert_eq!(verify["rows"][0]["p.name"], "PolicyRemote");
}
// ─── MR-668 PR 8 — omnigraph graphs list end-to-end ────────────────────────
/// Multi-graph server + CLI `omnigraph graphs list` end-to-end.
///
/// Steps:
/// 1. Init a graph `alpha` on disk and write an `omnigraph.yaml`
/// whose `graphs:` map references it.
/// 2. Spawn the server with `--config <yaml>`.
/// 3. `omnigraph graphs list` — expect to see `alpha`.
///
/// Ignored by default — spawning servers needs loopback socket
/// permissions some sandboxes lack.
#[test]
#[ignore = "requires loopback socket permissions in sandboxed runners"]
fn graphs_list_against_multi_graph_server() {
let cfg_dir = tempfile::tempdir().unwrap();
let schema_path = fixture("test.pg");
// Init `alpha` on disk.
let alpha_uri = cfg_dir.path().join("alpha.omni");
tokio::runtime::Runtime::new().unwrap().block_on(async {
Omnigraph::init(
alpha_uri.to_str().unwrap(),
&fs::read_to_string(&schema_path).unwrap(),
)
.await
.unwrap();
});
fs::write(
cfg_dir.path().join("server-policy.yaml"),
GRAPH_LIST_SERVER_POLICY_YAML,
)
.unwrap();
// Server config with `graphs:` map and no `server.graph` selector
// — multi mode (rule 4 of the inference matrix). `GET /graphs` is a
// server-scoped action, so the success path needs an explicit server
// policy and bearer token.
let server_config_path = cfg_dir.path().join("omnigraph.yaml");
fs::write(
&server_config_path,
format!(
"\
server:
policy:
file: ./server-policy.yaml
graphs:
alpha:
uri: {}
",
yaml_string(&alpha_uri.to_string_lossy())
),
)
.unwrap();
let server = spawn_server_with_config_env(
&server_config_path,
&[(
"OMNIGRAPH_SERVER_BEARER_TOKENS_JSON",
r#"{"act-admin":"admin-token"}"#,
)],
);
// Client config — the CLI's `--target dev` resolves to `server.base_url`.
let client_config_path = cfg_dir.path().join("client.yaml");
fs::write(
&client_config_path,
format!(
"\
graphs:
dev:
uri: {}
bearer_token_env: GRAPH_LIST_TOKEN
cli:
graph: dev
auth:
env_file: ./.env.omni
",
yaml_string(&server.base_url)
),
)
.unwrap();
fs::write(
cfg_dir.path().join(".env.omni"),
"GRAPH_LIST_TOKEN=admin-token\n",
)
.unwrap();
// `graphs list` lists `alpha`.
let payload = parse_stdout_json(&output_success(
cli()
.arg("graphs")
.arg("list")
.arg("--config")
.arg(&client_config_path)
.arg("--json"),
));
let ids: Vec<&str> = payload["graphs"]
.as_array()
.unwrap()
.iter()
.map(|g| g["graph_id"].as_str().unwrap())
.collect();
assert_eq!(ids, vec!["alpha"]);
drop(server);
}

View file

@ -1,6 +1,6 @@
[package]
name = "omnigraph-compiler"
version = "0.4.2"
version = "0.6.1"
edition = "2024"
description = "Schema/query compiler for Omnigraph. Zero Lance dependency."
license = "MIT"

View file

@ -150,9 +150,7 @@ impl SchemaMigrationStep {
/// non-`UnsupportedChange` variant).
pub fn diagnostic(&self) -> Option<&'static crate::lint::DiagnosticCode> {
match self {
Self::UnsupportedChange {
code: Some(c), ..
} => crate::lint::lookup(c),
Self::UnsupportedChange { code: Some(c), .. } => crate::lint::lookup(c),
_ => None,
}
}
@ -1037,10 +1035,7 @@ node Person {
.unwrap();
let plan = plan_schema_migration(&accepted, &desired).unwrap();
assert!(
plan.supported,
"drop-type plan must be supported: {plan:?}"
);
assert!(plan.supported, "drop-type plan must be supported: {plan:?}");
assert!(
plan.steps.iter().any(|step| matches!(
step,
@ -1182,8 +1177,7 @@ node Person @description("new") {
for step in steps {
let json = serde_json::to_string(&step).expect("serialize");
let round_trip: SchemaMigrationStep =
serde_json::from_str(&json).expect("deserialize");
let round_trip: SchemaMigrationStep = serde_json::from_str(&json).expect("deserialize");
assert_eq!(step, round_trip, "round-trip mismatch on {json}");
}
}

View file

@ -271,9 +271,7 @@ fn lower_clauses(
.traversals
.iter()
.find(|rt| {
rt.src == traversal.src
&& rt.dst == traversal.dst
&& rt.edge_type == edge.name
rt.src == traversal.src && rt.dst == traversal.dst && rt.edge_type == edge.name
})
.map(|rt| rt.direction)
.unwrap_or(Direction::Out);

View file

@ -205,12 +205,8 @@ insert Knows { from: $name, to: $friend }
let ir = lower_mutation_query(&qf.queries[0]).unwrap();
assert_eq!(ir.ops.len(), 2);
assert!(
matches!(&ir.ops[0], MutationOpIR::Insert { type_name, .. } if type_name == "Person")
);
assert!(
matches!(&ir.ops[1], MutationOpIR::Insert { type_name, .. } if type_name == "Knows")
);
assert!(matches!(&ir.ops[0], MutationOpIR::Insert { type_name, .. } if type_name == "Person"));
assert!(matches!(&ir.ops[1], MutationOpIR::Insert { type_name, .. } if type_name == "Knows"));
}
/// Destination binding is deferred: NodeScan + Expand + Filter (no cross-join).

View file

@ -18,9 +18,9 @@ pub use catalog::schema_ir::{
pub use catalog::schema_plan::{
DropMode, SchemaMigrationPlan, SchemaMigrationStep, SchemaTypeKind, plan_schema_migration,
};
pub use lint::{DiagnosticCode, Family, SafetyTier, Severity};
pub use ir::ParamMap;
pub use ir::lower::{lower_mutation_query, lower_query};
pub use lint::{DiagnosticCode, Family, SafetyTier, Severity};
pub use query::ast::Literal;
pub use query::lint::{
QueryLintFinding, QueryLintOutput, QueryLintQueryKind, QueryLintQueryResult,

View file

@ -116,7 +116,13 @@ pub const ALL_CODES: &[DiagnosticCode] = &[
];
/// Codes actually emitted by the planner in v0 (i.e. not reserved).
pub const EMITTED_IN_V0: &[&str] = &["OG-DS-102", "OG-DS-103", "OG-DS-104", "OG-MF-103", "OG-MF-106"];
pub const EMITTED_IN_V0: &[&str] = &[
"OG-DS-102",
"OG-DS-103",
"OG-DS-104",
"OG-MF-103",
"OG-MF-106",
];
/// Look up a code by its string identifier.
pub fn lookup(code: &str) -> Option<&'static DiagnosticCode> {

View file

@ -24,5 +24,5 @@
pub mod codes;
pub mod diagnostic;
pub use codes::{lookup, DiagnosticCode, ALL_CODES};
pub use codes::{ALL_CODES, DiagnosticCode, lookup};
pub use diagnostic::{Family, SafetyTier, Severity};

View file

@ -38,7 +38,7 @@ pub enum QueryLintQueryKind {
#[serde(rename_all = "lowercase")]
pub enum QueryLintSchemaSourceKind {
File,
Repo,
Graph,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
@ -59,9 +59,9 @@ impl QueryLintSchemaSource {
}
}
pub fn repo(uri: impl Into<String>) -> Self {
pub fn graph(uri: impl Into<String>) -> Self {
Self {
kind: QueryLintSchemaSourceKind::Repo,
kind: QueryLintSchemaSourceKind::Graph,
path: None,
uri: Some(uri.into()),
}

View file

@ -137,12 +137,11 @@ fn parse_query_decl(pair: pest::iterators::Pair<Rule>) -> Result<QueryDecl> {
Rule::mutation_body => {
for mutation_pair in body.into_inner() {
if let Rule::mutation_stmt = mutation_pair.as_rule() {
let stmt =
mutation_pair.into_inner().next().ok_or_else(|| {
NanoError::Parse(
"mutation statement cannot be empty".to_string(),
)
})?;
let stmt = mutation_pair.into_inner().next().ok_or_else(|| {
NanoError::Parse(
"mutation statement cannot be empty".to_string(),
)
})?;
mutations.push(parse_mutation_stmt(stmt)?);
}
}

View file

@ -271,9 +271,9 @@ age: I32?
match &schema.declarations[0] {
SchemaDecl::Node(n) => {
assert!(
n.constraints.iter().any(
|c| matches!(c, Constraint::Range { property, .. } if property == "age")
)
n.constraints
.iter()
.any(|c| matches!(c, Constraint::Range { property, .. } if property == "age"))
);
}
_ => panic!("expected Node"),

View file

@ -1,6 +1,6 @@
[package]
name = "omnigraph-policy"
version = "0.4.2"
version = "0.6.1"
edition = "2024"
description = "Policy / authorization layer for Omnigraph — Cedar-backed PolicyEngine, PolicyChecker trait, ResourceScope enum."
license = "MIT"

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
[package]
name = "omnigraph-server"
version = "0.4.2"
version = "0.6.1"
edition = "2024"
description = "HTTP server for the Omnigraph graph database."
license = "MIT"
@ -19,9 +19,9 @@ default = []
aws = ["dep:aws-config", "dep:aws-sdk-secretsmanager"]
[dependencies]
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.4.2" }
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" }
omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" }
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.1" }
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.1" }
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.1" }
axum = { workspace = true }
clap = { workspace = true }
color-eyre = { workspace = true }
@ -37,7 +37,10 @@ futures = { workspace = true }
sha2 = { workspace = true }
subtle = { workspace = true }
async-trait = { workspace = true }
arc-swap = { workspace = true }
dashmap = "6"
regex = { workspace = true }
thiserror = { workspace = true }
aws-config = { version = "1", optional = true, default-features = false, features = ["rustls", "rt-tokio", "credentials-process", "sso"] }
aws-sdk-secretsmanager = { version = "1", optional = true, default-features = false, features = ["rustls", "rt-tokio"] }

View file

@ -199,8 +199,8 @@ async fn drive_light_actor(
let mut other = 0usize;
for op_idx in 0..ops {
let request_body = ChangeRequest {
query_source: "query insert_person($name: String, $age: I32) {\n insert Person { name: $name, age: $age }\n}".to_string(),
query_name: Some("insert_person".to_string()),
query: "query insert_person($name: String, $age: I32) {\n insert Person { name: $name, age: $age }\n}".to_string(),
name: Some("insert_person".to_string()),
params: Some(serde_json::json!({
"name": format!("light-{actor_idx}-{op_idx}"),
"age": op_idx as i32,
@ -259,10 +259,10 @@ async fn main() {
}
let temp = tempfile::tempdir().expect("tempdir");
let repo = temp.path().join("bench.omni");
Omnigraph::init(repo.to_str().unwrap(), SCHEMA)
let graph = temp.path().join("bench.omni");
Omnigraph::init(graph.to_str().unwrap(), SCHEMA)
.await
.expect("init repo");
.expect("init graph");
// Build bearer tokens: one for the heavy actor + one per light actor.
let mut tokens: Vec<(String, String)> =
@ -270,21 +270,17 @@ async fn main() {
for i in 0..args.light_actors {
tokens.push((format!("act-light-{i}"), format!("light-token-{i}")));
}
let db = Omnigraph::open(repo.to_str().unwrap())
let db = Omnigraph::open(graph.to_str().unwrap())
.await
.expect("open repo");
.expect("open graph");
// Construct a custom WorkloadController with the requested caps and
// pass it through `AppState::new_with_workload`. Avoids the
// `unsafe { std::env::set_var(...) }` antipattern that violates
// `setenv`'s thread-safety precondition once the multi-thread tokio
// runtime is up.
let workload = WorkloadController::new(args.inflight_cap, args.byte_cap);
let state = AppState::new_with_workload(
repo.to_string_lossy().to_string(),
db,
tokens,
workload,
);
let state =
AppState::new_with_workload(graph.to_string_lossy().to_string(), db, tokens, workload);
let app = build_app(state);
eprintln!(

View file

@ -121,8 +121,8 @@ async fn drive_actor(
for op_idx in 0..ops {
let table_idx = pick_table(actor_idx, op_idx, mode, num_tables);
let request_body = ChangeRequest {
query_source: build_query_source(table_idx),
query_name: Some("insert_item".to_string()),
query: build_query_source(table_idx),
name: Some("insert_item".to_string()),
params: Some(serde_json::json!({
"name": format!("a{actor_idx}_o{op_idx}"),
"value": op_idx as i32,
@ -152,7 +152,9 @@ async fn drive_actor(
errors += 1;
// Drain body for logging on the first few failures.
if errors <= 3 {
let body = to_bytes(response.into_body(), 64 * 1024).await.unwrap_or_default();
let body = to_bytes(response.into_body(), 64 * 1024)
.await
.unwrap_or_default();
eprintln!(
"actor {actor_idx} op {op_idx} status {status} body {}",
String::from_utf8_lossy(&body)
@ -173,13 +175,13 @@ async fn main() {
}
let temp = tempfile::tempdir().expect("tempdir");
let repo = temp.path().join("bench.omni");
let graph = temp.path().join("bench.omni");
let schema = build_schema(args.tables);
Omnigraph::init(repo.to_str().unwrap(), &schema)
Omnigraph::init(graph.to_str().unwrap(), &schema)
.await
.expect("init repo");
.expect("init graph");
let state = AppState::open(repo.to_string_lossy().to_string())
let state = AppState::open(graph.to_string_lossy().to_string())
.await
.expect("open AppState");
let app = build_app(state);

View file

@ -1,8 +1,11 @@
use omnigraph::db::{GraphCommit, MergeOutcome, ReadTarget, SchemaApplyResult, Snapshot};
use omnigraph::error::{MergeConflict, MergeConflictKind};
use omnigraph::loader::{IngestResult, LoadMode};
use crate::queries::StoredQuery;
use omnigraph_compiler::SchemaMigrationStep;
use omnigraph_compiler::query::ast::Param;
use omnigraph_compiler::result::QueryResult;
use omnigraph_compiler::types::{PropType, ScalarType};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use utoipa::{IntoParams, ToSchema};
@ -235,7 +238,9 @@ pub struct CommitListOutput {
pub struct ReadRequest {
/// GQ query source. May declare one or more named queries; pick one with
/// `query_name` if there is more than one.
#[schema(example = "query get_person($name: String) {\n match {\n $p: Person { name: $name }\n }\n return { $p.name, $p.age }\n}")]
#[schema(
example = "query get_person($name: String) {\n match {\n $p: Person { name: $name }\n }\n return { $p.name, $p.age }\n}"
)]
pub query_source: String,
/// Name of the query to run when `query_source` declares multiple. Optional
/// when only one query is declared.
@ -248,25 +253,219 @@ pub struct ReadRequest {
pub snapshot: Option<String>,
}
/// Inline read-query request for `POST /query`.
///
/// Friendlier-named alternative to [`ReadRequest`] for ad-hoc reads and
/// AI-agent integration. Mutations are rejected with 400 — use `POST
/// /mutate` (or its deprecated alias `POST /change`) for write queries.
/// Field names are deliberately short (`query`, `name`) to match the GQ
/// keyword and the CLI `-e` flag.
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct QueryRequest {
/// GQ read-query source. May declare one or more named queries; pick one
/// with `name` when more than one is declared. Mutations
/// (`insert`/`update`/`delete`) get 400 — use `POST /mutate` (or its
/// deprecated alias `POST /change`) instead.
#[schema(example = "query get_person($name: String) {\n match {\n $p: Person { name: $name }\n }\n return { $p.name, $p.age }\n}")]
pub query: String,
/// Name of the query to run when `query` declares multiple. Optional when
/// only one query is declared.
pub name: Option<String>,
/// JSON object whose keys match the query's declared parameters.
pub params: Option<Value>,
/// Branch to read from. Mutually exclusive with `snapshot`. Defaults to `main`.
pub branch: Option<String>,
/// Snapshot id to read from. Mutually exclusive with `branch`.
pub snapshot: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct ChangeRequest {
/// GQ mutation source containing `insert`, `update`, or `delete` statements.
/// May declare multiple named mutations; pick one with `query_name`.
#[schema(example = "query insert_person($name: String, $age: I32) {\n insert Person { name: $name, age: $age }\n}")]
pub query_source: String,
/// Name of the mutation to run when `query_source` declares multiple.
pub query_name: Option<String>,
/// May declare multiple named mutations; pick one with `name`.
///
/// Accepts the legacy field name `query_source` as a deserialization alias.
#[schema(
example = "query insert_person($name: String, $age: I32) {\n insert Person { name: $name, age: $age }\n}"
)]
#[serde(alias = "query_source")]
pub query: String,
/// Name of the mutation to run when `query` declares multiple.
///
/// Accepts the legacy field name `query_name` as a deserialization alias.
#[serde(default, alias = "query_name")]
pub name: Option<String>,
/// JSON object whose keys match the mutation's declared parameters.
#[serde(default)]
pub params: Option<Value>,
/// Target branch. Defaults to `main`.
#[serde(default)]
pub branch: Option<String>,
}
/// Body for `POST /queries/{name}` — invokes the server-side stored query
/// named in the path. The query source and name come from the registry,
/// never the body; only the runtime inputs are supplied here.
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
pub struct InvokeStoredQueryRequest {
/// JSON object whose keys match the stored query's declared parameters.
#[serde(default)]
pub params: Option<Value>,
/// Branch to run against. Defaults to `main`; for a stored mutation the
/// write targets this branch.
#[serde(default)]
pub branch: Option<String>,
/// Snapshot id to read from (read queries only — rejected for a stored
/// mutation). Mutually exclusive with `branch`.
#[serde(default)]
pub snapshot: Option<String>,
}
/// Response for `POST /queries/{name}`: the read envelope for a stored
/// read, or the mutation envelope for a stored mutation. Serialized
/// **untagged**, so the wire shape is exactly [`ReadOutput`] or
/// [`ChangeOutput`] — classification follows the stored query, not a
/// wrapper field.
#[derive(Debug, Serialize, ToSchema)]
#[serde(untagged)]
pub enum InvokeStoredQueryResponse {
Read(ReadOutput),
Change(ChangeOutput),
}
/// The kind of a stored-query parameter, decomposed so a client (e.g. an
/// MCP server) can build a typed input schema with a closed `match` and
/// never re-parse omnigraph's type spelling. `bigint`/`date`/`datetime`/
/// `blob` are carried as JSON strings on the wire: a 64-bit integer past
/// 2^53 loses precision as a JSON number, and Date/DateTime are ISO
/// strings, Blob a blob-URI string.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "snake_case")]
pub enum ParamKind {
String,
Bool,
Int,
#[serde(rename = "bigint")]
BigInt,
Float,
Date,
#[serde(rename = "datetime")]
DateTime,
Blob,
Vector,
List,
}
/// One declared parameter of a stored query, projected for the catalog.
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct ParamDescriptor {
pub name: String,
pub kind: ParamKind,
/// Element kind when `kind == list` (always a scalar — the grammar
/// forbids lists of vectors or nested lists).
#[serde(skip_serializing_if = "Option::is_none")]
pub item_kind: Option<ParamKind>,
/// Dimension when `kind == vector`.
#[serde(skip_serializing_if = "Option::is_none")]
pub vector_dim: Option<u32>,
/// `false` → the caller must supply it; `true` → optional.
pub nullable: bool,
}
/// One entry in the stored-query catalog (`GET /queries`).
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct QueryCatalogEntry {
/// Registry key / invoke path segment (`POST /queries/{name}`).
pub name: String,
/// MCP tool id (the `tool_name` override, else `name`).
pub tool_name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub instruction: Option<String>,
/// `true` for a stored mutation → an MCP read-only hint of `false`.
pub mutation: bool,
pub params: Vec<ParamDescriptor>,
}
/// Response for `GET /queries`: the `mcp.expose` subset of a graph's
/// stored-query registry, each with typed parameters.
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct QueriesCatalogOutput {
pub queries: Vec<QueryCatalogEntry>,
}
/// Total map from a resolved scalar to its catalog kind. Exhaustive on
/// purpose: a new `ScalarType` is a compile error here until catalogued.
fn scalar_kind(scalar: ScalarType) -> ParamKind {
match scalar {
ScalarType::String => ParamKind::String,
ScalarType::Bool => ParamKind::Bool,
ScalarType::I32 | ScalarType::U32 => ParamKind::Int,
ScalarType::I64 | ScalarType::U64 => ParamKind::BigInt,
ScalarType::F32 | ScalarType::F64 => ParamKind::Float,
ScalarType::Date => ParamKind::Date,
ScalarType::DateTime => ParamKind::DateTime,
ScalarType::Blob => ParamKind::Blob,
ScalarType::Vector(_) => ParamKind::Vector,
}
}
fn param_descriptor(param: &Param) -> ParamDescriptor {
match PropType::from_param_type_name(&param.type_name, param.nullable) {
Some(pt) if pt.list => ParamDescriptor {
name: param.name.clone(),
kind: ParamKind::List,
item_kind: Some(scalar_kind(pt.scalar)),
vector_dim: None,
nullable: param.nullable,
},
Some(pt) => {
let (kind, vector_dim) = match pt.scalar {
ScalarType::Vector(dim) => (ParamKind::Vector, Some(dim)),
other => (scalar_kind(other), None),
};
ParamDescriptor {
name: param.name.clone(),
kind,
item_kind: None,
vector_dim,
nullable: param.nullable,
}
}
// Unreachable for a parsed query (every declared param type is
// grammatical); fall back to an opaque string so the field is still
// usable rather than dropped.
None => ParamDescriptor {
name: param.name.clone(),
kind: ParamKind::String,
item_kind: None,
vector_dim: None,
nullable: param.nullable,
},
}
}
/// Project a loaded stored query into its catalog entry (typed params,
/// MCP tool name, read/mutate flag, description/instruction).
pub fn query_catalog_entry(query: &StoredQuery) -> QueryCatalogEntry {
QueryCatalogEntry {
name: query.name.clone(),
tool_name: query.effective_tool_name().to_string(),
description: query.decl.description.clone(),
instruction: query.decl.instruction.clone(),
mutation: query.is_mutation(),
params: query.decl.params.iter().map(param_descriptor).collect(),
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
pub struct SchemaApplyRequest {
/// Project schema in `.pg` source form. The diff against the current
/// schema produces the migration steps that will be applied.
#[schema(example = "node Person {\n name: String @key\n age: I32?\n}\n\nedge Knows: Person -> Person")]
#[schema(
example = "node Person {\n name: String @key\n age: I32?\n}\n\nedge Knows: Person -> Person"
)]
pub schema_source: String,
/// When true, promote every `DropMode::Soft` step in the plan to
/// `DropMode::Hard`, making the prior column data unreachable
@ -303,7 +502,9 @@ pub struct IngestRequest {
pub mode: Option<LoadMode>,
/// NDJSON payload: one record per line, each shaped
/// `{"type": "<TypeName>", "data": {...}}`.
#[schema(example = "{\"type\": \"Person\", \"data\": {\"name\": \"Alice\", \"age\": 30}}\n{\"type\": \"Person\", \"data\": {\"name\": \"Bob\", \"age\": 25}}")]
#[schema(
example = "{\"type\": \"Person\", \"data\": {\"name\": \"Alice\", \"age\": 30}}\n{\"type\": \"Person\", \"data\": {\"name\": \"Bob\", \"age\": 25}}"
)]
pub data: String,
}
@ -344,6 +545,11 @@ pub enum ErrorCode {
Forbidden,
BadRequest,
NotFound,
/// 405 Method Not Allowed — the route exists but the active server
/// mode doesn't serve this method (e.g. `GET /graphs` in single-graph
/// mode). Distinct from 404 so clients can tell "wrong context" from
/// "no such resource."
MethodNotAllowed,
Conflict,
/// 429 Too Many Requests — per-actor admission cap exceeded.
/// Clients should respect the `Retry-After` header.
@ -467,3 +673,23 @@ pub fn read_target_output(target: &ReadTarget) -> ReadTargetOutput {
},
}
}
// ─── MR-668 — management endpoint shapes ──────────────────────────────────
/// One entry in the response from `GET /graphs`. Cluster operators
/// consume this list to discover which graphs the server is currently
/// serving. The shape is intentionally minimal — `graph_id` and `uri`
/// are the only fields a routing client needs.
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct GraphInfo {
pub graph_id: String,
pub uri: String,
}
/// Response from `GET /graphs`. Lists every graph registered with the
/// server in alphabetical order by `graph_id` (sorted server-side so
/// clients get deterministic output across requests).
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct GraphListResponse {
pub graphs: Vec<GraphInfo>,
}

View file

@ -119,7 +119,10 @@ pub(crate) fn parse_json_secret_payload(payload: &str) -> Result<Vec<(String, St
bail!("bearer-token secret contains a blank actor id");
}
if token.is_empty() {
bail!("bearer-token secret has a blank token for actor '{}'", actor);
bail!(
"bearer-token secret has a blank token for actor '{}'",
actor
);
}
pairs.push((actor, token));
}
@ -151,8 +154,7 @@ pub mod aws {
/// Construct a new source. Resolves AWS credentials + region via the
/// default chain — no explicit configuration needed on EC2/ECS/EKS.
pub async fn new(secret_id: impl Into<String>) -> Result<Self> {
let config =
aws_config::load_defaults(aws_config::BehaviorVersion::latest()).await;
let config = aws_config::load_defaults(aws_config::BehaviorVersion::latest()).await;
let client = aws_sdk_secretsmanager::Client::new(&config);
Ok(Self {
client,
@ -200,8 +202,8 @@ pub use aws::SecretsManagerTokenSource;
#[cfg(test)]
mod tests {
use super::*;
use std::env;
use serial_test::serial;
use std::env;
fn clear_env() {
unsafe {
@ -232,7 +234,10 @@ mod tests {
unsafe {
env::remove_var("OMNIGRAPH_SERVER_BEARER_TOKEN");
}
assert_eq!(tokens, vec![("default".to_string(), "some-token".to_string())]);
assert_eq!(
tokens,
vec![("default".to_string(), "some-token".to_string())]
);
}
#[tokio::test]

View file

@ -6,8 +6,16 @@ use std::path::{Path, PathBuf};
use clap::ValueEnum;
use color_eyre::eyre::{Result, bail};
use serde::{Deserialize, Serialize};
pub const DEFAULT_CONFIG_FILE: &str = "omnigraph.yaml";
pub fn graph_resource_id_for_selection(
selected_graph: Option<&str>,
normalized_uri: &str,
) -> String {
selected_graph.unwrap_or(normalized_uri).to_string()
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ProjectConfig {
pub name: Option<String>,
@ -17,6 +25,20 @@ pub struct ProjectConfig {
pub struct TargetConfig {
pub uri: String,
pub bearer_token_env: Option<String>,
/// Per-graph Cedar policy file (MR-668). In single-graph mode this
/// field is unused — the top-level `policy.file` applies. In
/// multi-graph mode, each `graphs.<id>.policy.file` governs that
/// graph's HTTP-layer Cedar enforcement.
#[serde(default)]
pub policy: PolicySettings,
/// Per-graph stored-query registry: an inline `name -> entry`
/// map. Mirrors the per-graph `policy` shape — each
/// `graphs.<id>.queries` declares that graph's stored queries. Absent
/// (or empty) = no stored queries for the graph. v1 is inline-only;
/// an external `queries.yaml` manifest indirection is a deferred
/// convenience.
#[serde(default)]
pub queries: BTreeMap<String, QueryEntry>,
}
#[derive(Debug, Clone, Copy, Default, Eq, PartialEq, Serialize, Deserialize, ValueEnum)]
@ -59,6 +81,12 @@ pub struct ServerDefaults {
#[serde(rename = "graph")]
pub graph: Option<String>,
pub bind: Option<String>,
/// Server-level Cedar policy (MR-668). Governs management endpoints
/// — currently `GET /graphs`; future runtime add/remove endpoints
/// will plug in here too. In single-graph mode this is unused — the
/// top-level `policy.file` covers the single graph.
#[serde(default)]
pub policy: PolicySettings,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
@ -77,10 +105,63 @@ pub struct PolicySettings {
pub file: Option<String>,
}
/// One stored-query registry entry. The map **key** is the query's
/// identity — it must equal the `query <name>` symbol declared inside
/// the referenced `.gq` file (asserted when the registry loads).
/// Renaming the key (or the symbol) is a breaking change to callers, by
/// design.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryEntry {
/// Path to the `.gq` file (relative to the config's `base_dir`). The
/// file may declare several queries; the registry selects the one
/// whose symbol matches the map key.
pub file: String,
#[serde(default)]
pub mcp: McpSettings,
}
/// MCP exposure for a stored query. A *deployment* concern (the same
/// `.gq` may be exposed in one graph and hidden in another), so it lives
/// in YAML rather than in the `.gq` source. **Default `expose: true`** —
/// declaring a query in the manifest *is* the opt-in, so it appears in the
/// MCP tool catalog (`GET /queries`) by default; set `expose: false` to
/// keep a query HTTP/service-callable but hidden from the agent tool list.
/// `expose` governs catalog membership only — it is **not** an
/// authorization gate (invocation is gated by `invoke_query`), so a hidden
/// query is still invocable by name with the right permission.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct McpSettings {
#[serde(default = "mcp_expose_default")]
pub expose: bool,
pub tool_name: Option<String>,
}
fn mcp_expose_default() -> bool {
true
}
impl Default for McpSettings {
fn default() -> Self {
Self {
expose: mcp_expose_default(),
tool_name: None,
}
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum AliasCommand {
/// Read alias (canonical: `query`). The legacy spelling `read` is
/// kept as the variant name for back-compat with serialized configs
/// and external SDK callers; `query` is accepted on the wire via the
/// serde alias.
#[serde(alias = "query")]
Read,
/// Mutation alias (canonical: `mutate`). The legacy spelling `change`
/// is kept as the variant name for back-compat; `mutate` is accepted
/// on the wire via the serde alias.
#[serde(alias = "mutate")]
Change,
}
@ -115,6 +196,12 @@ pub struct OmnigraphConfig {
pub aliases: BTreeMap<String, AliasConfig>,
#[serde(default)]
pub policy: PolicySettings,
/// Top-level stored-query registry, used in single-graph
/// mode — mirrors how the top-level `policy` applies to the single
/// graph. In multi-graph mode this is unused; each graph's
/// `graphs.<id>.queries` applies instead.
#[serde(default)]
pub queries: BTreeMap<String, QueryEntry>,
#[serde(skip)]
base_dir: PathBuf,
}
@ -130,6 +217,7 @@ impl Default for OmnigraphConfig {
query: QueryDefaults::default(),
aliases: BTreeMap::new(),
policy: PolicySettings::default(),
queries: BTreeMap::new(),
base_dir: PathBuf::new(),
}
}
@ -197,23 +285,164 @@ impl OmnigraphConfig {
}
pub fn resolve_auth_env_file(&self) -> Option<PathBuf> {
let path = self.auth.env_file.as_deref()?;
let path = Path::new(path);
Some(if path.is_absolute() {
path.to_path_buf()
} else {
self.base_dir.join(path)
})
self.auth
.env_file
.as_deref()
.map(|path| self.resolve_config_path(path))
}
pub fn resolve_policy_file(&self) -> Option<PathBuf> {
let path = self.policy.file.as_deref()?;
let path = Path::new(path);
Some(if path.is_absolute() {
path.to_path_buf()
} else {
self.base_dir.join(path)
})
self.policy
.file
.as_deref()
.map(|path| self.resolve_config_path(path))
}
/// Resolve the per-graph policy file path for the named target,
/// relative to the config file's `base_dir`. Returns `None` if the
/// target is unknown or no per-graph `policy.file` is set.
pub fn resolve_target_policy_file(&self, target_name: &str) -> Option<PathBuf> {
let target = self.graphs.get(target_name)?;
target
.policy
.file
.as_deref()
.map(|path| self.resolve_config_path(path))
}
/// The top-level stored-query registry entries (single-graph mode).
pub fn query_entries(&self) -> &BTreeMap<String, QueryEntry> {
&self.queries
}
/// The per-graph stored-query registry entries for a named target
/// (multi-graph mode). Returns `None` if the target is unknown.
pub fn target_query_entries(
&self,
target_name: &str,
) -> Option<&BTreeMap<String, QueryEntry>> {
self.graphs.get(target_name).map(|target| &target.queries)
}
/// The stored-query registry entries that apply for a graph
/// selection — the single definition of "which `queries:` block
/// governs graph X", shared by server boot and the CLI so the two
/// can't drift. A named graph present in `graphs:` uses its
/// per-graph block; everything else (no selection, or a name that is
/// not a known graph, e.g. a bare URI) falls back to the top-level
/// block (single-graph mode).
pub fn query_entries_for(&self, graph: Option<&str>) -> &BTreeMap<String, QueryEntry> {
match graph {
Some(name) if self.graphs.contains_key(name) => &self.graphs[name].queries,
_ => &self.queries,
}
}
/// The single CLI gate that turns a raw graph selection into a *validated*
/// one — the fallible counterpart to the infallible
/// [`OmnigraphConfig::query_entries_for`]. Both `queries` subcommands route
/// their selection through here so neither can skip a check the other (or
/// server boot) applies:
/// * a known name passes through, but only after the same coherence check
/// server boot enforces
/// ([`OmnigraphConfig::ensure_top_level_blocks_honored`]) — a named graph
/// with a populated top-level block is rejected;
/// * an unknown name errors with the **same** message
/// [`OmnigraphConfig::resolve_target_uri`] produces, so a command that
/// opens no URI rejects an unknown `--target` exactly like the
/// URI-resolving commands do;
/// * an anonymous selection (`None`, e.g. a bare URI) stays anonymous,
/// resolving to the top-level registry downstream (top-level honored).
pub fn resolve_graph_selection<'a>(&self, graph: Option<&'a str>) -> Result<Option<&'a str>> {
match graph {
Some(name) if self.graphs.contains_key(name) => {
self.ensure_top_level_blocks_honored(Some(name))?;
Ok(Some(name))
}
Some(name) => bail!("graph '{}' not found in {}", name, DEFAULT_CONFIG_FILE),
None => Ok(None),
}
}
pub fn resolve_policy_tooling_graph_selection(&self) -> Result<Option<&str>> {
self.resolve_graph_selection(self.cli_graph_name().or_else(|| self.server_graph_name()))
}
/// The policy file that applies for a graph selection — the policy
/// sibling of [`OmnigraphConfig::query_entries_for`], so policy and
/// queries resolve by the same identity rule. A named graph in
/// `graphs:` uses its per-graph `policy.file` with **no** top-level
/// fallback (a named graph with no per-graph policy has no policy —
/// that keeps the boot-time coherence check meaningful); anything else
/// (no selection, or a bare URI) uses the top-level `policy.file`.
pub fn resolve_policy_file_for(&self, graph: Option<&str>) -> Option<PathBuf> {
match graph {
Some(name) if self.graphs.contains_key(name) => self.resolve_target_policy_file(name),
_ => self.resolve_policy_file(),
}
}
/// Names of any top-level config blocks (`policy.file`, `queries:`)
/// that are populated. Used by the boot-time coherence check: when a
/// **named** graph is served (single-mode by name, or multi-mode),
/// the top-level blocks are not honored, so a populated one is a
/// configuration error rather than a silent no-op.
pub fn populated_top_level_blocks(&self) -> Vec<&'static str> {
let mut blocks = Vec::new();
if self.policy.file.is_some() {
blocks.push("policy.file");
}
if !self.queries.is_empty() {
blocks.push("queries");
}
blocks
}
/// A named graph uses its own `graphs.<name>` block, so a populated
/// top-level block would be silently ignored — a config error. The single
/// definition of that rule, shared by server boot and the CLI selection
/// gate ([`OmnigraphConfig::resolve_graph_selection`]) so the two can't
/// drift. An anonymous selection (`None`, e.g. a bare URI) legitimately
/// honors the top-level blocks, so it is never rejected here.
pub fn ensure_top_level_blocks_honored(&self, selected: Option<&str>) -> Result<()> {
if let Some(name) = selected {
let unhonored = self.populated_top_level_blocks();
if !unhonored.is_empty() {
bail!(
"named graph '{name}' uses its own `graphs.{name}.…` block, but top-level {} \
{} set and would be ignored. Move it to `graphs.{name}` (e.g. \
`graphs.{name}.policy.file`, `graphs.{name}.queries`).",
unhonored.join(" and "),
if unhonored.len() == 1 { "is" } else { "are" },
);
}
}
Ok(())
}
/// Resolve a stored-query `.gq` file path (from a registry entry),
/// relative to the config's `base_dir`. Mirrors policy-file
/// resolution; the registry loader calls this to turn each entry's
/// `file:` value into an absolute path.
pub fn resolve_query_file(&self, value: &str) -> PathBuf {
self.resolve_config_path(value)
}
/// Resolve the server-level policy file path (used by management
/// endpoints). Returns `None` if `server.policy.file` is not set.
pub fn resolve_server_policy_file(&self) -> Option<PathBuf> {
self.server
.policy
.file
.as_deref()
.map(|path| self.resolve_config_path(path))
}
/// Resolve a raw config-supplied URI (which may be relative) to its
/// absolute form. URIs containing `://` are passed through as-is;
/// relative paths are joined with the config file's `base_dir`.
pub fn resolve_uri_value(&self, value: &str) -> String {
self.resolve_config_uri(value)
}
pub fn resolve_policy_tests_file(&self) -> Option<PathBuf> {
@ -282,6 +511,15 @@ impl OmnigraphConfig {
self.base_dir.join(path).to_string_lossy().to_string()
}
}
fn resolve_config_path(&self, value: &str) -> PathBuf {
let path = Path::new(value);
if path.is_absolute() {
path.to_path_buf()
} else {
self.base_dir.join(path)
}
}
}
pub fn default_config_path() -> PathBuf {
@ -333,7 +571,9 @@ mod tests {
use tempfile::tempdir;
use super::{ReadOutputFormat, TableCellLayout, load_config_in};
use super::{
ReadOutputFormat, TableCellLayout, graph_resource_id_for_selection, load_config_in,
};
#[test]
fn load_config_reads_yaml_defaults_from_current_dir() {
@ -397,6 +637,114 @@ policy: {}
assert!(config.graphs.is_empty());
}
#[test]
fn graph_resource_id_for_selection_uses_name_or_anonymous_uri() {
assert_eq!(
graph_resource_id_for_selection(Some("local"), "/tmp/graph.omni"),
"local"
);
assert_eq!(
graph_resource_id_for_selection(None, "/tmp/graph.omni"),
"/tmp/graph.omni"
);
}
#[test]
fn resolve_graph_selection_validates_membership_and_coherence() {
let temp = tempdir().unwrap();
fs::write(
temp.path().join("omnigraph.yaml"),
"graphs:\n local:\n uri: ./demo.omni\n",
)
.unwrap();
let config = load_config_in(temp.path(), None).unwrap();
// A known graph passes through unchanged.
assert_eq!(config.resolve_graph_selection(Some("local")).unwrap(), Some("local"));
// An anonymous selection stays anonymous (→ top-level registry downstream).
assert_eq!(config.resolve_graph_selection(None).unwrap(), None);
// An unknown name errors, naming the graph (matching resolve_target_uri).
let err = config.resolve_graph_selection(Some("ghost")).unwrap_err().to_string();
assert!(
err.contains("ghost") && err.contains("not found"),
"unknown graph must error naming it: {err}"
);
// Coherence: a named graph plus a populated top-level block is the
// config server boot refuses, so the gate rejects it too (shared rule
// via ensure_top_level_blocks_honored). An anonymous selection still
// passes — top-level is honored when no graph is named.
let temp2 = tempdir().unwrap();
fs::write(
temp2.path().join("omnigraph.yaml"),
"graphs:\n local:\n uri: ./demo.omni\npolicy:\n file: ./top.yaml\n",
)
.unwrap();
let incoherent = load_config_in(temp2.path(), None).unwrap();
let err = incoherent
.resolve_graph_selection(Some("local"))
.unwrap_err()
.to_string();
assert!(
err.contains("local") && err.contains("policy.file"),
"named graph + populated top-level block must be rejected, naming both: {err}"
);
assert_eq!(
incoherent.resolve_graph_selection(None).unwrap(),
None,
"anonymous selection still honors top-level"
);
}
#[test]
fn policy_tooling_graph_selection_prefers_cli_then_server_and_validates() {
let temp = tempdir().unwrap();
fs::write(
temp.path().join("omnigraph.yaml"),
"graphs:\n local:\n uri: ./local.omni\n prod:\n uri: ./prod.omni\n\
server:\n graph: local\ncli:\n graph: prod\n",
)
.unwrap();
let config = load_config_in(temp.path(), None).unwrap();
assert_eq!(
config.resolve_policy_tooling_graph_selection().unwrap(),
Some("prod")
);
let temp = tempdir().unwrap();
fs::write(
temp.path().join("omnigraph.yaml"),
"graphs:\n local:\n uri: ./local.omni\nserver:\n graph: local\n",
)
.unwrap();
let config = load_config_in(temp.path(), None).unwrap();
assert_eq!(
config.resolve_policy_tooling_graph_selection().unwrap(),
Some("local")
);
let temp = tempdir().unwrap();
fs::write(temp.path().join("omnigraph.yaml"), "policy: {}\n").unwrap();
let config = load_config_in(temp.path(), None).unwrap();
assert_eq!(config.resolve_policy_tooling_graph_selection().unwrap(), None);
let temp = tempdir().unwrap();
fs::write(
temp.path().join("omnigraph.yaml"),
"graphs:\n local:\n uri: ./local.omni\nserver:\n graph: ghost\n",
)
.unwrap();
let config = load_config_in(temp.path(), None).unwrap();
let err = config
.resolve_policy_tooling_graph_selection()
.unwrap_err()
.to_string();
assert!(
err.contains("ghost") && err.contains("not found"),
"unknown server.graph must use graph-selection validation: {err}"
);
}
#[test]
fn resolve_query_path_searches_config_roots() {
let temp = tempdir().unwrap();
@ -435,6 +783,118 @@ policy: {}
assert_eq!(resolved, config_dir.join("local.gq"));
}
#[test]
fn queries_block_round_trips_inline_and_per_graph() {
let temp = tempdir().unwrap();
fs::write(
temp.path().join("omnigraph.yaml"),
r#"
graphs:
prod:
uri: s3://bucket/prod
queries:
find_user:
file: ./queries/find_user.gq
mcp:
expose: true
tool_name: lookup_user
internal_audit:
file: ./queries/audit.gq
queries:
single_mode_q:
file: ./q.gq
"#,
)
.unwrap();
let config = load_config_in(temp.path(), None).unwrap();
// Per-graph registry (multi-graph mode).
let prod = config.target_query_entries("prod").unwrap();
assert_eq!(prod.len(), 2);
let find_user = &prod["find_user"];
assert_eq!(find_user.file, "./queries/find_user.gq");
assert!(find_user.mcp.expose);
assert_eq!(find_user.mcp.tool_name.as_deref(), Some("lookup_user"));
// Default exposure is true (the manifest entry is the opt-in); tool_name absent.
let audit = &prod["internal_audit"];
assert!(audit.mcp.expose);
assert!(audit.mcp.tool_name.is_none());
// Top-level registry (single-graph mode).
assert_eq!(config.query_entries().len(), 1);
// The shared selector resolves the same blocks the server boot
// and the CLI use: a known graph → its per-graph block; no
// selection or an unknown name → the top-level block (the latter
// pins the behavior of the CLI's now-deleted fallback arm).
assert_eq!(config.query_entries_for(Some("prod")).len(), 2);
assert_eq!(config.query_entries_for(None).len(), 1);
assert_eq!(config.query_entries_for(Some("nonexistent")).len(), 1);
// Path resolution joins against base_dir, like policy files.
assert_eq!(
config.resolve_query_file(&find_user.file),
temp.path().join("./queries/find_user.gq")
);
}
#[test]
fn resolve_policy_file_for_follows_identity() {
let temp = tempdir().unwrap();
fs::write(
temp.path().join("omnigraph.yaml"),
"policy:\n file: ./top.yaml\ngraphs:\n prod:\n uri: s3://b/prod\n \
policy:\n file: ./prod.yaml\n bare:\n uri: s3://b/bare\n",
)
.unwrap();
let config = load_config_in(temp.path(), None).unwrap();
// Named graph with its own policy → per-graph (not top-level).
assert!(
config
.resolve_policy_file_for(Some("prod"))
.unwrap()
.ends_with("prod.yaml")
);
// Named graph with NO per-graph policy → None (no top-level fallback;
// load-bearing for the boot coherence check).
assert!(config.resolve_policy_file_for(Some("bare")).is_none());
// Anonymous (bare URI) or an unknown name → top-level.
assert!(
config
.resolve_policy_file_for(None)
.unwrap()
.ends_with("top.yaml")
);
assert!(
config
.resolve_policy_file_for(Some("nope"))
.unwrap()
.ends_with("top.yaml")
);
}
#[test]
fn queries_block_absent_yields_empty_registry() {
let temp = tempdir().unwrap();
fs::write(
temp.path().join("omnigraph.yaml"),
"graphs:\n local:\n uri: ./demo.omni\n",
)
.unwrap();
let config = load_config_in(temp.path(), None).unwrap();
// Additive: no `queries:` anywhere → empty registries everywhere.
assert!(config.query_entries().is_empty());
assert!(
config
.target_query_entries("local")
.unwrap()
.is_empty()
);
}
#[test]
fn policy_block_accepts_non_empty_mapping() {
let temp = tempdir().unwrap();

View file

@ -0,0 +1,254 @@
//! `GraphId` — registry-level identity for a graph in multi-graph mode (MR-668).
//!
//! Validation lives in `GraphId::try_from(String)`; nothing else can construct a
//! `GraphId`. The newtype prevents `graph_id` strings from escaping the storage
//! root via path traversal or colliding with engine-reserved filenames.
//!
//! Regex: `^[a-zA-Z0-9-]{1,64}$`
//!
//! The engine reserves every filename starting with `_` at the graph root
//! (`_schema.pg`, `_schema.ir.json`, `__schema_state.json`, `__manifest/`,
//! `__recovery/`, etc.). Disallowing leading underscores at the regex level
//! means a `graph_id` can never collide with engine-managed files. Path
//! traversal (`..`, `/`) is unrepresentable.
//!
//! `policies` is additionally reserved as a future-proofing measure for a
//! potential `/graphs/policies/...` cluster route.
use std::fmt;
use std::sync::OnceLock;
use color_eyre::eyre::{Result, bail};
use regex::Regex;
use serde::{Deserialize, Serialize};
/// Maximum length of a `GraphId` value.
pub const GRAPH_ID_MAX_LEN: usize = 64;
/// Validated registry-level identity for a graph.
///
/// Constructed only via `GraphId::try_from(String)` or
/// `GraphId::try_from(&str)`. The inner `String` is private to enforce the
/// validation contract.
#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize)]
#[serde(transparent)]
pub struct GraphId(String);
impl GraphId {
/// View the validated identifier as `&str`.
pub fn as_str(&self) -> &str {
&self.0
}
}
impl fmt::Display for GraphId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
impl AsRef<str> for GraphId {
fn as_ref(&self) -> &str {
&self.0
}
}
impl TryFrom<String> for GraphId {
type Error = color_eyre::eyre::Error;
fn try_from(value: String) -> Result<Self> {
validate(value.as_str())?;
Ok(Self(value))
}
}
impl TryFrom<&str> for GraphId {
type Error = color_eyre::eyre::Error;
fn try_from(value: &str) -> Result<Self> {
validate(value)?;
Ok(Self(value.to_string()))
}
}
// Custom Deserialize that re-runs validation. Otherwise a serde-derived impl
// would accept any String, defeating the newtype's guarantee.
impl<'de> Deserialize<'de> for GraphId {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Self::try_from(s).map_err(serde::de::Error::custom)
}
}
fn validate(value: &str) -> Result<()> {
if value.is_empty() {
bail!("graph_id must not be empty");
}
if value.len() > GRAPH_ID_MAX_LEN {
bail!(
"graph_id '{}' is {} chars; max {}",
value,
value.len(),
GRAPH_ID_MAX_LEN
);
}
if !regex().is_match(value) {
bail!(
"graph_id '{}' must match ^[a-zA-Z0-9-]{{1,64}}$ — \
no underscores (engine reserves them), no path separators, no unicode",
value
);
}
if is_reserved(value) {
bail!(
"graph_id '{}' is reserved (would collide with engine-managed names or \
future cluster routes)",
value
);
}
Ok(())
}
fn regex() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| Regex::new(r"^[a-zA-Z0-9-]{1,64}$").expect("regex literal"))
}
/// Reserved `graph_id` values that the regex alone wouldn't catch.
/// The leading-underscore rule already excludes every engine-managed
/// filename pattern (`_schema.pg`, `__manifest`, etc.); the regex
/// `^[a-zA-Z0-9-]{1,64}$` (see `regex()`) additionally rejects every
/// dot-containing name structurally — `openapi.json` and friends
/// never reach this check.
///
/// This list only needs to cover route-prefix collisions and
/// top-level endpoint names whose spellings DO satisfy the regex
/// (no dots, no underscores).
fn is_reserved(value: &str) -> bool {
matches!(value, "policies" | "healthz" | "openapi" | "graphs")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn accepts_simple_alphanumeric_ids() {
for ok in ["alpha", "beta", "tenant-001", "A", "g", "X-9-z"] {
GraphId::try_from(ok).unwrap_or_else(|_| panic!("expected accept: {ok}"));
}
}
#[test]
fn accepts_64_char_max() {
let max = "a".repeat(64);
GraphId::try_from(max.as_str()).unwrap();
}
#[test]
fn rejects_empty() {
assert!(GraphId::try_from("").is_err());
}
#[test]
fn rejects_over_64_chars() {
let too_long = "a".repeat(65);
assert!(GraphId::try_from(too_long.as_str()).is_err());
}
#[test]
fn rejects_leading_underscore() {
// Engine reserves every `_*` filename at the graph root.
assert!(GraphId::try_from("_internal").is_err());
assert!(GraphId::try_from("__manifest").is_err());
}
#[test]
fn rejects_underscores_anywhere() {
// The regex doesn't allow `_` at all — keeps the disallow-leading-`_`
// rule cheap to enforce. If the rule changes later, we'd need to
// distinguish "starts with `_`" from "contains `_`".
assert!(GraphId::try_from("tenant_alpha").is_err());
}
#[test]
fn rejects_path_separators() {
for bad in ["alpha/beta", "../etc", "..", "alpha\\beta"] {
assert!(GraphId::try_from(bad).is_err(), "expected reject: {bad}");
}
}
#[test]
fn rejects_unicode() {
assert!(GraphId::try_from("αlpha").is_err());
assert!(GraphId::try_from("graph-✨").is_err());
}
#[test]
fn rejects_whitespace() {
assert!(GraphId::try_from(" alpha").is_err());
assert!(GraphId::try_from("alpha ").is_err());
assert!(GraphId::try_from("alpha beta").is_err());
assert!(GraphId::try_from("\talpha").is_err());
}
#[test]
fn rejects_dots() {
// Reserves the "extension"-shaped ids that look like filenames.
assert!(GraphId::try_from(".").is_err());
assert!(GraphId::try_from("alpha.beta").is_err());
assert!(GraphId::try_from("alpha.").is_err());
}
#[test]
fn rejects_reserved_route_names() {
// Names that satisfy the regex but are still reserved because
// they'd collide with top-level route prefixes / endpoint names.
// Dot-containing names (e.g. `openapi.json`) are rejected by the
// regex, not this list — `rejects_dots` above covers them.
for bad in ["policies", "healthz", "openapi", "graphs"] {
assert!(
GraphId::try_from(bad).is_err(),
"expected reject (reserved): {bad}"
);
}
}
#[test]
fn display_returns_inner_string() {
let id = GraphId::try_from("alpha").unwrap();
assert_eq!(format!("{id}"), "alpha");
assert_eq!(id.as_str(), "alpha");
}
#[test]
fn serialize_round_trips_via_json() {
let id = GraphId::try_from("tenant-007").unwrap();
let json = serde_json::to_string(&id).unwrap();
assert_eq!(json, "\"tenant-007\"");
let back: GraphId = serde_json::from_str(&json).unwrap();
assert_eq!(back, id);
}
#[test]
fn deserialize_runs_validation() {
// Hostile payload must not produce a GraphId.
let bad = serde_json::from_str::<GraphId>("\"_evil\"");
assert!(bad.is_err());
let bad = serde_json::from_str::<GraphId>("\"../../etc\"");
assert!(bad.is_err());
}
#[test]
fn hash_equality_works_for_use_as_map_key() {
use std::collections::HashMap;
let a = GraphId::try_from("alpha").unwrap();
let b = GraphId::try_from("alpha").unwrap();
let mut m = HashMap::new();
m.insert(a, 1u32);
assert_eq!(m.get(&b), Some(&1));
}
}

View file

@ -0,0 +1,308 @@
//! Identity types for the multi-graph server (MR-668) + forward-compatible
//! shapes for Cloud mode (RFC 0003) and OAuth provider (RFC 0004).
//!
//! Per decision 13 in the implementation plan: ship the type shapes that
//! Cloud mode will consume, without committing to any trait shape
//! (`TokenVerifier` stays draft in RFC 0001). Every Cluster-mode call site
//! constructs these types with their Cluster-mode-specific values:
//!
//! - `tenant_id: None` (Cloud will set `Some(...)` from the OAuth `org_id` claim)
//! - `scopes: vec![Scope::Full]` (Cloud will populate from the OAuth `scope` claim)
//! - `source: AuthSource::Static` (Cloud / OIDC will set `AuthSource::Oidc`)
//!
//! The enums use `#[non_exhaustive]` so RFC 0001 step 1 / RFC 0004 can
//! add variants without breaking exhaustive matches in callers.
use std::fmt;
use std::sync::Arc;
use std::sync::OnceLock;
use color_eyre::eyre::{Result, bail};
use regex::Regex;
use serde::{Deserialize, Serialize};
use crate::graph_id::GraphId;
/// Maximum length of a `TenantId` value.
pub const TENANT_ID_MAX_LEN: usize = 64;
/// Cloud-mode tenant identifier. Validated with the same regex as
/// `GraphId` so the two interchange syntactically.
///
/// `None` in Cluster mode; Cloud mode (RFC 0003) sets `Some(...)` from
/// the OAuth `org_id` claim. Constructed only via `try_from` so callers
/// cannot bypass validation.
#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize)]
#[serde(transparent)]
pub struct TenantId(String);
impl TenantId {
pub fn as_str(&self) -> &str {
&self.0
}
}
impl fmt::Display for TenantId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
impl AsRef<str> for TenantId {
fn as_ref(&self) -> &str {
&self.0
}
}
impl TryFrom<String> for TenantId {
type Error = color_eyre::eyre::Error;
fn try_from(value: String) -> Result<Self> {
validate_tenant_id(value.as_str())?;
Ok(Self(value))
}
}
impl TryFrom<&str> for TenantId {
type Error = color_eyre::eyre::Error;
fn try_from(value: &str) -> Result<Self> {
validate_tenant_id(value)?;
Ok(Self(value.to_string()))
}
}
impl<'de> Deserialize<'de> for TenantId {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Self::try_from(s).map_err(serde::de::Error::custom)
}
}
fn validate_tenant_id(value: &str) -> Result<()> {
if value.is_empty() {
bail!("tenant_id must not be empty");
}
if value.len() > TENANT_ID_MAX_LEN {
bail!(
"tenant_id '{}' is {} chars; max {}",
value,
value.len(),
TENANT_ID_MAX_LEN
);
}
if !tenant_id_regex().is_match(value) {
bail!("tenant_id '{}' must match ^[a-zA-Z0-9-]{{1,64}}$", value);
}
Ok(())
}
fn tenant_id_regex() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| Regex::new(r"^[a-zA-Z0-9-]{1,64}$").expect("regex literal"))
}
/// Registry HashMap key. Cluster mode populates `tenant_id: None`;
/// Cloud mode (RFC 0003) populates `tenant_id: Some(...)`.
///
/// The `Option<TenantId>` field is the **single forward-compatibility seam**
/// between Cluster and Cloud modes. Every handler reaches the engine via
/// `state.registry.get(&key)` — the key shape stays stable, so handlers
/// don't get re-touched when Cloud mode lands.
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct GraphKey {
pub tenant_id: Option<TenantId>,
pub graph_id: GraphId,
}
impl GraphKey {
/// Cluster-mode constructor (`tenant_id: None`).
pub fn cluster(graph_id: GraphId) -> Self {
Self {
tenant_id: None,
graph_id,
}
}
/// Cloud-mode constructor — reserved for RFC 0003; included here so
/// the seam is visible even though no Cluster-mode code path calls it.
pub fn cloud(tenant_id: TenantId, graph_id: GraphId) -> Self {
Self {
tenant_id: Some(tenant_id),
graph_id,
}
}
}
impl fmt::Display for GraphKey {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.tenant_id {
Some(t) => write!(f, "{}/{}", t, self.graph_id),
None => write!(f, "{}", self.graph_id),
}
}
}
/// Authorization scope. Cluster mode: every authenticated actor gets
/// `Scope::Full`. Cloud mode (RFC 0004) adds OAuth-style scopes via the
/// dashboard-configured `graph:read`, `graph:write`, `graph:admin`,
/// `graph:*` set; those become additional variants here.
///
/// `#[non_exhaustive]` so RFC 0004 can extend without breaking matches.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
#[non_exhaustive]
pub enum Scope {
/// Full access. The Cluster-mode default — every authenticated actor
/// has unrestricted access subject to Cedar policy.
Full,
}
/// How the actor was authenticated. Cluster mode: every actor authenticates
/// via the existing SHA-256 hash compare against a static token set, so
/// `AuthSource::Static`. RFC 0001 step 1 adds `AuthSource::Oidc` when the
/// `OidcJwtVerifier` ships.
///
/// `#[non_exhaustive]` so RFC 0001 can extend without breaking matches.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
#[non_exhaustive]
pub enum AuthSource {
/// Authenticated via the static bearer-token hash table.
Static,
}
/// Server-resolved actor identity. Replaces the previous
/// `AuthenticatedActor(Arc<str>)` from `lib.rs`.
///
/// The fields are populated by `authenticate_bearer_token` after a successful
/// constant-time hash match. **Clients cannot set any of these fields directly**
/// — this is the MR-731 invariant. See `authorize_request` in `lib.rs` for the
/// chokepoint that overwrites any client-supplied actor identity.
///
/// Cluster mode constructs this with `tenant_id: None`, `scopes: vec![Scope::Full]`,
/// `source: AuthSource::Static` via the convenience constructor below.
#[derive(Debug, Clone)]
pub struct ResolvedActor {
pub actor_id: Arc<str>,
pub tenant_id: Option<TenantId>,
pub scopes: Vec<Scope>,
pub source: AuthSource,
}
impl ResolvedActor {
/// Cluster-mode constructor — Static auth, no tenant, Full scope.
/// Used by `authenticate_bearer_token` after a successful hash match.
pub fn cluster_static(actor_id: Arc<str>) -> Self {
Self {
actor_id,
tenant_id: None,
scopes: vec![Scope::Full],
source: AuthSource::Static,
}
}
/// View the actor identifier as `&str`. Stable across the Cluster/Cloud
/// boundary — Cedar always sees this value as the principal.
pub fn actor_id_str(&self) -> &str {
&self.actor_id
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tenant_id_accepts_simple_values() {
for ok in ["alpha", "tenant-001", "X", "01HZWA0KT0H0V0V0V0V0V0V0V0"] {
TenantId::try_from(ok).unwrap_or_else(|_| panic!("expected accept: {ok}"));
}
}
#[test]
fn tenant_id_rejects_empty_and_over_max() {
assert!(TenantId::try_from("").is_err());
let too_long = "a".repeat(65);
assert!(TenantId::try_from(too_long.as_str()).is_err());
}
#[test]
fn tenant_id_rejects_path_traversal() {
assert!(TenantId::try_from("../etc").is_err());
assert!(TenantId::try_from("alpha/beta").is_err());
}
#[test]
fn tenant_id_deserialize_runs_validation() {
let bad: Result<TenantId, _> = serde_json::from_str("\"../evil\"");
assert!(bad.is_err());
}
#[test]
fn graph_key_cluster_constructor_sets_no_tenant() {
let id = GraphId::try_from("alpha").unwrap();
let key = GraphKey::cluster(id.clone());
assert!(key.tenant_id.is_none());
assert_eq!(key.graph_id, id);
}
#[test]
fn graph_key_cloud_constructor_sets_tenant() {
let tenant = TenantId::try_from("acme").unwrap();
let id = GraphId::try_from("alpha").unwrap();
let key = GraphKey::cloud(tenant.clone(), id.clone());
assert_eq!(key.tenant_id.as_ref(), Some(&tenant));
assert_eq!(key.graph_id, id);
}
#[test]
fn graph_key_displays_with_or_without_tenant() {
let id = GraphId::try_from("alpha").unwrap();
let cluster_key = GraphKey::cluster(id.clone());
assert_eq!(format!("{cluster_key}"), "alpha");
let tenant = TenantId::try_from("acme").unwrap();
let cloud_key = GraphKey::cloud(tenant, id);
assert_eq!(format!("{cloud_key}"), "acme/alpha");
}
#[test]
fn graph_key_is_hashable_for_map_use() {
use std::collections::HashMap;
let a = GraphKey::cluster(GraphId::try_from("alpha").unwrap());
let b = GraphKey::cluster(GraphId::try_from("alpha").unwrap());
let mut m: HashMap<GraphKey, u32> = HashMap::new();
m.insert(a, 1);
assert_eq!(m.get(&b), Some(&1));
}
#[test]
fn graph_key_distinguishes_tenants() {
let id = GraphId::try_from("alpha").unwrap();
let t1 = TenantId::try_from("acme").unwrap();
let t2 = TenantId::try_from("globex").unwrap();
let k1 = GraphKey::cloud(t1, id.clone());
let k2 = GraphKey::cloud(t2, id);
assert_ne!(k1, k2);
}
#[test]
fn resolved_actor_cluster_defaults() {
let actor = ResolvedActor::cluster_static(Arc::<str>::from("act-alice"));
assert_eq!(actor.actor_id_str(), "act-alice");
assert!(actor.tenant_id.is_none());
assert_eq!(actor.scopes, vec![Scope::Full]);
assert_eq!(actor.source, AuthSource::Static);
}
#[test]
fn scope_and_auth_source_are_non_exhaustive() {
// Regression: keep the `#[non_exhaustive]` annotation. If someone
// removes it, this test still passes (matches are still legal); it's
// the cross-crate compile that catches it. Document the contract here.
let _scope = Scope::Full;
let _src = AuthSource::Static;
}
}

File diff suppressed because it is too large Load diff

View file

@ -8,7 +8,7 @@ use omnigraph_server::{ServerConfig, init_tracing, load_server_settings, serve};
#[command(name = "omnigraph-server")]
#[command(about = "HTTP server for the Omnigraph graph database")]
struct Cli {
/// Repo URI
/// Graph URI
uri: Option<String>,
#[arg(long)]
target: Option<String>,

View file

@ -0,0 +1,688 @@
//! Stored-query registry.
//!
//! A server-side registry of named, parameter-typed `.gq` queries that
//! operators declare in `omnigraph.yaml` (per-graph, or top-level in
//! single mode) and the server loads at startup. Each entry is parsed
//! and its identity asserted here (`load`); type-checking against the
//! live schema happens separately (a `check` pass) so the loader stays
//! callable without an open engine (the CLI's offline `queries check`).
//!
//! Identity is the query **name**: the manifest key must equal the
//! `query <name>` symbol declared in the referenced `.gq` file. The two
//! are asserted equal at load — one name, two places that must agree.
//! Renaming either is a breaking change to callers, by design.
use std::collections::BTreeMap;
use std::fs;
use std::sync::Arc;
use omnigraph_compiler::catalog::Catalog;
use omnigraph_compiler::query::ast::QueryDecl;
use omnigraph_compiler::query::parser::parse_query;
use omnigraph_compiler::query::typecheck::typecheck_query_decl;
use omnigraph_compiler::types::{PropType, ScalarType};
use crate::config::{OmnigraphConfig, QueryEntry};
/// One loaded stored query. `source` is the full `.gq` file text — the
/// invocation handler hands it to `run_query` / `run_mutate` verbatim,
/// which reuse the same parse/IR/exec path as the inline routes (no
/// parallel implementation).
#[derive(Debug, Clone)]
pub struct StoredQuery {
/// Identity: manifest key == `query <name>` symbol.
pub name: String,
/// Full `.gq` source text the query was selected from.
pub source: Arc<str>,
/// Parsed declaration (params, mutations, description, …).
pub decl: QueryDecl,
/// Whether this query is listed in the MCP tool catalog (`GET /queries`).
/// Default `true` (the manifest entry is the opt-in); `expose: false`
/// keeps it HTTP/service-callable but hidden from the agent tool list.
/// Catalog membership only — not an authorization gate.
pub expose: bool,
/// Optional MCP tool-name override; defaults to `name`.
pub tool_name: Option<String>,
}
impl StoredQuery {
/// `true` if the selected declaration contains insert/update/delete
/// statements — drives read-vs-mutate routing at invocation time.
pub fn is_mutation(&self) -> bool {
!self.decl.mutations.is_empty()
}
/// The MCP tool name this query is catalogued under: the explicit
/// `tool_name` override, else the query `name`. The catalog key —
/// enforced unique across exposed queries at load. Server-side
/// consumers (the uniqueness check, the future catalog projection) read
/// this; the CLI `queries list` resolves the same rule on its own DTO.
pub fn effective_tool_name(&self) -> &str {
self.tool_name.as_deref().unwrap_or(&self.name)
}
}
/// A loaded, identity-checked stored-query registry for one graph.
#[derive(Debug, Clone, Default)]
pub struct QueryRegistry {
by_name: BTreeMap<String, StoredQuery>,
}
/// In-memory registry entry before file I/O. Used by [`QueryRegistry::load`]
/// (after reading each `.gq` from disk) and directly by tests.
#[derive(Debug, Clone)]
pub struct RegistrySpec {
pub name: String,
pub source: String,
pub expose: bool,
pub tool_name: Option<String>,
}
/// A single registry load failure. Collected (not fail-fast) so a bad
/// `omnigraph.yaml` surfaces every broken entry at once, matching the
/// bad-policy-YAML posture.
#[derive(Debug, Clone)]
pub struct LoadError {
/// The offending query name, when the failure is entry-scoped.
pub query: Option<String>,
pub message: String,
}
impl std::fmt::Display for LoadError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match &self.query {
Some(name) => write!(f, "stored query '{name}': {}", self.message),
None => write!(f, "stored query registry: {}", self.message),
}
}
}
impl QueryRegistry {
/// Build a registry from in-memory specs: parse each source, select
/// the declaration whose symbol equals the manifest key, and assert
/// they agree. Collects every failure. No schema type-checking here
/// — that is [`check`].
pub fn from_specs(specs: Vec<RegistrySpec>) -> Result<Self, Vec<LoadError>> {
let mut by_name = BTreeMap::new();
let mut errors = Vec::new();
for spec in specs {
match parse_query(&spec.source) {
Ok(file) => {
match file.queries.into_iter().find(|q| q.name == spec.name) {
Some(decl) => {
by_name.insert(
spec.name.clone(),
StoredQuery {
name: spec.name,
source: Arc::from(spec.source),
decl,
expose: spec.expose,
tool_name: spec.tool_name,
},
);
}
None => errors.push(LoadError {
query: Some(spec.name.clone()),
message: format!(
"no `query {}` declaration found in its `.gq` file \
(the registry key must match the query symbol)",
spec.name
),
}),
}
}
Err(err) => errors.push(LoadError {
query: Some(spec.name),
message: format!("parse error: {err}"),
}),
}
}
// Exposed queries are catalogued under their effective tool name;
// two claiming one name is an MCP-namespace collision. Refuse it at
// load (collected, not fail-fast), naming the loser and the winner.
// Iterating the `BTreeMap` makes the winner deterministic (the
// lexicographically-first query name; config is a map, so YAML
// declaration order isn't preserved anyway) and the error order
// stable. Scoped to a block so these borrows of `by_name` end
// before it is moved into `Self`.
{
let mut claimed: BTreeMap<&str, &str> = BTreeMap::new();
for query in by_name.values().filter(|q| q.expose) {
let tool = query.effective_tool_name();
if let Some(winner) = claimed.insert(tool, &query.name) {
errors.push(LoadError {
query: Some(query.name.clone()),
message: format!(
"MCP tool name '{tool}' already claimed by exposed query '{winner}'"
),
});
}
}
}
if errors.is_empty() {
Ok(Self { by_name })
} else {
Err(errors)
}
}
/// Read each registry entry's `.gq` file from disk and build the
/// registry. `entries` is either the top-level `queries` map (single
/// mode) or a graph's `queries` map (multi mode); `config` resolves
/// each entry's relative `file:` path against `base_dir`.
pub fn load(
config: &OmnigraphConfig,
entries: &BTreeMap<String, QueryEntry>,
) -> Result<Self, Vec<LoadError>> {
let mut specs = Vec::with_capacity(entries.len());
let mut errors = Vec::new();
for (name, entry) in entries {
let path = config.resolve_query_file(&entry.file);
match fs::read_to_string(&path) {
Ok(source) => specs.push(RegistrySpec {
name: name.clone(),
source,
expose: entry.mcp.expose,
tool_name: entry.mcp.tool_name.clone(),
}),
Err(err) => errors.push(LoadError {
query: Some(name.clone()),
message: format!("cannot read '{}': {err}", path.display()),
}),
}
}
// Parse/identity/uniqueness-check the readable specs even when some
// files failed to read, so every broken entry (I/O, parse, identity,
// tool-name collision) surfaces in one pass rather than one per
// restart. I/O errors come first (in `entries` key order), then the
// spec errors. A non-empty `errors` always fails the load.
match Self::from_specs(specs) {
Ok(registry) if errors.is_empty() => Ok(registry),
Ok(_) => Err(errors),
Err(spec_errors) => {
errors.extend(spec_errors);
Err(errors)
}
}
}
pub fn lookup(&self, name: &str) -> Option<&StoredQuery> {
self.by_name.get(name)
}
pub fn iter(&self) -> impl Iterator<Item = &StoredQuery> {
self.by_name.values()
}
pub fn is_empty(&self) -> bool {
self.by_name.is_empty()
}
pub fn len(&self) -> usize {
self.by_name.len()
}
}
/// A stored query that fails to type-check against the live schema —
/// e.g. it references a node/edge type or property that was renamed or
/// removed by a migration. Breakages **block server boot** (same posture
/// as bad policy YAML), surfacing schema drift at the deploy boundary
/// rather than silently at invocation time.
#[derive(Debug, Clone)]
pub struct Breakage {
pub query: String,
pub message: String,
}
/// A non-blocking advisory found during validation. Logged at boot;
/// never blocks startup. Currently: an MCP-exposed query that declares a
/// parameter an agent cannot realistically supply.
#[derive(Debug, Clone)]
pub struct Warning {
pub query: String,
pub message: String,
}
/// Outcome of validating a registry against a schema. Breakages are
/// fatal (boot refuses); warnings are advisory.
#[derive(Debug, Clone, Default)]
pub struct CheckReport {
pub breakages: Vec<Breakage>,
pub warnings: Vec<Warning>,
}
impl CheckReport {
pub fn has_breakages(&self) -> bool {
!self.breakages.is_empty()
}
pub fn is_clean(&self) -> bool {
self.breakages.is_empty() && self.warnings.is_empty()
}
}
/// Validate a loaded registry against the live schema.
///
/// Pure over `(registry, catalog)` — takes an already-parsed registry and
/// a catalog, so it is callable both at server boot (with the engine's
/// `catalog()`) and offline from the CLI (`omnigraph queries check`),
/// without coupling to server config or an open engine connection.
///
/// Every query is type-checked via the same `typecheck_query_decl` the
/// engine runs for inline queries — no parallel implementation. Failures
/// are **collected, not fail-fast**, so an operator sees every broken
/// query in one pass.
///
/// Advisory lint (warn, never block): an `mcp.expose: true` query that
/// declares a `Vector(N)` parameter. An LLM cannot supply a raw embedding
/// vector; such a query should take a `String` parameter and let the
/// engine embed it server-side at query time. Service-to-service callers
/// may legitimately pass vectors, so this warns rather than rejects.
pub fn check(registry: &QueryRegistry, catalog: &Catalog) -> CheckReport {
let mut report = CheckReport::default();
for query in registry.iter() {
if let Err(err) = typecheck_query_decl(catalog, &query.decl) {
report.breakages.push(Breakage {
query: query.name.clone(),
message: err.to_string(),
});
}
if query.expose {
for param in &query.decl.params {
// Resolve to the structured type via the compiler's own
// resolver rather than string-matching `Vector(` — one
// canonical definition of "is a vector", so this lint can't
// drift from how the parser/type system spells the type.
let is_vector = PropType::from_param_type_name(&param.type_name, param.nullable)
.is_some_and(|pt| matches!(pt.scalar, ScalarType::Vector(_)));
if is_vector {
report.warnings.push(Warning {
query: query.name.clone(),
message: format!(
"MCP-exposed query declares a `{}` parameter `${}` that agents \
cannot supply; use a `String` parameter for server-side embedding",
param.type_name, param.name
),
});
}
}
}
}
report
}
/// Format every breakage in a registry check report into a multi-line
/// operator-facing message, naming each offending query.
pub fn format_check_breakages(label: &str, report: &CheckReport) -> String {
let joined = report
.breakages
.iter()
.map(|b| format!("query '{}': {}", b.query, b.message))
.collect::<Vec<_>>()
.join("\n ");
format!(
"graph '{label}': {} stored quer{} failed the schema check:\n {joined}",
report.breakages.len(),
if report.breakages.len() == 1 {
"y"
} else {
"ies"
}
)
}
#[cfg(test)]
mod tests {
use super::*;
fn spec(name: &str, source: &str, expose: bool) -> RegistrySpec {
RegistrySpec {
name: name.to_string(),
source: source.to_string(),
expose,
tool_name: None,
}
}
fn spec_tool(name: &str, source: &str, expose: bool, tool_name: &str) -> RegistrySpec {
RegistrySpec {
name: name.to_string(),
source: source.to_string(),
expose,
tool_name: Some(tool_name.to_string()),
}
}
#[test]
fn key_equal_symbol_loads() {
let reg = QueryRegistry::from_specs(vec![spec(
"find_user",
"query find_user($id: String) { match { $u: User } return { $u.name } }",
true,
)])
.unwrap();
let q = reg.lookup("find_user").unwrap();
assert_eq!(q.name, "find_user");
assert!(q.expose);
assert_eq!(q.decl.params.len(), 1);
assert!(!q.is_mutation());
// No override → the effective tool name is the query name.
assert_eq!(q.effective_tool_name(), "find_user");
// An explicit override is what the catalog keys on.
let with_tool = QueryRegistry::from_specs(vec![spec_tool(
"find_user",
"query find_user($id: String) { match { $u: User } return { $u.name } }",
true,
"lookup_user",
)])
.unwrap();
assert_eq!(
with_tool.lookup("find_user").unwrap().effective_tool_name(),
"lookup_user"
);
}
#[test]
fn key_mismatch_is_an_identity_error() {
let errors = QueryRegistry::from_specs(vec![spec(
"find_user",
// symbol is `lookup`, key is `find_user` — must be rejected.
"query lookup($id: String) { match { $u: User } return { $u.name } }",
false,
)])
.unwrap_err();
assert_eq!(errors.len(), 1);
assert_eq!(errors[0].query.as_deref(), Some("find_user"));
assert!(errors[0].message.contains("must match the query symbol"));
}
#[test]
fn multi_query_file_selects_the_matching_symbol() {
let source = "query a($x: I64) { match { $u: User } return { $u.name } }\n\
query b($y: String) { match { $u: User } return { $u.name } }";
let reg = QueryRegistry::from_specs(vec![spec("b", source, false)]).unwrap();
let q = reg.lookup("b").unwrap();
assert_eq!(q.name, "b");
assert_eq!(q.decl.params[0].name, "y");
assert!(reg.lookup("a").is_none(), "only the selected symbol is registered");
}
#[test]
fn duplicate_exposed_tool_name_is_a_load_error() {
// Two MCP-exposed queries claiming one tool name is an ambiguity in
// the catalog key space — refused at load, naming both queries and
// the contested tool.
let errors = QueryRegistry::from_specs(vec![
spec_tool("a", "query a() { match { $u: User } return { $u.name } }", true, "dup"),
spec_tool("b", "query b() { match { $u: User } return { $u.name } }", true, "dup"),
])
.unwrap_err();
assert_eq!(errors.len(), 1);
let msg = errors[0].to_string();
assert!(msg.contains("'dup'"), "names the contested tool: {msg}");
assert!(msg.contains("'a'"), "names the winning query: {msg}");
assert!(msg.contains("'b'"), "names the losing query: {msg}");
}
#[test]
fn duplicate_tool_name_among_unexposed_is_allowed() {
// Unexposed queries have no MCP tool, so a shared effective tool
// name is inert — must not error (pins the exposed-only scope).
let reg = QueryRegistry::from_specs(vec![
spec_tool("a", "query a() { match { $u: User } return { $u.name } }", false, "dup"),
spec_tool("b", "query b() { match { $u: User } return { $u.name } }", false, "dup"),
])
.unwrap();
assert_eq!(reg.len(), 2);
}
#[test]
fn parse_error_surfaces_per_entry() {
let errors =
QueryRegistry::from_specs(vec![spec("broken", "query broken( {{ not valid", false)])
.unwrap_err();
assert_eq!(errors[0].query.as_deref(), Some("broken"));
assert!(errors[0].message.contains("parse error"));
}
#[test]
fn errors_collect_rather_than_fail_fast() {
let errors = QueryRegistry::from_specs(vec![
spec("good", "query good() { match { $u: User } return { $u.name } }", false),
spec("mismatch", "query other() { match { $u: User } return { $u.name } }", false),
spec("broken", "query broken(", false),
])
.unwrap_err();
// `good` loads cleanly; only the mismatch and the parse error are
// reported, and both surface in one pass (not fail-fast).
assert_eq!(errors.len(), 2);
}
#[test]
fn mutation_body_classifies_as_mutation() {
let reg = QueryRegistry::from_specs(vec![spec(
"add_user",
"query add_user($name: String) { insert User { name: $name } }",
false,
)])
.unwrap();
assert!(reg.lookup("add_user").unwrap().is_mutation());
}
// --- check(registry, catalog) ---
use omnigraph_compiler::catalog::build_catalog;
use omnigraph_compiler::schema::parser::parse_schema;
fn test_catalog() -> Catalog {
let schema = parse_schema(
r#"
node User {
name: String
age: I32?
embedding: Vector(4)
}
"#,
)
.unwrap();
build_catalog(&schema).unwrap()
}
#[test]
fn check_passes_for_valid_query() {
let reg = QueryRegistry::from_specs(vec![spec(
"find_user",
"query find_user($name: String) { match { $u: User { name: $name } } return { $u.age } }",
false,
)])
.unwrap();
let report = check(&reg, &test_catalog());
assert!(report.is_clean(), "unexpected: {:?}", report);
}
#[test]
fn check_reports_unknown_type_as_breakage() {
let reg = QueryRegistry::from_specs(vec![spec(
"ghost",
// `Widget` is not in the schema.
"query ghost() { match { $w: Widget } return { $w.name } }",
false,
)])
.unwrap();
let report = check(&reg, &test_catalog());
assert!(report.has_breakages());
assert_eq!(report.breakages[0].query, "ghost");
}
#[test]
fn check_reports_unknown_property_as_breakage() {
let reg = QueryRegistry::from_specs(vec![spec(
"bad_prop",
// `User` exists but has no `nickname`.
"query bad_prop() { match { $u: User } return { $u.nickname } }",
false,
)])
.unwrap();
let report = check(&reg, &test_catalog());
assert!(report.has_breakages());
assert_eq!(report.breakages[0].query, "bad_prop");
}
#[test]
fn check_collects_every_breakage_not_fail_fast() {
let reg = QueryRegistry::from_specs(vec![
spec("a", "query a() { match { $w: Widget } return { $w.x } }", false),
spec("b", "query b() { match { $g: Gadget } return { $g.y } }", false),
spec(
"ok",
"query ok() { match { $u: User } return { $u.name } }",
false,
),
])
.unwrap();
let report = check(&reg, &test_catalog());
assert_eq!(report.breakages.len(), 2, "both bad queries reported: {:?}", report);
}
#[test]
fn vector_param_on_exposed_query_warns() {
let reg = QueryRegistry::from_specs(vec![spec(
"vec_search",
"query vec_search($q: Vector(4)) { match { $u: User } return { $u.name } \
order { nearest($u.embedding, $q) } limit 3 }",
true, // mcp.expose
)])
.unwrap();
let report = check(&reg, &test_catalog());
assert!(!report.has_breakages(), "valid query: {:?}", report);
assert_eq!(report.warnings.len(), 1);
assert_eq!(report.warnings[0].query, "vec_search");
}
#[test]
fn vector_param_on_unexposed_query_is_silent() {
let reg = QueryRegistry::from_specs(vec![spec(
"vec_search",
"query vec_search($q: Vector(4)) { match { $u: User } return { $u.name } \
order { nearest($u.embedding, $q) } limit 3 }",
false, // not exposed — vector param is fine for service-to-service callers
)])
.unwrap();
let report = check(&reg, &test_catalog());
assert!(report.is_clean(), "unexpected: {:?}", report);
}
#[test]
fn non_vector_param_on_exposed_query_does_not_warn() {
// The recommended `String` alternative on an exposed query does not
// resolve to a Vector, so the embedding advisory stays silent. Guards
// the structured type check against a false positive (and pins that
// only `Vector(_)` triggers the warning).
let reg = QueryRegistry::from_specs(vec![spec(
"search",
"query search($name: String) { match { $u: User { name: $name } } return { $u.name } }",
true,
)])
.unwrap();
let report = check(&reg, &test_catalog());
assert!(report.is_clean(), "no breakage or warning expected: {:?}", report);
}
// --- catalog projection (api::query_catalog_entry) ---
#[test]
fn catalog_entry_projects_every_param_kind() {
use crate::api::{self, ParamKind};
let reg = QueryRegistry::from_specs(vec![spec_tool(
"all_types",
"query all_types($s: String, $i: I32, $big: I64, $u: U64, $f: F64, $b: Bool, \
$d: Date, $dt: DateTime, $blob: Blob, $opt: String?, $list: [I32], $vec: Vector(4)) \
{ match { $x: User } return { $x.name } }",
true,
"all",
)])
.unwrap();
let entry = api::query_catalog_entry(reg.lookup("all_types").unwrap());
assert_eq!(entry.name, "all_types");
assert_eq!(entry.tool_name, "all");
assert!(!entry.mutation);
let by: std::collections::HashMap<_, _> =
entry.params.iter().map(|p| (p.name.as_str(), p)).collect();
assert_eq!(by["s"].kind, ParamKind::String);
assert_eq!(by["i"].kind, ParamKind::Int);
assert_eq!(by["big"].kind, ParamKind::BigInt, "I64 → bigint (string on the wire)");
assert_eq!(by["u"].kind, ParamKind::BigInt, "U64 → bigint");
assert_eq!(by["f"].kind, ParamKind::Float);
assert_eq!(by["b"].kind, ParamKind::Bool);
assert_eq!(by["d"].kind, ParamKind::Date);
assert_eq!(by["dt"].kind, ParamKind::DateTime);
assert_eq!(by["blob"].kind, ParamKind::Blob);
assert!(!by["s"].nullable);
assert!(by["opt"].nullable, "String? → nullable");
assert_eq!(by["list"].kind, ParamKind::List);
assert_eq!(by["list"].item_kind, Some(ParamKind::Int), "[I32] → list of int");
assert_eq!(by["vec"].kind, ParamKind::Vector);
assert_eq!(by["vec"].vector_dim, Some(4));
}
#[test]
fn catalog_entry_flags_mutation_and_empty_params() {
use crate::api;
let reg = QueryRegistry::from_specs(vec![spec(
"add_user",
"query add_user($name: String) { insert User { name: $name } }",
true,
)])
.unwrap();
let entry = api::query_catalog_entry(reg.lookup("add_user").unwrap());
assert!(entry.mutation, "insert body → mutation flag");
let reg2 = QueryRegistry::from_specs(vec![spec(
"no_params",
"query no_params() { match { $u: User } return { $u.name } }",
true,
)])
.unwrap();
let entry2 = api::query_catalog_entry(reg2.lookup("no_params").unwrap());
assert!(entry2.params.is_empty(), "no declared params → empty list");
}
// --- load() error collection (file I/O + parse in one pass) ---
#[test]
fn load_collects_io_and_parse_errors_in_one_pass() {
use crate::config::load_config;
let temp = tempfile::tempdir().unwrap();
std::fs::write(
temp.path().join("good.gq"),
"query good() { match { $u: User } return { $u.name } }",
)
.unwrap();
std::fs::write(temp.path().join("broken.gq"), "query broken( {{ not valid").unwrap();
// `missing.gq` is deliberately not written (an I/O failure).
std::fs::write(
temp.path().join("omnigraph.yaml"),
"queries:\n good:\n file: ./good.gq\n \
missing:\n file: ./missing.gq\n broken:\n file: ./broken.gq\n",
)
.unwrap();
let config = load_config(Some(&temp.path().join("omnigraph.yaml"))).unwrap();
let errors = QueryRegistry::load(&config, config.query_entries()).unwrap_err();
let joined = errors.iter().map(|e| e.to_string()).collect::<Vec<_>>().join("\n");
// Both the missing file AND the parse error surface in one pass —
// the I/O failure must not mask the parse failure.
assert!(joined.contains("missing"), "I/O error must surface: {joined}");
assert!(
joined.contains("broken") && joined.contains("parse error"),
"the parse error in a readable file must surface in the same pass: {joined}"
);
assert!(!joined.contains("'good'"), "the valid entry is not an error: {joined}");
}
}

View file

@ -0,0 +1,570 @@
//! `GraphRegistry` — the multi-graph routing substrate (MR-668).
//!
//! Holds the open `Arc<GraphHandle>` for every graph the server is currently
//! serving. Lock-free reads via `ArcSwap<RegistrySnapshot>`; mutations
//! serialize through `mutate: Mutex<()>` for read-modify-write atomicity.
//!
//! **Deletion is deferred** in v0.6.0 (MR-668 scope cut). The registry has
//! no `tombstones` field, no `RegistryLookup::Tombstoned` variant, no
//! `tombstone()` / `clear_tombstone()` methods. When `DELETE /graphs/{id}`
//! lands in a follow-up release, those return without breaking caller
//! signatures (`Gone` is the closest semantic — the graph is no longer
//! in the registry).
//!
//! Engine instance survival across registry mutations:
//! a request that grabbed `Arc<GraphHandle>` before a registry swap keeps
//! the engine alive via its own `Arc` clone (see `server_export` at
//! `lib.rs:1019-1033` for the spawn-and-clone pattern). The engine drops
//! when the last `Arc<Omnigraph>` clone drops, regardless of the
//! registry's current state.
use std::collections::HashMap;
use std::sync::Arc;
use arc_swap::ArcSwap;
use omnigraph::db::Omnigraph;
use omnigraph::storage::normalize_root_uri;
#[cfg(test)]
use tokio::sync::Mutex;
use crate::identity::GraphKey;
use crate::policy::PolicyEngine;
use crate::queries::QueryRegistry;
/// Open handle for a single graph in the registry. Cheap to clone (`Arc`-wrapped
/// engine + policy). Cluster-mode handlers extract this via
/// `Extension<Arc<GraphHandle>>` injected by the routing middleware.
pub struct GraphHandle {
/// Registry key. In Cluster mode `key.tenant_id` is always `None`.
pub key: GraphKey,
/// The URI the engine was opened from (`s3://...` or local path).
/// Stable for the engine's lifetime; surfaced in responses like
/// `BranchCreateOutput.uri`.
pub uri: String,
/// Engine. Reads/writes go directly through `&self` methods on
/// `Omnigraph` (no `RwLock` — MR-686 preserved).
pub engine: Arc<Omnigraph>,
/// Per-graph Cedar policy. `None` means "no policy gate on engine-layer
/// `_as` writers"; the HTTP-layer `require_bearer_auth` middleware still
/// runs regardless.
pub policy: Option<Arc<PolicyEngine>>,
/// Per-graph stored-query registry, loaded and validated at
/// startup. `None` means the operator declared no stored queries for
/// this graph — `POST /queries/{name}` then 404s. Mirrors the
/// optional `policy` shape.
pub queries: Option<Arc<QueryRegistry>>,
}
/// Immutable snapshot of the registry's current state. Replaced atomically
/// via `ArcSwap`; readers see a consistent view of all graphs without locking.
///
/// Derived state (`any_per_graph_policy`) is computed at snapshot
/// construction so request-time middleware doesn't have to walk the
/// graph map every call. Construct only via [`RegistrySnapshot::new`]
/// (or `Default`) so the field stays in sync with `graphs`.
pub struct RegistrySnapshot {
pub graphs: HashMap<GraphKey, Arc<GraphHandle>>,
/// `true` iff any registered graph has a per-graph policy installed.
/// Used by `AppState::requires_bearer_auth` to decide whether the
/// auth middleware should challenge a request — a per-graph policy
/// implies bearer auth is required even when no server-level tokens
/// or policy are configured.
pub any_per_graph_policy: bool,
}
impl RegistrySnapshot {
/// Build a snapshot from a graph map, deriving cached fields.
/// The only construction path — direct struct-literal use elsewhere
/// would let derived state drift from `graphs`.
pub fn new(graphs: HashMap<GraphKey, Arc<GraphHandle>>) -> Self {
let any_per_graph_policy = graphs.values().any(|h| h.policy.is_some());
Self {
graphs,
any_per_graph_policy,
}
}
}
impl Default for RegistrySnapshot {
fn default() -> Self {
Self::new(HashMap::new())
}
}
/// Result of a registry lookup. Two-valued — `Tombstoned` deferred with DELETE.
pub enum RegistryLookup {
/// Graph is open and ready to serve.
Ready(Arc<GraphHandle>),
/// Graph is not in the registry (never existed, or was unregistered in a
/// future release). Handlers respond with 404.
Gone,
}
/// Why an `insert` was rejected.
#[derive(Debug, thiserror::Error)]
pub enum InsertError {
/// Another handle already exists for this `GraphKey`. Maps to HTTP 409.
#[error("graph '{0}' is already registered")]
DuplicateKey(GraphKey),
/// Another handle is open against this URI. Two graphs sharing a URI
/// would commit through the same Lance manifest and corrupt each other.
/// Maps to HTTP 409.
#[error("URI '{0}' is already registered as another graph")]
DuplicateUri(String),
/// A handle carried an invalid graph URI. Maps to startup failure.
#[error("URI '{uri}' is invalid: {message}")]
InvalidUri { uri: String, message: String },
}
pub struct GraphRegistry {
snapshot: ArcSwap<RegistrySnapshot>,
/// Serializes runtime mutations through [`GraphRegistry::insert`].
/// Gated with `insert` because they share a single contract — if
/// the consumer goes away, so does the lock. Re-introducing one
/// requires re-introducing the other.
#[cfg(test)]
mutate: Mutex<()>,
}
impl GraphRegistry {
/// Empty registry. Used as a placeholder before startup populates it.
pub fn new() -> Self {
Self {
snapshot: ArcSwap::from_pointee(RegistrySnapshot::default()),
#[cfg(test)]
mutate: Mutex::new(()),
}
}
/// Build a registry from a startup-time list of open handles.
/// Rejects duplicate `GraphKey`s and duplicate URIs.
pub fn from_handles(handles: Vec<Arc<GraphHandle>>) -> Result<Self, InsertError> {
let mut graphs: HashMap<GraphKey, Arc<GraphHandle>> = HashMap::with_capacity(handles.len());
let mut seen_uris: HashMap<String, GraphKey> = HashMap::with_capacity(handles.len());
for handle in handles {
let (canonical_uri, handle) = canonicalize_handle_uri(handle)?;
if graphs.contains_key(&handle.key) {
return Err(InsertError::DuplicateKey(handle.key.clone()));
}
if seen_uris.contains_key(&canonical_uri) {
return Err(InsertError::DuplicateUri(handle.uri.clone()));
}
seen_uris.insert(canonical_uri, handle.key.clone());
graphs.insert(handle.key.clone(), handle);
}
Ok(Self {
snapshot: ArcSwap::from_pointee(RegistrySnapshot::new(graphs)),
#[cfg(test)]
mutate: Mutex::new(()),
})
}
/// Lock-free snapshot read. Callers that need derived state cached
/// on the snapshot (e.g. `any_per_graph_policy`) go through here;
/// callers that only need values of `graphs` should use [`list`]
/// or [`get`].
pub fn snapshot_ref(&self) -> arc_swap::Guard<Arc<RegistrySnapshot>> {
self.snapshot.load()
}
/// Lock-free read. Returns `Ready` if the graph is in the current snapshot,
/// `Gone` otherwise.
pub fn get(&self, key: &GraphKey) -> RegistryLookup {
let snapshot = self.snapshot.load();
match snapshot.graphs.get(key) {
Some(handle) => RegistryLookup::Ready(Arc::clone(handle)),
None => RegistryLookup::Gone,
}
}
/// Snapshot the full set of currently-registered handles. Ordering
/// matches the underlying `HashMap` iteration (intentionally
/// non-deterministic — callers that need a stable order sort by
/// `handle.key.graph_id`).
pub fn list(&self) -> Vec<Arc<GraphHandle>> {
let snapshot = self.snapshot.load();
snapshot.graphs.values().cloned().collect()
}
/// Number of registered graphs (excluding any future tombstones).
pub fn len(&self) -> usize {
self.snapshot.load().graphs.len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Add a new handle. Async because the mutex is `tokio::sync::Mutex`
/// (a future managed-catalog flow may hold it across `.await` points
/// during atomic registry mutations). Rejects duplicate `GraphKey`
/// and duplicate `uri`.
///
/// **Test-only surface.** No production code reaches this — startup
/// uses `from_handles`, and runtime add/remove is deferred. The
/// race-contract tests below pin the mutex linearization point so
/// that when a real consumer ships (managed cluster catalog), the
/// concurrency contract is already proven. Ungate by removing
/// `#[cfg(test)]` once that consumer is in scope.
///
/// Race semantics (pinned by `concurrent_insert_same_key_exactly_one_succeeds`):
/// under N concurrent calls with the same key, exactly one returns
/// `Ok(())` and the rest return `Err(InsertError::DuplicateKey(_))`.
#[cfg(test)]
pub async fn insert(&self, handle: Arc<GraphHandle>) -> Result<(), InsertError> {
let _guard = self.mutate.lock().await;
let current = self.snapshot.load();
let (canonical_uri, handle) = canonicalize_handle_uri(handle)?;
if current.graphs.contains_key(&handle.key) {
return Err(InsertError::DuplicateKey(handle.key.clone()));
}
for existing in current.graphs.values() {
let existing_uri =
normalize_root_uri(&existing.uri).map_err(|err| InsertError::InvalidUri {
uri: existing.uri.clone(),
message: err.to_string(),
})?;
if existing_uri == canonical_uri {
return Err(InsertError::DuplicateUri(handle.uri.clone()));
}
}
let mut new_graphs = current.graphs.clone();
new_graphs.insert(handle.key.clone(), handle);
self.snapshot
.store(Arc::new(RegistrySnapshot::new(new_graphs)));
Ok(())
}
}
fn canonicalize_handle_uri(
handle: Arc<GraphHandle>,
) -> Result<(String, Arc<GraphHandle>), InsertError> {
let canonical_uri = normalize_root_uri(&handle.uri).map_err(|err| InsertError::InvalidUri {
uri: handle.uri.clone(),
message: err.to_string(),
})?;
if canonical_uri == handle.uri {
return Ok((canonical_uri, handle));
}
let canonical_handle = Arc::new(GraphHandle {
key: handle.key.clone(),
uri: canonical_uri.clone(),
engine: Arc::clone(&handle.engine),
policy: handle.policy.clone(),
queries: handle.queries.clone(),
});
Ok((canonical_uri, canonical_handle))
}
impl Default for GraphRegistry {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use std::path::Path;
use tempfile::TempDir;
use super::*;
use crate::graph_id::GraphId;
const TEST_SCHEMA: &str = "node Person { name: String @key }\n";
async fn build_handle(graph_id: &str, dir: &Path) -> Arc<GraphHandle> {
let graph_uri = dir.join(graph_id).to_str().unwrap().to_string();
let engine = Omnigraph::init(&graph_uri, TEST_SCHEMA)
.await
.expect("init engine for registry test");
Arc::new(GraphHandle {
key: GraphKey::cluster(GraphId::try_from(graph_id).unwrap()),
uri: graph_uri,
engine: Arc::new(engine),
policy: None,
queries: None,
})
}
#[tokio::test]
async fn new_registry_is_empty() {
let registry = GraphRegistry::new();
assert!(registry.is_empty());
assert_eq!(registry.len(), 0);
assert!(registry.list().is_empty());
}
#[tokio::test]
async fn insert_then_get_returns_ready() {
let dir = TempDir::new().unwrap();
let registry = GraphRegistry::new();
let handle = build_handle("alpha", dir.path()).await;
registry.insert(Arc::clone(&handle)).await.unwrap();
match registry.get(&handle.key) {
RegistryLookup::Ready(found) => {
assert!(Arc::ptr_eq(&found, &handle));
}
RegistryLookup::Gone => panic!("expected Ready, got Gone"),
}
}
#[tokio::test]
async fn get_nonexistent_returns_gone() {
let registry = GraphRegistry::new();
let key = GraphKey::cluster(GraphId::try_from("ghost").unwrap());
match registry.get(&key) {
RegistryLookup::Gone => {}
RegistryLookup::Ready(_) => panic!("expected Gone"),
}
}
#[tokio::test]
async fn insert_duplicate_key_returns_error() {
let dir = TempDir::new().unwrap();
let registry = GraphRegistry::new();
let h1 = build_handle("alpha", dir.path()).await;
// Same key, different URI sub-path (build_handle uses graph_id as subdir).
let dir2 = TempDir::new().unwrap();
let h2 = build_handle("alpha", dir2.path()).await;
registry.insert(h1).await.unwrap();
match registry.insert(h2).await {
Err(InsertError::DuplicateKey(_)) => {}
other => panic!("expected DuplicateKey, got {other:?}"),
}
}
#[tokio::test]
async fn insert_duplicate_uri_returns_error() {
let dir = TempDir::new().unwrap();
// Two handles with the same URI but different keys.
let shared_uri = dir.path().join("shared").to_str().unwrap().to_string();
let engine = Omnigraph::init(&shared_uri, TEST_SCHEMA).await.unwrap();
let engine = Arc::new(engine);
let h1 = Arc::new(GraphHandle {
key: GraphKey::cluster(GraphId::try_from("alpha").unwrap()),
uri: shared_uri.clone(),
engine: Arc::clone(&engine),
policy: None,
queries: None,
});
let h2 = Arc::new(GraphHandle {
key: GraphKey::cluster(GraphId::try_from("beta").unwrap()),
uri: shared_uri,
engine,
policy: None,
queries: None,
});
let registry = GraphRegistry::new();
registry.insert(h1).await.unwrap();
match registry.insert(h2).await {
Err(InsertError::DuplicateUri(_)) => {}
other => panic!("expected DuplicateUri, got {other:?}"),
}
}
#[tokio::test]
async fn list_returns_all_inserted_handles() {
let dir = TempDir::new().unwrap();
let registry = GraphRegistry::new();
for name in ["alpha", "beta", "gamma"] {
let h = build_handle(name, dir.path()).await;
registry.insert(h).await.unwrap();
}
assert_eq!(registry.len(), 3);
let mut ids: Vec<_> = registry
.list()
.into_iter()
.map(|h| h.key.graph_id.as_str().to_string())
.collect();
ids.sort();
assert_eq!(ids, vec!["alpha", "beta", "gamma"]);
}
#[tokio::test]
async fn from_handles_bulk_init_succeeds() {
let dir = TempDir::new().unwrap();
let handles = vec![
build_handle("alpha", dir.path()).await,
build_handle("beta", dir.path()).await,
];
let registry = GraphRegistry::from_handles(handles).unwrap();
assert_eq!(registry.len(), 2);
}
#[tokio::test]
async fn from_handles_rejects_duplicate_keys() {
let dir1 = TempDir::new().unwrap();
let dir2 = TempDir::new().unwrap();
let h1 = build_handle("alpha", dir1.path()).await;
let h2 = build_handle("alpha", dir2.path()).await;
let err = match GraphRegistry::from_handles(vec![h1, h2]) {
Ok(_) => panic!("expected DuplicateKey, got Ok"),
Err(err) => err,
};
assert!(
matches!(err, InsertError::DuplicateKey(_)),
"expected DuplicateKey, got {err}",
);
}
#[tokio::test]
async fn from_handles_rejects_duplicate_uris() {
let dir = TempDir::new().unwrap();
let shared_uri = dir.path().join("shared").to_str().unwrap().to_string();
let engine = Arc::new(Omnigraph::init(&shared_uri, TEST_SCHEMA).await.unwrap());
let h1 = Arc::new(GraphHandle {
key: GraphKey::cluster(GraphId::try_from("alpha").unwrap()),
uri: shared_uri.clone(),
engine: Arc::clone(&engine),
policy: None,
queries: None,
});
let h2 = Arc::new(GraphHandle {
key: GraphKey::cluster(GraphId::try_from("beta").unwrap()),
uri: shared_uri,
engine,
policy: None,
queries: None,
});
let err = match GraphRegistry::from_handles(vec![h1, h2]) {
Ok(_) => panic!("expected DuplicateUri, got Ok"),
Err(err) => err,
};
assert!(
matches!(err, InsertError::DuplicateUri(_)),
"expected DuplicateUri, got {err}",
);
}
/// Race test modeled on `actor_admission_race_does_not_exceed_cap`
/// at `tests/server.rs:3596+`. Spawn N concurrent inserts with the
/// same `GraphKey` (each constructing its own `GraphHandle` against
/// its own tempdir). Exactly one must succeed; the others must
/// return `DuplicateKey`. No `unwrap` panic: the `Mutex<()>` +
/// in-mutex re-check is the linearization point.
#[tokio::test(flavor = "multi_thread")]
async fn concurrent_insert_same_key_exactly_one_succeeds() {
const N: usize = 8;
let registry = Arc::new(GraphRegistry::new());
// Pre-create N handles (each in its own tempdir; same key).
let mut handles = Vec::with_capacity(N);
let mut dirs = Vec::with_capacity(N);
for _ in 0..N {
let d = TempDir::new().unwrap();
handles.push(build_handle("contested", d.path()).await);
dirs.push(d);
}
let barrier = Arc::new(tokio::sync::Barrier::new(N));
let mut tasks = Vec::with_capacity(N);
for handle in handles {
let registry = Arc::clone(&registry);
let barrier = Arc::clone(&barrier);
tasks.push(tokio::spawn(async move {
barrier.wait().await;
registry.insert(handle).await
}));
}
let mut ok_count = 0usize;
let mut dup_count = 0usize;
for t in tasks {
match t.await.unwrap() {
Ok(()) => ok_count += 1,
Err(InsertError::DuplicateKey(_)) => dup_count += 1,
Err(other) => panic!("unexpected error: {other:?}"),
}
}
assert_eq!(ok_count, 1, "exactly one insert must succeed");
assert_eq!(dup_count, N - 1, "the rest must return DuplicateKey");
assert_eq!(registry.len(), 1);
// Drop the dirs at the end (preserves engines until tasks finish).
drop(dirs);
}
/// Concurrent inserts with **distinct** keys all succeed.
/// Linearizability over the mutex still serializes them.
#[tokio::test(flavor = "multi_thread")]
async fn concurrent_insert_distinct_keys_all_succeed() {
const N: usize = 8;
let registry = Arc::new(GraphRegistry::new());
// Pre-create N handles with distinct ids, each in its own tempdir.
let mut handles = Vec::with_capacity(N);
let mut dirs = Vec::with_capacity(N);
for i in 0..N {
let d = TempDir::new().unwrap();
handles.push(build_handle(&format!("graph-{i}"), d.path()).await);
dirs.push(d);
}
let barrier = Arc::new(tokio::sync::Barrier::new(N));
let mut tasks = Vec::with_capacity(N);
for handle in handles {
let registry = Arc::clone(&registry);
let barrier = Arc::clone(&barrier);
tasks.push(tokio::spawn(async move {
barrier.wait().await;
registry.insert(handle).await
}));
}
for t in tasks {
t.await.unwrap().unwrap();
}
assert_eq!(registry.len(), N);
drop(dirs);
}
/// Concurrent reads during a write must always see a consistent
/// snapshot (no torn state). With `ArcSwap`, the read either sees
/// the old snapshot or the new one — never both, never neither.
#[tokio::test(flavor = "multi_thread")]
async fn concurrent_reads_during_inserts_see_consistent_snapshots() {
let dir = TempDir::new().unwrap();
let registry = Arc::new(GraphRegistry::new());
// Spawn a writer that inserts graph-0..graph-9 sequentially.
const N_WRITES: usize = 10;
let writer_registry = Arc::clone(&registry);
let writer_dir = dir.path().to_path_buf();
let writer = tokio::spawn(async move {
for i in 0..N_WRITES {
let h = build_handle(&format!("graph-{i}"), &writer_dir).await;
writer_registry.insert(h).await.unwrap();
}
});
// Reader loop: repeatedly snapshot the registry until the writer
// finishes. Every snapshot's len must be in [0, N_WRITES], and
// for every key g in the snapshot, get(g) must return Ready.
let reader_registry = Arc::clone(&registry);
let reader = tokio::spawn(async move {
for _ in 0..200 {
let snap = reader_registry.list();
assert!(snap.len() <= N_WRITES);
for handle in &snap {
match reader_registry.get(&handle.key) {
RegistryLookup::Ready(found) => {
assert!(Arc::ptr_eq(&found, handle));
}
RegistryLookup::Gone => panic!(
"snapshot listed key {} but get() returned Gone",
handle.key.graph_id
),
}
}
tokio::task::yield_now().await;
}
});
writer.await.unwrap();
reader.await.unwrap();
assert_eq!(registry.len(), N_WRITES);
}
}

View file

@ -270,12 +270,13 @@ mod tests {
let err = controller
.try_admit(&actor, 100)
.expect_err("third should reject on count");
assert!(matches!(err, RejectReason::InFlightCountExceeded { cap: 2 }));
assert!(matches!(
err,
RejectReason::InFlightCountExceeded { cap: 2 }
));
drop(g1);
// After drop, a new admit succeeds again.
let _g3 = controller
.try_admit(&actor, 100)
.expect("admit after drop");
let _g3 = controller.try_admit(&actor, 100).expect("admit after drop");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
@ -356,7 +357,9 @@ mod tests {
let bob: Arc<str> = "bob".into();
let _ga = controller.try_admit(&alice, 100).expect("alice ok");
// Alice over count cap, Bob unaffected.
let err = controller.try_admit(&alice, 100).expect_err("alice rejected");
let err = controller
.try_admit(&alice, 100)
.expect_err("alice rejected");
assert!(matches!(err, RejectReason::InFlightCountExceeded { .. }));
let _gb = controller.try_admit(&bob, 100).expect("bob ok");
}

View file

@ -19,42 +19,42 @@ fn fixture(name: &str) -> PathBuf {
.join(name)
}
fn repo_path(root: &Path) -> PathBuf {
fn graph_path(root: &Path) -> PathBuf {
root.join("openapi_test.omni")
}
async fn init_loaded_repo() -> tempfile::TempDir {
async fn init_loaded_graph() -> tempfile::TempDir {
let temp = tempfile::tempdir().unwrap();
let repo = repo_path(temp.path());
fs::create_dir_all(&repo).unwrap();
let graph = graph_path(temp.path());
fs::create_dir_all(&graph).unwrap();
let schema = fs::read_to_string(fixture("test.pg")).unwrap();
let data = fs::read_to_string(fixture("test.jsonl")).unwrap();
Omnigraph::init(repo.to_str().unwrap(), &schema)
Omnigraph::init(graph.to_str().unwrap(), &schema)
.await
.unwrap();
let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap();
let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
load_jsonl(&mut db, &data, LoadMode::Overwrite)
.await
.unwrap();
temp
}
async fn app_for_loaded_repo() -> (tempfile::TempDir, Router) {
let temp = init_loaded_repo().await;
let repo = repo_path(temp.path());
let state = AppState::open(repo.to_string_lossy().to_string())
async fn app_for_loaded_graph() -> (tempfile::TempDir, Router) {
let temp = init_loaded_graph().await;
let graph = graph_path(temp.path());
let state = AppState::open(graph.to_string_lossy().to_string())
.await
.unwrap();
let app = build_app(state);
(temp, app)
}
async fn app_for_loaded_repo_with_auth(token: &str) -> (tempfile::TempDir, Router) {
let temp = init_loaded_repo().await;
let repo = repo_path(temp.path());
let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap();
async fn app_for_loaded_graph_with_auth(token: &str) -> (tempfile::TempDir, Router) {
let temp = init_loaded_graph().await;
let graph = graph_path(temp.path());
let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
let state = AppState::new_with_bearer_token(
repo.to_string_lossy().to_string(),
graph.to_string_lossy().to_string(),
db,
Some(token.to_string()),
);
@ -84,7 +84,7 @@ fn openapi_json() -> Value {
#[tokio::test]
async fn openapi_endpoint_returns_200_with_valid_json() {
let (_temp, app) = app_for_loaded_repo().await;
let (_temp, app) = app_for_loaded_graph().await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
@ -97,7 +97,7 @@ async fn openapi_endpoint_returns_200_with_valid_json() {
#[tokio::test]
async fn openapi_endpoint_returns_openapi_31_version() {
let (_temp, app) = app_for_loaded_repo().await;
let (_temp, app) = app_for_loaded_graph().await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
@ -113,11 +113,11 @@ async fn openapi_endpoint_returns_openapi_31_version() {
#[tokio::test]
async fn openapi_endpoint_does_not_require_auth() {
let temp = init_loaded_repo().await;
let repo = repo_path(temp.path());
let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap();
let temp = init_loaded_graph().await;
let graph = graph_path(temp.path());
let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
let state = AppState::new_with_bearer_token(
repo.to_string_lossy().to_string(),
graph.to_string_lossy().to_string(),
db,
Some("secret-token".to_string()),
);
@ -129,7 +129,11 @@ async fn openapi_endpoint_does_not_require_auth() {
.body(Body::empty())
.unwrap();
let (status, _) = json_response(&app, request).await;
assert_eq!(status, StatusCode::OK, "/openapi.json should not require auth");
assert_eq!(
status,
StatusCode::OK,
"/openapi.json should not require auth"
);
}
// ---------------------------------------------------------------------------
@ -157,10 +161,15 @@ fn openapi_info_contains_version() {
const EXPECTED_PATHS: &[&str] = &[
"/healthz",
"/graphs",
"/snapshot",
"/read",
"/query",
"/export",
"/change",
"/mutate",
"/queries",
"/queries/{name}",
"/schema",
"/schema/apply",
"/ingest",
@ -227,6 +236,64 @@ fn openapi_change_is_post() {
assert!(doc["paths"]["/change"]["post"].is_object());
}
#[test]
fn openapi_mutate_is_post() {
let doc = openapi_json();
assert!(doc["paths"]["/mutate"]["post"].is_object());
}
// Deprecation flagging — `/read` and `/change` are kept indefinitely for
// back-compat but are flagged so OpenAPI codegens (typescript-fetch,
// openapi-generator, oapi-codegen, etc.) emit @deprecated on the generated
// SDK methods. The canonical successors `/query` and `/mutate` are not
// flagged. See `deprecation_headers` in `omnigraph-server/src/lib.rs` for
// the matching runtime signal (RFC 9745 + RFC 8288 headers).
#[test]
fn openapi_read_is_deprecated() {
let doc = openapi_json();
assert_eq!(
doc["paths"]["/read"]["post"]["deprecated"],
serde_json::Value::Bool(true),
"/read must be flagged deprecated in OpenAPI; use /query instead"
);
}
#[test]
fn openapi_change_is_deprecated() {
let doc = openapi_json();
assert_eq!(
doc["paths"]["/change"]["post"]["deprecated"],
serde_json::Value::Bool(true),
"/change must be flagged deprecated in OpenAPI; use /mutate instead"
);
}
#[test]
fn openapi_query_is_not_deprecated() {
let doc = openapi_json();
let deprecated = doc["paths"]["/query"]["post"]
.get("deprecated")
.and_then(serde_json::Value::as_bool)
.unwrap_or(false);
assert!(
!deprecated,
"/query is the canonical read endpoint and must not be deprecated"
);
}
#[test]
fn openapi_mutate_is_not_deprecated() {
let doc = openapi_json();
let deprecated = doc["paths"]["/mutate"]["post"]
.get("deprecated")
.and_then(serde_json::Value::as_bool)
.unwrap_or(false);
assert!(
!deprecated,
"/mutate is the canonical mutation endpoint and must not be deprecated"
);
}
#[test]
fn openapi_ingest_is_post() {
let doc = openapi_json();
@ -278,6 +345,7 @@ const EXPECTED_SCHEMAS: &[&str] = &[
"BranchMergeRequest",
"ChangeOutput",
"ChangeRequest",
"QueryRequest",
"CommitListOutput",
"CommitOutput",
"ErrorCode",
@ -368,13 +436,65 @@ fn read_output_schema_has_expected_fields() {
#[test]
fn change_request_schema_has_expected_fields() {
// Canonical field names on the wire are now `query` and `name`. The
// schema descriptions document `query_source` and `query_name` as
// legacy deserialization aliases for backward compatibility.
let doc = openapi_json();
let schema = &doc["components"]["schemas"]["ChangeRequest"];
let props = schema["properties"].as_object().unwrap();
assert!(props.contains_key("query_source"));
assert!(props.contains_key("query_name"));
assert!(props.contains_key("query"));
assert!(props.contains_key("name"));
assert!(props.contains_key("params"));
assert!(props.contains_key("branch"));
let query_desc = schema["properties"]["query"]["description"]
.as_str()
.unwrap_or_default();
assert!(
query_desc.contains("query_source"),
"expected `query` description to mention the legacy `query_source` alias, got: {query_desc}"
);
}
#[test]
fn query_request_schema_has_expected_fields() {
let doc = openapi_json();
let schema = &doc["components"]["schemas"]["QueryRequest"];
let props = schema["properties"].as_object().unwrap();
assert!(props.contains_key("query"));
assert!(props.contains_key("name"));
assert!(props.contains_key("params"));
assert!(props.contains_key("branch"));
assert!(props.contains_key("snapshot"));
}
#[test]
fn query_request_query_is_required() {
let doc = openapi_json();
let schema = &doc["components"]["schemas"]["QueryRequest"];
let required: Vec<&str> = schema["required"]
.as_array()
.unwrap()
.iter()
.map(|v| v.as_str().unwrap())
.collect();
assert!(required.contains(&"query"));
}
#[test]
fn openapi_query_is_post() {
let doc = openapi_json();
assert!(doc["paths"]["/query"]["post"].is_object());
}
#[test]
fn query_endpoint_documents_mutation_400() {
let doc = openapi_json();
let four_hundred = &doc["paths"]["/query"]["post"]["responses"]["400"];
let description = four_hundred["description"].as_str().unwrap_or_default();
assert!(
description.contains("mutations") || description.contains("POST /mutate"),
"expected /query 400 response to mention mutation rejection, got: {description}"
);
}
#[test]
@ -583,6 +703,8 @@ fn protected_endpoints_reference_bearer_token_security() {
("/read", "post"),
("/change", "post"),
("/schema/apply", "post"),
("/queries", "get"),
("/queries/{name}", "post"),
("/ingest", "post"),
("/export", "post"),
("/snapshot", "get"),
@ -626,10 +748,13 @@ fn branch_delete_has_branch_path_parameter() {
let params = doc["paths"]["/branches/{branch}"]["delete"]["parameters"]
.as_array()
.unwrap();
let has_branch = params.iter().any(|p| {
p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("path")
});
assert!(has_branch, "DELETE /branches/{{branch}} must have 'branch' path parameter");
let has_branch = params
.iter()
.any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("path"));
assert!(
has_branch,
"DELETE /branches/{{branch}} must have 'branch' path parameter"
);
}
#[test]
@ -638,10 +763,13 @@ fn commit_show_has_commit_id_path_parameter() {
let params = doc["paths"]["/commits/{commit_id}"]["get"]["parameters"]
.as_array()
.unwrap();
let has_commit_id = params.iter().any(|p| {
p["name"].as_str() == Some("commit_id") && p["in"].as_str() == Some("path")
});
assert!(has_commit_id, "GET /commits/{{commit_id}} must have 'commit_id' path parameter");
let has_commit_id = params
.iter()
.any(|p| p["name"].as_str() == Some("commit_id") && p["in"].as_str() == Some("path"));
assert!(
has_commit_id,
"GET /commits/{{commit_id}} must have 'commit_id' path parameter"
);
}
#[test]
@ -650,10 +778,13 @@ fn snapshot_has_branch_query_parameter() {
let params = doc["paths"]["/snapshot"]["get"]["parameters"]
.as_array()
.unwrap();
let has_branch = params.iter().any(|p| {
p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query")
});
assert!(has_branch, "GET /snapshot must have 'branch' query parameter");
let has_branch = params
.iter()
.any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query"));
assert!(
has_branch,
"GET /snapshot must have 'branch' query parameter"
);
}
#[test]
@ -662,10 +793,13 @@ fn commits_has_branch_query_parameter() {
let params = doc["paths"]["/commits"]["get"]["parameters"]
.as_array()
.unwrap();
let has_branch = params.iter().any(|p| {
p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query")
});
assert!(has_branch, "GET /commits must have 'branch' query parameter");
let has_branch = params
.iter()
.any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query"));
assert!(
has_branch,
"GET /commits must have 'branch' query parameter"
);
}
// ---------------------------------------------------------------------------
@ -741,8 +875,7 @@ fn error_responses_reference_error_output_schema() {
];
for (path, method, status) in paths_with_errors {
let content =
&doc["paths"][path][method]["responses"][status]["content"];
let content = &doc["paths"][path][method]["responses"][status]["content"];
let schema = &content["application/json"]["schema"];
let ref_path = schema["$ref"].as_str().unwrap();
assert!(
@ -784,6 +917,34 @@ fn post_endpoints_have_request_body() {
}
}
#[test]
fn invoke_stored_query_request_body_is_optional() {
let doc = openapi_json();
let request_body = &doc["paths"]["/queries/{name}"]["post"]["requestBody"];
assert!(
request_body.is_object(),
"POST /queries/{{name}} should document its optional request body"
);
assert_eq!(
request_body["required"].as_bool().unwrap_or(false),
false,
"stored-query invocation body should be optional"
);
let schema = &request_body["content"]["application/json"]["schema"];
let ref_path = schema["$ref"]
.as_str()
.or_else(|| {
schema["oneOf"]
.as_array()
.and_then(|schemas| schemas.iter().find_map(|schema| schema["$ref"].as_str()))
})
.unwrap();
assert!(
ref_path.contains("InvokeStoredQueryRequest"),
"POST /queries/{{name}} requestBody should reference InvokeStoredQueryRequest, got {ref_path}"
);
}
// ---------------------------------------------------------------------------
// Serialization round-trip test
// ---------------------------------------------------------------------------
@ -804,7 +965,7 @@ fn openapi_spec_round_trips_through_json() {
#[tokio::test]
async fn open_mode_spec_has_no_security_schemes() {
let (_temp, app) = app_for_loaded_repo().await;
let (_temp, app) = app_for_loaded_graph().await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
@ -820,7 +981,7 @@ async fn open_mode_spec_has_no_security_schemes() {
#[tokio::test]
async fn open_mode_spec_has_no_operation_security() {
let (_temp, app) = app_for_loaded_repo().await;
let (_temp, app) = app_for_loaded_graph().await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
@ -841,7 +1002,7 @@ async fn open_mode_spec_has_no_operation_security() {
#[tokio::test]
async fn auth_mode_spec_includes_bearer_token_security_scheme() {
let (_temp, app) = app_for_loaded_repo_with_auth("secret").await;
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
@ -855,7 +1016,7 @@ async fn auth_mode_spec_includes_bearer_token_security_scheme() {
#[tokio::test]
async fn auth_mode_spec_has_security_on_protected_operations() {
let (_temp, app) = app_for_loaded_repo_with_auth("secret").await;
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
@ -886,7 +1047,7 @@ async fn auth_mode_spec_has_security_on_protected_operations() {
#[tokio::test]
async fn auth_mode_spec_matches_static_generation() {
let (_temp, app) = app_for_loaded_repo_with_auth("secret").await;
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
@ -902,7 +1063,7 @@ async fn auth_mode_spec_matches_static_generation() {
#[tokio::test]
async fn auth_mode_healthz_still_has_no_security() {
let (_temp, app) = app_for_loaded_repo_with_auth("secret").await;
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
@ -918,8 +1079,7 @@ async fn auth_mode_healthz_still_has_no_security() {
#[test]
fn openapi_spec_is_up_to_date() {
let spec_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("../../openapi.json");
let spec_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../openapi.json");
let generated = serde_json::to_string_pretty(&openapi_doc()).unwrap() + "\n";
@ -943,3 +1103,290 @@ fn openapi_spec_is_up_to_date() {
"openapi.json is out of date. Run: OMNIGRAPH_UPDATE_OPENAPI=1 cargo test -p omnigraph-server --test openapi openapi_spec_is_up_to_date"
);
}
// ---------------------------------------------------------------------------
// MR-668 — multi-mode OpenAPI cluster filter
// ---------------------------------------------------------------------------
//
// In multi-graph mode, `/openapi.json` reports cluster routes
// (`/graphs/{graph_id}/...`) instead of the legacy flat routes. The
// only flat path that survives is `/healthz`. Operation IDs gain a
// `cluster_` prefix so SDK generators have stable, unique ids.
//
// These tests exercise the request-time `server_openapi` handler via
// `oneshot`, not the static `ApiDoc::openapi()` — the rewrite happens
// only on the served document.
const EXPECTED_CLUSTER_PATHS: &[&str] = &[
"/graphs/{graph_id}/snapshot",
"/graphs/{graph_id}/read",
"/graphs/{graph_id}/export",
"/graphs/{graph_id}/change",
"/graphs/{graph_id}/schema",
"/graphs/{graph_id}/schema/apply",
"/graphs/{graph_id}/ingest",
"/graphs/{graph_id}/branches",
"/graphs/{graph_id}/branches/{branch}",
"/graphs/{graph_id}/branches/merge",
"/graphs/{graph_id}/commits",
"/graphs/{graph_id}/commits/{commit_id}",
];
async fn app_for_multi_mode(graph_ids: &[&str]) -> (Vec<tempfile::TempDir>, Router) {
use std::sync::Arc;
use omnigraph_server::{GraphHandle, GraphId, GraphKey};
let mut dirs = Vec::with_capacity(graph_ids.len());
let mut handles = Vec::with_capacity(graph_ids.len());
for id in graph_ids {
let dir = tempfile::tempdir().unwrap();
let graph_uri = dir.path().join(id).to_str().unwrap().to_string();
let schema = fs::read_to_string(fixture("test.pg")).unwrap();
let engine = Omnigraph::init(&graph_uri, &schema).await.unwrap();
handles.push(Arc::new(GraphHandle {
key: GraphKey::cluster(GraphId::try_from(*id).unwrap()),
uri: graph_uri,
engine: Arc::new(engine),
policy: None,
queries: None,
}));
dirs.push(dir);
}
let workload = omnigraph_server::workload::WorkloadController::from_env();
let state = AppState::new_multi(handles, Vec::new(), None, workload, None).unwrap();
let app = build_app(state);
(dirs, app)
}
#[tokio::test]
async fn multi_mode_openapi_lists_cluster_paths() {
let (_dirs, app) = app_for_multi_mode(&["alpha"]).await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
.body(Body::empty())
.unwrap();
let (status, json) = json_response(&app, request).await;
assert_eq!(status, StatusCode::OK);
let paths = json["paths"].as_object().expect("paths must be an object");
let path_keys: HashSet<&str> = paths.keys().map(|k| k.as_str()).collect();
for expected in EXPECTED_CLUSTER_PATHS {
assert!(
path_keys.contains(expected),
"missing cluster path in multi-mode spec: {expected}. \
Found: {path_keys:?}"
);
}
}
#[tokio::test]
async fn multi_mode_openapi_drops_flat_protected_paths() {
let (_dirs, app) = app_for_multi_mode(&["alpha"]).await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
.body(Body::empty())
.unwrap();
let (_, json) = json_response(&app, request).await;
let paths = json["paths"].as_object().unwrap();
// None of the legacy flat protected paths should appear in multi mode.
let flat_protected = [
"/snapshot",
"/read",
"/export",
"/change",
"/schema",
"/schema/apply",
"/ingest",
"/branches",
"/branches/{branch}",
"/branches/merge",
"/commits",
"/commits/{commit_id}",
];
for flat in flat_protected {
assert!(
!paths.contains_key(flat),
"flat path {flat} must not appear in multi-mode spec; \
cluster routes are the only protected surface"
);
}
}
#[tokio::test]
async fn multi_mode_openapi_keeps_management_paths_flat() {
let (_dirs, app) = app_for_multi_mode(&["alpha"]).await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
.body(Body::empty())
.unwrap();
let (_, json) = json_response(&app, request).await;
let paths = json["paths"].as_object().unwrap();
for flat in ["/healthz", "/graphs"] {
assert!(
paths.contains_key(flat),
"{flat} must remain flat in multi mode"
);
let nested = format!("/graphs/{{graph_id}}{flat}");
assert!(
!paths.contains_key(&nested),
"{flat} must NOT be cluster-prefixed to {nested}"
);
}
}
#[tokio::test]
async fn multi_mode_openapi_prefixes_operation_ids_with_cluster() {
let (_dirs, app) = app_for_multi_mode(&["alpha"]).await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
.body(Body::empty())
.unwrap();
let (_, json) = json_response(&app, request).await;
// Every cluster path operation must have a `cluster_` operation_id.
// Flat-mounted paths (healthz, management /graphs) keep their
// original operation_ids — they're not per-graph.
let paths = json["paths"].as_object().unwrap();
let mut checked = 0;
for (path, item) in paths {
if path == "/healthz" || path == "/graphs" {
continue;
}
for method in ["get", "post", "put", "delete", "patch"] {
if let Some(op) = item.get(method).filter(|v| v.is_object()) {
if let Some(id) = op["operationId"].as_str() {
assert!(
id.starts_with("cluster_"),
"operation_id at {path}.{method} must start with `cluster_`, got `{id}`"
);
checked += 1;
}
}
}
}
assert!(
checked >= EXPECTED_CLUSTER_PATHS.len(),
"expected at least {} cluster operation_ids; checked {checked}",
EXPECTED_CLUSTER_PATHS.len()
);
}
#[tokio::test]
async fn multi_mode_openapi_declares_graph_id_path_parameter() {
let (_dirs, app) = app_for_multi_mode(&["alpha"]).await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
.body(Body::empty())
.unwrap();
let (_, json) = json_response(&app, request).await;
let paths = json["paths"].as_object().unwrap();
for expected_path in EXPECTED_CLUSTER_PATHS {
let item = paths
.get(*expected_path)
.unwrap_or_else(|| panic!("missing cluster path {expected_path}"));
for method in ["get", "post", "put", "delete", "patch"] {
let Some(operation) = item.get(method).filter(|value| value.is_object()) else {
continue;
};
let parameters = operation["parameters"]
.as_array()
.unwrap_or_else(|| panic!("{expected_path}.{method} missing parameters"));
let graph_id = parameters
.iter()
.find(|param| param["name"] == "graph_id" && param["in"] == "path")
.unwrap_or_else(|| {
panic!("{expected_path}.{method} missing graph_id path parameter")
});
assert_eq!(
graph_id["required"].as_bool(),
Some(true),
"{expected_path}.{method} graph_id parameter must be required"
);
assert_eq!(
graph_id["schema"]["type"].as_str(),
Some("string"),
"{expected_path}.{method} graph_id parameter must be string typed"
);
}
}
for flat in ["/healthz", "/graphs"] {
let item = paths.get(flat).unwrap();
for method in ["get", "post", "put", "delete", "patch"] {
if let Some(operation) = item.get(method).filter(|value| value.is_object()) {
let has_graph_id = operation["parameters"]
.as_array()
.map(|params| {
params
.iter()
.any(|param| param["name"] == "graph_id" && param["in"] == "path")
})
.unwrap_or(false);
assert!(
!has_graph_id,
"{flat}.{method} must not declare graph_id; it remains flat"
);
}
}
}
}
#[tokio::test]
async fn multi_mode_operation_ids_are_unique() {
// Sanity check: the cluster_ prefix prevents collision with flat ids
// (which don't appear in multi mode, but the contract is "unique
// across the spec"). Verify every operation_id in the multi-mode
// spec is unique.
let (_dirs, app) = app_for_multi_mode(&["alpha"]).await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
.body(Body::empty())
.unwrap();
let (_, json) = json_response(&app, request).await;
let paths = json["paths"].as_object().unwrap();
let mut seen_ids: HashSet<String> = HashSet::new();
for (_, item) in paths {
for method in ["get", "post", "put", "delete", "patch"] {
if let Some(op) = item.get(method).filter(|v| v.is_object()) {
if let Some(id) = op["operationId"].as_str() {
assert!(
seen_ids.insert(id.to_string()),
"duplicate operation_id `{id}` in multi-mode spec"
);
}
}
}
}
}
#[tokio::test]
async fn single_mode_openapi_unchanged_by_cluster_filter() {
// Regression: single mode still emits the legacy flat surface.
let (_temp, app) = app_for_loaded_graph().await;
let request = Request::builder()
.method(Method::GET)
.uri("/openapi.json")
.body(Body::empty())
.unwrap();
let (_, json) = json_response(&app, request).await;
let paths = json["paths"].as_object().unwrap();
let path_keys: HashSet<&str> = paths.keys().map(|k| k.as_str()).collect();
for expected in EXPECTED_PATHS {
assert!(
path_keys.contains(expected),
"single mode must still emit flat path: {expected}"
);
}
for cluster in EXPECTED_CLUSTER_PATHS {
assert!(
!path_keys.contains(cluster),
"single mode must NOT emit cluster path: {cluster}"
);
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
[package]
name = "omnigraph-engine"
version = "0.4.2"
version = "0.6.1"
edition = "2024"
description = "Runtime engine for the Omnigraph graph database."
license = "MIT"
@ -16,8 +16,8 @@ default = []
failpoints = ["dep:fail", "fail/failpoints"]
[dependencies]
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" }
omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" }
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.1" }
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.1" }
lance = { workspace = true }
lance-datafusion = { workspace = true }
datafusion = { workspace = true }
@ -51,7 +51,7 @@ chrono = { workspace = true }
arc-swap = { workspace = true }
[dev-dependencies]
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" }
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.1" }
tokio = { workspace = true }
lance-namespace-impls = { workspace = true }
serial_test = "3"

View file

@ -239,7 +239,9 @@ async fn main() {
let jsonl = generate_jsonl(n, avg_deg, 42);
let t = Instant::now();
load_jsonl(&mut db, &jsonl, LoadMode::Overwrite).await.unwrap();
load_jsonl(&mut db, &jsonl, LoadMode::Overwrite)
.await
.unwrap();
let load_elapsed = t.elapsed();
println!(

View file

@ -169,6 +169,37 @@ impl CommitGraph {
self.refresh().await
}
/// Idempotently drop the commit-graph branch `name`, tolerating an
/// already-absent branch (see [`TableStore::force_delete_branch`] for the
/// same semantics). Used by the best-effort reclaim in `branch_delete` and
/// the `cleanup` orphan reconciler. `RefConflict` (referencing descendants)
/// is still surfaced.
pub async fn force_delete_branch(&mut self, name: &str) -> Result<()> {
let mut ds = Dataset::open(&graph_commits_uri(&self.root_uri))
.await
.map_err(|e| OmniError::Lance(e.to_string()))?;
match ds.force_delete_branch(name).await {
Ok(()) => {}
Err(lance::Error::RefNotFound { .. }) | Err(lance::Error::NotFound { .. }) => {}
Err(e) => return Err(OmniError::Lance(e.to_string())),
}
self.refresh().await
}
/// List the named branches present on the commit-graph dataset. The
/// `cleanup` reconciler diffs this against the manifest branch set to find
/// orphaned commit-graph branches to reclaim.
pub async fn list_branches(&self) -> Result<Vec<String>> {
let ds = Dataset::open(&graph_commits_uri(&self.root_uri))
.await
.map_err(|e| OmniError::Lance(e.to_string()))?;
let branches = ds
.list_branches()
.await
.map_err(|e| OmniError::Lance(e.to_string()))?;
Ok(branches.into_keys().collect())
}
pub async fn append_commit(
&mut self,
manifest_branch: Option<&str>,
@ -345,7 +376,7 @@ impl CommitGraph {
}
}
fn graph_commits_uri(root_uri: &str) -> String {
pub(crate) fn graph_commits_uri(root_uri: &str) -> String {
format!("{}/{}", root_uri.trim_end_matches('/'), GRAPH_COMMITS_DIR)
}

View file

@ -211,14 +211,47 @@ impl GraphCoordinator {
let branch = normalize_branch_name(name)?
.ok_or_else(|| OmniError::manifest("cannot create branch 'main'".to_string()))?;
self.ensure_commit_graph_initialized().await?;
// Manifest authority flip first.
self.manifest.create_branch(&branch).await?;
failpoints::maybe_fail("branch_create.after_manifest_branch_create")?;
if let Some(commit_graph) = &mut self.commit_graph {
commit_graph.create_branch(&branch).await?;
// Derived commit-graph branch. If anything after the authority flip
// fails, roll back the manifest branch so the branch never half-exists
// (a manifest branch with no commit-graph branch breaks the next write).
if let Err(err) = self.create_commit_graph_branch(&branch).await {
if let Err(rollback_err) = self.manifest.delete_branch(&branch).await {
tracing::warn!(
target: "omnigraph::branch_create",
branch = %branch,
error = %rollback_err,
"rollback of manifest branch failed after commit-graph create failure",
);
}
return Err(err);
}
Ok(())
}
/// Create the derived commit-graph branch for `branch`, healing a zombie ref
/// left by an incomplete prior delete. The manifest branch was just created
/// fresh, so any existing commit-graph branch with this name is provably
/// orphaned and is force-dropped before recreating.
async fn create_commit_graph_branch(&mut self, branch: &str) -> Result<()> {
failpoints::maybe_fail("branch_create.after_manifest_branch_create")?;
let Some(commit_graph) = &mut self.commit_graph else {
return Ok(());
};
if commit_graph
.list_branches()
.await?
.iter()
.any(|existing| existing == branch)
{
commit_graph.force_delete_branch(branch).await?;
}
commit_graph.create_branch(branch).await
}
pub async fn branch_delete(&mut self, name: &str) -> Result<()> {
let branch = normalize_branch_name(name)?
.ok_or_else(|| OmniError::manifest("cannot delete branch 'main'".to_string()))?;
@ -229,20 +262,43 @@ impl GraphCoordinator {
)));
}
// Manifest authority flip — the single atomic op that makes the branch
// cease to exist. Must succeed; everything after is derived state
// reclaimed best-effort.
self.manifest.delete_branch(&branch).await?;
// Commit-graph branch is derived state. Reclaim best-effort with the
// idempotent force variant: a failure here (or a missing dataset) is
// reconciled by `cleanup` and must not fail the delete after the
// authority already flipped.
if let Err(err) = self.reclaim_commit_graph_branch(&branch).await {
tracing::warn!(
target: "omnigraph::branch_delete::cleanup",
branch = %branch,
error = %err,
"best-effort commit-graph branch reclaim failed; cleanup will reconcile",
);
}
Ok(())
}
/// Best-effort, idempotent reclaim of the commit-graph branch `branch`.
/// Tolerates an absent commit-graph dataset (a graph that never committed).
async fn reclaim_commit_graph_branch(&mut self, branch: &str) -> Result<()> {
failpoints::maybe_fail("branch_delete.before_commit_graph_reclaim")?;
if let Some(commit_graph) = &mut self.commit_graph {
commit_graph.delete_branch(&branch).await?;
commit_graph.force_delete_branch(branch).await
} else if self
.storage
.exists(&graph_commits_uri(self.root_uri()))
.await?
{
let mut commit_graph = CommitGraph::open(self.root_uri()).await?;
commit_graph.delete_branch(&branch).await?;
commit_graph.force_delete_branch(branch).await
} else {
Ok(())
}
Ok(())
}
pub async fn snapshot_at_version(&self, version: u64) -> Result<Snapshot> {

View file

@ -6,6 +6,8 @@ use lance::Dataset;
use lance_namespace::models::CreateTableVersionRequest;
use omnigraph_compiler::catalog::Catalog;
#[path = "manifest/graph.rs"]
mod graph;
#[path = "manifest/layout.rs"]
mod layout;
#[path = "manifest/metadata.rs"]
@ -18,11 +20,10 @@ mod namespace;
mod publisher;
#[path = "manifest/recovery.rs"]
mod recovery;
#[path = "manifest/repo.rs"]
mod repo;
#[path = "manifest/state.rs"]
mod state;
use graph::{init_manifest_graph, open_manifest_graph, snapshot_state_at};
use layout::{manifest_uri, open_manifest_dataset, type_name_hash};
pub(crate) use metadata::TableVersionMetadata;
#[cfg(test)]
@ -33,11 +34,10 @@ pub(crate) use namespace::open_table_head_for_write;
use namespace::{branch_manifest_namespace, staged_table_namespace};
use publisher::{GraphNamespacePublisher, ManifestBatchPublisher};
pub(crate) use recovery::{
delete_sidecar, has_schema_apply_sidecar, new_sidecar, recover_manifest_drift, write_sidecar,
RecoveryMode, RecoverySidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin,
SidecarTableRegistration, SidecarTombstone,
SidecarTableRegistration, SidecarTombstone, delete_sidecar, has_schema_apply_sidecar,
new_sidecar, recover_manifest_drift, write_sidecar,
};
use repo::{init_manifest_repo, open_manifest_repo, snapshot_state_at};
pub use state::SubTableEntry;
#[cfg(test)]
use state::string_column;
@ -215,12 +215,12 @@ impl ManifestCoordinator {
self
}
/// Create a new repo at `root_uri` from a catalog.
/// Create a new graph at `root_uri` from a catalog.
///
/// Creates per-type Lance datasets and the namespace `__manifest` table.
pub async fn init(root_uri: &str, catalog: &Catalog) -> Result<Self> {
let root = root_uri.trim_end_matches('/');
let (dataset, known_state) = init_manifest_repo(root, catalog).await?;
let (dataset, known_state) = init_manifest_graph(root, catalog).await?;
Ok(Self::from_parts_with_default_publisher(
root,
@ -230,10 +230,10 @@ impl ManifestCoordinator {
))
}
/// Open an existing repo's manifest.
/// Open an existing graph's manifest.
pub async fn open(root_uri: &str) -> Result<Self> {
let root = root_uri.trim_end_matches('/');
let (dataset, known_state) = open_manifest_repo(root, None).await?;
let (dataset, known_state) = open_manifest_graph(root, None).await?;
Ok(Self::from_parts_with_default_publisher(
root,
dataset,
@ -242,14 +242,14 @@ impl ManifestCoordinator {
))
}
/// Open an existing repo's manifest at a specific branch.
/// Open an existing graph's manifest at a specific branch.
pub async fn open_at_branch(root_uri: &str, branch: &str) -> Result<Self> {
if branch == "main" {
return Self::open(root_uri).await;
}
let root = root_uri.trim_end_matches('/');
let (dataset, known_state) = open_manifest_repo(root, Some(branch)).await?;
let (dataset, known_state) = open_manifest_graph(root, Some(branch)).await?;
Ok(Self::from_parts_with_default_publisher(
root,
dataset,
@ -410,7 +410,7 @@ impl ManifestCoordinator {
Ok(descendants)
}
/// Root URI of the repo.
/// Root URI of the graph.
pub fn root_uri(&self) -> &str {
&self.root_uri
}

View file

@ -17,7 +17,7 @@ use super::state::{
ManifestState, SubTableEntry, entries_to_batch, manifest_schema, read_manifest_state,
};
pub(super) async fn init_manifest_repo(
pub(super) async fn init_manifest_graph(
root_uri: &str,
catalog: &Catalog,
) -> Result<(Dataset, ManifestState)> {
@ -47,7 +47,7 @@ pub(super) async fn init_manifest_repo(
Ok((dataset, known_state))
}
pub(super) async fn open_manifest_repo(
pub(super) async fn open_manifest_graph(
root_uri: &str,
branch: Option<&str>,
) -> Result<(Dataset, ManifestState)> {

View file

@ -24,8 +24,8 @@
//! Only on open-for-write paths (the publisher's `load_publish_state`).
//! Reads are side-effect-free by contract; an old-shape `__manifest` reads
//! fine, it just lacks the protections introduced by later versions.
//! `init_manifest_repo` stamps the current version at creation, so newly
//! initialized repos never need migration.
//! `init_manifest_graph` stamps the current version at creation, so newly
//! initialized graphs never need migration.
//!
//! ## Forward-version protection
//!
@ -78,7 +78,7 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()>
if current > INTERNAL_MANIFEST_SCHEMA_VERSION {
return Err(OmniError::manifest(format!(
"__manifest is stamped at internal schema v{} but this binary expects v{} \
upgrade omnigraph before opening this repo for writes",
upgrade omnigraph before opening this graph for writes",
current, INTERNAL_MANIFEST_SCHEMA_VERSION,
)));
}
@ -112,7 +112,10 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()>
async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> {
dataset
.update_field_metadata()
.update("object_id", [(OBJECT_ID_PK_KEY.to_string(), "true".to_string())])
.update(
"object_id",
[(OBJECT_ID_PK_KEY.to_string(), "true".to_string())],
)
.map_err(|e| OmniError::Lance(e.to_string()))?
.await
.map_err(|e| OmniError::Lance(e.to_string()))?;
@ -121,10 +124,7 @@ async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> {
async fn set_stamp(dataset: &mut Dataset, version: u32) -> Result<()> {
dataset
.update_schema_metadata([(
INTERNAL_SCHEMA_VERSION_KEY.to_string(),
version.to_string(),
)])
.update_schema_metadata([(INTERNAL_SCHEMA_VERSION_KEY.to_string(), version.to_string())])
.await
.map_err(|e| OmniError::Lance(e.to_string()))?;
Ok(())

View file

@ -2,7 +2,7 @@
//!
//! This module implements the building blocks of the per-sidecar recovery
//! sweep that closes the documented Phase B → Phase C residual (see
//! `docs/dev/runs.md` "Open-time recovery sweep"). The high-level shape:
//! `docs/dev/writes.md` "Open-time recovery sweep"). The high-level shape:
//!
//! 1. Each writer that performs a multi-table commit writes a small JSON
//! sidecar at `__recovery/{ulid}.json` BEFORE its per-table
@ -58,7 +58,7 @@ use super::{ManifestChange, SubTableUpdate, TableRegistration, TableTombstone};
/// into the audit row's `recovery_for_actor` field.
pub(crate) const RECOVERY_ACTOR: &str = "omnigraph:recovery";
/// Subdirectory under the repo root holding sidecar files.
/// Subdirectory under the graph root holding sidecar files.
pub(crate) const RECOVERY_DIR_NAME: &str = "__recovery";
/// Current sidecar JSON shape version. Bumping this is a breaking change:
@ -142,7 +142,7 @@ pub(crate) struct SidecarTablePin {
pub(crate) struct SidecarTableRegistration {
/// Stable identifier (`node:Tag`, `edge:WorksAt`, etc.).
pub table_key: String,
/// Repo-relative path the manifest will register
/// Graph-relative path the manifest will register
/// (e.g. `nodes/{fnv1a64-hex}`); recovery joins this with `root_uri`
/// to open the dataset Lance HEAD when constructing the
/// accompanying `Update`.
@ -295,7 +295,7 @@ pub(crate) enum SidecarDecision {
Abort,
}
/// Build the `__recovery/` directory URI under a repo root.
/// Build the `__recovery/` directory URI under a graph root.
pub(crate) fn recovery_dir_uri(root_uri: &str) -> String {
let trimmed = root_uri.trim_end_matches('/');
format!("{}/{}", trimmed, RECOVERY_DIR_NAME)
@ -1122,7 +1122,7 @@ async fn record_audit(
/// the rename so the recovery sweep's roll-forward step sees the new
/// catalog. Without this, the disambiguation logic deletes the staging
/// files (since manifest still pins the old table set) and leaves the
/// repo with new-schema data on disk but the old `_schema.pg` live —
/// graph with new-schema data on disk but the old `_schema.pg` live —
/// real corruption.
pub(crate) async fn has_schema_apply_sidecar(
root_uri: &str,

View file

@ -1393,7 +1393,10 @@ async fn test_concurrent_publish_with_overlapping_expected_versions_one_succeeds
// version (no duplicate version rows).
let mc = ManifestCoordinator::open(uri).await.unwrap();
let entry = mc.snapshot().entry("node:Person").unwrap().clone();
assert!(entry.table_version > 1, "Person should have advanced past v=1");
assert!(
entry.table_version > 1,
"Person should have advanced past v=1"
);
}
#[tokio::test]
@ -1418,7 +1421,7 @@ async fn test_publish_migrates_pre_stamp_manifest_to_current_version() {
let catalog = build_test_catalog();
let mc = ManifestCoordinator::init(uri, &catalog).await.unwrap();
// Simulate a v1 (pre-stamp) repo by removing the schema-level stamp on disk.
// Simulate a v1 (pre-stamp) graph by removing the schema-level stamp on disk.
{
let mut ds = open_manifest_dataset(uri, None).await.unwrap();
ds.update_schema_metadata([(
@ -1449,7 +1452,7 @@ async fn test_publish_migrates_pre_stamp_manifest_to_current_version() {
assert_eq!(
super::migrations::read_stamp(&post),
super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION,
"publish on a v1 repo should leave the manifest stamped at the current version",
"publish on a v1 graph should leave the manifest stamped at the current version",
);
// Manifest should still serve correctly post-migration.

View file

@ -10,11 +10,11 @@ pub(crate) mod write_queue;
pub use commit_graph::GraphCommit;
pub use graph_coordinator::{GraphCoordinator, ReadTarget, ResolvedTarget, SnapshotId};
pub use manifest::{Snapshot, SubTableEntry, SubTableUpdate};
pub use omnigraph::{
CleanupPolicyOptions, MergeOutcome, Omnigraph, OpenMode, SchemaApplyOptions,
SchemaApplyResult, TableCleanupStats, TableOptimizeStats,
};
pub(crate) use omnigraph::ensure_public_branch_ref;
pub use omnigraph::{
CleanupPolicyOptions, InitOptions, MergeOutcome, Omnigraph, OpenMode, SchemaApplyOptions,
SchemaApplyResult, SkipReason, TableCleanupStats, TableOptimizeStats,
};
pub(crate) use run_registry::is_internal_run_branch;
pub(crate) const SCHEMA_APPLY_LOCK_BRANCH: &str = "__schema_apply_lock__";
@ -59,9 +59,7 @@ impl MutationOpKind {
pub(crate) fn strict_pre_stage_version_check(self) -> bool {
match self {
MutationOpKind::Insert | MutationOpKind::Merge => false,
MutationOpKind::Update
| MutationOpKind::Delete
| MutationOpKind::SchemaRewrite => true,
MutationOpKind::Update | MutationOpKind::Delete | MutationOpKind::SchemaRewrite => true,
}
}
}

View file

@ -33,7 +33,7 @@ mod optimize;
mod schema_apply;
mod table_ops;
pub use optimize::{CleanupPolicyOptions, TableCleanupStats, TableOptimizeStats};
pub use optimize::{CleanupPolicyOptions, SkipReason, TableCleanupStats, TableOptimizeStats};
pub use schema_apply::SchemaApplyOptions;
use super::commit_graph::GraphCommit;
@ -67,6 +67,12 @@ pub struct SchemaApplyResult {
pub steps: Vec<SchemaMigrationStep>,
}
#[derive(Debug, Clone)]
pub struct SchemaApplyPreview {
pub plan: SchemaMigrationPlan,
pub catalog: Catalog,
}
/// Top-level handle to an Omnigraph database.
///
/// An Omnigraph is a Lance-native graph database with git-style branching.
@ -165,31 +171,137 @@ pub enum OpenMode {
ReadOnly,
}
/// Options for [`Omnigraph::init_with_options`].
///
/// `force` controls the safety preflight that prevents an
/// accidental re-init from overwriting an existing graph's schema
/// metadata. Default behavior (`force: false`) fails fast with
/// [`OmniError::AlreadyInitialized`] if any of `_schema.pg`,
/// `_schema.ir.json`, or `__schema_state.json` already exists at
/// the target URI. With `force: true` the preflight is skipped —
/// existing schema files are overwritten in place. Force does NOT
/// purge old Lance datasets or `__manifest/`; reclaiming those
/// still requires deleting the graph directory by hand (or via a
/// future `DELETE /graphs/{id}`).
#[derive(Debug, Clone, Copy, Default)]
pub struct InitOptions {
/// Skip the existing-graph preflight. Operators set this when
/// they actually mean to overwrite — e.g. `omnigraph init --force`.
pub force: bool,
}
impl Omnigraph {
/// Create a new repo at `uri` from schema source.
/// Create a new graph at `uri` from schema source.
///
/// Creates `_schema.pg`, per-type Lance datasets, and `__manifest`.
/// Strict mode: errors with [`OmniError::AlreadyInitialized`] if
/// `uri` already holds any of the three schema artifacts. To
/// overwrite an existing graph deliberately, call
/// [`Self::init_with_options`] with `InitOptions { force: true }`.
pub async fn init(uri: &str, schema_source: &str) -> Result<Self> {
Self::init_with_storage(uri, schema_source, storage_for_uri(uri)?).await
Self::init_with_options(uri, schema_source, InitOptions::default()).await
}
/// Create a new graph at `uri`, with explicit init-time options.
///
/// See [`InitOptions`] for the safety contract — by default this
/// behaves identically to [`Self::init`].
pub async fn init_with_options(
uri: &str,
schema_source: &str,
options: InitOptions,
) -> Result<Self> {
Self::init_with_storage(uri, schema_source, storage_for_uri(uri)?, options).await
}
pub(crate) async fn init_with_storage(
uri: &str,
schema_source: &str,
storage: Arc<dyn StorageAdapter>,
options: InitOptions,
) -> Result<Self> {
let root = normalize_root_uri(uri)?;
// Preflight: refuse to clobber an existing graph unless the
// operator passed `force`. This runs BEFORE any parse or
// write so a misdirected `init` against an existing graph
// URI cannot reach a code path that overwrites or, on a
// later cleanup, deletes the schema files.
//
// Closes the "init is destructive against existing state"
// class: there is no longer a code path where strict-mode
// `init` can mutate a populated graph root.
if !options.force {
for candidate in [
schema_source_uri(&root),
schema_ir_uri(&root),
schema_state_uri(&root),
] {
if storage.exists(&candidate).await? {
return Err(OmniError::AlreadyInitialized { uri: root.clone() });
}
}
}
let schema_ir = read_schema_ir_from_source(schema_source)?;
let mut catalog = build_catalog_from_ir(&schema_ir)?;
fixup_blob_schemas(&mut catalog);
// Write _schema.pg
let schema_path = join_uri(&root, SCHEMA_SOURCE_FILENAME);
storage.write_text(&schema_path, schema_source).await?;
write_schema_contract(&root, storage.as_ref(), &schema_ir).await?;
// Establish an atomic ownership claim on `_schema.pg` before
// writing the remaining init artifacts. A check-then-write preflight
// is not enough under concurrent `init` calls: two callers can both
// observe an empty root, one can successfully initialize, and the
// loser can then fail in Lance `WriteMode::Create`. Only the caller
// that atomically created `_schema.pg` may clean up schema artifacts
// on later failure.
let schema_pg_claimed = if options.force {
false
} else {
let schema_path = join_uri(&root, SCHEMA_SOURCE_FILENAME);
if !storage
.write_text_if_absent(&schema_path, schema_source)
.await?
{
return Err(OmniError::AlreadyInitialized { uri: root.clone() });
}
if let Err(err) = crate::failpoints::maybe_fail("init.after_schema_pg_written") {
best_effort_cleanup_init_artifacts(&root, storage.as_ref()).await;
return Err(err);
}
true
};
// Create manifest + per-type datasets
let coordinator = GraphCoordinator::init(&root, &catalog, Arc::clone(&storage)).await?;
// Run the I/O phase. On any error, best-effort-clean schema
// artifacts only when this invocation owns them: strict mode owns
// them after the atomic `_schema.pg` claim above; force mode owns
// destructive overwrite semantics by explicit operator request.
//
// Coverage gap: Lance per-type datasets and `__manifest/`
// directory created by `GraphCoordinator::init` are NOT cleaned
// up here — fully recursive directory deletion requires a
// `StorageAdapter::delete_prefix` primitive that's deferred
// along with `DELETE /graphs/{id}` (PR 2b in the MR-668 plan
// is currently deferred). If `init` fails after coordinator
// init succeeds, operators may need to remove the graph
// directory manually before retrying `init` on the same URI.
// Documented in the PR 2a commit message and `init` rustdoc.
let coordinator = match init_storage_phase(
&root,
schema_source,
&schema_ir,
&catalog,
&storage,
!schema_pg_claimed,
)
.await
{
Ok(coordinator) => coordinator,
Err(err) => {
if schema_pg_claimed || options.force {
best_effort_cleanup_init_artifacts(&root, storage.as_ref()).await;
}
return Err(err);
}
};
Ok(Self {
root_uri: root.clone(),
@ -205,7 +317,7 @@ impl Omnigraph {
})
}
/// Open an existing repo (read-write).
/// Open an existing graph (read-write).
///
/// Reads `_schema.pg`, parses it, builds the catalog, and opens `__manifest`.
/// Runs the open-time recovery sweep before returning — see [`OpenMode`].
@ -213,7 +325,7 @@ impl Omnigraph {
Self::open_with_storage_and_mode(uri, storage_for_uri(uri)?, OpenMode::ReadWrite).await
}
/// Open an existing repo for read-only consumers (NDJSON export,
/// Open an existing graph for read-only consumers (NDJSON export,
/// `commit list`, etc.). Skips the recovery sweep — see [`OpenMode`].
pub async fn open_read_only(uri: &str) -> Result<Self> {
Self::open_with_storage_and_mode(uri, storage_for_uri(uri)?, OpenMode::ReadOnly).await
@ -387,6 +499,14 @@ impl Omnigraph {
schema_apply::plan_schema(self, desired_schema_source, options).await
}
pub async fn preview_schema_apply_with_options(
&self,
desired_schema_source: &str,
options: SchemaApplyOptions,
) -> Result<SchemaApplyPreview> {
schema_apply::preview_schema_apply(self, desired_schema_source, options).await
}
pub async fn apply_schema(&self, desired_schema_source: &str) -> Result<SchemaApplyResult> {
self.apply_schema_as(desired_schema_source, SchemaApplyOptions::default(), None)
.await
@ -397,7 +517,8 @@ impl Omnigraph {
desired_schema_source: &str,
options: SchemaApplyOptions,
) -> Result<SchemaApplyResult> {
self.apply_schema_as(desired_schema_source, options, None).await
self.apply_schema_as(desired_schema_source, options, None)
.await
}
/// Apply a schema migration with an explicit actor for engine-layer
@ -416,7 +537,28 @@ impl Omnigraph {
options: SchemaApplyOptions,
actor: Option<&str>,
) -> Result<SchemaApplyResult> {
schema_apply::apply_schema(self, desired_schema_source, options, actor).await
self.apply_schema_as_with_catalog_check(desired_schema_source, options, actor, |_| Ok(()))
.await
}
pub async fn apply_schema_as_with_catalog_check<F>(
&self,
desired_schema_source: &str,
options: SchemaApplyOptions,
actor: Option<&str>,
validate_catalog: F,
) -> Result<SchemaApplyResult>
where
F: FnOnce(&Catalog) -> Result<()>,
{
schema_apply::apply_schema(
self,
desired_schema_source,
options,
actor,
validate_catalog,
)
.await
}
pub(crate) async fn ensure_schema_apply_idle(&self, operation: &str) -> Result<()> {
@ -470,7 +612,7 @@ impl Omnigraph {
Arc::clone(&self.merge_exclusive)
}
/// Engine-level access to the repo's normalized root URI. Used by
/// Engine-level access to the graph's normalized root URI. Used by
/// the recovery sidecar protocol to compute `__recovery/` paths.
pub(crate) fn root_uri(&self) -> &str {
&self.root_uri
@ -510,9 +652,10 @@ impl Omnigraph {
let normalized = normalize_branch_name(branch.unwrap_or("main"))?;
let coord = self.coordinator.read().await;
if normalized.as_deref() == coord.current_branch() {
let snapshot_id = coord.head_commit_id().await?.unwrap_or_else(|| {
SnapshotId::synthetic(coord.current_branch(), coord.version())
});
let snapshot_id = coord
.head_commit_id()
.await?
.unwrap_or_else(|| SnapshotId::synthetic(coord.current_branch(), coord.version()));
return Ok(ResolvedTarget {
requested,
branch: coord.current_branch().map(str::to_string),
@ -587,7 +730,7 @@ impl Omnigraph {
/// exist. Required BEFORE manifest-drift recovery so a
/// SchemaApply roll-forward doesn't publish the manifest while
/// the staging files remain unrenamed (which would corrupt the
/// repo: data on new schema, catalog on old).
/// graph: data on new schema, catalog on old).
/// 3. `recover_manifest_drift(... RollForwardOnly)` — close the
/// finalize→publisher residual via roll-forward; defer rollback
/// work to next ReadWrite open.
@ -668,7 +811,11 @@ impl Omnigraph {
pub async fn resolve_snapshot(&self, branch: &str) -> Result<SnapshotId> {
self.ensure_schema_state_valid().await?;
self.coordinator.read().await.resolve_snapshot_id(branch).await
self.coordinator
.read()
.await
.resolve_snapshot_id(branch)
.await
}
pub(crate) async fn resolved_target(
@ -676,7 +823,11 @@ impl Omnigraph {
target: impl Into<ReadTarget>,
) -> Result<ResolvedTarget> {
self.ensure_schema_state_valid().await?;
self.coordinator.read().await.resolve_target(&target.into()).await
self.coordinator
.read()
.await
.resolve_target(&target.into())
.await
}
// ─── Change detection ────────────────────────────────────────────────
@ -708,7 +859,9 @@ impl Omnigraph {
filter: &crate::changes::ChangeFilter,
) -> Result<crate::changes::ChangeSet> {
let coord = self.coordinator.read().await;
let from_commit = coord.resolve_commit(&SnapshotId::new(from_commit_id)).await?;
let from_commit = coord
.resolve_commit(&SnapshotId::new(from_commit_id))
.await?;
let to_commit = coord.resolve_commit(&SnapshotId::new(to_commit_id)).await?;
let from_snap = coord
.resolve_target(&ReadTarget::Snapshot(SnapshotId::new(
@ -753,7 +906,11 @@ impl Omnigraph {
/// Create a Snapshot at any historical manifest version.
pub async fn snapshot_at_version(&self, version: u64) -> Result<Snapshot> {
self.ensure_schema_state_valid().await?;
self.coordinator.read().await.snapshot_at_version(version).await
self.coordinator
.read()
.await
.snapshot_at_version(version)
.await
}
pub async fn export_jsonl(
@ -894,11 +1051,20 @@ impl Omnigraph {
}
pub(crate) async fn active_branch(&self) -> Option<String> {
self.coordinator.read().await.current_branch().map(str::to_string)
self.coordinator
.read()
.await
.current_branch()
.map(str::to_string)
}
async fn ensure_branch_delete_safe(&self, branch: &str, branches: &[String]) -> Result<()> {
let descendants = self.coordinator.read().await.branch_descendants(branch).await?;
let descendants = self
.coordinator
.read()
.await
.branch_descendants(branch)
.await?;
if let Some(descendant) = descendants.first() {
return Err(OmniError::manifest_conflict(format!(
"cannot delete branch '{}' because descendant branch '{}' still depends on it",
@ -927,11 +1093,14 @@ impl Omnigraph {
Ok(())
}
async fn cleanup_deleted_branch_tables(
&self,
branch: &str,
owned_tables: &[(String, String)],
) -> Result<()> {
/// Best-effort reclaim of the per-table Lance forks a just-deleted branch
/// owned. Runs AFTER the manifest authority flip, so the branch is already
/// gone and these forks are unreachable orphans. A failure here (transient
/// object-store error, the `branch_delete.before_table_cleanup` failpoint)
/// is logged and swallowed: the `cleanup` reconciler is the guaranteed
/// backstop that converges any leftover orphan. Uses `force_delete_branch`
/// so a partially-reclaimed retry is idempotent.
async fn cleanup_deleted_branch_tables(&self, branch: &str, owned_tables: &[(String, String)]) {
let mut seen_paths = HashSet::new();
let mut cleanup_targets = owned_tables
.iter()
@ -942,19 +1111,30 @@ impl Omnigraph {
for (table_key, table_path) in cleanup_targets {
let dataset_uri = self.table_store.dataset_uri(&table_path);
if let Err(err) = self.table_store.delete_branch(&dataset_uri, branch).await {
return Err(OmniError::manifest_internal(format!(
"branch '{}' was deleted but cleanup failed for {}: {}",
branch, table_key, err
)));
let outcome = match crate::failpoints::maybe_fail("branch_delete.before_table_cleanup")
{
Ok(()) => self.table_store.force_delete_branch(&dataset_uri, branch).await,
Err(injected) => Err(injected),
};
if let Err(err) = outcome {
tracing::warn!(
target: "omnigraph::branch_delete::cleanup",
branch = %branch,
table = %table_key,
error = %err,
"best-effort fork reclaim failed; cleanup will reconcile the orphan",
);
}
}
Ok(())
}
async fn delete_branch_storage_only(&self, branch: &str) -> Result<()> {
let active = self.coordinator.read().await.current_branch().map(str::to_string);
let active = self
.coordinator
.read()
.await
.current_branch()
.map(str::to_string);
if active.as_deref() == Some(branch) {
return Err(OmniError::manifest_conflict(format!(
"cannot delete currently active branch '{}'",
@ -969,9 +1149,12 @@ impl Omnigraph {
.map(|entry| (entry.table_key.clone(), entry.table_path.clone()))
.collect::<Vec<_>>();
// Authority flip (+ best-effort commit-graph reclaim) — must succeed.
self.coordinator.write().await.branch_delete(branch).await?;
// Best-effort per-table fork reclaim; cleanup reconciles any leftover.
self.cleanup_deleted_branch_tables(branch, &owned_tables)
.await
.await;
Ok(())
}
pub(crate) fn normalize_branch_name(branch: &str) -> Result<Option<String>> {
@ -1013,11 +1196,7 @@ impl Omnigraph {
self.coordinator.write().await.branch_create(name).await
}
pub async fn branch_create_from(
&self,
from: impl Into<ReadTarget>,
name: &str,
) -> Result<()> {
pub async fn branch_create_from(&self, from: impl Into<ReadTarget>, name: &str) -> Result<()> {
self.branch_create_from_as(from, name, None).await
}
@ -1134,7 +1313,9 @@ impl Omnigraph {
pub async fn get_commit(&self, commit_id: &str) -> Result<GraphCommit> {
self.ensure_schema_state_valid().await?;
self.coordinator.read().await
self.coordinator
.read()
.await
.resolve_commit(&SnapshotId::new(commit_id))
.await
}
@ -1449,6 +1630,71 @@ fn read_schema_ir_from_source(schema_source: &str) -> Result<SchemaIR> {
build_schema_ir(&schema_ast).map_err(|err| OmniError::manifest(err.to_string()))
}
/// I/O phase of `Omnigraph::init_with_storage`. Split out so the caller
/// can pattern-match on the result and run cleanup on error before
/// returning the original error.
///
/// Failpoints fire at the phase boundaries:
/// * `init.after_schema_pg_written` — `_schema.pg` is on disk. In strict mode
/// this fires in the caller immediately after the atomic ownership claim; in
/// force mode it fires here after the explicit overwrite.
/// * `init.after_schema_contract_written` — `_schema.pg` + `_schema.ir.json`
/// + `__schema_state.json` are on disk.
/// * `init.after_coordinator_init` — all schema files plus Lance per-type
/// datasets and `__manifest/` are on disk. (The cleanup wrapper can only
/// remove the schema files; Lance directories need `delete_prefix` —
/// deferred along with `DELETE /graphs/{id}`.)
async fn init_storage_phase(
root: &str,
schema_source: &str,
schema_ir: &SchemaIR,
catalog: &Catalog,
storage: &Arc<dyn StorageAdapter>,
write_schema_pg: bool,
) -> Result<GraphCoordinator> {
if write_schema_pg {
let schema_path = join_uri(root, SCHEMA_SOURCE_FILENAME);
storage.write_text(&schema_path, schema_source).await?;
crate::failpoints::maybe_fail("init.after_schema_pg_written")?;
}
write_schema_contract(root, storage.as_ref(), schema_ir).await?;
crate::failpoints::maybe_fail("init.after_schema_contract_written")?;
let coordinator = GraphCoordinator::init(root, catalog, Arc::clone(storage)).await?;
crate::failpoints::maybe_fail("init.after_coordinator_init")?;
Ok(coordinator)
}
/// Best-effort cleanup of init-phase artifacts. Called from
/// `init_with_storage` on any error returned by `init_storage_phase`.
///
/// Removes the three schema files: `_schema.pg`, `_schema.ir.json`,
/// `__schema_state.json`. Lance datasets and `__manifest/` are not
/// touched here — recursive directory deletion requires a
/// `StorageAdapter::delete_prefix` primitive that's deferred along
/// with `DELETE /graphs/{id}` (MR-668 PR 2b).
///
/// Failures to delete are logged via `tracing::warn` and do not mask
/// the original init error.
async fn best_effort_cleanup_init_artifacts(root: &str, storage: &dyn StorageAdapter) {
for uri in [
schema_source_uri(root),
schema_ir_uri(root),
schema_state_uri(root),
] {
if let Err(err) = storage.delete(&uri).await {
tracing::warn!(
target: "omnigraph::init::cleanup",
uri = %uri,
error = %err,
"init failed; best-effort cleanup could not delete artifact",
);
}
}
}
fn schema_table_key(type_kind: SchemaTypeKind, name: &str) -> String {
match type_kind {
SchemaTypeKind::Node => format!("node:{}", name),
@ -1658,7 +1904,7 @@ mod tests {
use crate::db::manifest::ManifestCoordinator;
use async_trait::async_trait;
use serde_json::Value;
use std::sync::Mutex;
use std::sync::{Arc, Mutex};
use crate::storage::{LocalStorageAdapter, StorageAdapter, join_uri};
@ -1712,6 +1958,11 @@ edge WorksAt: Person -> Company
self.inner.write_text(uri, contents).await
}
async fn write_text_if_absent(&self, uri: &str, contents: &str) -> Result<bool> {
self.writes.lock().unwrap().push(uri.to_string());
self.inner.write_text_if_absent(uri, contents).await
}
async fn exists(&self, uri: &str) -> Result<bool> {
self.exists_checks.lock().unwrap().push(uri.to_string());
self.inner.exists(uri).await
@ -1735,13 +1986,96 @@ edge WorksAt: Person -> Company
}
}
#[derive(Debug)]
struct InitRaceStorageAdapter {
inner: LocalStorageAdapter,
root: String,
barrier: Arc<tokio::sync::Barrier>,
}
#[async_trait]
impl StorageAdapter for InitRaceStorageAdapter {
async fn read_text(&self, uri: &str) -> Result<String> {
self.inner.read_text(uri).await
}
async fn write_text(&self, uri: &str, contents: &str) -> Result<()> {
self.inner.write_text(uri, contents).await
}
async fn write_text_if_absent(&self, uri: &str, contents: &str) -> Result<bool> {
self.inner.write_text_if_absent(uri, contents).await
}
async fn exists(&self, uri: &str) -> Result<bool> {
let exists = self.inner.exists(uri).await?;
if uri == schema_state_uri(&self.root) {
self.barrier.wait().await;
}
Ok(exists)
}
async fn rename_text(&self, from_uri: &str, to_uri: &str) -> Result<()> {
self.inner.rename_text(from_uri, to_uri).await
}
async fn delete(&self, uri: &str) -> Result<()> {
self.inner.delete(uri).await
}
async fn list_dir(&self, dir_uri: &str) -> Result<Vec<String>> {
self.inner.list_dir(dir_uri).await
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn concurrent_strict_init_does_not_delete_winning_schema_files() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap().to_string();
let root = normalize_root_uri(&uri).unwrap();
let storage: Arc<dyn StorageAdapter> = Arc::new(InitRaceStorageAdapter {
inner: LocalStorageAdapter,
root,
barrier: Arc::new(tokio::sync::Barrier::new(2)),
});
let left = Omnigraph::init_with_storage(
&uri,
TEST_SCHEMA,
Arc::clone(&storage),
InitOptions::default(),
);
let right = Omnigraph::init_with_storage(
&uri,
TEST_SCHEMA,
Arc::clone(&storage),
InitOptions::default(),
);
let (left, right) = tokio::join!(left, right);
let ok_count = usize::from(left.is_ok()) + usize::from(right.is_ok());
assert_eq!(ok_count, 1, "exactly one concurrent init should win");
assert!(
dir.path().join("_schema.pg").exists(),
"winning init must leave _schema.pg in place"
);
assert!(
dir.path().join("_schema.ir.json").exists(),
"winning init must leave _schema.ir.json in place"
);
assert!(
dir.path().join("__schema_state.json").exists(),
"winning init must leave __schema_state.json in place"
);
}
#[tokio::test]
async fn test_init_and_open_route_graph_metadata_through_storage_adapter() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let adapter = Arc::new(RecordingStorageAdapter::default());
Omnigraph::init_with_storage(uri, TEST_SCHEMA, adapter.clone())
Omnigraph::init_with_storage(uri, TEST_SCHEMA, adapter.clone(), InitOptions::default())
.await
.unwrap();
assert!(adapter.writes().contains(&join_uri(uri, "_schema.pg")));

View file

@ -16,7 +16,12 @@ pub(super) async fn entity_at(
id: &str,
version: u64,
) -> Result<Option<serde_json::Value>> {
let snap = db.coordinator.read().await.snapshot_at_version(version).await?;
let snap = db
.coordinator
.read()
.await
.snapshot_at_version(version)
.await?;
entity_from_snapshot(db, &snap, table_key, id).await
}

View file

@ -1,7 +1,7 @@
//! Lance compaction + version cleanup exposed at the graph level.
//!
//! Lance accumulates many small `.lance` fragment files per table over the
//! life of a repo: each `write`, `load`, and `change` op appends one or more
//! life of a graph: each `write`, `load`, and `change` op appends one or more
//! fragments and a new manifest. Over long timescales this hurts open times
//! and S3 object counts without improving anything.
//!
@ -40,6 +40,20 @@ fn maint_concurrency() -> usize {
.unwrap_or(DEFAULT_MAINT_CONCURRENCY)
}
/// Whether the installed Lance can compact a dataset that contains blob
/// columns. `false` today: Lance `compact_files` forces
/// `BlobHandling::AllBinary` on the read side, and the blob-v2 struct decoder
/// mis-counts columns ("there were more fields in the schema than provided
/// column indices"), failing even a pristine uniform-V2_2 multi-fragment blob
/// table. Reads are unaffected (queries use descriptor handling).
///
/// While `false`, [`optimize_all_tables`] skips blob-bearing tables and reports
/// [`SkipReason::BlobColumnsUnsupportedByLance`] instead of aborting the whole
/// sweep. Flip to `true` once the upstream Lance fix ships — the
/// `lance_surface_guards.rs::compact_files_still_fails_on_blob_columns` guard
/// turns red on that bump and forces this flip. Tracked in `docs/dev/lance.md`.
const LANCE_SUPPORTS_BLOB_COMPACTION: bool = false;
/// Retention knobs for [`cleanup_all_tables`]. At least one must be set or
/// nothing is cleaned. If both are set, Lance applies them as AND (a manifest
/// is kept if it satisfies either — i.e. only manifests older than BOTH the
@ -52,8 +66,45 @@ pub struct CleanupPolicyOptions {
pub older_than: Option<Duration>,
}
/// Per-table outcome of `optimize_all_tables`.
/// Why `optimize` did not compact a table. Typed so callers branch on the
/// reason rather than sniffing a string. One variant today, gated by
/// [`LANCE_SUPPORTS_BLOB_COMPACTION`].
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub enum SkipReason {
/// The table has one or more `Blob` columns. Lance `compact_files` forces
/// `BlobHandling::AllBinary`, which mis-decodes blob-v2 columns; see
/// [`LANCE_SUPPORTS_BLOB_COMPACTION`] and `docs/dev/lance.md`.
BlobColumnsUnsupportedByLance,
}
impl SkipReason {
/// Stable machine-readable token for serialized output (e.g. CLI `--json`).
/// Once emitted this is part of the output contract — keep it stable.
pub fn as_str(&self) -> &'static str {
match self {
SkipReason::BlobColumnsUnsupportedByLance => "blob_columns_unsupported_by_lance",
}
}
}
impl std::fmt::Display for SkipReason {
/// Human-readable reason for CLI and log output.
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let msg = match self {
SkipReason::BlobColumnsUnsupportedByLance => {
"blob columns — Lance compaction unsupported"
}
};
f.write_str(msg)
}
}
/// Per-table outcome of `optimize_all_tables`. This is a returned result type,
/// not built by callers, so it is `#[non_exhaustive]`: future fields stay
/// non-breaking and downstream code reads fields rather than constructing it.
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct TableOptimizeStats {
pub table_key: String,
/// Number of source fragments that were rewritten by Lance.
@ -62,14 +113,44 @@ pub struct TableOptimizeStats {
pub fragments_added: usize,
/// Did this table get a new Lance manifest version from the compaction?
pub committed: bool,
/// `Some(reason)` if this table was deliberately not compacted. When set,
/// `fragments_removed == 0`, `fragments_added == 0`, and `!committed`.
pub skipped: Option<SkipReason>,
}
/// Per-table outcome of `cleanup_all_tables`.
impl TableOptimizeStats {
/// Stat for a table that Lance actually compacted.
fn compacted(table_key: String, metrics: &CompactionMetrics, committed: bool) -> Self {
Self {
table_key,
fragments_removed: metrics.fragments_removed,
fragments_added: metrics.fragments_added,
committed,
skipped: None,
}
}
/// Stat for a table that was deliberately skipped (compaction not attempted).
fn skipped(table_key: String, reason: SkipReason) -> Self {
Self {
table_key,
fragments_removed: 0,
fragments_added: 0,
committed: false,
skipped: Some(reason),
}
}
}
/// Per-table outcome of `cleanup_all_tables`. `error` is `Some` when this
/// table's version GC failed; cleanup is fault-isolated per table, so a single
/// table's failure is recorded here rather than aborting the whole sweep.
#[derive(Debug, Clone)]
pub struct TableCleanupStats {
pub table_key: String,
pub bytes_removed: u64,
pub old_versions_removed: u64,
pub error: Option<String>,
}
/// Run Lance `compact_files` on every node + edge table on `main`.
@ -81,14 +162,21 @@ pub async fn optimize_all_tables(db: &Omnigraph) -> Result<Vec<TableOptimizeStat
let resolved = db.resolved_branch_target(None).await?;
let snapshot = resolved.snapshot;
let table_tasks: Vec<_> = all_table_keys(&db.catalog())
.into_iter()
.filter_map(|table_key| {
let entry = snapshot.entry(&table_key)?;
// Compute per-table state (path + whether it has blob columns) up front, in
// a scope that drops the catalog handle before the async stream starts.
let table_tasks: Vec<(String, String, bool)> = {
let catalog = db.catalog();
let mut tasks = Vec::new();
for table_key in all_table_keys(&catalog) {
let Some(entry) = snapshot.entry(&table_key) else {
continue;
};
let full_path = format!("{}/{}", db.root_uri, entry.table_path);
Some((table_key, full_path))
})
.collect();
let has_blob = !blob_properties_for_table_key(&catalog, &table_key)?.is_empty();
tasks.push((table_key, full_path, has_blob));
}
tasks
};
if table_tasks.is_empty() {
return Ok(Vec::new());
@ -98,7 +186,24 @@ pub async fn optimize_all_tables(db: &Omnigraph) -> Result<Vec<TableOptimizeStat
let table_store = &db.table_store;
let stats: Vec<Result<TableOptimizeStats>> = futures::stream::iter(table_tasks.into_iter())
.map(|(table_key, full_path)| async move {
.map(|(table_key, full_path, has_blob)| async move {
// Lance `compact_files` mis-decodes blob-v2 columns under the forced
// `BlobHandling::AllBinary` read (see LANCE_SUPPORTS_BLOB_COMPACTION).
// Skip blob-bearing tables and report it rather than aborting the
// whole sweep — the other tables still compact.
if has_blob && !LANCE_SUPPORTS_BLOB_COMPACTION {
tracing::warn!(
target: "omnigraph::optimize",
table = %table_key,
"skipping compaction: table has blob columns the current Lance \
cannot rewrite (blob-v2 AllBinary decode bug); other tables \
unaffected rerun after the Lance fix",
);
return Ok(TableOptimizeStats::skipped(
table_key,
SkipReason::BlobColumnsUnsupportedByLance,
));
}
let mut ds = table_store
.open_dataset_head_for_write(&table_key, &full_path, None)
.await?;
@ -108,12 +213,11 @@ pub async fn optimize_all_tables(db: &Omnigraph) -> Result<Vec<TableOptimizeStat
.await
.map_err(|e| OmniError::Lance(e.to_string()))?;
let version_after = ds.version().version;
Ok(TableOptimizeStats {
Ok(TableOptimizeStats::compacted(
table_key,
fragments_removed: metrics.fragments_removed,
fragments_added: metrics.fragments_added,
committed: version_after != version_before,
})
&metrics,
version_after != version_before,
))
})
.buffer_unordered(concurrency)
.collect()
@ -138,6 +242,26 @@ pub async fn cleanup_all_tables(
db.ensure_schema_state_valid().await?;
db.ensure_schema_apply_idle("cleanup").await?;
// Reclaim orphaned branch forks (from an incomplete prior `branch_delete`)
// before version GC. Authority-derived and idempotent; the eager
// best-effort reclaim in `branch_delete` covers the common case, this is
// the guaranteed backstop. Logged for observability.
let reconciled = reconcile_orphaned_branches(db).await?;
if !reconciled.reclaimed.is_empty() {
tracing::info!(
count = reconciled.reclaimed.len(),
reclaimed = ?reconciled.reclaimed,
"cleanup reconciled orphaned branch forks"
);
}
if !reconciled.failures.is_empty() {
tracing::warn!(
count = reconciled.failures.len(),
failures = ?reconciled.failures,
"cleanup could not reconcile some orphaned forks; will retry next cleanup"
);
}
let before_timestamp = options.older_than.map(|d| Utc::now() - d);
let keep_versions = options.keep_versions;
@ -160,37 +284,205 @@ pub async fn cleanup_all_tables(
let concurrency = maint_concurrency().min(table_tasks.len()).max(1);
let table_store = &db.table_store;
let results: Vec<Result<TableCleanupStats>> = futures::stream::iter(table_tasks.into_iter())
// Fault-isolated per table: a single table's GC failure is recorded on its
// stats row (`error: Some`) and logged, never aborting the healthy tables.
// cleanup is the convergence backstop, so it must do as much as it can and
// converge on re-run rather than fail wholesale (invariant 13).
let results: Vec<TableCleanupStats> = futures::stream::iter(table_tasks.into_iter())
.map(|(table_key, full_path)| async move {
let ds = table_store
.open_dataset_head_for_write(&table_key, &full_path, None)
.await?;
let before_version = keep_versions
.map(|n| ds.version().version.saturating_sub(n as u64))
.filter(|v| *v > 0);
let policy = CleanupPolicy {
before_timestamp,
before_version,
delete_unverified: false,
error_if_tagged_old_versions: false,
clean_referenced_branches: false,
delete_rate_limit: None,
};
let removed: RemovalStats =
let outcome: Result<RemovalStats> = async {
crate::failpoints::maybe_fail("cleanup.table_gc")?;
let ds = table_store
.open_dataset_head_for_write(&table_key, &full_path, None)
.await?;
let before_version = keep_versions
.map(|n| ds.version().version.saturating_sub(n as u64))
.filter(|v| *v > 0);
let policy = CleanupPolicy {
before_timestamp,
before_version,
delete_unverified: false,
error_if_tagged_old_versions: false,
clean_referenced_branches: false,
delete_rate_limit: None,
};
lance::dataset::cleanup::cleanup_old_versions(&ds, policy)
.await
.map_err(|e| OmniError::Lance(e.to_string()))?;
Ok(TableCleanupStats {
table_key,
bytes_removed: removed.bytes_removed,
old_versions_removed: removed.old_versions,
})
.map_err(|e| OmniError::Lance(e.to_string()))
}
.await;
match outcome {
Ok(removed) => TableCleanupStats {
table_key,
bytes_removed: removed.bytes_removed,
old_versions_removed: removed.old_versions,
error: None,
},
Err(err) => {
tracing::warn!(
target: "omnigraph::cleanup",
table = %table_key,
error = %err,
"version GC failed for table; other tables unaffected",
);
TableCleanupStats {
table_key,
bytes_removed: 0,
old_versions_removed: 0,
error: Some(err.to_string()),
}
}
}
})
.buffer_unordered(concurrency)
.collect()
.await;
results.into_iter().collect()
Ok(results)
}
/// Outcome of [`reconcile_orphaned_branches`]: the `(owner, branch)` pairs
/// reclaimed and the `(owner, error)` pairs that failed, where `owner` is a
/// table key (e.g. `node:Person`) or `"_graph_commits"`. Per-owner failures are
/// isolated and recorded here, not propagated — the next reconcile converges.
#[derive(Debug, Clone, Default)]
pub struct BranchReconcileStats {
pub reclaimed: Vec<(String, String)>,
pub failures: Vec<(String, String)>,
}
/// Drop every per-table and commit-graph Lance branch that the manifest no
/// longer references.
///
/// Orphaned forks arise when a `branch_delete` flips the manifest authority
/// (atomic) but a downstream best-effort reclaim does not complete. They are
/// unreachable through any snapshot — no manifest entry can name them — yet
/// they pin their `tree/{branch}/` storage and can block reusing the branch
/// name. This is the guaranteed convergence backstop: it is idempotent and
/// derived purely from the manifest authority, so it no-ops once everything is
/// reconciled, and it would harmlessly find nothing if a future Lance atomic
/// multi-dataset branch op prevented orphans from forming.
///
/// The keep-set is the full (unfiltered) manifest branch list, so system
/// branches' forks are never reclaimed; `main`/default is not a named Lance
/// branch and so is never a candidate. Referencing children are dropped before
/// parents (Lance refuses to delete a referenced parent) by ordering longest
/// branch names first.
pub async fn reconcile_orphaned_branches(db: &Omnigraph) -> Result<BranchReconcileStats> {
use std::collections::HashSet;
let keep: HashSet<String> = db
.coordinator
.read()
.await
.all_branches()
.await?
.into_iter()
.collect();
let resolved = db.resolved_branch_target(None).await?;
let snapshot = resolved.snapshot;
let table_targets: Vec<(String, String)> = all_table_keys(&db.catalog())
.into_iter()
.filter_map(|table_key| {
let entry = snapshot.entry(&table_key)?;
let full_path = format!("{}/{}", db.root_uri, entry.table_path);
Some((table_key, full_path))
})
.collect();
let mut stats = BranchReconcileStats::default();
// Per-table fault isolation: one table's transient failure is recorded and
// logged, never aborting the rest of the sweep.
for (table_key, full_path) in table_targets {
let listed = match db.table_store.list_branches(&full_path).await {
Ok(listed) => listed,
Err(err) => {
tracing::warn!(
target: "omnigraph::cleanup",
table = %table_key,
error = %err,
"listing branches failed during reconcile; skipping table",
);
stats.failures.push((table_key.clone(), err.to_string()));
continue;
}
};
for branch in orphan_branches(listed, &keep) {
let outcome = match crate::failpoints::maybe_fail("cleanup.reconcile_fork") {
Ok(()) => db.table_store.force_delete_branch(&full_path, &branch).await,
Err(injected) => Err(injected),
};
match outcome {
Ok(()) => stats.reclaimed.push((table_key.clone(), branch)),
Err(err) => {
tracing::warn!(
target: "omnigraph::cleanup",
table = %table_key,
branch = %branch,
error = %err,
"reclaiming orphaned fork failed; will retry next cleanup",
);
stats.failures.push((table_key.clone(), err.to_string()));
}
}
}
}
// Commit-graph orphans (best-effort: the dataset may not exist on a graph
// that has never committed; any failure is isolated and retried next time).
if let Err(err) = reconcile_commit_graph_orphans(db, &keep, &mut stats).await {
tracing::warn!(
target: "omnigraph::cleanup",
error = %err,
"commit-graph orphan reconcile failed; will retry next cleanup",
);
stats.failures.push(("_graph_commits".to_string(), err.to_string()));
}
Ok(stats)
}
/// Commit-graph half of [`reconcile_orphaned_branches`], split out so its
/// errors can be isolated. Returns `Ok` when the commit-graph dataset is absent.
async fn reconcile_commit_graph_orphans(
db: &Omnigraph,
keep: &std::collections::HashSet<String>,
stats: &mut BranchReconcileStats,
) -> Result<()> {
let commits_uri = crate::db::commit_graph::graph_commits_uri(db.root_uri());
if !db.storage_adapter().exists(&commits_uri).await? {
return Ok(());
}
let mut commit_graph = crate::db::commit_graph::CommitGraph::open(db.root_uri()).await?;
for branch in orphan_branches(commit_graph.list_branches().await?, keep) {
match commit_graph.force_delete_branch(&branch).await {
Ok(()) => stats.reclaimed.push(("_graph_commits".to_string(), branch)),
Err(err) => {
tracing::warn!(
target: "omnigraph::cleanup",
branch = %branch,
error = %err,
"reclaiming orphaned commit-graph branch failed; will retry next cleanup",
);
stats.failures.push(("_graph_commits".to_string(), err.to_string()));
}
}
}
Ok(())
}
/// Filter `present` Lance branches down to those absent from the manifest
/// `keep` set, ordered children-before-parents (longest name first) so Lance's
/// referenced-parent `RefConflict` cannot block reclamation.
fn orphan_branches(present: Vec<String>, keep: &std::collections::HashSet<String>) -> Vec<String> {
let mut orphans: Vec<String> = present
.into_iter()
.filter(|branch| !keep.contains(branch))
.collect();
orphans.sort_by(|a, b| b.len().cmp(&a.len()).then_with(|| a.cmp(b)));
orphans
}
fn all_table_keys(catalog: &omnigraph_compiler::catalog::Catalog) -> Vec<String> {
@ -198,12 +490,7 @@ fn all_table_keys(catalog: &omnigraph_compiler::catalog::Catalog) -> Vec<String>
.node_types
.keys()
.map(|n| format!("node:{}", n))
.chain(
catalog
.edge_types
.keys()
.map(|n| format!("edge:{}", n)),
)
.chain(catalog.edge_types.keys().map(|n| format!("edge:{}", n)))
.collect();
keys.sort();
keys

View file

@ -48,12 +48,80 @@ pub(super) async fn plan_schema(
Ok(plan)
}
pub(super) async fn apply_schema(
struct PlannedSchemaApply {
plan: SchemaMigrationPlan,
desired_ir: SchemaIR,
desired_catalog: Catalog,
}
async fn plan_schema_for_apply(
db: &Omnigraph,
desired_schema_source: &str,
options: SchemaApplyOptions,
) -> Result<PlannedSchemaApply> {
db.ensure_schema_state_valid().await?;
let branches = db.coordinator.read().await.all_branches().await?;
// Skip `main` and internal system branches. The schema-apply lock branch
// is excluded because it is the cluster-wide schema-apply serializer.
// `__run__*` branches are no longer created; the filter remains as
// defense-in-depth for legacy graphs with leftover staging branches.
// A future production sweep will let this guard go.
let blocking_branches = branches
.into_iter()
.filter(|branch| branch != "main" && !is_internal_system_branch(branch))
.collect::<Vec<_>>();
if !blocking_branches.is_empty() {
return Err(OmniError::manifest_conflict(format!(
"schema apply requires a graph with only main; found non-main branches: {}",
blocking_branches.join(", ")
)));
}
let accepted_ir = read_accepted_schema_ir(db.uri(), Arc::clone(&db.storage)).await?;
let desired_ir = read_schema_ir_from_source(desired_schema_source)?;
let mut plan = plan_schema_migration(&accepted_ir, &desired_ir)
.map_err(|err| OmniError::manifest(err.to_string()))?;
promote_drops_to_hard(&mut plan, options.allow_data_loss);
if !plan.supported {
let message = plan
.steps
.iter()
.find_map(|step| step.unsupported_error_message())
.unwrap_or_else(|| "unsupported schema migration plan".to_string());
return Err(OmniError::manifest(message));
}
let mut desired_catalog = build_catalog_from_ir(&desired_ir)?;
fixup_blob_schemas(&mut desired_catalog);
Ok(PlannedSchemaApply {
plan,
desired_ir,
desired_catalog,
})
}
pub(super) async fn preview_schema_apply(
db: &Omnigraph,
desired_schema_source: &str,
options: SchemaApplyOptions,
) -> Result<SchemaApplyPreview> {
let planned = plan_schema_for_apply(db, desired_schema_source, options).await?;
Ok(SchemaApplyPreview {
plan: planned.plan,
catalog: planned.desired_catalog,
})
}
pub(super) async fn apply_schema<F>(
db: &Omnigraph,
desired_schema_source: &str,
options: SchemaApplyOptions,
actor: Option<&str>,
) -> Result<SchemaApplyResult> {
validate_catalog: F,
) -> Result<SchemaApplyResult>
where
F: FnOnce(&Catalog) -> Result<()>,
{
// Engine-layer policy gate (MR-722 chassis core).
//
// Fires BEFORE acquiring the schema-apply lock or doing any other
@ -77,7 +145,7 @@ pub(super) async fn apply_schema(
)?;
acquire_schema_apply_lock(db).await?;
let result = apply_schema_with_lock(db, desired_schema_source, options).await;
let result = apply_schema_with_lock(db, desired_schema_source, options, validate_catalog).await;
let release_result = release_schema_apply_lock(db).await;
match (result, release_result) {
(Ok(result), Ok(())) => Ok(result),
@ -87,42 +155,22 @@ pub(super) async fn apply_schema(
}
}
pub(super) async fn apply_schema_with_lock(
pub(super) async fn apply_schema_with_lock<F>(
db: &Omnigraph,
desired_schema_source: &str,
options: SchemaApplyOptions,
) -> Result<SchemaApplyResult> {
db.ensure_schema_state_valid().await?;
let branches = db.coordinator.read().await.all_branches().await?;
// Skip `main` and internal system branches. The schema-apply lock branch
// is excluded because it is the cluster-wide schema-apply serializer.
// `__run__*` branches are no longer created; the filter remains as
// defense-in-depth for legacy repos with leftover staging branches.
// A future production sweep will let this guard go.
let blocking_branches = branches
.into_iter()
.filter(|branch| branch != "main" && !is_internal_system_branch(branch))
.collect::<Vec<_>>();
if !blocking_branches.is_empty() {
return Err(OmniError::manifest_conflict(format!(
"schema apply requires a repo with only main; found non-main branches: {}",
blocking_branches.join(", ")
)));
}
let accepted_ir = read_accepted_schema_ir(db.uri(), Arc::clone(&db.storage)).await?;
let desired_ir = read_schema_ir_from_source(desired_schema_source)?;
let mut plan = plan_schema_migration(&accepted_ir, &desired_ir)
.map_err(|err| OmniError::manifest(err.to_string()))?;
promote_drops_to_hard(&mut plan, options.allow_data_loss);
if !plan.supported {
let message = plan
.steps
.iter()
.find_map(|step| step.unsupported_error_message())
.unwrap_or_else(|| "unsupported schema migration plan".to_string());
return Err(OmniError::manifest(message));
}
validate_catalog: F,
) -> Result<SchemaApplyResult>
where
F: FnOnce(&Catalog) -> Result<()>,
{
let planned = plan_schema_for_apply(db, desired_schema_source, options).await?;
validate_catalog(&planned.desired_catalog)?;
let PlannedSchemaApply {
plan,
desired_ir,
desired_catalog,
} = planned;
if plan.steps.is_empty() {
return Ok(SchemaApplyResult {
supported: true,
@ -132,9 +180,6 @@ pub(super) async fn apply_schema_with_lock(
});
}
let mut desired_catalog = build_catalog_from_ir(&desired_ir)?;
fixup_blob_schemas(&mut desired_catalog);
let snapshot = db.snapshot().await;
let base_manifest_version = snapshot.version();
let mut added_tables = BTreeSet::new();
@ -780,7 +825,7 @@ pub(super) async fn acquire_schema_apply_lock(db: &Omnigraph) -> Result<()> {
if !blocking_branches.is_empty() {
let _ = release_schema_apply_lock(db).await;
return Err(OmniError::manifest_conflict(format!(
"schema apply requires a repo with only main; found non-main branches: {}",
"schema apply requires a graph with only main; found non-main branches: {}",
blocking_branches.join(", ")
)));
}

View file

@ -22,7 +22,12 @@ pub(super) async fn graph_index_for_resolved(
}
pub(super) async fn ensure_indices(db: &Omnigraph) -> Result<()> {
let current_branch = db.coordinator.read().await.current_branch().map(str::to_string);
let current_branch = db
.coordinator
.read()
.await
.current_branch()
.map(str::to_string);
ensure_indices_for_branch(db, current_branch.as_deref()).await
}
@ -68,10 +73,7 @@ pub(super) async fn failpoint_publish_table_head_without_index_rebuild_for_test(
.await
}
pub(super) async fn ensure_indices_for_branch(
db: &Omnigraph,
branch: Option<&str>,
) -> Result<()> {
pub(super) async fn ensure_indices_for_branch(db: &Omnigraph, branch: Option<&str>) -> Result<()> {
db.ensure_schema_state_valid().await?;
db.ensure_schema_apply_idle("ensure_indices").await?;
let resolved = db.resolved_branch_target(branch).await?;
@ -403,7 +405,12 @@ pub(super) async fn open_for_mutation(
table_key: &str,
op_kind: crate::db::MutationOpKind,
) -> Result<(Dataset, String, Option<String>)> {
let current_branch = db.coordinator.read().await.current_branch().map(str::to_string);
let current_branch = db
.coordinator
.read()
.await
.current_branch()
.map(str::to_string);
open_for_mutation_on_branch(db, current_branch.as_deref(), table_key, op_kind).await
}
@ -476,6 +483,22 @@ pub(super) async fn open_owned_dataset_for_branch_write(
Ok((ds, Some(active_branch.to_string())))
}
source_branch => {
crate::failpoints::maybe_fail("fork.before_classify")?;
// Authority check before forking: re-read the live manifest. If this
// table is already forked on active_branch, a concurrent first-write
// won the race and our snapshot is stale — that is a retryable
// conflict, not an orphan. (A zombie fork is never in the manifest,
// so this only fires for a live concurrent fork.)
let live = db.snapshot_for_branch(Some(active_branch)).await?;
if let Some(entry) = live.entry(table_key) {
if entry.table_branch.as_deref() == Some(active_branch) {
return Err(OmniError::manifest_expected_version_mismatch(
table_key,
entry_version,
entry.table_version,
));
}
}
fork_dataset_from_entry_state(
db,
table_key,
@ -807,7 +830,12 @@ pub(super) async fn commit_prepared_updates_on_branch(
updates: &[crate::db::SubTableUpdate],
actor_id: Option<&str>,
) -> Result<u64> {
let current_branch = db.coordinator.read().await.current_branch().map(str::to_string);
let current_branch = db
.coordinator
.read()
.await
.current_branch()
.map(str::to_string);
let requested_branch = branch.map(str::to_string);
if requested_branch == current_branch {
return commit_prepared_updates(db, updates, actor_id).await;
@ -835,7 +863,12 @@ pub(super) async fn commit_prepared_updates_on_branch_with_expected(
expected_table_versions: &std::collections::HashMap<String, u64>,
actor_id: Option<&str>,
) -> Result<u64> {
let current_branch = db.coordinator.read().await.current_branch().map(str::to_string);
let current_branch = db
.coordinator
.read()
.await
.current_branch()
.map(str::to_string);
let requested_branch = branch.map(str::to_string);
if requested_branch == current_branch {
return commit_prepared_updates_with_expected(
@ -870,7 +903,12 @@ pub(super) async fn commit_updates(
updates: &[crate::db::SubTableUpdate],
) -> Result<u64> {
db.ensure_schema_apply_not_locked("write commit").await?;
let current_branch = db.coordinator.read().await.current_branch().map(str::to_string);
let current_branch = db
.coordinator
.read()
.await
.current_branch()
.map(str::to_string);
let prepared = prepare_updates_for_commit(db, current_branch.as_deref(), updates).await?;
commit_prepared_updates(db, &prepared, None).await
}
@ -879,7 +917,11 @@ pub(super) async fn commit_manifest_updates(
db: &Omnigraph,
updates: &[crate::db::SubTableUpdate],
) -> Result<u64> {
db.coordinator.write().await.commit_manifest_updates(updates).await
db.coordinator
.write()
.await
.commit_manifest_updates(updates)
.await
}
pub(super) async fn record_merge_commit(
@ -889,7 +931,9 @@ pub(super) async fn record_merge_commit(
merged_parent_commit_id: &str,
actor_id: Option<&str>,
) -> Result<String> {
db.coordinator.write().await
db.coordinator
.write()
.await
.record_merge_commit(
manifest_version,
parent_commit_id,
@ -923,7 +967,11 @@ pub(super) async fn commit_updates_on_branch_with_expected(
}
pub(super) async fn ensure_commit_graph_initialized(db: &Omnigraph) -> Result<()> {
db.coordinator.write().await.ensure_commit_graph_initialized().await
db.coordinator
.write()
.await
.ensure_commit_graph_initialized()
.await
}
pub(super) async fn invalidate_graph_index(db: &Omnigraph) {

View file

@ -93,7 +93,7 @@ pub(crate) struct RecoveryAudit {
}
impl RecoveryAudit {
/// Open the recovery-audit dataset for the repo, or return a handle
/// Open the recovery-audit dataset for the graph, or return a handle
/// with no dataset yet (created on first append). Mirrors the
/// optional-dataset pattern from `_graph_commit_actors.lance`.
pub(crate) async fn open(root_uri: &str) -> Result<Self> {
@ -205,9 +205,7 @@ fn recovery_record_to_batch(record: &RecoveryAuditRecord) -> Result<RecordBatch>
vec![
Arc::new(StringArray::from(vec![record.graph_commit_id.clone()])),
Arc::new(StringArray::from(vec![record.recovery_kind.as_str()])),
Arc::new(StringArray::from(vec![record
.recovery_for_actor
.clone()])),
Arc::new(StringArray::from(vec![record.recovery_for_actor.clone()])),
Arc::new(StringArray::from(vec![record.operation_id.clone()])),
Arc::new(StringArray::from(vec![record.sidecar_writer_kind.clone()])),
Arc::new(StringArray::from(vec![outcomes_json])),
@ -221,10 +219,14 @@ fn decode_row(batch: &RecordBatch, row: usize) -> Result<RecoveryAuditRecord> {
let str_col = |name: &str| -> Result<&StringArray> {
batch
.column_by_name(name)
.ok_or_else(|| OmniError::manifest_internal(format!("missing column '{}' in recovery audit", name)))?
.ok_or_else(|| {
OmniError::manifest_internal(format!("missing column '{}' in recovery audit", name))
})?
.as_any()
.downcast_ref::<StringArray>()
.ok_or_else(|| OmniError::manifest_internal(format!("column '{}' has wrong type", name)))
.ok_or_else(|| {
OmniError::manifest_internal(format!("column '{}' has wrong type", name))
})
};
let ts_col = batch
.column_by_name("created_at")
@ -269,9 +271,7 @@ pub(crate) fn now_micros() -> Result<i64> {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_micros() as i64)
.map_err(|e| {
OmniError::manifest_internal(format!("system clock before unix epoch: {}", e))
})
.map_err(|e| OmniError::manifest_internal(format!("system clock before unix epoch: {}", e)))
}
#[cfg(test)]
@ -307,7 +307,7 @@ mod tests {
let root = dir.path().to_str().unwrap();
let mut audit = RecoveryAudit::open(root).await.unwrap();
// Empty repo: list returns empty.
// Empty graph: list returns empty.
assert!(audit.list().await.unwrap().is_empty());
// Append + list.

View file

@ -61,7 +61,7 @@ pub(crate) async fn load_or_bootstrap_schema_contract(
.collect::<Vec<_>>();
if !public_non_main.is_empty() {
return Err(schema_lock_conflict(format!(
"repo is missing persisted schema state and has public branches ({}); public branches block schema evolution entirely",
"graph is missing persisted schema state and has public branches ({}); public branches block schema evolution entirely",
public_non_main.join(", ")
)));
}
@ -70,7 +70,7 @@ pub(crate) async fn load_or_bootstrap_schema_contract(
Ok((current_source_ir.clone(), state))
}
SchemaContractRead::PartialMissing => Err(schema_lock_conflict(
"repo schema state is incomplete (_schema.ir.json and __schema_state.json must either both exist or both be absent)",
"graph schema state is incomplete (_schema.ir.json and __schema_state.json must either both exist or both be absent)",
)),
}
}
@ -84,7 +84,7 @@ pub(crate) async fn validate_schema_contract(
SchemaContractRead::Present { ir, state } => (ir, state),
SchemaContractRead::MissingAll | SchemaContractRead::PartialMissing => {
return Err(schema_lock_conflict(
"repo is missing persisted schema state; manual coordination is required before schema changes are allowed",
"graph is missing persisted schema state; manual coordination is required before schema changes are allowed",
));
}
};
@ -163,7 +163,7 @@ pub(crate) async fn read_accepted_schema_ir(
}
SchemaContractRead::MissingAll | SchemaContractRead::PartialMissing => {
Err(schema_lock_conflict(
"repo is missing persisted schema state; manual coordination is required before schema changes are allowed",
"graph is missing persisted schema state; manual coordination is required before schema changes are allowed",
))
}
}
@ -221,7 +221,7 @@ async fn read_schema_contract(
})?;
let state = serde_json::from_str::<SchemaState>(&state_json).map_err(|err| {
schema_lock_conflict(format!(
"repo schema state in {} is invalid: {}",
"graph schema state in {} is invalid: {}",
SCHEMA_STATE_FILENAME, err
))
})?;
@ -234,7 +234,7 @@ async fn read_schema_contract(
fn validate_persisted_schema_contract(ir: &SchemaIR, state: &SchemaState) -> Result<()> {
if state.format_version != SCHEMA_STATE_FORMAT_VERSION {
return Err(schema_lock_conflict(format!(
"repo schema state format {} is unsupported",
"graph schema state format {} is unsupported",
state.format_version
)));
}
@ -344,7 +344,7 @@ pub(crate) async fn recover_schema_state_files(
// to the new Lance HEADs; we MUST also rename the staging files
// forward so the catalog matches. Without this, the disambiguation
// logic below sees actual_keys == live_keys (manifest didn't move)
// and deletes the staging files, leaving the repo with new-schema
// and deletes the staging files, leaving the graph with new-schema
// data on disk but the old `_schema.pg` live — corruption.
if crate::db::manifest::has_schema_apply_sidecar(root_uri, storage.as_ref()).await? {
warn!(

View file

@ -91,10 +91,7 @@ impl WriteQueueManager {
/// Empty input returns an empty Vec without touching the map.
/// Duplicates in `keys` are deduped before acquisition (the same
/// key acquired twice would deadlock against itself).
pub(crate) async fn acquire_many(
&self,
keys: &[TableQueueKey],
) -> Vec<OwnedMutexGuard<()>> {
pub(crate) async fn acquire_many(&self, keys: &[TableQueueKey]) -> Vec<OwnedMutexGuard<()>> {
if keys.is_empty() {
return Vec::new();
}
@ -167,7 +164,10 @@ mod tests {
qm2.acquire_many(&[z_clone, a_clone]).await
})
.await;
assert!(result.is_err(), "acquire_many should block on `a`, the lex-first key");
assert!(
result.is_err(),
"acquire_many should block on `a`, the lex-first key"
);
}
#[tokio::test]
@ -180,9 +180,10 @@ mod tests {
// Second acquire on same key should NOT complete within 200ms.
let qm2 = Arc::clone(&qm);
let k2 = k.clone();
let blocked = timeout(Duration::from_millis(200), async move {
qm2.acquire(&k2).await
})
let blocked = timeout(
Duration::from_millis(200),
async move { qm2.acquire(&k2).await },
)
.await;
assert!(blocked.is_err(), "second acquire on same key must block");

View file

@ -92,6 +92,14 @@ pub enum OmniError {
/// callers can match on this variant directly.
#[error("policy: {0}")]
Policy(String),
/// `Omnigraph::init` was called against a URI that already holds
/// schema artifacts from a previous init. Strict mode (the default)
/// fails fast with this error before touching disk so an existing
/// graph's metadata cannot be overwritten or destroyed. Operators
/// who actually want to overwrite pass `InitOptions { force: true }`
/// (CLI: `omnigraph init --force`).
#[error("graph already initialized at '{uri}'; pass --force to overwrite")]
AlreadyInitialized { uri: String },
}
impl OmniError {

View file

@ -794,11 +794,8 @@ impl Omnigraph {
// post_commit_pin) and tidies up. Failing the user
// here would return an error for a write that
// already landed.
if let Err(err) = crate::db::manifest::delete_sidecar(
&handle,
self.storage_adapter(),
)
.await
if let Err(err) =
crate::db::manifest::delete_sidecar(&handle, self.storage_adapter()).await
{
tracing::warn!(
error = %err,
@ -852,15 +849,8 @@ impl Omnigraph {
assignments,
predicate,
} => {
self.execute_update(
type_name,
assignments,
predicate,
params,
branch,
staging,
)
.await?
self.execute_update(type_name, assignments, predicate, params, branch, staging)
.await?
}
MutationOpIR::Delete {
type_name,
@ -981,14 +971,8 @@ impl Omnigraph {
// + iterate pending edges in-memory for the `src` column,
// group-by-src. The pending side already includes the row
// we just appended (above).
validate_edge_cardinality_with_pending(
self,
&ds,
staging,
&table_key,
edge_type,
)
.await?;
validate_edge_cardinality_with_pending(self, &ds, staging, &table_key, edge_type)
.await?;
self.invalidate_graph_index().await;
@ -1379,14 +1363,8 @@ async fn validate_edge_cardinality_with_pending(
if edge_type.cardinality.is_default() {
return Ok(());
}
let counts = super::staging::count_src_per_edge(
db,
committed_ds,
table_key,
staging,
None,
)
.await?;
let counts =
super::staging::count_src_per_edge(db, committed_ds, table_key, staging, None).await?;
super::staging::enforce_cardinality_bounds(edge_type, &counts)
}

View file

@ -345,10 +345,7 @@ fn evaluate_projection(
IRExpr::PropAccess { variable, property } => {
let col_name = format!("{}.{}", variable, property);
let col = wide_batch.column_by_name(&col_name).ok_or_else(|| {
OmniError::manifest(format!(
"column '{}' not found in wide batch",
col_name
))
OmniError::manifest(format!("column '{}' not found in wide batch", col_name))
})?;
Ok((col_name, col.clone()))
}
@ -516,12 +513,10 @@ fn aggregate_return(
}
let num_groups = group_indices.len();
let mut result_columns: Vec<(usize, String, ArrayRef)> =
Vec::with_capacity(projections.len());
let mut result_columns: Vec<(usize, String, ArrayRef)> = Vec::with_capacity(projections.len());
for gk in &group_keys {
let first_row_indices: Vec<u32> =
group_indices.iter().map(|rows| rows[0] as u32).collect();
let first_row_indices: Vec<u32> = group_indices.iter().map(|rows| rows[0] as u32).collect();
let take_idx = UInt32Array::from(first_row_indices);
let col = arrow_select::take::take(gk.column.as_ref(), &take_idx, None)
.map_err(|e| OmniError::Lance(e.to_string()))?;
@ -584,11 +579,19 @@ fn compute_aggregate(
}
}
fn compute_sum(arg: &ArrayRef, group_indices: &[Vec<usize>], num_groups: usize) -> Result<ArrayRef> {
fn compute_sum(
arg: &ArrayRef,
group_indices: &[Vec<usize>],
num_groups: usize,
) -> Result<ArrayRef> {
macro_rules! sum_numeric {
($arr_type:ty, $arg:expr, $dt:expr) => {{
let arr = $arg.as_any().downcast_ref::<$arr_type>().ok_or_else(|| {
OmniError::manifest(format!("sum: expected {:?}, got {:?}", $dt, $arg.data_type()))
OmniError::manifest(format!(
"sum: expected {:?}, got {:?}",
$dt,
$arg.data_type()
))
})?;
let mut builder = Float64Builder::with_capacity(num_groups);
for group in group_indices {
@ -613,24 +616,42 @@ fn compute_sum(arg: &ArrayRef, group_indices: &[Vec<usize>], num_groups: usize)
dt @ DataType::UInt64 => sum_numeric!(UInt64Array, arg, dt),
dt @ DataType::Float32 => sum_numeric!(Float32Array, arg, dt),
dt @ DataType::Float64 => sum_numeric!(Float64Array, arg, dt),
dt => Err(OmniError::manifest(format!("sum: unsupported type {:?}", dt))),
dt => Err(OmniError::manifest(format!(
"sum: unsupported type {:?}",
dt
))),
}
}
fn compute_avg(arg: &ArrayRef, group_indices: &[Vec<usize>], num_groups: usize) -> Result<ArrayRef> {
fn compute_avg(
arg: &ArrayRef,
group_indices: &[Vec<usize>],
num_groups: usize,
) -> Result<ArrayRef> {
macro_rules! avg_typed {
($arr_type:ty, $arg:expr) => {{
let arr = $arg.as_any().downcast_ref::<$arr_type>().ok_or_else(|| {
OmniError::manifest(format!("avg: expected {:?}, got {:?}", stringify!($arr_type), $arg.data_type()))
OmniError::manifest(format!(
"avg: expected {:?}, got {:?}",
stringify!($arr_type),
$arg.data_type()
))
})?;
let mut builder = Float64Builder::with_capacity(num_groups);
for group in group_indices {
let mut sum = 0.0f64;
let mut count = 0usize;
for &i in group {
if !arr.is_null(i) { sum += arr.value(i) as f64; count += 1; }
if !arr.is_null(i) {
sum += arr.value(i) as f64;
count += 1;
}
}
if count > 0 {
builder.append_value(sum / count as f64);
} else {
builder.append_null();
}
if count > 0 { builder.append_value(sum / count as f64); } else { builder.append_null(); }
}
Ok(Arc::new(builder.finish()) as ArrayRef)
}};
@ -642,15 +663,27 @@ fn compute_avg(arg: &ArrayRef, group_indices: &[Vec<usize>], num_groups: usize)
DataType::UInt64 => avg_typed!(UInt64Array, arg),
DataType::Float32 => avg_typed!(Float32Array, arg),
DataType::Float64 => avg_typed!(Float64Array, arg),
dt => Err(OmniError::manifest(format!("avg: unsupported type {:?}", dt))),
dt => Err(OmniError::manifest(format!(
"avg: unsupported type {:?}",
dt
))),
}
}
fn compute_min_max(arg: &ArrayRef, group_indices: &[Vec<usize>], num_groups: usize, is_min: bool) -> Result<ArrayRef> {
fn compute_min_max(
arg: &ArrayRef,
group_indices: &[Vec<usize>],
num_groups: usize,
is_min: bool,
) -> Result<ArrayRef> {
macro_rules! minmax_typed {
($arr_type:ty, $builder_type:ty, $arg:expr, $is_min:expr) => {{
let arr = $arg.as_any().downcast_ref::<$arr_type>().ok_or_else(|| {
OmniError::manifest(format!("min/max: expected {:?}, got {:?}", stringify!($arr_type), $arg.data_type()))
OmniError::manifest(format!(
"min/max: expected {:?}, got {:?}",
stringify!($arr_type),
$arg.data_type()
))
})?;
let mut builder = <$builder_type>::with_capacity(num_groups);
for group in group_indices {
@ -660,11 +693,20 @@ fn compute_min_max(arg: &ArrayRef, group_indices: &[Vec<usize>], num_groups: usi
let v = arr.value(i);
result = Some(match result {
None => v,
Some(cur) => if $is_min { if v < cur { v } else { cur } } else { if v > cur { v } else { cur } },
Some(cur) => {
if $is_min {
if v < cur { v } else { cur }
} else {
if v > cur { v } else { cur }
}
}
});
}
}
match result { Some(v) => builder.append_value(v), None => builder.append_null() }
match result {
Some(v) => builder.append_value(v),
None => builder.append_null(),
}
}
Ok(Arc::new(builder.finish()) as ArrayRef)
}};
@ -688,15 +730,27 @@ fn compute_min_max(arg: &ArrayRef, group_indices: &[Vec<usize>], num_groups: usi
let v = arr.value(i);
result = Some(match result {
None => v,
Some(cur) => if is_min { if v < cur { v } else { cur } } else { if v > cur { v } else { cur } },
Some(cur) => {
if is_min {
if v < cur { v } else { cur }
} else {
if v > cur { v } else { cur }
}
}
});
}
}
match result { Some(v) => builder.append_value(v), None => builder.append_null() }
match result {
Some(v) => builder.append_value(v),
None => builder.append_null(),
}
}
Ok(Arc::new(builder.finish()) as ArrayRef)
}
dt => Err(OmniError::manifest(format!("min/max: unsupported type {:?}", dt))),
dt => Err(OmniError::manifest(format!(
"min/max: unsupported type {:?}",
dt
))),
}
}
@ -715,7 +769,8 @@ fn build_empty_aggregate_result(projections: &[IRProjection]) -> Result<RecordBa
}
_ => {
fields.push(Field::new(name, DataType::Float64, true));
columns.push(Arc::new(Float64Array::from(vec![None as Option<f64>])) as ArrayRef);
columns
.push(Arc::new(Float64Array::from(vec![None as Option<f64>])) as ArrayRef);
}
},
_ => {

View file

@ -75,14 +75,7 @@ impl Omnigraph {
None
};
execute_query(
&ir,
params,
&snapshot,
graph_index.as_deref(),
&catalog,
)
.await
execute_query(&ir, params, &snapshot, graph_index.as_deref(), &catalog).await
}
}
@ -360,11 +353,23 @@ pub async fn execute_query(
}
let mut wide: Option<RecordBatch> = None;
execute_pipeline(&ir.pipeline, params, snapshot, graph_index, catalog, &mut wide, &search_mode).await?;
execute_pipeline(
&ir.pipeline,
params,
snapshot,
graph_index,
catalog,
&mut wide,
&search_mode,
)
.await?;
let wide_batch = wide.unwrap_or_else(|| RecordBatch::new_empty(Arc::new(Schema::empty())));
// Project return expressions
let has_aggregates = ir.return_exprs.iter().any(|p| matches!(&p.expr, IRExpr::Aggregate { .. }));
let has_aggregates = ir
.return_exprs
.iter()
.any(|p| matches!(&p.expr, IRExpr::Aggregate { .. }));
let mut result_batch = project_return(&wide_batch, &ir.return_exprs, params)?;
// Apply ordering (skip if search mode already ordered the results)
@ -516,9 +521,9 @@ async fn execute_rrf_query(
}
fn extract_id_column_by_name(batch: &RecordBatch, col_name: &str) -> Result<Vec<String>> {
let col = batch
.column_by_name(col_name)
.ok_or_else(|| OmniError::manifest(format!("batch missing '{}' column for RRF", col_name)))?;
let col = batch.column_by_name(col_name).ok_or_else(|| {
OmniError::manifest(format!("batch missing '{}' column for RRF", col_name))
})?;
let ids = col
.as_any()
.downcast_ref::<StringArray>()
@ -653,8 +658,19 @@ fn execute_pipeline<'a>(
})?;
if let Some(batch) = wide.as_mut() {
execute_expand(
batch, gi, snapshot, catalog, src_var, dst_var, edge_type, *direction,
dst_type, *min_hops, *max_hops, dst_filters, params,
batch,
gi,
snapshot,
catalog,
src_var,
dst_var,
edge_type,
*direction,
dst_type,
*min_hops,
*max_hops,
dst_filters,
params,
)
.await?;
}
@ -691,7 +707,9 @@ async fn execute_expand(
let src_id_col_name = format!("{}.id", src_var);
let src_ids = wide
.column_by_name(&src_id_col_name)
.ok_or_else(|| OmniError::manifest(format!("wide batch missing '{}' column", src_id_col_name)))?
.ok_or_else(|| {
OmniError::manifest(format!("wide batch missing '{}' column", src_id_col_name))
})?
.as_any()
.downcast_ref::<StringArray>()
.ok_or_else(|| OmniError::manifest(format!("'{}' column is not Utf8", src_id_col_name)))?
@ -1421,22 +1439,39 @@ fn literal_to_expr(lit: &Literal) -> Option<datafusion::prelude::Expr> {
}
fn prefix_batch(batch: &RecordBatch, variable: &str) -> Result<RecordBatch> {
let fields: Vec<Field> = batch.schema().fields().iter().map(|f| {
Field::new(format!("{}.{}", variable, f.name()), f.data_type().clone(), f.is_nullable())
}).collect();
let fields: Vec<Field> = batch
.schema()
.fields()
.iter()
.map(|f| {
Field::new(
format!("{}.{}", variable, f.name()),
f.data_type().clone(),
f.is_nullable(),
)
})
.collect();
let schema = Arc::new(Schema::new(fields));
RecordBatch::try_new(schema, batch.columns().to_vec()).map_err(|e| OmniError::Lance(e.to_string()))
RecordBatch::try_new(schema, batch.columns().to_vec())
.map_err(|e| OmniError::Lance(e.to_string()))
}
fn cross_join_batches(left: &RecordBatch, right: &RecordBatch) -> Result<RecordBatch> {
let n = left.num_rows();
let m = right.num_rows();
if n == 0 || m == 0 {
let mut fields: Vec<Field> = left.schema().fields().iter().map(|f| f.as_ref().clone()).collect();
let mut fields: Vec<Field> = left
.schema()
.fields()
.iter()
.map(|f| f.as_ref().clone())
.collect();
fields.extend(right.schema().fields().iter().map(|f| f.as_ref().clone()));
return Ok(RecordBatch::new_empty(Arc::new(Schema::new(fields))));
}
let left_indices: Vec<u32> = (0..n as u32).flat_map(|i| std::iter::repeat(i).take(m)).collect();
let left_indices: Vec<u32> = (0..n as u32)
.flat_map(|i| std::iter::repeat(i).take(m))
.collect();
let right_indices: Vec<u32> = (0..n).flat_map(|_| 0..m as u32).collect();
let left_expanded = take_batch(left, &UInt32Array::from(left_indices))?;
let right_expanded = take_batch(right, &UInt32Array::from(right_indices))?;
@ -1444,23 +1479,39 @@ fn cross_join_batches(left: &RecordBatch, right: &RecordBatch) -> Result<RecordB
}
fn hconcat_batches(left: &RecordBatch, right: &RecordBatch) -> Result<RecordBatch> {
let mut fields: Vec<Field> = left.schema().fields().iter().map(|f| f.as_ref().clone()).collect();
let mut fields: Vec<Field> = left
.schema()
.fields()
.iter()
.map(|f| f.as_ref().clone())
.collect();
if cfg!(debug_assertions) {
let left_schema = left.schema();
let left_names: HashSet<&str> = left_schema.fields().iter().map(|f| f.name().as_str()).collect();
let left_names: HashSet<&str> = left_schema
.fields()
.iter()
.map(|f| f.name().as_str())
.collect();
let right_schema = right.schema();
for f in right_schema.fields() {
debug_assert!(!left_names.contains(f.name().as_str()), "hconcat_batches: duplicate column '{}'", f.name());
debug_assert!(
!left_names.contains(f.name().as_str()),
"hconcat_batches: duplicate column '{}'",
f.name()
);
}
}
fields.extend(right.schema().fields().iter().map(|f| f.as_ref().clone()));
let mut columns: Vec<ArrayRef> = left.columns().to_vec();
columns.extend(right.columns().to_vec());
RecordBatch::try_new(Arc::new(Schema::new(fields)), columns).map_err(|e| OmniError::Lance(e.to_string()))
RecordBatch::try_new(Arc::new(Schema::new(fields)), columns)
.map_err(|e| OmniError::Lance(e.to_string()))
}
fn take_batch(batch: &RecordBatch, indices: &UInt32Array) -> Result<RecordBatch> {
let columns: Vec<ArrayRef> = batch.columns().iter()
let columns: Vec<ArrayRef> = batch
.columns()
.iter()
.map(|col| arrow_select::take::take(col.as_ref(), indices, None))
.collect::<std::result::Result<Vec<_>, _>>()
.map_err(|e| OmniError::Lance(e.to_string()))?;

View file

@ -26,10 +26,10 @@ use arrow_schema::SchemaRef;
use lance::Dataset;
use omnigraph_compiler::catalog::EdgeType;
use crate::db::{MutationOpKind, SubTableUpdate};
use crate::db::manifest::{
new_sidecar, write_sidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin,
RecoverySidecarHandle, SidecarKind, SidecarTablePin, new_sidecar, write_sidecar,
};
use crate::db::{MutationOpKind, SubTableUpdate};
use crate::error::{OmniError, Result};
/// Whether the per-table accumulator should commit via `stage_append`
@ -119,10 +119,12 @@ impl MutationStaging {
expected_version: u64,
op_kind: MutationOpKind,
) {
self.paths.entry(table_key.to_string()).or_insert(StagedTablePath {
full_path,
table_branch,
});
self.paths
.entry(table_key.to_string())
.or_insert(StagedTablePath {
full_path,
table_branch,
});
self.expected_versions
.entry(table_key.to_string())
.or_insert(expected_version);
@ -202,7 +204,8 @@ impl MutationStaging {
/// Record a delete that already inline-committed at the Lance layer.
pub(crate) fn record_inline(&mut self, update: SubTableUpdate) {
self.inline_committed.insert(update.table_key.clone(), update);
self.inline_committed
.insert(update.table_key.clone(), update);
}
/// Read-your-writes accessor: the accumulated pending batches for
@ -308,18 +311,13 @@ impl MutationStaging {
// mode is exempt because no-key node and edge inserts use
// ULID-generated ids that are unique within a query.
let combined = match table.mode {
PendingMode::Merge => {
dedupe_merge_batches_by_id(&table.schema, table.batches)?
}
PendingMode::Merge => dedupe_merge_batches_by_id(&table.schema, table.batches)?,
PendingMode::Append => {
if table.batches.len() == 1 {
table.batches.into_iter().next().unwrap()
} else {
arrow_select::concat::concat_batches(
&table.schema,
&table.batches,
)
.map_err(|e| OmniError::Lance(e.to_string()))?
arrow_select::concat::concat_batches(&table.schema, &table.batches)
.map_err(|e| OmniError::Lance(e.to_string()))?
}
}
};
@ -327,9 +325,7 @@ impl MutationStaging {
// Stage produces uncommitted fragments + transaction. No
// Lance HEAD advance until `commit_all` runs `commit_staged`.
let staged = match table.mode {
PendingMode::Append => {
db.table_store().stage_append(&ds, combined, &[]).await?
}
PendingMode::Append => db.table_store().stage_append(&ds, combined, &[]).await?,
PendingMode::Merge => {
db.table_store()
.stage_merge_insert(
@ -420,7 +416,7 @@ impl StagedMutation {
///
/// Revalidation: between `stage_all` and `commit_all`, another
/// writer (in the same process or another process sharing the
/// repo) may have committed to one of our touched tables, advancing
/// graph) may have committed to one of our touched tables, advancing
/// the manifest pin past our `expected_version`. We revalidate
/// under the queue and fail-fast with `manifest_conflict` before
/// any `commit_staged` so the orphaned uncommitted fragments stay
@ -462,9 +458,8 @@ impl StagedMutation {
// from interleaving between our delete and our publish, which
// would otherwise leave a Lance-HEAD-ahead residual the
// delete-only sidecar (added below) would have to recover.
let mut queue_keys: Vec<(String, Option<String>)> = Vec::with_capacity(
staged.len() + inline_committed.len(),
);
let mut queue_keys: Vec<(String, Option<String>)> =
Vec::with_capacity(staged.len() + inline_committed.len());
for entry in &staged {
queue_keys.push((entry.table_key.clone(), entry.path.table_branch.clone()));
}
@ -565,9 +560,8 @@ impl StagedMutation {
// Finding 3 hazard: delete-only mutations would otherwise skip
// the sidecar, leaving any commit→publish residual unreachable
// by recovery.
let mut pins: Vec<SidecarTablePin> = Vec::with_capacity(
staged.len() + inline_committed.len(),
);
let mut pins: Vec<SidecarTablePin> =
Vec::with_capacity(staged.len() + inline_committed.len());
for entry in &staged {
pins.push(SidecarTablePin {
table_key: entry.table_key.clone(),
@ -899,10 +893,7 @@ pub(crate) async fn count_src_per_edge(
/// Count pending edges per `src` with NO dedup. Correct when caller
/// guarantees pending rows have unique primary keys (engine inserts via
/// fresh ULID; loader Append mode).
fn count_pending_src_naive(
pending_batches: &[RecordBatch],
counts: &mut HashMap<String, u32>,
) {
fn count_pending_src_naive(pending_batches: &[RecordBatch], counts: &mut HashMap<String, u32>) {
for batch in pending_batches {
let Some(col) = batch.column_by_name("src") else {
continue;
@ -947,12 +938,15 @@ fn count_pending_src_with_dedupe(
dedupe_key_column
)));
};
let key_arr = key_col.as_any().downcast_ref::<StringArray>().ok_or_else(|| {
OmniError::Lance(format!(
"count_src_per_edge: pending '{}' column is not Utf8",
dedupe_key_column
))
})?;
let key_arr = key_col
.as_any()
.downcast_ref::<StringArray>()
.ok_or_else(|| {
OmniError::Lance(format!(
"count_src_per_edge: pending '{}' column is not Utf8",
dedupe_key_column
))
})?;
let src_arr = batch
.column_by_name("src")
.and_then(|c| c.as_any().downcast_ref::<StringArray>());

View file

@ -212,12 +212,7 @@ impl Omnigraph {
.await
}
pub async fn load_file(
&self,
branch: &str,
path: &str,
mode: LoadMode,
) -> Result<LoadResult> {
pub async fn load_file(&self, branch: &str, path: &str, mode: LoadMode) -> Result<LoadResult> {
self.load_file_as(branch, path, mode, None).await
}
@ -457,13 +452,7 @@ async fn load_jsonl_reader<R: BufRead>(
for (edge_name, rows) in &edge_rows {
let edge_type = &catalog.edge_types[edge_name];
let from_ids = if use_staging {
collect_node_ids_with_pending(
db,
branch,
&edge_type.from_type,
&staging,
)
.await?
collect_node_ids_with_pending(db, branch, &edge_type.from_type, &staging).await?
} else {
collect_node_ids(
db,
@ -476,13 +465,7 @@ async fn load_jsonl_reader<R: BufRead>(
.await?
};
let to_ids = if use_staging {
collect_node_ids_with_pending(
db,
branch,
&edge_type.to_type,
&staging,
)
.await?
collect_node_ids_with_pending(db, branch, &edge_type.to_type, &staging).await?
} else {
collect_node_ids(
db,
@ -581,12 +564,7 @@ async fn load_jsonl_reader<R: BufRead>(
let table_key = format!("edge:{}", edge_name);
if use_staging {
validate_edge_cardinality_with_pending_loader(
db,
branch,
edge_type,
&table_key,
&staging,
mode,
db, branch, edge_type, &table_key, &staging, mode,
)
.await?;
} else if let Some(update) = overwrite_updates.iter().find(|u| u.table_key == table_key) {
@ -635,7 +613,7 @@ async fn load_jsonl_reader<R: BufRead>(
} else {
// LoadMode::Overwrite keeps the legacy inline-commit path —
// truncate-then-append doesn't fit the staged shape (see
// `docs/runs.md` "LoadMode::Overwrite residual"). The recovery
// `docs/dev/writes.md` "LoadMode::Overwrite residual"). The recovery
// sidecar is not applicable here because the writer doesn't go
// through MutationStaging; per-table inline commits + a final
// manifest publish handle their own residual via the documented
@ -1699,8 +1677,7 @@ async fn validate_edge_cardinality_with_pending_loader(
LoadMode::Append | LoadMode::Overwrite => None,
};
let counts =
crate::exec::staging::count_src_per_edge(db, &ds, table_key, staging, dedupe_key)
.await?;
crate::exec::staging::count_src_per_edge(db, &ds, table_key, staging, dedupe_key).await?;
crate::exec::staging::enforce_cardinality_bounds(edge_type, &counts)
}

View file

@ -7,7 +7,8 @@ use async_trait::async_trait;
use futures::TryStreamExt;
use object_store::aws::AmazonS3Builder;
use object_store::path::Path as ObjectPath;
use object_store::{DynObjectStore, ObjectStore, PutPayload};
use object_store::{DynObjectStore, ObjectStore, PutMode, PutPayload};
use tokio::io::AsyncWriteExt;
use url::Url;
use crate::error::{OmniError, Result};
@ -19,6 +20,13 @@ const S3_SCHEME_PREFIX: &str = "s3://";
pub trait StorageAdapter: Debug + Send + Sync {
async fn read_text(&self, uri: &str) -> Result<String>;
async fn write_text(&self, uri: &str, contents: &str) -> Result<()>;
/// Write a text object only if no object exists at `uri`.
///
/// Returns `Ok(true)` when this call created the object, `Ok(false)`
/// when the object already existed, and propagates every other storage
/// error. Callers use this to establish ownership before running
/// best-effort cleanup on partial failure.
async fn write_text_if_absent(&self, uri: &str, contents: &str) -> Result<bool>;
async fn exists(&self, uri: &str) -> Result<bool>;
/// Move a file from `from_uri` to `to_uri`, replacing any existing file at
/// `to_uri`. Atomic on local POSIX; on S3 implemented as copy + delete
@ -66,7 +74,7 @@ impl StorageAdapter for LocalStorageAdapter {
// Ensure parent directory exists. S3 has no equivalent (PutObject
// is path-agnostic). For local fs, callers like the recovery
// sidecar protocol expect transparent directory creation under
// the repo root (the `__recovery/` directory doesn't pre-exist;
// the graph root (the `__recovery/` directory doesn't pre-exist;
// first sidecar write creates it).
if let Some(parent) = path.parent() {
if !parent.as_os_str().is_empty() {
@ -77,6 +85,30 @@ impl StorageAdapter for LocalStorageAdapter {
Ok(())
}
async fn write_text_if_absent(&self, uri: &str, contents: &str) -> Result<bool> {
let path = local_path_from_uri(uri)?;
if let Some(parent) = path.parent() {
if !parent.as_os_str().is_empty() {
tokio::fs::create_dir_all(parent).await?;
}
}
let mut file = match tokio::fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&path)
.await
{
Ok(file) => file,
Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => return Ok(false),
Err(err) => return Err(err.into()),
};
if let Err(err) = file.write_all(contents.as_bytes()).await {
let _ = tokio::fs::remove_file(&path).await;
return Err(err.into());
}
Ok(true)
}
async fn exists(&self, uri: &str) -> Result<bool> {
Ok(local_path_from_uri(uri)?.exists())
}
@ -146,6 +178,24 @@ impl StorageAdapter for S3StorageAdapter {
Ok(())
}
async fn write_text_if_absent(&self, uri: &str, contents: &str) -> Result<bool> {
let location = self.object_path(uri)?;
match self
.store
.put_opts(
&location,
PutPayload::from(contents.as_bytes().to_vec()),
PutMode::Create.into(),
)
.await
{
Ok(_) => Ok(true),
Err(object_store::Error::AlreadyExists { .. })
| Err(object_store::Error::Precondition { .. }) => Ok(false),
Err(err) => Err(storage_backend_error("write_if_absent", uri, err)),
}
}
async fn exists(&self, uri: &str) -> Result<bool> {
let location = self.object_path(uri)?;
match self.store.head(&location).await {
@ -398,10 +448,13 @@ mod tests {
#[test]
fn storage_backend_selection_is_scheme_aware() {
assert_eq!(storage_kind_for_uri("/tmp/repo"), StorageKind::Local);
assert_eq!(storage_kind_for_uri("file:///tmp/repo"), StorageKind::Local);
assert_eq!(storage_kind_for_uri("/tmp/graph"), StorageKind::Local);
assert_eq!(
storage_kind_for_uri("s3://omnigraph-preview/repo"),
storage_kind_for_uri("file:///tmp/graph"),
StorageKind::Local
);
assert_eq!(
storage_kind_for_uri("s3://omnigraph-preview/graph"),
StorageKind::S3
);
}
@ -440,8 +493,20 @@ mod tests {
#[test]
fn parse_s3_uri_splits_bucket_and_key() {
let location = parse_s3_uri("s3://bucket/repo/_schema.pg").unwrap();
let location = parse_s3_uri("s3://bucket/graph/_schema.pg").unwrap();
assert_eq!(location.bucket, "bucket");
assert_eq!(location.key, "repo/_schema.pg");
assert_eq!(location.key, "graph/_schema.pg");
}
#[tokio::test]
async fn local_write_text_if_absent_creates_once_without_overwrite() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().join("claim.txt");
let uri = uri.to_str().unwrap();
let storage = LocalStorageAdapter;
assert!(storage.write_text_if_absent(uri, "first").await.unwrap());
assert!(!storage.write_text_if_absent(uri, "second").await.unwrap());
assert_eq!(storage.read_text(uri).await.unwrap(), "first");
}
}

View file

@ -94,7 +94,9 @@ impl SnapshotHandle {
/// Construct from a Lance dataset. `pub(crate)` — only
/// `TableStore` should produce these.
pub(crate) fn new(ds: Dataset) -> Self {
Self { inner: Arc::new(ds) }
Self {
inner: Arc::new(ds),
}
}
/// Borrow the underlying Lance dataset. `pub(crate)` so only the
@ -242,16 +244,10 @@ pub trait TableStorage: sealed::Sealed + Send + Sync + Debug {
async fn scan_batches(&self, snapshot: &SnapshotHandle) -> Result<Vec<RecordBatch>>;
async fn scan_batches_for_rewrite(
&self,
snapshot: &SnapshotHandle,
) -> Result<Vec<RecordBatch>>;
async fn scan_batches_for_rewrite(&self, snapshot: &SnapshotHandle)
-> Result<Vec<RecordBatch>>;
async fn count_rows(
&self,
snapshot: &SnapshotHandle,
filter: Option<String>,
) -> Result<usize>;
async fn count_rows(&self, snapshot: &SnapshotHandle, filter: Option<String>) -> Result<usize>;
async fn count_rows_with_staged(
&self,
@ -284,11 +280,8 @@ pub trait TableStorage: sealed::Sealed + Send + Sync + Debug {
filter: &str,
) -> Result<Option<u64>>;
async fn table_state(
&self,
dataset_uri: &str,
snapshot: &SnapshotHandle,
) -> Result<TableState>;
async fn table_state(&self, dataset_uri: &str, snapshot: &SnapshotHandle)
-> Result<TableState>;
// ── Staged writes (no HEAD advance) ────────────────────────────────
@ -565,11 +558,7 @@ impl TableStorage for TableStore {
TableStore::scan_batches_for_rewrite(self, snapshot.dataset()).await
}
async fn count_rows(
&self,
snapshot: &SnapshotHandle,
filter: Option<String>,
) -> Result<usize> {
async fn count_rows(&self, snapshot: &SnapshotHandle, filter: Option<String>) -> Result<usize> {
TableStore::count_rows(self, snapshot.dataset(), filter).await
}
@ -591,14 +580,8 @@ impl TableStorage for TableStore {
filter: Option<&str>,
) -> Result<Vec<RecordBatch>> {
let staged_writes = staged_handles_as_writes(staged);
TableStore::scan_with_staged(
self,
snapshot.dataset(),
&staged_writes,
projection,
filter,
)
.await
TableStore::scan_with_staged(self, snapshot.dataset(), &staged_writes, projection, filter)
.await
}
async fn scan_with_pending(
@ -658,18 +641,10 @@ impl TableStorage for TableStore {
when_matched: WhenMatched,
when_not_matched: WhenNotMatched,
) -> Result<StagedHandle> {
let ds = Arc::try_unwrap(snapshot.into_arc())
.unwrap_or_else(|arc| (*arc).clone());
TableStore::stage_merge_insert(
self,
ds,
batch,
key_columns,
when_matched,
when_not_matched,
)
.await
.map(StagedHandle::new)
let ds = Arc::try_unwrap(snapshot.into_arc()).unwrap_or_else(|arc| (*arc).clone());
TableStore::stage_merge_insert(self, ds, batch, key_columns, when_matched, when_not_matched)
.await
.map(StagedHandle::new)
}
async fn commit_staged(
@ -720,8 +695,7 @@ impl TableStorage for TableStore {
snapshot: SnapshotHandle,
batch: RecordBatch,
) -> Result<(SnapshotHandle, TableState)> {
let mut ds = Arc::try_unwrap(snapshot.into_arc())
.unwrap_or_else(|arc| (*arc).clone());
let mut ds = Arc::try_unwrap(snapshot.into_arc()).unwrap_or_else(|arc| (*arc).clone());
let state = TableStore::append_batch(self, dataset_uri, &mut ds, batch).await?;
Ok((SnapshotHandle::new(ds), state))
}
@ -735,8 +709,7 @@ impl TableStorage for TableStore {
when_matched: WhenMatched,
when_not_matched: WhenNotMatched,
) -> Result<TableState> {
let ds = Arc::try_unwrap(snapshot.into_arc())
.unwrap_or_else(|arc| (*arc).clone());
let ds = Arc::try_unwrap(snapshot.into_arc()).unwrap_or_else(|arc| (*arc).clone());
TableStore::merge_insert_batches(
self,
dataset_uri,
@ -755,8 +728,7 @@ impl TableStorage for TableStore {
snapshot: SnapshotHandle,
batch: RecordBatch,
) -> Result<(SnapshotHandle, TableState)> {
let mut ds = Arc::try_unwrap(snapshot.into_arc())
.unwrap_or_else(|arc| (*arc).clone());
let mut ds = Arc::try_unwrap(snapshot.into_arc()).unwrap_or_else(|arc| (*arc).clone());
let state = TableStore::overwrite_batch(self, dataset_uri, &mut ds, batch).await?;
Ok((SnapshotHandle::new(ds), state))
}
@ -767,8 +739,7 @@ impl TableStorage for TableStore {
snapshot: SnapshotHandle,
filter: &str,
) -> Result<(SnapshotHandle, DeleteState)> {
let mut ds = Arc::try_unwrap(snapshot.into_arc())
.unwrap_or_else(|arc| (*arc).clone());
let mut ds = Arc::try_unwrap(snapshot.into_arc()).unwrap_or_else(|arc| (*arc).clone());
let state = TableStore::delete_where(self, dataset_uri, &mut ds, filter).await?;
Ok((SnapshotHandle::new(ds), state))
}
@ -790,8 +761,7 @@ impl TableStorage for TableStore {
snapshot: SnapshotHandle,
columns: &[&str],
) -> Result<SnapshotHandle> {
let mut ds = Arc::try_unwrap(snapshot.into_arc())
.unwrap_or_else(|arc| (*arc).clone());
let mut ds = Arc::try_unwrap(snapshot.into_arc()).unwrap_or_else(|arc| (*arc).clone());
TableStore::create_btree_index(self, &mut ds, columns).await?;
Ok(SnapshotHandle::new(ds))
}
@ -801,8 +771,7 @@ impl TableStorage for TableStore {
snapshot: SnapshotHandle,
column: &str,
) -> Result<SnapshotHandle> {
let mut ds = Arc::try_unwrap(snapshot.into_arc())
.unwrap_or_else(|arc| (*arc).clone());
let mut ds = Arc::try_unwrap(snapshot.into_arc()).unwrap_or_else(|arc| (*arc).clone());
TableStore::create_inverted_index(self, &mut ds, column).await?;
Ok(SnapshotHandle::new(ds))
}
@ -812,8 +781,7 @@ impl TableStorage for TableStore {
snapshot: SnapshotHandle,
column: &str,
) -> Result<SnapshotHandle> {
let mut ds = Arc::try_unwrap(snapshot.into_arc())
.unwrap_or_else(|arc| (*arc).clone());
let mut ds = Arc::try_unwrap(snapshot.into_arc()).unwrap_or_else(|arc| (*arc).clone());
TableStore::create_vector_index(self, &mut ds, column).await?;
Ok(SnapshotHandle::new(ds))
}
@ -837,6 +805,13 @@ impl TableStorage for TableStore {
// Note: existing TableStore::scan_stream is an associated fn that
// takes &Dataset, so we delegate via the dataset reference held by
// the snapshot.
TableStore::scan_stream(snapshot.dataset(), projection, filter, order_by, with_row_id).await
TableStore::scan_stream(
snapshot.dataset(),
projection,
filter,
order_by,
with_row_id,
)
.await
}
}

View file

@ -49,7 +49,7 @@ pub struct DeleteState {
/// `exec/mutation.rs`) and the bulk loader (`loader/mod.rs`). The
/// intent: defer Lance commits to end-of-query so a mid-query failure
/// leaves the touched table at the pre-mutation HEAD instead of
/// drifting ahead. See `docs/runs.md` for the publisher-CAS contract
/// drifting ahead. See `docs/dev/writes.md` for the publisher-CAS contract
/// this builds on.
///
/// `transaction` is opaque from our side — Lance owns its semantics. We
@ -177,6 +177,45 @@ impl TableStore {
.map_err(|e| OmniError::Lance(e.to_string()))
}
/// List the named Lance branches present on the dataset at `dataset_uri`.
/// The `cleanup` orphan reconciler diffs this against the manifest branch
/// set to find orphaned per-table forks. `main`/default is not a named
/// branch and never appears here.
pub async fn list_branches(&self, dataset_uri: &str) -> Result<Vec<String>> {
let ds = Dataset::open(dataset_uri)
.await
.map_err(|e| OmniError::Lance(e.to_string()))?;
let branches = ds
.list_branches()
.await
.map_err(|e| OmniError::Lance(e.to_string()))?;
Ok(branches.into_keys().collect())
}
/// Idempotently drop `branch` from the dataset at `dataset_uri`.
///
/// Unlike [`delete_branch`](Self::delete_branch), this tolerates an
/// already-absent branch — both a missing contents ref (Lance's
/// `force_delete_branch` handles that) and a missing `tree/{branch}/`
/// directory (the local-store `NotFound` quirk pinned by
/// `lance_surface_guards::force_delete_branch_semantics`). Safe to call on a
/// possibly-orphaned or already-reclaimed fork.
///
/// A branch that still has referencing descendants (`RefConflict`) is NOT
/// tolerated: that is a real ordering error and surfaces as `OmniError::Lance`.
/// Used by the eager best-effort reclaim in `cleanup_deleted_branch_tables`
/// and the `cleanup` orphan reconciler.
pub async fn force_delete_branch(&self, dataset_uri: &str, branch: &str) -> Result<()> {
let mut ds = Dataset::open(dataset_uri)
.await
.map_err(|e| OmniError::Lance(e.to_string()))?;
match ds.force_delete_branch(branch).await {
Ok(()) => Ok(()),
Err(lance::Error::RefNotFound { .. }) | Err(lance::Error::NotFound { .. }) => Ok(()),
Err(e) => Err(OmniError::Lance(e.to_string())),
}
}
pub async fn open_dataset_at_state(
&self,
table_path: &str,
@ -243,21 +282,24 @@ impl TableStore {
.map_err(|e| OmniError::Lance(e.to_string()))?;
self.ensure_expected_version(&source_ds, table_key, source_version)?;
match source_ds
if source_ds
.create_branch(target_branch, source_version, None)
.await
.is_err()
{
Ok(_) => {}
Err(create_err) => match self
.open_dataset_head(dataset_uri, Some(target_branch))
.await
{
Ok(ds) => {
self.ensure_expected_version(&ds, table_key, source_version)?;
return Ok(ds);
}
Err(_) => return Err(OmniError::Lance(create_err.to_string())),
},
// The target branch ref already exists. The caller
// (`open_owned_dataset_for_branch_write`) re-reads the live manifest
// before forking and returns a retryable error when a concurrent
// writer legitimately holds the fork, so reaching here means the
// manifest does NOT reference this fork: it is an orphan from an
// incomplete prior `branch_delete`. Surface the actionable cleanup
// error rather than guessing from Lance branch versions.
return Err(OmniError::manifest_conflict(format!(
"branch '{}' has orphaned table state for '{}' from an incomplete \
prior delete; run `omnigraph cleanup` to reclaim it before reusing \
this branch name",
target_branch, table_key
)));
}
let ds = self
@ -901,7 +943,7 @@ impl TableStore {
/// Lift path: either a Lance API extension that lets
/// `MergeInsertBuilder` accept additional staged fragments, or an
/// in-memory pre-merge here that folds prior staged batches into the
/// input stream. See `docs/runs.md`.
/// input stream. See `docs/dev/writes.md`.
pub async fn stage_merge_insert(
&self,
ds: Dataset,
@ -1793,25 +1835,24 @@ mod tests {
#[test]
fn check_batch_unique_by_keys_errors_on_duplicate_id() {
let batch = batch_with_ids(&["a", "b", "a"]);
let err =
check_batch_unique_by_keys(&batch, &["id".to_string()], "test").unwrap_err();
let err = check_batch_unique_by_keys(&batch, &["id".to_string()], "test").unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("duplicate source row for key 'a'"),
"unexpected error: {msg}"
);
assert!(msg.contains("MR-957"), "error should reference MR-957: {msg}");
assert!(
msg.contains("MR-957"),
"error should reference MR-957: {msg}"
);
}
#[test]
fn check_batch_unique_by_keys_rejects_multi_column_keys() {
let batch = batch_with_ids(&["a"]);
let err = check_batch_unique_by_keys(
&batch,
&["id".to_string(), "other".to_string()],
"test",
)
.unwrap_err();
let err =
check_batch_unique_by_keys(&batch, &["id".to_string(), "other".to_string()], "test")
.unwrap_err();
assert!(err.to_string().contains("single-column keys only"));
}
}

View file

@ -56,7 +56,7 @@ async fn composite_flow_canonical_lifecycle() {
let uri = dir.path().to_str().unwrap();
// ─────────────────────────────────────────────────────────────────
// Step 1: init a fresh repo with the standard test schema.
// Step 1: init a fresh graph with the standard test schema.
// ─────────────────────────────────────────────────────────────────
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
let v_init = version_branch(&db, "main").await.unwrap();
@ -70,7 +70,9 @@ async fn composite_flow_canonical_lifecycle() {
// Step 2: load JSONL seed data (Person + Company nodes,
// Knows + WorksAt edges).
// ─────────────────────────────────────────────────────────────────
load_jsonl(&mut db, TEST_DATA, LoadMode::Append).await.unwrap();
load_jsonl(&mut db, TEST_DATA, LoadMode::Append)
.await
.unwrap();
let v_after_load = version_branch(&db, "main").await.unwrap();
assert!(
v_after_load > v_init,
@ -119,19 +121,13 @@ async fn composite_flow_canonical_lifecycle() {
"feature",
MUTATION_QUERIES,
"insert_person_and_friend",
&mixed_params(
&[("$name", "Frank"), ("$friend", "Eve")],
&[("$age", 33)],
),
&mixed_params(&[("$name", "Frank"), ("$friend", "Eve")], &[("$age", 33)]),
)
.await
.expect("multi-statement insert+edge on feature");
// After: feature has 4 + Eve + Frank = 6 Persons.
let snap = db
.snapshot_of(ReadTarget::branch("feature"))
.await
.unwrap();
let snap = db.snapshot_of(ReadTarget::branch("feature")).await.unwrap();
let person_ds = snap.open("node:Person").await.unwrap();
assert_eq!(
person_ds.count_rows(None).await.unwrap(),
@ -321,14 +317,10 @@ async fn composite_flow_canonical_lifecycle() {
);
// Re-run a query to verify post-optimize correctness.
let post_optimize_total = query_main(
&mut db,
TEST_QUERIES,
"total_people",
&ParamMap::default(),
)
.await
.unwrap();
let post_optimize_total =
query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
.await
.unwrap();
assert!(
!post_optimize_total.batches().is_empty(),
"queries must still work after optimize"
@ -385,14 +377,9 @@ async fn composite_flow_canonical_lifecycle() {
// post-cleanup. Post-cleanup mutation is omitted here pending
// resolution of the optimize-vs-manifest-pin interaction documented
// in Step 10.
let final_total = query_main(
&mut db,
TEST_QUERIES,
"total_people",
&ParamMap::default(),
)
.await
.unwrap();
let final_total = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
.await
.unwrap();
assert!(!final_total.batches().is_empty());
}
@ -431,10 +418,12 @@ async fn composite_flow_schema_apply_then_branch_ops_no_deadlock_in_refresh() {
// Step 1: init + load on handle A.
let mut db_a = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
load_jsonl(&mut db_a, TEST_DATA, LoadMode::Append).await.unwrap();
load_jsonl(&mut db_a, TEST_DATA, LoadMode::Append)
.await
.unwrap();
assert_eq!(count_rows(&db_a, "node:Person").await, 4);
// Step 2: open handle B on the same repo. B's in-memory schema_source
// Step 2: open handle B on the same graph. B's in-memory schema_source
// cache is now a snapshot of `_schema.pg` at open time.
let db_b = Omnigraph::open(uri).await.unwrap();
@ -444,7 +433,7 @@ async fn composite_flow_schema_apply_then_branch_ops_no_deadlock_in_refresh() {
// to disk.
const TEST_SCHEMA_V2: &str = "node Person {\n name: String @key\n age: I32?\n nickname: String?\n}\n\nnode Company {\n name: String @key\n}\n\nedge Knows: Person -> Person {\n since: Date?\n}\n\nedge WorksAt: Person -> Company\n";
let plan = db_a.apply_schema(TEST_SCHEMA_V2).await.unwrap();
assert!(plan.applied, "apply_schema must succeed on a clean repo");
assert!(plan.applied, "apply_schema must succeed on a clean graph");
assert!(
!plan.steps.is_empty(),
"apply_schema must record the AddProperty step"
@ -561,7 +550,9 @@ async fn composite_flow_multi_branch_sequential_merges() {
// edges from test.jsonl).
// ─────────────────────────────────────────────────────────────────
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
load_jsonl(&mut db, TEST_DATA, LoadMode::Append).await.unwrap();
load_jsonl(&mut db, TEST_DATA, LoadMode::Append)
.await
.unwrap();
assert_eq!(count_rows(&db, "node:Person").await, 4);
assert_eq!(count_rows(&db, "edge:Knows").await, 3);
@ -687,10 +678,7 @@ async fn composite_flow_multi_branch_sequential_merges() {
"feat-a",
MUTATION_QUERIES,
"insert_person_and_friend",
&mixed_params(
&[("$name", "Grace"), ("$friend", "Eve")],
&[("$age", 28)],
),
&mixed_params(&[("$name", "Grace"), ("$friend", "Eve")], &[("$age", 28)]),
)
.await
.expect("insert Grace + Knows(Grace → Eve) on feat-a");
@ -821,15 +809,14 @@ async fn composite_flow_multi_branch_sequential_merges() {
// `total_people` returns count(Person) = 10. Catches regressions in
// group-by/count execution against a multi-fragment table whose
// current shape was produced by two sequential merges.
let total_post_merges = query_main(
&mut db,
TEST_QUERIES,
"total_people",
&ParamMap::default(),
)
.await
.unwrap();
assert_total(&total_post_merges, 10, "post both merges, main must total 10 Persons");
let total_post_merges = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
.await
.unwrap();
assert_total(
&total_post_merges,
10,
"post both merges, main must total 10 Persons",
);
// ─────────────────────────────────────────────────────────────────
// Step 14: time-travel to pre-merge-a-version. Reads must return
@ -1021,14 +1008,9 @@ async fn composite_flow_multi_branch_sequential_merges() {
// correctly to disk but the reopened catalog can't bind them.
// ─────────────────────────────────────────────────────────────────
let mut db = db;
let post_reopen_total = query_main(
&mut db,
TEST_QUERIES,
"total_people",
&ParamMap::default(),
)
.await
.unwrap();
let post_reopen_total = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
.await
.unwrap();
assert_total(
&post_reopen_total,
10,

View file

@ -292,13 +292,11 @@ node Thing {
.await
.expect("first merge after ensure_indices must succeed");
db.ensure_indices().await.unwrap();
load_jsonl(&mut db, &delta, LoadMode::Merge)
.await
.expect(
"second merge after ensure_indices must succeed \
load_jsonl(&mut db, &delta, LoadMode::Merge).await.expect(
"second merge after ensure_indices must succeed \
(Window 2 canary: drop the FirstSeen setter in table_store.rs \
only when this stays green WITHOUT it)",
);
);
assert_eq!(count_rows(&db, "node:Thing").await, 55);
}
@ -346,7 +344,7 @@ async fn explicit_target_query_sees_other_writer_commits_without_refresh() {
let uri = dir.path().to_str().unwrap();
// Two independent handles to the same repo
// Two independent handles to the same graph
let mut db1 = Omnigraph::open(uri).await.unwrap();
let mut db2 = Omnigraph::open(uri).await.unwrap();

View file

@ -1910,9 +1910,14 @@ query docs_with_tag($tag: String) {
return { $d.slug }
}
"#;
let result = query_main(&mut db, queries, "docs_with_tag", &params(&[("$tag", "red")]))
.await
.unwrap();
let result = query_main(
&mut db,
queries,
"docs_with_tag",
&params(&[("$tag", "red")]),
)
.await
.unwrap();
let batch = result.concat_batches().unwrap();
let slugs = batch

View file

@ -41,6 +41,452 @@ async fn branch_create_failpoint_triggers() {
);
}
// Branch delete flips the manifest authority first, then reclaims the per-table
// forks best-effort. A failure during that reclaim (here, the
// `branch_delete.before_table_cleanup` failpoint, standing in for a transient
// object-store error) must NOT fail the call: the branch is already gone, and
// `cleanup` reconciles the stranded fork. The branch name is reusable after.
#[tokio::test]
async fn branch_delete_partial_failure_converges_via_cleanup() {
let _scenario = FailScenario::setup();
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap().to_string();
let mut main = helpers::init_and_load(&dir).await;
main.branch_create("feature").await.unwrap();
let mut feature = Omnigraph::open(&uri).await.unwrap();
helpers::mutate_branch(
&mut feature,
"feature",
MUTATION_QUERIES,
"insert_person",
&mixed_params(&[("$name", "Eve")], &[("$age", 22)]),
)
.await
.unwrap();
drop(feature);
let person_uri = node_table_uri(&uri, "Person");
{
let ds = lance::Dataset::open(&person_uri).await.unwrap();
assert!(
ds.list_branches().await.unwrap().contains_key("feature"),
"precondition: the owned table fork exists before delete"
);
}
// Inject a failure during per-table cleanup, AFTER the manifest authority
// flip. branch_delete must still succeed (best-effort reclaim).
{
let _fp = ScopedFailPoint::new("branch_delete.before_table_cleanup", "return");
main.branch_delete("feature").await.expect(
"branch_delete is best-effort after the manifest flip: a cleanup-step \
failure must not fail the call",
);
}
// Authority flipped: the branch is gone.
assert_eq!(main.branch_list().await.unwrap(), vec!["main".to_string()]);
// The eager reclaim failed, so the orphan is stranded until cleanup.
{
let ds = lance::Dataset::open(&person_uri).await.unwrap();
assert!(
ds.list_branches().await.unwrap().contains_key("feature"),
"failed eager reclaim should leave the orphan for cleanup to reconcile"
);
}
// cleanup converges: the orphan is reclaimed.
main.cleanup(omnigraph::db::CleanupPolicyOptions {
keep_versions: Some(1),
older_than: None,
})
.await
.unwrap();
{
let ds = lance::Dataset::open(&person_uri).await.unwrap();
assert!(
!ds.list_branches().await.unwrap().contains_key("feature"),
"cleanup should reconcile the orphaned fork away"
);
}
// The name is reusable after cleanup reclaims the orphan.
main.branch_create("feature").await.unwrap();
let mut feature2 = Omnigraph::open(&uri).await.unwrap();
helpers::mutate_branch(
&mut feature2,
"feature",
MUTATION_QUERIES,
"insert_person",
&mixed_params(&[("$name", "Frank")], &[("$age", 41)]),
)
.await
.unwrap();
}
// Reusing a branch name whose delete left an orphaned fork (before `cleanup`
// reconciles it) must fail with a clear, actionable error pointing at
// `cleanup`, not the opaque `ExpectedVersionMismatch` that leaks from the fork
// path. The recreate itself succeeds; the first write to the previously-forked
// table is where the stale orphan collides.
#[tokio::test]
async fn recreate_over_orphaned_fork_before_cleanup_is_actionable() {
let _scenario = FailScenario::setup();
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap().to_string();
let mut main = helpers::init_and_load(&dir).await;
main.branch_create("feature").await.unwrap();
let mut feature = Omnigraph::open(&uri).await.unwrap();
helpers::mutate_branch(
&mut feature,
"feature",
MUTATION_QUERIES,
"insert_person",
&mixed_params(&[("$name", "Eve")], &[("$age", 22)]),
)
.await
.unwrap();
drop(feature);
// Partial delete: leaves the Person fork orphaned (cleanup not yet run).
{
let _fp = ScopedFailPoint::new("branch_delete.before_table_cleanup", "return");
main.branch_delete("feature").await.unwrap();
}
// Recreate the name and write to the previously-forked table WITHOUT a
// cleanup in between.
main.branch_create("feature").await.unwrap();
let mut feature2 = Omnigraph::open(&uri).await.unwrap();
let err = helpers::mutate_branch(
&mut feature2,
"feature",
MUTATION_QUERIES,
"insert_person",
&mixed_params(&[("$name", "Frank")], &[("$age", 41)]),
)
.await
.expect_err("write should collide with the stale orphaned fork");
let msg = err.to_string();
assert!(
msg.contains("cleanup")
&& (msg.contains("orphan") || msg.contains("incomplete prior delete")),
"expected an actionable orphaned-fork error pointing at cleanup, got: {msg}"
);
assert!(
!msg.contains("expected manifest table version"),
"should not surface the opaque ExpectedVersionMismatch, got: {msg}"
);
}
// cleanup is the guaranteed convergence backstop, so one table's transient
// failure must not abort the whole sweep. Inject a one-shot version-GC failure
// for a single table and assert: cleanup still succeeds, the failure is
// surfaced per-table in the returned stats, and the independent reconcile pass
// still reclaimed an orphan.
#[tokio::test]
async fn cleanup_isolates_single_table_failure() {
let _scenario = FailScenario::setup();
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap().to_string();
let mut db = helpers::init_and_load(&dir).await;
// Forge an orphaned fork on the Person table (a reconcile target).
let person_uri = node_table_uri(&uri, "Person");
{
let mut ds = lance::Dataset::open(&person_uri).await.unwrap();
let base = ds.version().version;
ds.create_branch("ghost", base, None).await.unwrap();
}
// One table's version GC fails once; the sweep must isolate it.
let _fp = ScopedFailPoint::new("cleanup.table_gc", "1*return");
let stats = db
.cleanup(omnigraph::db::CleanupPolicyOptions {
keep_versions: Some(1),
older_than: None,
})
.await
.expect("a single table's GC failure must not abort cleanup");
let errored = stats.iter().filter(|s| s.error.is_some()).count();
assert_eq!(
errored, 1,
"exactly one table's GC failure should be surfaced in stats, got {errored}"
);
assert!(
stats.len() >= 4,
"every node+edge table should still appear in the stats"
);
// The reconcile pass is independent of the GC failure, so the orphan is gone.
{
let ds = lance::Dataset::open(&person_uri).await.unwrap();
assert!(
!ds.list_branches().await.unwrap().contains_key("ghost"),
"reconcile should reclaim the orphan despite the GC failure"
);
}
}
// Companion to the version-GC isolation test, exercising the OTHER cleanup
// loop: a force-delete failure while reconciling one orphaned fork must be
// isolated (logged, not propagated) so the sweep continues, and a later
// cleanup converges. This is the loop the Devin finding was about.
#[tokio::test]
async fn cleanup_isolates_reconcile_failure() {
let _scenario = FailScenario::setup();
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap().to_string();
let mut db = helpers::init_and_load(&dir).await;
// Forge an orphaned fork the reconcile pass will try to reclaim.
let person_uri = node_table_uri(&uri, "Person");
{
let mut ds = lance::Dataset::open(&person_uri).await.unwrap();
let base = ds.version().version;
ds.create_branch("ghost", base, None).await.unwrap();
}
// Inject a one-shot failure into the reconcile force-delete. The sweep must
// not abort.
{
let _fp = ScopedFailPoint::new("cleanup.reconcile_fork", "1*return");
db.cleanup(omnigraph::db::CleanupPolicyOptions {
keep_versions: Some(1),
older_than: None,
})
.await
.expect("a reconcile force-delete failure must not abort cleanup");
}
// The blocked orphan is still present (the failure was isolated, not retried).
{
let ds = lance::Dataset::open(&person_uri).await.unwrap();
assert!(
ds.list_branches().await.unwrap().contains_key("ghost"),
"the orphan whose reclaim was injected-to-fail should remain"
);
}
// A second cleanup with no injected failure converges.
db.cleanup(omnigraph::db::CleanupPolicyOptions {
keep_versions: Some(1),
older_than: None,
})
.await
.unwrap();
{
let ds = lance::Dataset::open(&person_uri).await.unwrap();
assert!(
!ds.list_branches().await.unwrap().contains_key("ghost"),
"the second cleanup should reconcile the orphan"
);
}
}
// The cleanup reconciler must reclaim orphaned commit-graph branches, not just
// per-table forks. A delete whose best-effort commit-graph reclaim fails leaves
// a commit-graph orphan; the next cleanup must drop it.
#[tokio::test]
async fn cleanup_reclaims_orphaned_commit_graph_branch() {
let _scenario = FailScenario::setup();
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap().to_string();
let mut db = helpers::init_and_load(&dir).await;
db.branch_create("feature").await.unwrap();
// Delete, failing the commit-graph reclaim → commit-graph "feature" orphan
// (manifest branch gone, commit-graph branch left behind).
{
let _fp = ScopedFailPoint::new("branch_delete.before_commit_graph_reclaim", "return");
db.branch_delete("feature").await.unwrap();
}
let commits_uri = format!("{}/_graph_commits.lance", uri.trim_end_matches('/'));
{
let ds = lance::Dataset::open(&commits_uri).await.unwrap();
assert!(
ds.list_branches().await.unwrap().contains_key("feature"),
"precondition: the commit-graph branch should be orphaned after the failed reclaim"
);
}
db.cleanup(omnigraph::db::CleanupPolicyOptions {
keep_versions: Some(1),
older_than: None,
})
.await
.unwrap();
{
let ds = lance::Dataset::open(&commits_uri).await.unwrap();
assert!(
!ds.list_branches().await.unwrap().contains_key("feature"),
"cleanup should reclaim the orphaned commit-graph branch"
);
}
}
// A branch_delete whose best-effort commit-graph reclaim fails leaves a
// commit-graph "zombie" branch. Recreating that name must heal the zombie and
// succeed (branch_create force-deletes a stale commit-graph ref since the
// manifest branch is created fresh), instead of dying on the leftover ref.
#[tokio::test]
async fn branch_create_recreates_over_commit_graph_zombie() {
let _scenario = FailScenario::setup();
let dir = tempfile::tempdir().unwrap();
let db = Omnigraph::init(dir.path().to_str().unwrap(), helpers::TEST_SCHEMA)
.await
.unwrap();
db.branch_create("feature").await.unwrap();
{
// Fail the best-effort commit-graph reclaim → commit-graph "feature"
// zombie survives the delete (manifest authority still flips).
let _fp = ScopedFailPoint::new("branch_delete.before_commit_graph_reclaim", "return");
db.branch_delete("feature").await.unwrap();
}
assert_eq!(db.branch_list().await.unwrap(), vec!["main".to_string()]);
db.branch_create("feature")
.await
.expect("branch_create should heal the zombie commit-graph branch and succeed");
assert!(
db.branch_list()
.await
.unwrap()
.contains(&"feature".to_string())
);
}
// branch_create is authority-then-derived: if the derived commit-graph branch
// cannot be created, the manifest branch (the authority) must be rolled back so
// the branch does not half-exist. The existing failpoint fires right after the
// manifest create, standing in for any post-authority failure.
#[tokio::test]
async fn branch_create_rolls_back_manifest_on_commit_graph_failure() {
let _scenario = FailScenario::setup();
let dir = tempfile::tempdir().unwrap();
let db = Omnigraph::init(dir.path().to_str().unwrap(), helpers::TEST_SCHEMA)
.await
.unwrap();
let err = {
let _fp = ScopedFailPoint::new("branch_create.after_manifest_branch_create", "return");
db.branch_create("feature").await.unwrap_err()
};
assert!(
!db.branch_list()
.await
.unwrap()
.contains(&"feature".to_string()),
"branch_create must roll back the manifest branch when the derived \
commit-graph branch fails, got error: {err}"
);
}
// A fork collision must be classified by the manifest authority, not by Lance
// branch versions. When a concurrent first-write legitimately wins the fork
// race, the loser sees a version mismatch — but that is a stale snapshot, not
// an orphan, so it must be a retryable "refresh and retry", never a misleading
// "run cleanup".
//
// Ordering is made deterministic (no sleeps) via a callback at the fork point:
// `compare_exchange` lets only the FIRST arrival (writer A) record readiness and
// block until released; later arrivals (writer B) fall through. The test waits
// on the readiness flag, lets B win and commit the fork, then releases A.
static FORK_A_AT_POINT: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false);
static FORK_RELEASE_A: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false);
#[tokio::test(flavor = "multi_thread")]
async fn fork_collision_with_live_concurrent_fork_is_retryable() {
use std::sync::atomic::Ordering::SeqCst;
let _scenario = FailScenario::setup();
FORK_A_AT_POINT.store(false, SeqCst);
FORK_RELEASE_A.store(false, SeqCst);
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap().to_string();
let main = helpers::init_and_load(&dir).await;
main.branch_create("feature").await.unwrap();
// First arrival (A) records readiness and blocks until released; the rest
// (B) fall through immediately. Bounded spin so a mistake can't hang forever.
fail::cfg_callback("fork.before_classify", || {
if FORK_A_AT_POINT
.compare_exchange(false, true, SeqCst, SeqCst)
.is_ok()
{
for _ in 0..2000 {
if FORK_RELEASE_A.load(SeqCst) {
break;
}
std::thread::sleep(std::time::Duration::from_millis(5));
}
}
})
.unwrap();
let uri_a = uri.clone();
let writer_a = tokio::spawn(async move {
let mut a = Omnigraph::open(&uri_a).await.unwrap();
helpers::mutate_branch(
&mut a,
"feature",
MUTATION_QUERIES,
"insert_person",
&mixed_params(&[("$name", "Eve")], &[("$age", 22)]),
)
.await
});
// Wait (bounded) until A is parked at the fork point.
for _ in 0..600 {
if FORK_A_AT_POINT.load(SeqCst) {
break;
}
tokio::time::sleep(std::time::Duration::from_millis(5)).await;
}
assert!(
FORK_A_AT_POINT.load(SeqCst),
"writer A never reached the fork point"
);
// B wins the fork and commits it.
let mut b = Omnigraph::open(&uri).await.unwrap();
helpers::mutate_branch(
&mut b,
"feature",
MUTATION_QUERIES,
"insert_person",
&mixed_params(&[("$name", "Frank")], &[("$age", 41)]),
)
.await
.unwrap();
// Release A; it resumes, re-reads the manifest, and sees the fork is live.
FORK_RELEASE_A.store(true, SeqCst);
let err = writer_a
.await
.unwrap()
.expect_err("A's stale-snapshot fork should be a retryable conflict");
fail::remove("fork.before_classify");
let msg = err.to_string();
assert!(
!msg.contains("cleanup"),
"a live concurrent fork must not be misclassified as an orphan, got: {msg}"
);
assert!(
msg.contains("refresh and retry") || msg.contains("expected manifest table version"),
"expected a retryable stale-view error, got: {msg}"
);
}
#[tokio::test(flavor = "multi_thread")]
async fn graph_publish_failpoint_triggers_before_commit_append() {
let _scenario = FailScenario::setup();
@ -66,7 +512,7 @@ async fn graph_publish_failpoint_triggers_before_commit_append() {
// Atomic schema apply: schema apply writes staging files first, then commits
// the manifest, then renames staging → final. Tests below inject crashes at
// the two boundaries and assert that reopening the repo yields a consistent
// the two boundaries and assert that reopening the graph yields a consistent
// state.
#[tokio::test]
@ -303,14 +749,10 @@ async fn inline_delete_conflict_writes_sidecar_before_rejecting() {
let person_uri = node_table_uri(&uri, "Person");
{
let _pause_delete = ScopedFailPoint::new("mutation.delete_node_pre_primary_delete", "pause");
let _pause_delete =
ScopedFailPoint::new("mutation.delete_node_pre_primary_delete", "pause");
let delete_params = helpers::params(&[("$name", "Alice")]);
let delete = db.mutate(
"main",
MUTATION_QUERIES,
"remove_person",
&delete_params,
);
let delete = db.mutate("main", MUTATION_QUERIES, "remove_person", &delete_params);
tokio::pin!(delete);
let mut concurrent_update_succeeded = false;
@ -325,15 +767,18 @@ async fn inline_delete_conflict_writes_sidecar_before_rejecting() {
"set_age",
&mixed_params(&[("$name", "Bob")], &[("$age", 26)]),
)
.await
.is_ok()
.await
.is_ok()
{
concurrent_update_succeeded = true;
break;
}
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
}
assert!(concurrent_update_succeeded, "concurrent update must land while delete is paused");
assert!(
concurrent_update_succeeded,
"concurrent update must land while delete is paused"
);
fail::remove("mutation.delete_node_pre_primary_delete");
let err = delete.await.unwrap_err();
@ -925,13 +1370,13 @@ async fn ensure_indices_stage_btree_failure_leaves_existing_tables_writable() {
.expect("Person mutation must succeed after the failed schema apply — existing tables are not drifted");
}
fn assert_no_staging_files(repo: &std::path::Path) {
fn assert_no_staging_files(graph: &std::path::Path) {
for name in [
"_schema.pg.staging",
"_schema.ir.json.staging",
"__schema_state.json.staging",
] {
let path = repo.join(name);
let path = graph.join(name);
assert!(
!path.exists(),
"staging file {} still exists after recovery",
@ -1164,7 +1609,7 @@ edge WorksAt: Person -> Company
// NEW schema (city column on Person, Tag node type) — not the old.
// Without the schema-staging coordination, the schema-state
// recovery would have deleted the staging files (because manifest
// hadn't advanced when it ran), leaving a corrupt repo with new-
// hadn't advanced when it ran), leaving a corrupt graph with new-
// schema data on disk but old-schema catalog.
let live_schema = std::fs::read_to_string(dir.path().join("_schema.pg")).unwrap();
assert!(
@ -1667,3 +2112,143 @@ async fn ensure_indices_phase_b_failure_does_not_leak_sidecar_when_no_work_neede
"_graph_commit_recoveries.lance must NOT exist when no sidecar was processed"
);
}
// ─── MR-668 PR 2a: Omnigraph::init cleanup on partial failure ──────────────
//
// `init_with_storage` writes three schema artifacts before invoking
// `GraphCoordinator::init`. Without cleanup, a failure between any of those
// steps left orphan files behind, making the URI unusable for a retry of
// `init` (it would refuse because `_schema.pg` already exists). The tests
// below pin: on failpoint trigger at each of the three phase boundaries,
// the three schema files are removed before the error is returned.
//
// Coverage note: the third boundary (`init.after_coordinator_init`) only
// asserts cleanup of the schema files. Lance per-type directories and
// `__manifest/` are NOT cleaned up — that requires a recursive
// `StorageAdapter::delete_prefix` primitive deferred along with
// `DELETE /graphs/{id}` (MR-668 PR 2b). The orphan Lance directories
// after a coordinator-init-phase failure are documented as a known
// limitation.
#[tokio::test]
async fn init_failpoint_after_schema_pg_written_cleans_up_schema_file() {
let _scenario = FailScenario::setup();
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let _failpoint = ScopedFailPoint::new("init.after_schema_pg_written", "return");
let err = match Omnigraph::init(uri, helpers::TEST_SCHEMA).await {
Ok(_) => panic!("expected Omnigraph::init to fail at the configured failpoint"),
Err(e) => e,
};
assert!(
err.to_string()
.contains("injected failpoint triggered: init.after_schema_pg_written"),
"got: {err}"
);
// Only `_schema.pg` was written at this phase boundary, but the
// cleanup attempts all three — `delete` treats not-found as Ok,
// so the other two deletes are no-ops.
assert!(
!dir.path().join("_schema.pg").exists(),
"_schema.pg must be cleaned up after init failure"
);
}
#[tokio::test]
async fn init_failpoint_after_schema_contract_written_cleans_up_all_schema_files() {
let _scenario = FailScenario::setup();
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let _failpoint = ScopedFailPoint::new("init.after_schema_contract_written", "return");
let err = match Omnigraph::init(uri, helpers::TEST_SCHEMA).await {
Ok(_) => panic!("expected Omnigraph::init to fail at the configured failpoint"),
Err(e) => e,
};
assert!(
err.to_string()
.contains("injected failpoint triggered: init.after_schema_contract_written"),
"got: {err}"
);
assert!(
!dir.path().join("_schema.pg").exists(),
"_schema.pg must be cleaned up"
);
assert!(
!dir.path().join("_schema.ir.json").exists(),
"_schema.ir.json must be cleaned up"
);
assert!(
!dir.path().join("__schema_state.json").exists(),
"__schema_state.json must be cleaned up"
);
}
#[tokio::test]
async fn init_failpoint_after_coordinator_init_cleans_up_schema_files() {
let _scenario = FailScenario::setup();
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let _failpoint = ScopedFailPoint::new("init.after_coordinator_init", "return");
let err = match Omnigraph::init(uri, helpers::TEST_SCHEMA).await {
Ok(_) => panic!("expected Omnigraph::init to fail at the configured failpoint"),
Err(e) => e,
};
assert!(
err.to_string()
.contains("injected failpoint triggered: init.after_coordinator_init"),
"got: {err}"
);
// Schema files are cleaned up by `best_effort_cleanup_init_artifacts`.
assert!(
!dir.path().join("_schema.pg").exists(),
"_schema.pg must be cleaned up after late-phase init failure"
);
assert!(
!dir.path().join("_schema.ir.json").exists(),
"_schema.ir.json must be cleaned up after late-phase init failure"
);
assert!(
!dir.path().join("__schema_state.json").exists(),
"__schema_state.json must be cleaned up after late-phase init failure"
);
// Documented limitation: Lance per-type datasets and `__manifest/`
// created by `GraphCoordinator::init` are NOT cleaned up — recursive
// deletion requires the deferred `delete_prefix` primitive. This
// assertion does NOT check for their absence; it merely documents
// the boundary by noting we don't validate orphan directories here.
// When PR 2b lands, this test can be tightened to assert the graph
// root is fully empty.
}
#[tokio::test]
async fn init_failpoint_returns_original_error_not_cleanup_error() {
// The cleanup is best-effort. If `storage.delete` fails (e.g. transient
// network blip on S3), the original init failpoint error must still
// surface — not be masked by a cleanup failure. This test triggers the
// failpoint and asserts the returned error references the failpoint,
// not the cleanup. (The cleanup currently logs via `tracing::warn`;
// we can't easily fault-inject delete failures without another seam,
// so this is a smoke test for the precedence contract.)
let _scenario = FailScenario::setup();
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let _failpoint = ScopedFailPoint::new("init.after_schema_pg_written", "return");
let err = match Omnigraph::init(uri, helpers::TEST_SCHEMA).await {
Ok(_) => panic!("expected Omnigraph::init to fail at the configured failpoint"),
Err(e) => e,
};
// Failpoint message wins; no "cleanup" substring expected.
let msg = err.to_string();
assert!(
msg.contains("init.after_schema_pg_written"),
"init error must surface the failpoint cause, got: {msg}"
);
}

View file

@ -95,11 +95,11 @@ const FORBIDDEN_PATTERNS: &[&str] = &[
/// provide the staged primitives or to maintain the system tables
/// (commit graph, manifest).
const ALLOW_LIST_FILES: &[&str] = &[
"table_store.rs", // The storage layer itself.
"storage_layer.rs", // The trait module.
"commit_graph.rs", // Maintains `_graph_commits.lance` system table.
"graph_coordinator.rs", // Drives the manifest publisher / branch coordinator.
"recovery_audit.rs", // Maintains `_graph_commit_recoveries.lance` (recovery audit trail).
"table_store.rs", // The storage layer itself.
"storage_layer.rs", // The trait module.
"commit_graph.rs", // Maintains `_graph_commits.lance` system table.
"graph_coordinator.rs", // Drives the manifest publisher / branch coordinator.
"recovery_audit.rs", // Maintains `_graph_commit_recoveries.lance` (recovery audit trail).
];
/// Directories exempt from the guard. Files under these paths may use
@ -168,10 +168,7 @@ fn engine_code_does_not_call_forbidden_lance_apis() {
// comments are documentation, not code use. The trait
// surface (sealed + trait-only) is the actual enforcement;
// this test only catches code use.
if trimmed.starts_with("//")
|| trimmed.starts_with("/*")
|| trimmed.starts_with("*")
{
if trimmed.starts_with("//") || trimmed.starts_with("/*") || trimmed.starts_with("*") {
continue;
}
// Allow lines marked with the sentinel on the SAME line or

View file

@ -44,7 +44,7 @@ query insert_person_and_friend($name: String, $age: I32, $friend: String) {
}
"#;
/// Init a repo and load the standard test data.
/// Init a graph and load the standard test data.
pub async fn init_and_load(dir: &tempfile::TempDir) -> Omnigraph {
let uri = dir.path().to_str().unwrap();
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
@ -249,7 +249,7 @@ pub fn vector_and_string_params(
map
}
pub fn s3_test_repo_uri(suite: &str) -> Option<String> {
pub fn s3_test_graph_uri(suite: &str) -> Option<String> {
let bucket = std::env::var("OMNIGRAPH_S3_TEST_BUCKET").ok()?;
let prefix = std::env::var("OMNIGRAPH_S3_TEST_PREFIX")
.ok()

View file

@ -110,8 +110,8 @@ impl FollowUpMutation {
}
}
pub fn single_sidecar_operation_id(repo_root: &Path) -> String {
let ids = sidecar_operation_ids(repo_root);
pub fn single_sidecar_operation_id(graph_root: &Path) -> String {
let ids = sidecar_operation_ids(graph_root);
assert_eq!(
ids.len(),
1,
@ -121,8 +121,8 @@ pub fn single_sidecar_operation_id(repo_root: &Path) -> String {
ids.into_iter().next().unwrap()
}
pub fn sidecar_operation_ids(repo_root: &Path) -> Vec<String> {
let dir = repo_root.join("__recovery");
pub fn sidecar_operation_ids(graph_root: &Path) -> Vec<String> {
let dir = graph_root.join("__recovery");
if !dir.exists() {
return Vec::new();
}
@ -143,10 +143,10 @@ pub fn sidecar_operation_ids(repo_root: &Path) -> Vec<String> {
ids
}
pub async fn branch_head_commit_id(repo_root: &Path, branch: &str) -> Result<String> {
pub async fn branch_head_commit_id(graph_root: &Path, branch: &str) -> Result<String> {
let graph = match branch {
"main" => CommitGraph::open(&repo_uri(repo_root)).await?,
branch => CommitGraph::open_at_branch(&repo_uri(repo_root), branch).await?,
"main" => CommitGraph::open(&graph_uri(graph_root)).await?,
branch => CommitGraph::open_at_branch(&graph_uri(graph_root), branch).await?,
};
graph.head_commit_id().await?.ok_or_else(|| {
OmniError::manifest_internal(format!("commit graph for branch {branch} has no head"))
@ -154,52 +154,52 @@ pub async fn branch_head_commit_id(repo_root: &Path, branch: &str) -> Result<Str
}
pub async fn assert_post_recovery_invariants(
repo_root: &Path,
graph_root: &Path,
operation_id: &str,
expectation: RecoveryExpectation,
) -> Result<()> {
match expectation {
RecoveryExpectation::RolledForward { tables } => {
assert_sidecar_absent(repo_root, operation_id);
let audit = read_audit_row(repo_root, operation_id).await?;
assert_sidecar_absent(graph_root, operation_id);
let audit = read_audit_row(graph_root, operation_id).await?;
assert_eq!(
audit.recovery_kind, "RolledForward",
"audit row for {operation_id} recorded the wrong recovery_kind",
);
assert_manifest_pins_match_lance_heads(repo_root, &tables).await?;
assert_audit_to_versions_match_lance_heads(repo_root, &audit, &tables).await?;
assert_recovery_commit_shape(repo_root, &audit, &tables).await?;
assert_non_main_did_not_move_main(repo_root, &tables).await?;
assert_idempotent_reopen(repo_root, operation_id).await?;
run_follow_up_mutations(repo_root, tables).await?;
assert_manifest_pins_match_lance_heads(graph_root, &tables).await?;
assert_audit_to_versions_match_lance_heads(graph_root, &audit, &tables).await?;
assert_recovery_commit_shape(graph_root, &audit, &tables).await?;
assert_non_main_did_not_move_main(graph_root, &tables).await?;
assert_idempotent_reopen(graph_root, operation_id).await?;
run_follow_up_mutations(graph_root, tables).await?;
}
RecoveryExpectation::RolledBack { tables } => {
assert_sidecar_absent(repo_root, operation_id);
let audit = read_audit_row(repo_root, operation_id).await?;
assert_sidecar_absent(graph_root, operation_id);
let audit = read_audit_row(graph_root, operation_id).await?;
assert_eq!(
audit.recovery_kind, "RolledBack",
"audit row for {operation_id} recorded the wrong recovery_kind",
);
assert_rollback_outcomes_record_drift(&audit);
assert_recovery_commit_shape(repo_root, &audit, &tables).await?;
assert_non_main_did_not_move_main(repo_root, &tables).await?;
assert_idempotent_reopen(repo_root, operation_id).await?;
run_follow_up_mutations(repo_root, tables).await?;
assert_recovery_commit_shape(graph_root, &audit, &tables).await?;
assert_non_main_did_not_move_main(graph_root, &tables).await?;
assert_idempotent_reopen(graph_root, operation_id).await?;
run_follow_up_mutations(graph_root, tables).await?;
}
RecoveryExpectation::Deferred => {
assert!(
sidecar_path(repo_root, operation_id).exists(),
sidecar_path(graph_root, operation_id).exists(),
"deferred recovery must leave sidecar {operation_id} on disk",
);
assert!(
read_audit_row(repo_root, operation_id).await.is_err(),
read_audit_row(graph_root, operation_id).await.is_err(),
"deferred recovery must not record an audit row for {operation_id}",
);
}
RecoveryExpectation::NoOp => {
assert_sidecar_absent(repo_root, operation_id);
assert_sidecar_absent(graph_root, operation_id);
assert!(
read_audit_row(repo_root, operation_id).await.is_err(),
read_audit_row(graph_root, operation_id).await.is_err(),
"no-op recovery must not record an audit row for {operation_id}",
);
}
@ -216,24 +216,24 @@ fn branch_context(tables: &[TableExpectation]) -> Option<String> {
.map(str::to_string)
}
fn sidecar_path(repo_root: &Path, operation_id: &str) -> PathBuf {
repo_root
fn sidecar_path(graph_root: &Path, operation_id: &str) -> PathBuf {
graph_root
.join("__recovery")
.join(format!("{operation_id}.json"))
}
fn assert_sidecar_absent(repo_root: &Path, operation_id: &str) {
fn assert_sidecar_absent(graph_root: &Path, operation_id: &str) {
assert!(
!sidecar_path(repo_root, operation_id).exists(),
!sidecar_path(graph_root, operation_id).exists(),
"recovery sidecar {operation_id} must be deleted after successful recovery",
);
}
async fn assert_manifest_pins_match_lance_heads(
repo_root: &Path,
graph_root: &Path,
tables: &[TableExpectation],
) -> Result<()> {
let uri = repo_uri(repo_root);
let uri = graph_uri(graph_root);
let db = Omnigraph::open(&uri).await?;
for table in tables {
let (entry, lance_head) = entry_and_lance_head(&db, &uri, table).await?;
@ -254,11 +254,11 @@ async fn assert_manifest_pins_match_lance_heads(
}
async fn assert_audit_to_versions_match_lance_heads(
repo_root: &Path,
graph_root: &Path,
audit: &RecoveryAuditRow,
tables: &[TableExpectation],
) -> Result<()> {
let uri = repo_uri(repo_root);
let uri = graph_uri(graph_root);
let db = Omnigraph::open(&uri).await?;
for table in tables {
let (_, lance_head) = entry_and_lance_head(&db, &uri, table).await?;
@ -301,10 +301,10 @@ fn assert_rollback_outcomes_record_drift(audit: &RecoveryAuditRow) {
}
async fn assert_non_main_did_not_move_main(
repo_root: &Path,
graph_root: &Path,
tables: &[TableExpectation],
) -> Result<()> {
let uri = repo_uri(repo_root);
let uri = graph_uri(graph_root);
let db = Omnigraph::open(&uri).await?;
let main = db.snapshot_of(ReadTarget::branch("main")).await?;
for table in tables {
@ -327,14 +327,14 @@ async fn assert_non_main_did_not_move_main(
}
async fn assert_recovery_commit_shape(
repo_root: &Path,
graph_root: &Path,
audit: &RecoveryAuditRow,
tables: &[TableExpectation],
) -> Result<()> {
let branch = branch_context(tables);
let expected_parent = expected_recovery_parent(tables)?;
let branch = branch.as_deref();
let commit = read_recovery_commit(repo_root, audit, branch).await?;
let commit = read_recovery_commit(graph_root, audit, branch).await?;
assert_eq!(
commit.actor_id.as_deref(),
@ -362,7 +362,7 @@ async fn assert_recovery_commit_shape(
);
if let Some(branch) = branch {
let graph = CommitGraph::open_at_branch(&repo_uri(repo_root), branch).await?;
let graph = CommitGraph::open_at_branch(&graph_uri(graph_root), branch).await?;
let commits = graph.load_commits().await?;
let parent = commit.parent_commit_id.as_deref().ok_or_else(|| {
OmniError::manifest_internal(format!(
@ -403,12 +403,12 @@ fn expected_recovery_parent(tables: &[TableExpectation]) -> Result<Option<String
Ok(expected)
}
async fn assert_idempotent_reopen(repo_root: &Path, operation_id: &str) -> Result<()> {
let before = matching_audit_rows(repo_root, operation_id).await?;
let uri = repo_uri(repo_root);
async fn assert_idempotent_reopen(graph_root: &Path, operation_id: &str) -> Result<()> {
let before = matching_audit_rows(graph_root, operation_id).await?;
let uri = graph_uri(graph_root);
let _db = Omnigraph::open(&uri).await?;
assert_sidecar_absent(repo_root, operation_id);
let after = matching_audit_rows(repo_root, operation_id).await?;
assert_sidecar_absent(graph_root, operation_id);
let after = matching_audit_rows(graph_root, operation_id).await?;
assert_eq!(
after.len(),
before.len(),
@ -417,14 +417,14 @@ async fn assert_idempotent_reopen(repo_root: &Path, operation_id: &str) -> Resul
Ok(())
}
async fn run_follow_up_mutations(repo_root: &Path, tables: Vec<TableExpectation>) -> Result<()> {
async fn run_follow_up_mutations(graph_root: &Path, tables: Vec<TableExpectation>) -> Result<()> {
let mut db: Option<Omnigraph> = None;
for table in tables {
let Some(mutation) = table.follow_up_mutation else {
continue;
};
if db.is_none() {
db = Some(Omnigraph::open(&repo_uri(repo_root)).await?);
db = Some(Omnigraph::open(&graph_uri(graph_root)).await?);
}
let db = db.as_mut().unwrap();
db.mutate(
@ -480,11 +480,11 @@ async fn lance_head_for_entry(root_uri: &str, entry: &SubTableEntry) -> Result<u
}
async fn read_recovery_commit(
repo_root: &Path,
graph_root: &Path,
audit: &RecoveryAuditRow,
branch: Option<&str>,
) -> Result<GraphCommit> {
let uri = repo_uri(repo_root);
let uri = graph_uri(graph_root);
let graph = match branch {
Some(branch) => CommitGraph::open_at_branch(&uri, branch).await?,
None => CommitGraph::open(&uri).await?,
@ -502,8 +502,8 @@ async fn read_recovery_commit(
})
}
async fn read_audit_row(repo_root: &Path, operation_id: &str) -> Result<RecoveryAuditRow> {
let mut rows = matching_audit_rows(repo_root, operation_id).await?;
async fn read_audit_row(graph_root: &Path, operation_id: &str) -> Result<RecoveryAuditRow> {
let mut rows = matching_audit_rows(graph_root, operation_id).await?;
if rows.len() != 1 {
return Err(OmniError::manifest_internal(format!(
"expected exactly one recovery audit row for {operation_id}, got {}",
@ -514,10 +514,10 @@ async fn read_audit_row(repo_root: &Path, operation_id: &str) -> Result<Recovery
}
async fn matching_audit_rows(
repo_root: &Path,
graph_root: &Path,
operation_id: &str,
) -> Result<Vec<RecoveryAuditRow>> {
let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance");
let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance");
if !recoveries_dir.exists() {
return Ok(Vec::new());
}
@ -575,6 +575,6 @@ fn string_column<'a>(batch: &'a RecordBatch, name: &str) -> Result<&'a StringArr
})
}
fn repo_uri(repo_root: &Path) -> String {
repo_root.to_str().unwrap().to_string()
fn graph_uri(graph_root: &Path) -> String {
graph_root.to_str().unwrap().to_string()
}

View file

@ -242,3 +242,136 @@ async fn _compile_delete_result_field_shape() -> lance::Result<()> {
let _num_deleted: u64 = result.num_deleted_rows;
Ok(())
}
// --- Guard 9: force_delete_branch semantics --------------------------------
//
// The branch-delete reconciler (`db/omnigraph/optimize.rs::reconcile_orphaned_branches`)
// and the eager best-effort reclaim in `cleanup_deleted_branch_tables` call
// `force_delete_branch` to drop orphaned branch refs. The single-authority
// design relies on three facts pinned here:
// 1. plain `delete_branch` errors on a missing ref (so the design uses the
// force variant instead);
// 2. `force_delete_branch` removes an existing (forked) branch — the orphan
// case, where a `tree/{branch}/` exists;
// 3. `force_delete_branch` on a *fully-absent* branch (no tree dir) still
// errors on the local store, because `remove_dir_all`'s NotFound is not
// caught for Lance's native error variant. `TableStore::force_delete_branch`
// wraps this to be fully idempotent. Pin the raw quirk so a future Lance
// fix (which would let us simplify the wrapper) is noticed.
#[tokio::test]
async fn force_delete_branch_semantics() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().join("guard9.lance");
let uri = uri.to_str().unwrap();
let mut ds = fresh_dataset(uri).await;
// (1) Plain delete of a never-created branch errors (RefNotFound).
assert!(
ds.delete_branch("nope").await.is_err(),
"Dataset::delete_branch on a missing ref should error; if this is now \
Ok, the reconciler could drop the force variant."
);
// (2) force_delete_branch removes an existing (forked) branch.
let base = ds.version().version;
ds.create_branch("feature", base, None).await.unwrap();
ds.force_delete_branch("feature").await.unwrap();
assert!(
!ds.list_branches().await.unwrap().contains_key("feature"),
"force_delete_branch should remove an existing branch ref"
);
// (3) Quirk: force_delete on a fully-absent branch errors on the local
// store (worked around by TableStore::force_delete_branch).
assert!(
ds.force_delete_branch("never").await.is_err(),
"force_delete_branch on a fully-absent branch no longer errors — \
TableStore::force_delete_branch's NotFound tolerance can be simplified."
);
}
// --- Guard 10: blob-column compaction is still broken in this Lance --------
//
// `db/omnigraph/optimize.rs` skips tables with blob columns while
// `LANCE_SUPPORTS_BLOB_COMPACTION = false`: Lance `compact_files` forces
// `BlobHandling::AllBinary`, and the blob-v2 struct decoder mis-counts columns
// ("more fields in the schema than provided column indices"), failing even a
// pristine uniform-V2_2 multi-fragment blob table. Reads are unaffected (they
// use descriptor handling).
//
// WHEN THIS TEST TURNS RED (compact_files no longer errors), the Lance bug is
// fixed: flip `LANCE_SUPPORTS_BLOB_COMPACTION` to true in optimize.rs, drop the
// blob-skip branch + the `optimize_skips_blob_table_and_reports_skip`
// skip assertions in maintenance.rs, and re-pin docs/dev/lance.md.
#[tokio::test]
async fn compact_files_still_fails_on_blob_columns() {
use arrow_array::{LargeBinaryArray, StructArray};
fn blob_batch(start: i32, n: i32) -> RecordBatch {
let ids: Vec<String> = (start..start + n).map(|i| format!("n{i}")).collect();
let data =
LargeBinaryArray::from_iter_values((start..start + n).map(|i| format!("blob{i}")));
let blob_uri = StringArray::from(vec![None::<&str>; n as usize]);
let DataType::Struct(fields) = lance::blob::blob_field("content", true).data_type().clone()
else {
unreachable!("blob_field is always a Struct");
};
let content = StructArray::new(
fields,
vec![Arc::new(data) as _, Arc::new(blob_uri) as _],
None,
);
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Utf8, false),
lance::blob::blob_field("content", true),
]));
RecordBatch::try_new(
schema,
vec![Arc::new(StringArray::from(ids)) as _, Arc::new(content) as _],
)
.unwrap()
}
async fn write(uri: &str, batch: RecordBatch, mode: WriteMode) {
let schema = batch.schema();
let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
// Blob v2 requires file version >= 2.2; without the pin the *write*
// would fail with a different error, masking the guard's intent.
let params = WriteParams {
mode,
enable_stable_row_ids: true,
data_storage_version: Some(LanceFileVersion::V2_2),
..Default::default()
};
Dataset::write(reader, uri, Some(params)).await.unwrap();
}
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().join("guard10-blob.lance");
let uri = uri.to_str().unwrap();
// Uniform V2_2, two fragments → forces compaction to actually rewrite.
write(uri, blob_batch(0, 2), WriteMode::Create).await;
write(uri, blob_batch(100, 2), WriteMode::Append).await;
let mut ds = Dataset::open(uri).await.unwrap();
assert!(
ds.get_fragments().len() >= 2,
"guard needs a multi-fragment table to trigger a real compaction rewrite"
);
let result = compact_files(&mut ds, CompactionOptions::default(), None).await;
let err = result.expect_err(
"compact_files unexpectedly SUCCEEDED on a blob table — the Lance blob-v2 \
compaction bug is fixed. Flip LANCE_SUPPORTS_BLOB_COMPACTION to true in \
db/omnigraph/optimize.rs, remove the blob-skip branch, and re-pin docs/dev/lance.md.",
);
assert!(
err.to_string()
.contains("more fields in the schema than provided column indices"),
"blob compaction failed with an unexpected error (Lance internals may have \
shifted): {err}"
);
}

View file

@ -2,14 +2,14 @@ mod helpers;
use std::fs;
use omnigraph::db::{Omnigraph, ReadTarget};
use omnigraph_compiler::{build_schema_ir, schema_ir_pretty_json};
use omnigraph::db::{InitOptions, Omnigraph, ReadTarget};
use omnigraph_compiler::schema::parser::parse_schema;
use omnigraph_compiler::{build_schema_ir, schema_ir_pretty_json};
use helpers::*;
#[tokio::test]
async fn init_creates_repo() {
async fn init_creates_graph() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
@ -34,7 +34,7 @@ async fn init_creates_repo() {
}
#[tokio::test]
async fn open_reads_existing_repo() {
async fn open_reads_existing_graph() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
@ -49,7 +49,7 @@ async fn open_reads_existing_repo() {
}
#[tokio::test]
async fn open_bootstraps_legacy_schema_state_for_main_only_repo() {
async fn open_bootstraps_legacy_schema_state_for_main_only_graph() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
@ -64,7 +64,7 @@ async fn open_bootstraps_legacy_schema_state_for_main_only_repo() {
}
#[tokio::test]
async fn open_rejects_legacy_repo_with_public_branch() {
async fn open_rejects_legacy_graph_with_public_branch() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
@ -74,7 +74,7 @@ async fn open_rejects_legacy_repo_with_public_branch() {
fs::remove_file(dir.path().join("__schema_state.json")).unwrap();
let err = match Omnigraph::open(uri).await {
Ok(_) => panic!("expected legacy repo with public branch to fail schema bootstrap"),
Ok(_) => panic!("expected legacy graph with public branch to fail schema bootstrap"),
Err(err) => err,
};
assert!(
@ -185,3 +185,122 @@ async fn snapshot_version_is_pinned() {
assert_eq!(snap1.version(), v1);
}
/// Regression for the `Omnigraph::init` re-init footgun (MR-668
/// follow-up): a second `init` against a URI that already holds a
/// graph must NOT modify or destroy the existing graph's schema
/// artifacts. Today's behavior is destructive either way — the
/// `write_text(_schema.pg, ...)` call at the top of
/// `init_storage_phase` overwrites the existing file before any
/// preflight, and `best_effort_cleanup_init_artifacts` will later
/// delete all three files if the inner `GraphCoordinator::init`
/// fails. Both outcomes corrupt an existing graph.
///
/// After the fix: strict-mode `init` (no `force` flag) errors out
/// before touching any file, and the original schema artifacts
/// match their pre-attempt contents byte-for-byte.
#[tokio::test]
async fn init_on_existing_graph_uri_does_not_destroy_existing_schema() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
// Establish the first graph and snapshot its three schema files.
Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
let original_schema_pg = fs::read_to_string(dir.path().join("_schema.pg")).unwrap();
let original_schema_ir = fs::read_to_string(dir.path().join("_schema.ir.json")).unwrap();
let original_schema_state = fs::read_to_string(dir.path().join("__schema_state.json")).unwrap();
// Attempt a re-init with a deliberately different schema so any
// overwrite would be observable in the file contents.
let different_schema = "node Other { id: String @key }\n";
let result = Omnigraph::init(uri, different_schema).await;
// The new init must report the conflict, not silently mutate.
assert!(
result.is_err(),
"init against an existing graph URI must error, not silently overwrite"
);
// The three schema files must remain present and byte-identical to
// their pre-attempt contents.
assert!(
dir.path().join("_schema.pg").exists(),
"_schema.pg must not be deleted by a failed re-init"
);
assert!(
dir.path().join("_schema.ir.json").exists(),
"_schema.ir.json must not be deleted by a failed re-init"
);
assert!(
dir.path().join("__schema_state.json").exists(),
"__schema_state.json must not be deleted by a failed re-init"
);
assert_eq!(
fs::read_to_string(dir.path().join("_schema.pg")).unwrap(),
original_schema_pg,
"_schema.pg contents must be preserved when re-init is rejected"
);
assert_eq!(
fs::read_to_string(dir.path().join("_schema.ir.json")).unwrap(),
original_schema_ir,
"_schema.ir.json contents must be preserved when re-init is rejected"
);
assert_eq!(
fs::read_to_string(dir.path().join("__schema_state.json")).unwrap(),
original_schema_state,
"__schema_state.json contents must be preserved when re-init is rejected"
);
}
/// Happy-path sibling to the strict re-init regression above:
/// `InitOptions { force: true }` must skip the schema-file preflight
/// when the operator deliberately wants to recover from orphan
/// schema artifacts (e.g. files left behind by a failed prior init).
///
/// Documented semantics per `InitOptions::force`: skips the preflight
/// only. Force does NOT purge existing Lance datasets or `__manifest/`
/// — that needs `StorageAdapter::delete_prefix`, which is tracked
/// separately. The realistic recovery scenario is "schema files
/// exist but Lance state doesn't," which this test reproduces.
///
/// Without this test, a future refactor could invert the `if !force`
/// branch and silently break the operator-facing escape hatch.
#[tokio::test]
async fn init_with_force_recovers_from_orphan_schema_files() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
// Simulate orphan schema files: write `_schema.pg` to disk
// without running a full init. The preflight will see it and
// bail in strict mode.
fs::write(dir.path().join("_schema.pg"), TEST_SCHEMA).unwrap();
// Strict mode refuses because `_schema.pg` exists.
let strict_err = match Omnigraph::init(uri, TEST_SCHEMA).await {
Ok(_) => panic!("strict init must refuse when orphan _schema.pg exists"),
Err(e) => e,
};
assert!(
strict_err.to_string().contains("already initialized"),
"strict init must surface AlreadyInitialized (sanity check); got: {strict_err}"
);
// Force init succeeds: it skips the preflight, overwrites the
// orphan file, and proceeds to initialize Lance state (which
// didn't exist, so `GraphCoordinator::init` is unblocked).
let db = Omnigraph::init_with_options(uri, TEST_SCHEMA, InitOptions { force: true })
.await
.expect("force init must succeed when only orphan schema files block strict init");
// Confirm the catalog is populated as expected — proves the
// graph is functional after force-recovery, not just that the
// call returned Ok.
assert!(
db.catalog().node_types.contains_key("Person"),
"force-recovered graph must have the new catalog installed"
);
assert!(
dir.path().join("__schema_state.json").exists(),
"force-recovered graph must have full schema state written"
);
}

View file

@ -1,19 +1,32 @@
// Maintenance tests: `optimize` (Lance compact_files) and `cleanup`
// (Lance cleanup_old_versions) at the graph level. Covers no-op edges
// (empty repo, already-optimized repo), the policy-validation contract on
// (empty graph, already-optimized graph), the policy-validation contract on
// `cleanup`, and the keep-versions cap that protects head.
mod helpers;
use std::time::Duration;
use omnigraph::db::{CleanupPolicyOptions, Omnigraph};
use lance::Dataset;
use omnigraph::db::{CleanupPolicyOptions, Omnigraph, SkipReason};
use omnigraph::loader::{LoadMode, load_jsonl};
use helpers::{TEST_DATA, TEST_SCHEMA, count_rows, init_and_load};
/// Filesystem URI of a node sub-table, mirroring the engine's layout
/// (FNV-1a of the type name under `nodes/`). Matches the helper in
/// `failpoints.rs`; used to inspect/forge Lance branches directly in tests.
fn node_table_uri(root: &str, type_name: &str) -> String {
let mut hash: u64 = 0xcbf2_9ce4_8422_2325;
for &b in type_name.as_bytes() {
hash ^= b as u64;
hash = hash.wrapping_mul(0x100_0000_01b3);
}
format!("{}/nodes/{hash:016x}", root.trim_end_matches('/'))
}
#[tokio::test]
async fn optimize_on_empty_repo_returns_stats_per_table_with_no_changes() {
async fn optimize_on_empty_graph_returns_stats_per_table_with_no_changes() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
@ -37,7 +50,7 @@ async fn optimize_after_load_then_again_is_idempotent() {
// First pass may compact (load wrote real fragments).
let _first = db.optimize().await.unwrap();
// Second pass should be a no-op: already-compacted repo produces no
// Second pass should be a no-op: already-compacted graph produces no
// fragments_removed / fragments_added.
let second = db.optimize().await.unwrap();
for s in &second {
@ -59,6 +72,97 @@ async fn optimize_after_load_then_again_is_idempotent() {
}
}
// Regression: `optimize` must not crash on a graph that has a `Blob` table.
//
// Lance `compact_files` forces `BlobHandling::AllBinary`, which mis-decodes
// blob-v2 columns ("more fields in the schema than provided column indices"),
// failing even a pristine uniform-V2_2 multi-fragment blob table. `optimize`
// must skip blob-bearing tables (and report the skip) rather than aborting the
// whole sweep.
//
// Before the skip fix, `optimize()` returned that Lance error here and aborted
// the whole sweep; it now skips the blob table (`doc.skipped == Some(..)`)
// while the sibling non-blob `Tag` table still compacts. The skip is gated by
// `LANCE_SUPPORTS_BLOB_COMPACTION`; the surface guard
// `compact_files_still_fails_on_blob_columns` flags when the upstream Lance fix
// makes the skip (and this test's blob arm) removable.
#[tokio::test]
async fn optimize_skips_blob_table_and_reports_skip() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
// One Blob node type (`Doc`) + one plain node type (`Tag`): proves the blob
// table is skipped while a non-blob table in the same sweep still compacts.
let schema = "\
node Doc {\n slug: String @key\n content: Blob\n}\n\
node Tag {\n slug: String @key\n}\n";
let mut db = Omnigraph::init(uri, schema).await.unwrap();
// Multi-fragment blob table: Overwrite creates fragment 1; each Merge of
// new keys appends another. A >=2-fragment blob table is exactly what
// crashes `compact_files` today (single fragment would no-op and not crash).
load_jsonl(
&mut db,
"{\"type\":\"Doc\",\"data\":{\"slug\":\"d1\",\"content\":\"base64:aGVsbG8x\"}}\n{\"type\":\"Doc\",\"data\":{\"slug\":\"d2\",\"content\":\"base64:aGVsbG8y\"}}",
LoadMode::Overwrite,
)
.await
.unwrap();
load_jsonl(
&mut db,
"{\"type\":\"Doc\",\"data\":{\"slug\":\"d3\",\"content\":\"base64:aGVsbG8z\"}}",
LoadMode::Merge,
)
.await
.unwrap();
load_jsonl(
&mut db,
"{\"type\":\"Doc\",\"data\":{\"slug\":\"d4\",\"content\":\"base64:aGVsbG80\"}}",
LoadMode::Merge,
)
.await
.unwrap();
// Plain table, also multi-fragment so it has something to compact.
load_jsonl(
&mut db,
"{\"type\":\"Tag\",\"data\":{\"slug\":\"t1\"}}\n{\"type\":\"Tag\",\"data\":{\"slug\":\"t2\"}}",
LoadMode::Merge,
)
.await
.unwrap();
load_jsonl(
&mut db,
"{\"type\":\"Tag\",\"data\":{\"slug\":\"t3\"}}",
LoadMode::Merge,
)
.await
.unwrap();
let stats = db
.optimize()
.await
.expect("optimize must not crash on a graph with a Blob table");
let doc = stats
.iter()
.find(|s| s.table_key == "node:Doc")
.expect("Doc stat present");
let tag = stats
.iter()
.find(|s| s.table_key == "node:Tag")
.expect("Tag stat present");
// The blob table is skipped (and reported), not compacted.
assert_eq!(
doc.skipped,
Some(SkipReason::BlobColumnsUnsupportedByLance),
"blob table must be reported as skipped",
);
assert!(!doc.committed, "skipped blob table is not compacted");
assert_eq!(doc.fragments_removed, 0);
assert_eq!(doc.fragments_added, 0);
// The plain (non-blob) table is unaffected by the skip.
assert_eq!(tag.skipped, None, "non-blob table must not be skipped");
}
#[tokio::test]
async fn cleanup_without_any_policy_option_errors() {
let dir = tempfile::tempdir().unwrap();
@ -119,7 +223,9 @@ async fn cleanup_older_than_zero_preserves_head() {
// Smoke test: after aggressive cleanup, we can still read and write the
// graph — head wasn't pruned.
load_jsonl(&mut db, TEST_DATA, LoadMode::Merge).await.unwrap();
load_jsonl(&mut db, TEST_DATA, LoadMode::Merge)
.await
.unwrap();
}
#[tokio::test]
@ -151,6 +257,64 @@ async fn cleanup_then_optimize_preserves_rows_and_table_remains_writable() {
assert_eq!(count_rows(&db, "node:Company").await, companies_before);
// Table is still writable after the cleanup+optimize sequence.
load_jsonl(&mut db, TEST_DATA, LoadMode::Merge).await.unwrap();
load_jsonl(&mut db, TEST_DATA, LoadMode::Merge)
.await
.unwrap();
assert_eq!(count_rows(&db, "node:Person").await, people_before);
}
#[tokio::test]
async fn cleanup_reconciles_orphaned_branch_forks() {
// An incomplete prior `branch_delete` can leave a per-table Lance branch
// that the manifest no longer references (a "zombie" fork). It is
// unreachable through any snapshot but pins its `tree/{branch}/` storage.
// `cleanup` must reconcile it away: drop every Lance branch absent from the
// manifest authority, without touching `main`.
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap().to_string();
let mut db = init_and_load(&dir).await;
let people_before = count_rows(&db, "node:Person").await;
assert!(people_before > 0, "fixture should seed Person rows");
// Forge an orphaned fork the manifest never knew about.
let person_uri = node_table_uri(&uri, "Person");
{
let mut ds = Dataset::open(&person_uri).await.unwrap();
let base = ds.version().version;
ds.create_branch("ghost", base, None).await.unwrap();
assert!(
ds.list_branches().await.unwrap().contains_key("ghost"),
"precondition: orphaned fork staged"
);
}
db.cleanup(CleanupPolicyOptions {
keep_versions: Some(1),
older_than: None,
})
.await
.unwrap();
// Orphan reclaimed; main untouched.
{
let ds = Dataset::open(&person_uri).await.unwrap();
assert!(
!ds.list_branches().await.unwrap().contains_key("ghost"),
"cleanup should reconcile the orphaned 'ghost' fork away"
);
}
assert_eq!(
count_rows(&db, "node:Person").await,
people_before,
"cleanup must not disturb main while reconciling orphans"
);
// Idempotent: a second cleanup with the orphan already gone is a no-op.
db.cleanup(CleanupPolicyOptions {
keep_versions: Some(1),
older_than: None,
})
.await
.unwrap();
}

View file

@ -23,8 +23,8 @@ use std::path::Path;
use std::sync::Arc;
use omnigraph::db::{Omnigraph, ReadTarget, SchemaApplyOptions};
use omnigraph::loader::LoadMode;
use omnigraph::error::OmniError;
use omnigraph::loader::LoadMode;
use omnigraph_policy::{PolicyChecker, PolicyEngine};
use helpers::*;
@ -58,13 +58,16 @@ rules:
"#;
fn additive_schema() -> String {
helpers::TEST_SCHEMA.replace(" age: I32?\n}", " age: I32?\n nickname: String?\n}")
helpers::TEST_SCHEMA.replace(
" age: I32?\n}",
" age: I32?\n nickname: String?\n}",
)
}
fn install_policy(db: Omnigraph, dir_path: &Path) -> (Omnigraph, Arc<PolicyEngine>) {
let policy_path = dir_path.join("policy.yaml");
fs::write(&policy_path, POLICY_YAML).unwrap();
let engine = PolicyEngine::load(&policy_path, dir_path.to_str().unwrap()).unwrap();
let engine = PolicyEngine::load_graph(&policy_path, dir_path.to_str().unwrap()).unwrap();
let engine = Arc::new(engine);
let db = db.with_policy(Arc::clone(&engine) as Arc<dyn PolicyChecker>);
(db, engine)
@ -238,7 +241,12 @@ async fn load_as_denies_when_policy_rejects_actor() {
let (db, _engine) = init_with_policy(&dir).await;
let result = db
.load_as("main", ONE_PERSON_JSONL, LoadMode::Merge, Some("act-denied"))
.load_as(
"main",
ONE_PERSON_JSONL,
LoadMode::Merge,
Some("act-denied"),
)
.await;
assert_denied(result, "load_as");
}

View file

@ -22,16 +22,16 @@ use helpers::recovery::{RecoveryExpectation, TableExpectation, assert_post_recov
const TEST_SCHEMA: &str = include_str!("fixtures/test.pg");
fn write_sidecar_file(repo_root: &Path, operation_id: &str, json: &str) {
let dir = repo_root.join("__recovery");
fn write_sidecar_file(graph_root: &Path, operation_id: &str, json: &str) {
let dir = graph_root.join("__recovery");
if !dir.exists() {
std::fs::create_dir(&dir).unwrap();
}
std::fs::write(dir.join(format!("{}.json", operation_id)), json).unwrap();
}
fn list_recovery_dir(repo_root: &Path) -> Vec<String> {
let dir = repo_root.join("__recovery");
fn list_recovery_dir(graph_root: &Path) -> Vec<String> {
let dir = graph_root.join("__recovery");
if !dir.exists() {
return Vec::new();
}
@ -41,7 +41,7 @@ fn list_recovery_dir(repo_root: &Path) -> Vec<String> {
.collect()
}
/// Full URI of a node-type Lance dataset under a fresh Omnigraph repo.
/// Full URI of a node-type Lance dataset under a fresh Omnigraph graph.
/// Mirrors the `nodes/{fnv1a64-hex(type_name)}` layout in `db/manifest/layout.rs`.
fn node_table_uri(root: &str, type_name: &str) -> String {
let h: u64 = fnv1a64(type_name.as_bytes());
@ -283,8 +283,8 @@ async fn recovery_rolls_back_synthetic_drift_on_open() {
// =====================================================================
/// Helper: count rows in `_graph_commit_recoveries.lance` at the given root.
async fn count_recovery_audit_rows(repo_root: &Path) -> usize {
let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance");
async fn count_recovery_audit_rows(graph_root: &Path) -> usize {
let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance");
if !recoveries_dir.exists() {
return 0;
}
@ -306,9 +306,9 @@ async fn count_recovery_audit_rows(repo_root: &Path) -> usize {
/// Helper: read the most recent recovery audit row's `recovery_kind`,
/// `recovery_for_actor`, and `operation_id`. Returns `None` if no rows.
async fn read_latest_recovery_audit(
repo_root: &Path,
graph_root: &Path,
) -> Option<(String, Option<String>, String, String)> {
let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance");
let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance");
if !recoveries_dir.exists() {
return None;
}
@ -357,8 +357,8 @@ async fn read_latest_recovery_audit(
/// storage order (multiple batches concatenated). Used by the
/// multi-sidecar fresh-snapshot test as a diagnostic alongside the
/// post-recovery Lance HEAD assertion.
async fn list_recovery_audit_kinds(repo_root: &Path) -> Vec<String> {
let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance");
async fn list_recovery_audit_kinds(graph_root: &Path) -> Vec<String> {
let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance");
if !recoveries_dir.exists() {
return Vec::new();
}
@ -391,8 +391,8 @@ async fn list_recovery_audit_kinds(repo_root: &Path) -> Vec<String> {
}
/// Helper: count `_graph_commits.lance` rows tagged with the recovery actor.
async fn count_recovery_actor_commits(repo_root: &Path) -> usize {
let actors_dir = repo_root.join("_graph_commit_actors.lance");
async fn count_recovery_actor_commits(graph_root: &Path) -> usize {
let actors_dir = graph_root.join("_graph_commit_actors.lance");
if !actors_dir.exists() {
return 0;
}
@ -908,7 +908,7 @@ async fn recovery_ensure_indices_steady_state_no_sidecar() {
/// ran) and rolls back any sibling table's legitimate index work.
///
/// Integration verification: after a real init + ensure_indices on a
/// repo where every table is empty, the recovery sweep must complete
/// graph where every table is empty, the recovery sweep must complete
/// cleanly (no leftover sidecar) AND the next ensure_indices must also
/// leave no sidecar — proving the empty-table-scoping behavior lets
/// steady-state runs incur zero sidecar I/O. The
@ -930,7 +930,7 @@ async fn recovery_ensure_indices_handles_empty_tables() {
db.ensure_indices().await.unwrap();
assert!(
list_recovery_dir(dir.path()).is_empty(),
"ensure_indices on an all-empty repo must not leave a sidecar"
"ensure_indices on an all-empty graph must not leave a sidecar"
);
// Reopen + ensure_indices — still steady state, still no sidecar.
drop(db);
@ -938,7 +938,7 @@ async fn recovery_ensure_indices_handles_empty_tables() {
db.ensure_indices().await.unwrap();
assert!(
list_recovery_dir(dir.path()).is_empty(),
"second ensure_indices on an all-empty repo must also not leave a sidecar"
"second ensure_indices on an all-empty graph must also not leave a sidecar"
);
}

View file

@ -7,8 +7,8 @@ use omnigraph::loader::{LoadMode, load_jsonl};
use helpers::*;
#[tokio::test(flavor = "multi_thread")]
async fn s3_compatible_repo_lifecycle_works() {
let Some(uri) = s3_test_repo_uri("omnigraph-runtime") else {
async fn s3_compatible_graph_lifecycle_works() {
let Some(uri) = s3_test_graph_uri("omnigraph-runtime") else {
eprintln!("skipping s3 runtime test: OMNIGRAPH_S3_TEST_BUCKET is not set");
return;
};
@ -81,7 +81,7 @@ async fn s3_compatible_repo_lifecycle_works() {
#[tokio::test(flavor = "multi_thread")]
async fn s3_branch_change_merge_flow_works() {
let Some(uri) = s3_test_repo_uri("omnigraph-branching") else {
let Some(uri) = s3_test_graph_uri("omnigraph-branching") else {
eprintln!("skipping s3 branch test: OMNIGRAPH_S3_TEST_BUCKET is not set");
return;
};
@ -135,7 +135,7 @@ async fn s3_branch_change_merge_flow_works() {
#[tokio::test(flavor = "multi_thread")]
async fn s3_public_load_uses_hidden_run_and_publishes() {
let Some(uri) = s3_test_repo_uri("omnigraph-public-load") else {
let Some(uri) = s3_test_graph_uri("omnigraph-public-load") else {
eprintln!("skipping s3 public load test: OMNIGRAPH_S3_TEST_BUCKET is not set");
return;
};

View file

@ -74,7 +74,7 @@ async fn apply_schema_rejects_when_non_main_branch_exists() {
let err = db.apply_schema(&desired).await.unwrap_err();
assert!(
err.to_string()
.contains("schema apply requires a repo with only main")
.contains("schema apply requires a graph with only main")
);
}
@ -402,10 +402,7 @@ async fn apply_schema_rejects_adding_a_required_property_without_backfill() {
// Add `email: String` (required, non-nullable, no @rename_from). Existing
// rows have no value to fill in, so this is unsupported in v1.
let desired = TEST_SCHEMA.replace(
" age: I32?\n}",
" age: I32?\n email: String\n}",
);
let desired = TEST_SCHEMA.replace(" age: I32?\n}", " age: I32?\n email: String\n}");
let err = db.apply_schema(&desired).await.unwrap_err();
let msg = err.to_string();
assert!(
@ -437,7 +434,10 @@ async fn plan_schema_for_property_type_narrowing_is_not_supported() {
.unwrap();
let plan = db.plan_schema(TEST_SCHEMA).await.unwrap();
assert!(!plan.supported, "narrowing I64 -> I32 must not be supported");
assert!(
!plan.supported,
"narrowing I64 -> I32 must not be supported"
);
assert!(plan.steps.iter().any(|step| matches!(
step,
SchemaMigrationStep::UnsupportedChange { code, .. }

View file

@ -2,7 +2,7 @@
//! exercise `stage_append`, `stage_merge_insert`, `scan_with_staged`,
//! and `count_rows_with_staged` directly against a Lance dataset — no
//! Omnigraph engine involved. The engine-level use of these primitives
//! is exercised by `tests/runs.rs`.
//! is exercised by `tests/writes.rs`.
//!
//! Test surface here:
//! 1. `stage_append` + `scan_with_staged` shows committed + staged data
@ -132,7 +132,11 @@ async fn stage_merge_insert_dedupes_superseded_committed_fragment() {
.await
.unwrap();
let ids = collect_ids(&batches);
assert_eq!(ids, vec!["alice"], "merge_insert must not surface duplicates");
assert_eq!(
ids,
vec!["alice"],
"merge_insert must not surface duplicates"
);
// Confirm the visible row is the rewritten one.
let total: usize = batches.iter().map(|b| b.num_rows()).sum();
@ -382,12 +386,7 @@ async fn scan_with_staged_with_filter_silently_drops_staged_rows() {
// Actual: dave (staged, age=35) is dropped — only the committed matches
// come back.
let batches = store
.scan_with_staged(
&ds,
std::slice::from_ref(&staged),
None,
Some("age >= 30"),
)
.scan_with_staged(&ds, std::slice::from_ref(&staged), None, Some("age >= 30"))
.await
.unwrap();
assert_eq!(
@ -403,12 +402,7 @@ async fn scan_with_staged_with_filter_silently_drops_staged_rows() {
// Without filter, staged data IS visible — confirms the issue is
// specifically filter pushdown, not fragment scanning per se.
let unfiltered = store
.scan_with_staged(
&ds,
std::slice::from_ref(&staged),
None,
None,
)
.scan_with_staged(&ds, std::slice::from_ref(&staged), None, None)
.await
.unwrap();
assert_eq!(
@ -686,10 +680,7 @@ async fn stage_create_inverted_index_does_not_advance_head_until_commit() {
.unwrap();
let pre_version = ds.version().version;
let staged = store
.stage_create_inverted_index(&ds, "id")
.await
.unwrap();
let staged = store.stage_create_inverted_index(&ds, "id").await.unwrap();
assert_eq!(
ds.version().version,
pre_version,
@ -718,7 +709,7 @@ async fn stage_create_inverted_index_does_not_advance_head_until_commit() {
///
/// **When Lance #6658 lands**: this test will need to flip — replace
/// the assertion with a `stage_delete` + `commit_staged` round-trip
/// and remove the residual line in `docs/runs.md`.
/// and remove the residual line in `docs/dev/writes.md`.
#[tokio::test]
async fn delete_where_advances_head_inline_documents_residual() {
let dir = tempfile::tempdir().unwrap();
@ -781,13 +772,9 @@ async fn create_vector_index_advances_head_inline_documents_residual() {
let id_arr = StringArray::from(ids);
let flat: Vec<f32> = (0..(n_rows * dim)).map(|i| i as f32).collect();
let values = arrow_array::Float32Array::from(flat);
let vec_arr =
FixedSizeListArray::new(item_field, dim as i32, Arc::new(values), None);
let batch = RecordBatch::try_new(
schema.clone(),
vec![Arc::new(id_arr), Arc::new(vec_arr)],
)
.unwrap();
let vec_arr = FixedSizeListArray::new(item_field, dim as i32, Arc::new(values), None);
let batch =
RecordBatch::try_new(schema.clone(), vec![Arc::new(id_arr), Arc::new(vec_arr)]).unwrap();
let mut ds = TableStore::write_dataset(&uri, batch).await.unwrap();
let pre_version = ds.version().version;

View file

@ -504,9 +504,21 @@ query fof_chain($name: String) {
let batch = result.concat_batches().unwrap();
assert_eq!(batch.num_rows(), 1);
let col0 = batch.column(0).as_any().downcast_ref::<StringArray>().unwrap();
let col1 = batch.column(1).as_any().downcast_ref::<StringArray>().unwrap();
let col2 = batch.column(2).as_any().downcast_ref::<StringArray>().unwrap();
let col0 = batch
.column(0)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
let col1 = batch
.column(1)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
let col2 = batch
.column(2)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
assert_eq!(col0.value(0), "Alice");
assert_eq!(col1.value(0), "Bob");
assert_eq!(col2.value(0), "Diana");
@ -574,8 +586,16 @@ query at_acme_named() {
let batch = result.concat_batches().unwrap();
assert_eq!(batch.num_rows(), 1);
let person = batch.column(0).as_any().downcast_ref::<StringArray>().unwrap();
let company = batch.column(1).as_any().downcast_ref::<StringArray>().unwrap();
let person = batch
.column(0)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
let company = batch
.column(1)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
assert_eq!(person.value(0), "Alice");
assert_eq!(company.value(0), "Acme");
}
@ -608,8 +628,16 @@ query at_company($company: String) {
let batch = result.concat_batches().unwrap();
assert_eq!(batch.num_rows(), 1);
let person = batch.column(0).as_any().downcast_ref::<StringArray>().unwrap();
let company = batch.column(1).as_any().downcast_ref::<StringArray>().unwrap();
let person = batch
.column(0)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
let company = batch
.column(1)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
assert_eq!(person.value(0), "Bob");
assert_eq!(company.value(0), "Globex");
}
@ -633,19 +661,22 @@ query fan_out($name: String) {
"#;
// Alice knows Bob and Charlie, works at Acme.
// Each friend paired with her company → 2 rows.
let result = query_main(
&mut db,
queries,
"fan_out",
&params(&[("$name", "Alice")]),
)
.await
.unwrap();
let result = query_main(&mut db, queries, "fan_out", &params(&[("$name", "Alice")]))
.await
.unwrap();
let batch = result.concat_batches().unwrap();
assert_eq!(batch.num_rows(), 2);
let friends = batch.column(0).as_any().downcast_ref::<StringArray>().unwrap();
let companies = batch.column(1).as_any().downcast_ref::<StringArray>().unwrap();
let friends = batch
.column(0)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
let companies = batch
.column(1)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
let mut pairs: Vec<(&str, &str)> = (0..batch.num_rows())
.map(|i| (friends.value(i), companies.value(i)))

View file

@ -76,7 +76,9 @@ async fn init_with(schema: &str, data: &str) -> (tempfile::TempDir, Omnigraph) {
let uri = dir.path().to_str().unwrap();
let mut db = Omnigraph::init(uri, schema).await.unwrap();
if !data.is_empty() {
load_jsonl(&mut db, data, LoadMode::Overwrite).await.unwrap();
load_jsonl(&mut db, data, LoadMode::Overwrite)
.await
.unwrap();
}
(dir, db)
}

View file

@ -1,7 +1,7 @@
//! Tests for the direct-to-target write path (Run state machine
//! removed). The Run/`__run__` staging branch / RunRecord state machine no
//! longer exists; mutations and loads write directly to target tables and
//! commit once via the publisher's `expected_table_versions` CAS.
//! Tests for the direct-publish write path: mutations and loads write
//! directly to target tables and commit once via the publisher's
//! `expected_table_versions` CAS. (History: this replaced the removed Run
//! state machine / `__run__` staging branches / RunRecord — MR-771.)
//!
//! What this file covers:
//! - No `__run__*` branches are created by load or mutate.
@ -127,10 +127,7 @@ async fn multi_statement_mutation_is_atomic_with_read_your_writes() {
"main",
MUTATION_QUERIES,
"insert_person_and_friend",
&mixed_params(
&[("$name", "Eve"), ("$friend", "Alice")],
&[("$age", 22)],
),
&mixed_params(&[("$name", "Eve"), ("$friend", "Alice")], &[("$age", 22)]),
)
.await
.unwrap();
@ -187,10 +184,7 @@ async fn partial_failure_leaves_target_queryable_and_unblocks_next_mutation() {
"main",
MUTATION_QUERIES,
"insert_person_and_friend",
&mixed_params(
&[("$name", "Eve"), ("$friend", "Missing")],
&[("$age", 22)],
),
&mixed_params(&[("$name", "Eve"), ("$friend", "Missing")], &[("$age", 22)]),
)
.await
.expect_err("op-2 must fail");
@ -543,10 +537,7 @@ async fn mutation_rejects_mixed_insert_and_delete_at_parse_time() {
"main",
STAGED_QUERIES,
"mixed_insert_and_delete",
&mixed_params(
&[("$name", "Eve"), ("$victim", "Alice")],
&[("$age", 22)],
),
&mixed_params(&[("$name", "Eve"), ("$victim", "Alice")], &[("$age", 22)]),
)
.await
.expect_err("D₂ must reject mixed insert+delete");
@ -559,7 +550,9 @@ async fn mutation_rejects_mixed_insert_and_delete_at_parse_time() {
manifest_err.message,
);
assert!(
manifest_err.message.contains("split into separate mutations"),
manifest_err
.message
.contains("split into separate mutations"),
"error message should direct user to split: {}",
manifest_err.message,
);
@ -668,11 +661,7 @@ async fn multiple_appends_to_same_edge_coalesce_to_one_append() {
"main",
STAGED_QUERIES,
"insert_two_friends",
&params(&[
("$from", "Alice"),
("$a", "Bob"),
("$b", "Eve"),
]),
&params(&[("$from", "Alice"), ("$a", "Bob"), ("$b", "Eve")]),
)
.await
.unwrap();
@ -782,8 +771,14 @@ async fn load_with_bad_edge_reference_unblocks_next_load() {
// No write made it to disk: counts unchanged.
let mid_persons = count_rows(&db, "node:Person").await;
let mid_edges = count_rows(&db, "edge:Knows").await;
assert_eq!(mid_persons, pre_persons, "failed load must not advance Person count");
assert_eq!(mid_edges, pre_edges, "failed load must not advance Knows count");
assert_eq!(
mid_persons, pre_persons,
"failed load must not advance Person count"
);
assert_eq!(
mid_edges, pre_edges,
"failed load must not advance Knows count"
);
// Second load against the same tables — succeeds (no HEAD drift).
let good = r#"{"type": "Person", "data": {"name": "Pat", "age": 55}}"#;
@ -824,7 +819,9 @@ edge WorksAt: Person -> Company @card(0..1)
{"type": "Company", "data": {"name": "Acme"}}
{"type": "Company", "data": {"name": "Bigco"}}
"#;
load_jsonl(&mut db, seed, LoadMode::Overwrite).await.unwrap();
load_jsonl(&mut db, seed, LoadMode::Overwrite)
.await
.unwrap();
let pre_works = count_rows(&db, "edge:WorksAt").await;
@ -1014,7 +1011,10 @@ query cascade_then_explicit($name: String, $other: String) {
// — Bob→Diana would survive. The exact-count check makes both ops
// independently observable.
let pre_knows = count_rows(&db, "edge:Knows").await;
assert_eq!(pre_knows, 3, "fixture invariant: TEST_DATA seeds 3 Knows edges");
assert_eq!(
pre_knows, 3,
"fixture invariant: TEST_DATA seeds 3 Knows edges"
);
db.mutate(
"main",
@ -1066,7 +1066,9 @@ query add_friend($from: String, $to: String) {
let seed = r#"{"type": "Person", "data": {"name": "Alice"}}
{"type": "Person", "data": {"name": "Bob"}}
"#;
load_jsonl(&mut db, seed, LoadMode::Overwrite).await.unwrap();
load_jsonl(&mut db, seed, LoadMode::Overwrite)
.await
.unwrap();
// Single insert: count=1 < min=2 → reject with clear message.
let err = db
@ -1082,8 +1084,7 @@ query add_friend($from: String, $to: String) {
panic!("expected Manifest error, got {err:?}");
};
assert!(
manifest_err.message.contains("@card violation")
&& manifest_err.message.contains("min 2"),
manifest_err.message.contains("@card violation") && manifest_err.message.contains("min 2"),
"unexpected error: {}",
manifest_err.message,
);
@ -1121,7 +1122,9 @@ edge WorksAt: Person -> Company @card(0..1)
{"type": "Company", "data": {"name": "Bigco"}}
{"edge": "WorksAt", "from": "Alice", "to": "Acme", "data": {"id": "w1"}}
"#;
load_jsonl(&mut db, seed, LoadMode::Overwrite).await.unwrap();
load_jsonl(&mut db, seed, LoadMode::Overwrite)
.await
.unwrap();
// Merge-update the same edge id w1 to point at Bigco. Counted naively
// as union, Alice has 2 WorksAt (committed Acme + pending Bigco) which
@ -1167,7 +1170,9 @@ edge WorksAt: Person -> Company @card(0..1)
{"type": "Company", "data": {"name": "Acme"}}
{"type": "Company", "data": {"name": "Bigco"}}
"#;
load_jsonl(&mut db, seed, LoadMode::Overwrite).await.unwrap();
load_jsonl(&mut db, seed, LoadMode::Overwrite)
.await
.unwrap();
// Merge load with the SAME edge id twice — the second row supersedes
// the first in the finalize-time dedupe. If pending-counting doesn't
@ -1364,7 +1369,11 @@ query insert_then_update_note(
)
.await
.unwrap();
assert_eq!(qr.num_rows(), 0, "letter must not be visible after early error");
assert_eq!(
qr.num_rows(),
0,
"letter must not be visible after early error"
);
}
/// MR-920 regression: two sequential `update T set {f:v} where x=y`
@ -1446,5 +1455,9 @@ async fn second_sequential_update_on_same_row_succeeds() {
}
}
}
assert_eq!(alice_age, Some(42), "Alice's age must reflect the second update");
assert_eq!(
alice_age,
Some(42),
"Alice's age must reflect the second update"
);
}

View file

@ -9,8 +9,14 @@ fi
bind="${OMNIGRAPH_BIND:-0.0.0.0:8080}"
# URI comes from the env var (the positional arg wins over any config
# `graphs` block in resolve_target_uri). OMNIGRAPH_CONFIG, when also set,
# is forwarded as --config purely to supply a policy file — the two
# compose. Without OMNIGRAPH_CONFIG the behavior is unchanged.
if [ -n "${OMNIGRAPH_TARGET_URI:-}" ]; then
exec "$SERVER_BIN" "${OMNIGRAPH_TARGET_URI}" --bind "${bind}"
exec "$SERVER_BIN" "${OMNIGRAPH_TARGET_URI}" \
${OMNIGRAPH_CONFIG:+--config "$OMNIGRAPH_CONFIG"} \
--bind "${bind}"
fi
if [ -n "${OMNIGRAPH_CONFIG:-}" ]; then
@ -28,5 +34,7 @@ omnigraph-server container startup requires one of:
Optional:
- OMNIGRAPH_BIND (default: 0.0.0.0:8080)
- OMNIGRAPH_TARGET (used with OMNIGRAPH_CONFIG)
- OMNIGRAPH_CONFIG (may also accompany OMNIGRAPH_TARGET_URI to add a
policy file; the URI still comes from OMNIGRAPH_TARGET_URI)
EOF
exit 64

65
docker/entrypoint_test.sh Executable file
View file

@ -0,0 +1,65 @@
#!/bin/sh
# Self-contained test for docker/entrypoint.sh argument composition.
# Runs the entrypoint against a stub server that echoes its args, and
# asserts the forwarded argv for each startup mode. No Docker required.
#
# sh docker/entrypoint_test.sh
#
# Exits 0 on success, 1 on the first mismatch.
set -eu
here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
entrypoint="$here/entrypoint.sh"
work=$(mktemp -d)
trap 'rm -rf "$work"' EXIT
mkdir -p "$work/bin"
cat > "$work/bin/omnigraph-server" <<'EOF'
#!/bin/sh
echo "ARGS: $*"
EOF
chmod +x "$work/bin/omnigraph-server"
# Run the real entrypoint with SERVER_BIN pointed at the stub.
ep="$work/entrypoint.sh"
sed "s#SERVER_BIN=\"/usr/local/bin/omnigraph-server\"#SERVER_BIN=\"$work/bin/omnigraph-server\"#" \
"$entrypoint" > "$ep"
fail=0
check() {
desc=$1; want=$2; got=$3
if [ "$got" != "$want" ]; then
echo "FAIL: $desc"
echo " want: $want"
echo " got: $got"
fail=1
else
echo "ok: $desc"
fi
}
got=$(OMNIGRAPH_TARGET_URI="s3://b/g" OMNIGRAPH_BIND="0.0.0.0:8080" sh "$ep")
check "TARGET_URI only (legacy)" \
"ARGS: s3://b/g --bind 0.0.0.0:8080" "$got"
got=$(OMNIGRAPH_TARGET_URI="s3://b/g" OMNIGRAPH_CONFIG="/etc/omnigraph/omnigraph.yaml" OMNIGRAPH_BIND="0.0.0.0:8080" sh "$ep")
check "TARGET_URI + CONFIG composes (policy)" \
"ARGS: s3://b/g --config /etc/omnigraph/omnigraph.yaml --bind 0.0.0.0:8080" "$got"
got=$(OMNIGRAPH_CONFIG="/etc/omnigraph/omnigraph.yaml" OMNIGRAPH_BIND="0.0.0.0:8080" sh "$ep")
check "CONFIG only" \
"ARGS: --config /etc/omnigraph/omnigraph.yaml --bind 0.0.0.0:8080" "$got"
got=$(OMNIGRAPH_CONFIG="/etc/omnigraph/omnigraph.yaml" OMNIGRAPH_TARGET="active" OMNIGRAPH_BIND="0.0.0.0:8080" sh "$ep")
check "CONFIG + TARGET" \
"ARGS: --config /etc/omnigraph/omnigraph.yaml --target active --bind 0.0.0.0:8080" "$got"
got=$(sh "$ep" some-uri --bind 1.2.3.4:9 --extra)
check "explicit args passthrough" \
"ARGS: some-uri --bind 1.2.3.4:9 --extra" "$got"
if [ "$fail" -ne 0 ]; then
echo "entrypoint_test: FAILED"
exit 1
fi
echo "entrypoint_test: all cases passed"

View file

@ -10,7 +10,7 @@ Three views, increasing zoom:
2. **Layer view** — the eight-layer stack inside one OmniGraph process.
3. **Component zoom-ins** — what's inside each layer.
For runtime flows (read query, mutation), see [`docs/dev/execution.md`](execution.md). For the on-disk layout of a repo, see [`docs/user/storage.md`](../user/storage.md).
For runtime flows (read query, mutation), see [`docs/dev/execution.md`](execution.md). For the on-disk layout of a graph, see [`docs/user/storage.md`](../user/storage.md).
L1 (orange in the diagrams) is what we inherit from Lance; L2 (blue) is what OmniGraph adds. The L1/L2 framing is also called out in prose at the bottom of this doc.
@ -63,7 +63,7 @@ flowchart TB
subgraph engine[omnigraph engine]
plan[exec query and mutation]:::l2
gi[graph index CSR/CSC<br/>RuntimeCache LRU 8]:::l2
coord[coordinator<br/>ManifestRepo · CommitGraph]:::l2
coord[coordinator<br/>ManifestCoordinator · CommitGraph]:::l2
end
subgraph storage[storage trait — wraps Lance]
@ -132,7 +132,7 @@ flowchart TB
subgraph state[graph state]
coord[GraphCoordinator]:::l2
mr[ManifestRepo<br/>db/manifest.rs]:::l2
mr[ManifestCoordinator<br/>db/manifest.rs]:::l2
cg[CommitGraph<br/>_graph_commits.lance]:::l2
stg[MutationStaging<br/>per-query in-memory accumulator<br/>exec/staging.rs]:::l2
end
@ -166,7 +166,7 @@ Code paths:
- Read entry: `Omnigraph::query` at `crates/omnigraph/src/exec/query.rs:7`
- Mutation entry: `Omnigraph::mutate` at `crates/omnigraph/src/exec/mutation.rs:511`
- Manifest commit: `ManifestRepo::commit` at `crates/omnigraph/src/db/manifest.rs:280`
- Manifest commit: `ManifestCoordinator::commit` at `crates/omnigraph/src/db/manifest.rs:280`
- Graph index: `crates/omnigraph/src/graph_index/`
- Loader: `Omnigraph::ingest` at `crates/omnigraph/src/loader/mod.rs:74`
@ -207,7 +207,7 @@ contracts:
This pattern realizes read-your-writes within a multi-statement mutation
and keeps failure scope bounded for inserts/updates by construction at
the writer layer. See [docs/dev/invariants.md](invariants.md) and
[docs/dev/runs.md](runs.md) for the publisher CAS contract this builds on.
[docs/dev/writes.md](writes.md) for the publisher CAS contract this builds on.
### Storage trait — today vs. roadmap
@ -278,7 +278,7 @@ flowchart LR
eng --> wq
```
The server applies Cedar policy at the HTTP boundary today. The roadmap, called out in [docs/dev/invariants.md](invariants.md) as a known gap, is to push policy into the planner as predicates. After Cedar, mutating handlers go through `WorkloadController` (per-actor admission cap + byte budget; PR 2 / MR-686) before reaching the engine. The engine itself holds an `Arc<WriteQueueManager>` so concurrent mutations on the same `(table, branch)` serialize at the queue, while disjoint keys run in parallel — see [docs/user/server.md](../user/server.md) "Per-actor admission control" and [docs/dev/runs.md](runs.md). The CLI bypasses the HTTP layer (and admission) and calls the engine API directly.
The server applies Cedar policy at the HTTP boundary today. The roadmap, called out in [docs/dev/invariants.md](invariants.md) as a known gap, is to push policy into the planner as predicates. After Cedar, mutating handlers go through `WorkloadController` (per-actor admission cap + byte budget; PR 2 / MR-686) before reaching the engine. The engine itself holds an `Arc<WriteQueueManager>` so concurrent mutations on the same `(table, branch)` serialize at the queue, while disjoint keys run in parallel — see [docs/user/server.md](../user/server.md) "Per-actor admission control" and [docs/dev/writes.md](writes.md). The CLI bypasses the HTTP layer (and admission) and calls the engine API directly.
Code paths:

View file

@ -8,7 +8,7 @@ This page explains what the policy says and how to change it.
| Setting | Value | Why |
|---|---|---|
| **Required status checks (strict)** | `Classify Changes`, `Check AGENTS.md Links`, `Test Workspace`, `Test omnigraph-server --features aws`, `CODEOWNERS / drift`, `CODEOWNERS / noedit` | Every PR must pass workspace tests, AGENTS.md link integrity, and the CODEOWNERS hygiene checks. `strict: true` requires the branch to be up-to-date with `main` before merge. |
| **Required status checks (strict)** | `Classify Changes`, `Check AGENTS.md Links`, `Test Workspace`, `Test omnigraph-server --features aws`, `CODEOWNERS matches source`, `CODEOWNERS not hand-edited` | Every PR must pass workspace tests, AGENTS.md link integrity, and the CODEOWNERS hygiene checks. The two CODEOWNERS contexts must equal the job `name:` values in `.github/workflows/codeowners.yml` **verbatim** — a context naming a job that never reports (the old `CODEOWNERS / drift` used the job *id*, and the job was path-filtered) leaves every PR permanently pending and forces admin overrides. `strict: true` requires the branch to be up-to-date with `main` before merge. |
| **Required approving reviews** | `1` | At least one reviewer. With a 2-person team, going higher would block all merges when one person is unavailable. |
| **Require code-owner reviews** | `true` | The reviewer must be a code owner per `.github/CODEOWNERS`. This is what makes the codeowners chassis enforced. |
| **Dismiss stale reviews on new commits** | `true` | A push after approval invalidates the prior review. Prevents the "approve, then sneak in unreviewed changes" pattern. |
@ -16,12 +16,12 @@ This page explains what the policy says and how to change it.
| **Disallow force pushes** | `true` | No history rewrites on `main`. |
| **Disallow branch deletions** | `true` | `main` cannot be deleted. |
| **Required conversation resolution** | `true` | All review comment threads must be resolved before merge. |
| **Enforce on admins** | `true` | Even repo admins go through the gates. The point is no bypasses. |
| **Enforce on admins** | `false` | Admins can override the gates (`enforce_admins: false` in the JSON). This is the intended escape hatch for the 2-person team; tightening to `true` is tracked under hardening below. |
| **Required signed commits** | not yet | Not enabled. Would lock out maintainers until everyone enrolls GPG/SSH commit signing. Tracked as a follow-up. |
## How to apply
Run from the repo root:
Run from the repository root:
```bash
./scripts/apply-branch-protection.sh
@ -29,7 +29,7 @@ Run from the repo root:
The script reads `.github/branch-protection.json`, strips the human-readable `_comment` field (the GitHub API rejects unknown keys), and PUTs to `repos/ModernRelay/omnigraph/branches/main/protection`.
Requires `gh` authenticated with a token that has admin permissions on the repo.
Requires `gh` authenticated with a token that has admin permissions on the repository.
To preview without applying:
@ -57,7 +57,7 @@ Outputs the live policy. Compare against `.github/branch-protection.json` to det
- **Audit trail**: `git log .github/branch-protection.json` shows every change with a reviewable diff and a merge commit.
- **Disaster recovery**: if branch protection is accidentally removed or weakened via the UI, the JSON is the canonical recovery point.
- **Consistency**: pairs with `.github/codeowners-roles.yml` (the CODEOWNERS source of truth). Repo policy lives in the repo.
- **Consistency**: pairs with `.github/codeowners-roles.yml` (the CODEOWNERS source of truth). Repository policy lives in the repository.
## What this gates
@ -69,7 +69,7 @@ After branch protection is applied, every PR targeting `main` must:
4. Have all review conversations resolved.
5. Be squash- or rebase-merged (no merge commits).
Even repo admins are subject to these rules.
Even repository admins are subject to these rules.
## Subsequent hardening (not in this PR)

View file

@ -2,9 +2,10 @@
`.github/workflows/`:
- **ci.yml**: text-only changes skip; otherwise `cargo test --workspace --locked` on ubuntu-latest with protobuf compiler. OpenAPI-drift check that auto-commits the regenerated `openapi.json` for same-repo PRs. Also runs the AGENTS.md cross-link integrity check (`scripts/check-agents-md.sh`).
- **ci.yml**: text-only changes skip; otherwise `cargo test --workspace --locked` on ubuntu-latest with protobuf compiler. OpenAPI-drift check that auto-commits the regenerated `openapi.json` for same-repository PRs. Also runs the AGENTS.md cross-link integrity check (`scripts/check-agents-md.sh`).
- **AWS feature build job**: `cargo build/test -p omnigraph-server --features aws` on ubuntu-latest.
- **RustFS S3 integration**: spins up RustFS in Docker, runs `s3_storage`, `server_opens_s3_repo_directly_and_serves_snapshot_and_read`, and `local_cli_s3_end_to_end_init_load_read_flow`.
- **release-edge.yml**: on every push to main, retags `edge`, builds Linux/macOS-Intel/macOS-arm64 archives + sha256, publishes a rolling prerelease.
- **release.yml**: on `v*` tags, builds the 3-platform matrix and updates the Homebrew tap (`scripts/update-homebrew-formula.sh`) by pushing the regenerated formula to `ModernRelay/homebrew-tap`.
- **Windows binary build job**: `cargo build --release --locked -p omnigraph-cli -p omnigraph-server` on windows-latest with smoke checks for `omnigraph.exe version`, `omnigraph-server.exe --help`, and PowerShell installer syntax.
- **RustFS S3 integration**: spins up RustFS in Docker, runs `s3_storage`, `server_opens_s3_graph_directly_and_serves_snapshot_and_read`, and `local_cli_s3_end_to_end_init_load_read_flow`.
- **release-edge.yml**: on every push to main, retags `edge`, builds Linux x86_64 / macOS arm64 archives and Windows x86_64 zip + sha256, publishes a rolling prerelease, then smoke-tests the Windows PowerShell installer against `edge`.
- **release.yml**: on `v*` tags, builds the Linux x86_64 / macOS arm64 archives and Windows x86_64 zip release matrix, updates the Homebrew tap (`scripts/update-homebrew-formula.sh`) by pushing the regenerated formula to `ModernRelay/homebrew-tap`, and smoke-tests the Windows PowerShell installer against the tag.
- **package.yml**: manual ECR image build; emits two image tags per commit (`<sha>`, `<sha>-aws`) via CodeBuild.

View file

@ -2,26 +2,47 @@
`.github/CODEOWNERS` is **generated** — not hand-edited. The source of truth is `.github/codeowners-roles.yml`, expanded by `.github/scripts/render-codeowners.py`. CI rejects drift between the two and rejects direct edits to `CODEOWNERS` that don't accompany a yml change.
This setup gives every role change a reviewable PR and a permanent in-repo audit trail (`git log .github/codeowners-roles.yml`).
This setup gives every role change a reviewable PR and a permanent in-repository audit trail (`git log .github/codeowners-roles.yml`).
## Current roles
## Who owns what
| Role | Members | Scope |
The tables below are **generated** from `.github/codeowners-roles.yml` by `.github/scripts/render-codeowners.py` (the same render that produces `.github/CODEOWNERS`). They are the always-current "who owns what at this commit" view — don't edit them by hand; edit the yml and re-render.
<!-- BEGIN GENERATED OWNERSHIP — edit codeowners-roles.yml + run render-codeowners.py -->
**Path → owners** (GitHub applies *last match wins*; the `*` catch-all is listed first and is overridden by the specific patterns below it):
| Path | Owners | Role(s) |
|---|---|---|
| `engineering` | `@aaltshuler` | All code under `crates/**`, repo infrastructure, default for unmapped paths |
| `docs` | `@aaltshuler`, `@ragnorc` | `docs/**`, README.md, AGENTS.md, CLAUDE.md, SECURITY.md |
| `*` | @ragnorc | engineering |
| `crates/**` | @ragnorc | engineering |
| `docs/**` | @ragnorc | docs |
| `README.md` | @ragnorc | docs |
| `AGENTS.md` | @ragnorc | docs |
| `CLAUDE.md` | @ragnorc | docs |
| `SECURITY.md` | @ragnorc | docs |
GitHub treats multiple owners in a CODEOWNERS line as **"any one of them satisfies the review requirement"**. For docs, either named member can approve. To require N distinct approvers on a specific path, layer a CI check on top (not currently configured).
**Roles**:
| Role | Members | Description |
|---|---|---|
| `engineering` | @ragnorc | All production code under crates/**. Engine, CLI, server, compiler. |
| `docs` | @ragnorc | Documentation under docs/**, plus repo-level docs (README.md, AGENTS.md, CLAUDE.md symlink, SECURITY.md). |
<!-- END GENERATED OWNERSHIP -->
GitHub treats multiple owners on a CODEOWNERS line as **"any one of them satisfies the review requirement"**. To require N distinct approvers on a specific path, layer a CI check on top (not currently configured).
## How to change role membership or path mappings
1. Edit `.github/codeowners-roles.yml`.
2. Run `python3 .github/scripts/render-codeowners.py` (requires PyYAML; `pip install pyyaml`).
3. Commit both files in the same PR.
2. Open a PR. **CI re-renders for you**: the `CODEOWNERS` workflow regenerates `.github/CODEOWNERS` and the ownership tables above and auto-commits them back to your PR branch on same-repository PRs — you don't have to run the script locally (though you can: `python3 .github/scripts/render-codeowners.py`, requires PyYAML).
On a fork (where CI can't push back), the workflow instead fails with the diff so you can run the script and commit it yourself.
CI fails the PR if:
- `CODEOWNERS` was edited without a corresponding yml change, or
- The yml was changed but the rendered `CODEOWNERS` doesn't match.
- a fork PR left a generated artifact out of sync, or
- `CODEOWNERS` was edited without a corresponding yml change (the `CODEOWNERS not hand-edited` check).
## How to add a new role
@ -34,4 +55,4 @@ CI fails the PR if:
- **Audit trail**: `git log .github/codeowners-roles.yml` is the canonical record of every role change. The rendered `CODEOWNERS` is a derived artifact.
- **Roles are first-class**: paths reference roles, not raw handles. Renaming a person or rotating a role updates one place, not every path.
- **Future extension**: scheduled rotation (weekly on-call, quarterly leads) plugs into the same yml without changing the path mappings. Not enabled today.
- **Consistency with the product**: omnigraph itself enforces auditable Cedar policy. The repo's code-owner policy follows the same "policy as reviewed code" pattern.
- **Consistency with the product**: omnigraph itself enforces auditable Cedar policy. The repository's code-owner policy follows the same "policy as reviewed code" pattern.

View file

@ -147,7 +147,7 @@ sequenceDiagram
- End-of-query Lance commit: `TableStore::stage_append`, `stage_merge_insert`, `commit_staged` at `crates/omnigraph/src/table_store.rs`
- Manifest commit primitive: `commit_updates_on_branch_with_expected` at `crates/omnigraph/src/db/omnigraph/table_ops.rs`
Atomicity guarantee for multi-statement mutations: a mid-query failure leaves Lance HEAD untouched on staged tables (no inline commit happened during op execution), so the next mutation proceeds normally with no `ExpectedVersionMismatch`. The publisher CAS at the very end either succeeds (manifest advances atomically across all touched sub-tables) or fails with a typed `ManifestConflictDetails::ExpectedVersionMismatch` (no partial publish). See [docs/dev/invariants.md](invariants.md) and [docs/dev/runs.md](runs.md).
Atomicity guarantee for multi-statement mutations: a mid-query failure leaves Lance HEAD untouched on staged tables (no inline commit happened during op execution), so the next mutation proceeds normally with no `ExpectedVersionMismatch`. The publisher CAS at the very end either succeeds (manifest advances atomically across all touched sub-tables) or fails with a typed `ManifestConflictDetails::ExpectedVersionMismatch` (no partial publish). See [docs/dev/invariants.md](invariants.md) and [docs/dev/writes.md](writes.md).
## Bulk loader (`loader/mod.rs`)

View file

@ -21,7 +21,7 @@ constraints. User-facing behavior should still be documented through
|---|---|
| System structure, L1/L2 framing, component diagrams | [architecture.md](architecture.md) |
| On-disk layout, manifest schema, URI behavior | [storage.md](../user/storage.md) |
| Direct-publish writes, D2, staged writes, recovery sidecars | [runs.md](runs.md) |
| Direct-publish writes, D2, staged writes, recovery sidecars | [writes.md](writes.md) |
| Query execution, mutation execution, loader flow | [execution.md](execution.md) |
| DataFusion: current state, passive wins, future improvements | [datafusion-future-improvements.md](datafusion-future-improvements.md) |
| Index lifecycle and graph topology indexes | [indexes.md](../user/indexes.md) |
@ -59,6 +59,9 @@ Working documents for in-flight feature work. Removed when the work lands.
| Area | Read |
|---|---|
| Schema-lint chassis v1 (MR-694) — `--allow-data-loss`, soft/hard drops | [schema-lint-v1-plan.md](schema-lint-v1-plan.md) |
| Inline + stored queries, request/response envelope, MCP (MR-656 / MR-976 / MR-969) | [rfc-001-queries-envelope-mcp.md](rfc-001-queries-envelope-mcp.md) |
| Config & CLI architecture — layered config, client targeting, file naming (MR-973 / MR-974 / MR-981) | [rfc-002-config-cli-architecture.md](rfc-002-config-cli-architecture.md) |
| MCP server surface — full tool parity, stored queries, modular auth (MR-969 / MR-956 / MR-974) | [rfc-003-mcp-server-surface.md](rfc-003-mcp-server-surface.md) |
## Boundary

View file

@ -38,7 +38,7 @@ Use it this way:
publishes one manifest update. Do not commit per statement. Delete-only
queries are the documented inline residual; the parse-time D2 rule prevents
mixing deletes with insert/update until Lance exposes two-phase delete.
Read [runs.md](runs.md) and [execution.md](execution.md).
Read [writes.md](writes.md) and [execution.md](execution.md).
5. **Recovery is part of the commit protocol.** Writers that can advance Lance
HEAD before manifest publish must write `__recovery/{ulid}.json` sidecars.
@ -56,7 +56,7 @@ Use it this way:
branch they read even when index coverage is partial. Expensive index work
should converge from manifest state instead of extending the critical write
path. Scalar staged index builds and vector inline residuals are documented
in [runs.md](runs.md) and [indexes.md](../user/indexes.md).
in [writes.md](writes.md) and [indexes.md](../user/indexes.md).
8. **Schema identity survives renames.** Accepted schema identity must remain
stable across type and property renames. Rename support belongs in migration
@ -96,17 +96,25 @@ Use it this way:
| Area | Current state | Source |
|---|---|---|
| Multi-table commit | Manifest CAS plus recovery sidecars; not a single Lance primitive | [runs.md](runs.md), [architecture.md](architecture.md) |
| Constructive mutations | In-memory `MutationStaging`, one end-of-query table commit per touched table, then one manifest publish | [runs.md](runs.md), [execution.md](execution.md) |
| Deletes | Inline-commit residual; delete-only queries allowed, mixed insert/update/delete rejected by D2 | [query-language.md](../user/query-language.md), [runs.md](runs.md) |
| Multi-table commit | Manifest CAS plus recovery sidecars; not a single Lance primitive | [writes.md](writes.md), [architecture.md](architecture.md) |
| Constructive mutations | In-memory `MutationStaging`, one end-of-query table commit per touched table, then one manifest publish | [writes.md](writes.md), [execution.md](execution.md) |
| Deletes | Inline-commit residual; delete-only queries allowed, mixed insert/update/delete rejected by D2 | [query-language.md](../user/query-language.md), [writes.md](writes.md) |
| Branch delete | Manifest is the single authority, flipped atomically first; per-table forks + commit-graph branch are derived state, reclaimed best-effort (`force_delete_branch`) with the `cleanup` reconciler as the guaranteed backstop. Reusing a name whose reclaim failed before `cleanup` surfaces an actionable error | [branches-commits.md](../user/branches-commits.md), [maintenance.md](../user/maintenance.md) |
| Schema validation | Type checks, required fields, defaults, edge endpoint checks, and edge cardinality are enforced on write paths | [schema-language.md](../user/schema-language.md), [execution.md](execution.md) |
| Unique constraints | Intra-batch and write-path checks exist; full cross-version uniqueness is still a gap | [schema-language.md](../user/schema-language.md) |
| Storage trait | `TableStorage` exists as the sealed staged-write surface; full call-site migration and capability/stat surfaces are incomplete | [runs.md](runs.md), [architecture.md](architecture.md) |
| Storage trait | `TableStorage` exists as the sealed staged-write surface; full call-site migration and capability/stat surfaces are incomplete | [writes.md](writes.md), [architecture.md](architecture.md) |
| Index lifecycle | `ensure_indices` is explicit today; reconciler-based convergence is roadmap | [indexes.md](../user/indexes.md), [maintenance.md](../user/maintenance.md) |
| Traversal IDs | Runtime still builds `TypeIndex`; Lance stable row-id based graph IDs are roadmap | [architecture.md](architecture.md), [query-language.md](../user/query-language.md) |
| Auth | Bearer token hashing and server-side actor resolution are implemented at the HTTP boundary | [server.md](../user/server.md), [policy.md](../user/policy.md) |
| Tests | Tempdir-backed Lance tests are the current substrate; there is no `MemStorage` test backend | [testing.md](testing.md) |
The branch-delete reconciler is authority-derived: it reclaims orphaned forks
today and degrades to a no-op if Lance ships an atomic multi-dataset branch
operation, so the design composes with that future rather than blocking it. This
is the same shape as invariant 7 (indexes are derived state); prefer it over a
recovery-sidecar-style approach for any new multi-dataset metadata operation,
since the sidecar would be scaffolding to remove once the substrate closes the gap.
## Known Gaps
Do not hide these behind invariant wording. Either move them forward or keep
@ -122,6 +130,15 @@ them explicit.
- **Deletes and vector indexes:** `delete_where` and vector index creation still
advance Lance HEAD inline because the required public Lance APIs are missing.
Keep D2 and recovery coverage in place until those residuals are removed.
- **Blob-column compaction:** Lance `compact_files` mis-decodes blob-v2 columns
under its forced `BlobHandling::AllBinary` read ("more fields in the schema
than provided column indices"), so `optimize` skips any table with a `Blob`
property — reporting `SkipReason::BlobColumnsUnsupportedByLance` (loud, not a
silent drop) behind the `LANCE_SUPPORTS_BLOB_COMPACTION` gate. Reads and writes
are unaffected; only space/fragment reclamation on blob tables is deferred.
Remove the skip when the upstream Lance fix lands — the
`lance_surface_guards.rs::compact_files_still_fails_on_blob_columns` guard
turns red on that bump to force it.
- **Planner capability/stat surfaces:** cost-aware planning, complete
capability advertisement, and explain-with-cost are roadmap. Do not describe
them as implemented.

Some files were not shown because too many files have changed in this diff Show more