mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-09 01:35:18 +02:00
Compare commits
25 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5eead8d29e | ||
|
|
c2a97f4559 | ||
|
|
ce150fb0ca | ||
|
|
e62d9166fb | ||
|
|
4a66d6e071 | ||
|
|
54842808db | ||
|
|
fd8e078a77 | ||
|
|
343f1f17ed | ||
|
|
c7365bf8ef | ||
|
|
96dbe9dec0 | ||
|
|
d54bccb940 | ||
|
|
3c2b1b8051 | ||
|
|
fab105bcce | ||
|
|
353c0c876a | ||
|
|
e94e7d124a | ||
|
|
2d5c4b1202 | ||
|
|
854ad0afcb | ||
|
|
8eba37cc60 | ||
|
|
24413844ae | ||
|
|
6c9afc7e9b | ||
|
|
d8451c3d33 | ||
|
|
c221e67e1b | ||
|
|
6e948de985 | ||
|
|
baeb4387df | ||
|
|
4e431d28b8 |
96 changed files with 8635 additions and 651 deletions
4
.github/CODEOWNERS
vendored
4
.github/CODEOWNERS
vendored
|
|
@ -8,9 +8,9 @@
|
||||||
# CI fails if this file drifts from its source, and rejects PRs that
|
# CI fails if this file drifts from its source, and rejects PRs that
|
||||||
# edit this file directly without also editing the yml.
|
# edit this file directly without also editing the yml.
|
||||||
|
|
||||||
* @ragnorc
|
* @ragnorc @aaltshuler
|
||||||
|
|
||||||
crates/** @ragnorc
|
crates/** @ragnorc @aaltshuler
|
||||||
docs/** @ragnorc
|
docs/** @ragnorc
|
||||||
README.md @ragnorc
|
README.md @ragnorc
|
||||||
AGENTS.md @ragnorc
|
AGENTS.md @ragnorc
|
||||||
|
|
|
||||||
34
.github/DISCUSSION_TEMPLATE/rfc.yml
vendored
Normal file
34
.github/DISCUSSION_TEMPLATE/rfc.yml
vendored
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
labels: ["rfc"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: |
|
||||||
|
Use this to **incubate an RFC** — socialize a design and reach rough
|
||||||
|
consensus before writing the formal document. When it's ready, graduate
|
||||||
|
it into a pull request that adds `docs/rfcs/NNNN-title.md`
|
||||||
|
(see [docs/rfcs/README.md](../blob/main/docs/rfcs/README.md)); a
|
||||||
|
maintainer merging that PR is acceptance.
|
||||||
|
|
||||||
|
For a plain feature request or open-ended idea, use the **Ideas**
|
||||||
|
category instead. For bugs, open an [Issue](../../issues/new/choose).
|
||||||
|
- type: textarea
|
||||||
|
id: problem
|
||||||
|
attributes:
|
||||||
|
label: Problem / motivation
|
||||||
|
description: What needs solving, and why is it worth the long-run cost?
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: sketch
|
||||||
|
attributes:
|
||||||
|
label: Proposed direction (sketch)
|
||||||
|
description: A rough shape of the design. Detail comes later in the RFC document.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: invariants
|
||||||
|
attributes:
|
||||||
|
label: Invariants touched
|
||||||
|
description: Which items in docs/dev/invariants.md does this affect or risk? Any deny-list brush?
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
55
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
Normal file
55
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
Normal file
|
|
@ -0,0 +1,55 @@
|
||||||
|
name: Bug report
|
||||||
|
description: Report a reproducible problem or wrong behavior in OmniGraph.
|
||||||
|
title: "bug: <short summary>"
|
||||||
|
labels: ["bug", "needs-triage"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: |
|
||||||
|
Issues are for **reporting problems** — concrete, reproducible bugs.
|
||||||
|
For ideas, feature requests, or questions, please use
|
||||||
|
[Discussions](../../discussions) instead.
|
||||||
|
For a security vulnerability, follow [SECURITY.md](../../blob/main/SECURITY.md) — do **not** file it here.
|
||||||
|
|
||||||
|
A maintainer will triage this; once labelled **`accepted`** it's open for a pull request
|
||||||
|
(see [GOVERNANCE.md](../../blob/main/GOVERNANCE.md)).
|
||||||
|
- type: textarea
|
||||||
|
id: what-happened
|
||||||
|
attributes:
|
||||||
|
label: What happened
|
||||||
|
description: What went wrong, and what you expected instead.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: repro
|
||||||
|
attributes:
|
||||||
|
label: Steps to reproduce
|
||||||
|
description: Minimal steps, commands, schema/query, or a failing snippet.
|
||||||
|
placeholder: |
|
||||||
|
1. omnigraph init ...
|
||||||
|
2. omnigraph ...
|
||||||
|
3. observed: ... / expected: ...
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: input
|
||||||
|
id: version
|
||||||
|
attributes:
|
||||||
|
label: Version
|
||||||
|
description: Output of `omnigraph --version` (or the engine/crate version) and how you installed it.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: input
|
||||||
|
id: environment
|
||||||
|
attributes:
|
||||||
|
label: Environment
|
||||||
|
description: OS, architecture, and storage backend (local FS / S3 / RustFS / MinIO).
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Logs / output
|
||||||
|
description: Relevant error text or logs. Will be rendered as code.
|
||||||
|
render: shell
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
13
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
13
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
# Issues are for problem reports only. Disable blank issues so everything is
|
||||||
|
# routed: bugs through the form, everything else to Discussions / SECURITY.md.
|
||||||
|
blank_issues_enabled: false
|
||||||
|
contact_links:
|
||||||
|
- name: 💡 Idea, feature request, or RFC
|
||||||
|
url: https://github.com/ModernRelay/omnigraph/discussions
|
||||||
|
about: Propose features and designs in Discussions. RFCs graduate from there into a docs/rfcs/ pull request.
|
||||||
|
- name: ❓ Question or help
|
||||||
|
url: https://github.com/ModernRelay/omnigraph/discussions
|
||||||
|
about: Ask in Discussions — questions are not tracked as Issues.
|
||||||
|
- name: 🔒 Security vulnerability
|
||||||
|
url: https://github.com/ModernRelay/omnigraph/blob/main/SECURITY.md
|
||||||
|
about: Report security issues privately per SECURITY.md — never as a public Issue.
|
||||||
29
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
29
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
<!--
|
||||||
|
Thanks for contributing! See CONTRIBUTING.md and GOVERNANCE.md.
|
||||||
|
A substantive PR needs a backing accepted issue or accepted RFC.
|
||||||
|
Maintainers: your internal process applies; the link requirement below
|
||||||
|
is for external contributions.
|
||||||
|
-->
|
||||||
|
|
||||||
|
## What & why
|
||||||
|
|
||||||
|
<!-- One or two sentences: what this changes and why. -->
|
||||||
|
|
||||||
|
## Backing issue / RFC
|
||||||
|
|
||||||
|
<!-- Pick one. A substantive change needs (1) or (2). -->
|
||||||
|
|
||||||
|
- [ ] Fixes an **accepted** issue: Closes #
|
||||||
|
- [ ] Implements / is an **accepted** RFC: <link to docs/rfcs/NNNN-*.md>
|
||||||
|
- [ ] **Trivial fast-lane** (typo / docs / dependency bump / comment / one-line CI) — no issue/RFC required
|
||||||
|
|
||||||
|
## Checklist
|
||||||
|
|
||||||
|
- [ ] Change is focused (one logical change)
|
||||||
|
- [ ] Tests added/updated for behavior changes (or N/A)
|
||||||
|
- [ ] Public docs updated if user-facing surface changed (or N/A)
|
||||||
|
- [ ] Reviewed against [docs/dev/invariants.md](../blob/main/docs/dev/invariants.md) — no Hard Invariant weakened, no deny-list item hit (or justified)
|
||||||
|
|
||||||
|
## Notes for reviewers
|
||||||
|
|
||||||
|
<!-- Anything that helps review: tradeoffs, follow-ups, areas of risk. -->
|
||||||
13
.github/branch-protection.json
vendored
13
.github/branch-protection.json
vendored
|
|
@ -1,5 +1,5 @@
|
||||||
{
|
{
|
||||||
"_comment": "Branch protection policy for main. Applied via scripts/apply-branch-protection.sh. See docs/branch-protection.md for rationale.",
|
"_comment": "Branch protection policy for main. Applied via scripts/apply-branch-protection.sh. See docs/branch-protection.md for rationale. NOTE: bypass_pull_request_allowances.users must mirror the engineering owners in .github/codeowners-roles.yml — code owners merge their own PRs without a second review; non-owners still need a code-owner approval. (render-codeowners.py does NOT generate this list; keep it in sync by hand.)",
|
||||||
"required_status_checks": {
|
"required_status_checks": {
|
||||||
"strict": true,
|
"strict": true,
|
||||||
"contexts": [
|
"contexts": [
|
||||||
|
|
@ -7,8 +7,8 @@
|
||||||
"Check AGENTS.md Links",
|
"Check AGENTS.md Links",
|
||||||
"Test Workspace",
|
"Test Workspace",
|
||||||
"Test omnigraph-server --features aws",
|
"Test omnigraph-server --features aws",
|
||||||
"CODEOWNERS / drift",
|
"CODEOWNERS matches source",
|
||||||
"CODEOWNERS / noedit"
|
"CODEOWNERS not hand-edited"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"enforce_admins": false,
|
"enforce_admins": false,
|
||||||
|
|
@ -17,7 +17,12 @@
|
||||||
"dismiss_stale_reviews": true,
|
"dismiss_stale_reviews": true,
|
||||||
"require_code_owner_reviews": true,
|
"require_code_owner_reviews": true,
|
||||||
"required_approving_review_count": 1,
|
"required_approving_review_count": 1,
|
||||||
"require_last_push_approval": false
|
"require_last_push_approval": false,
|
||||||
|
"bypass_pull_request_allowances": {
|
||||||
|
"users": ["ragnorc", "aaltshuler"],
|
||||||
|
"teams": [],
|
||||||
|
"apps": []
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"restrictions": null,
|
"restrictions": null,
|
||||||
"required_linear_history": true,
|
"required_linear_history": true,
|
||||||
|
|
|
||||||
1
.github/codeowners-roles.yml
vendored
1
.github/codeowners-roles.yml
vendored
|
|
@ -22,6 +22,7 @@ roles:
|
||||||
compiler.
|
compiler.
|
||||||
members:
|
members:
|
||||||
- ragnorc
|
- ragnorc
|
||||||
|
- aaltshuler
|
||||||
|
|
||||||
docs:
|
docs:
|
||||||
description: >
|
description: >
|
||||||
|
|
|
||||||
81
.github/scripts/render-codeowners.py
vendored
81
.github/scripts/render-codeowners.py
vendored
|
|
@ -1,10 +1,14 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""Render .github/CODEOWNERS from .github/codeowners-roles.yml.
|
"""Render .github/CODEOWNERS and the ownership tables in
|
||||||
|
docs/dev/codeowners.md from .github/codeowners-roles.yml.
|
||||||
|
|
||||||
The yml is the source of truth — editing CODEOWNERS directly is
|
The yml is the source of truth. This script expands the role-based yml
|
||||||
rejected by CI (see .github/workflows/codeowners.yml). This script
|
into (1) the flat path→owners format GitHub expects in
|
||||||
expands the role-based yml into the flat path→owners format GitHub
|
`.github/CODEOWNERS`, and (2) the "who owns what" markdown tables spliced
|
||||||
expects.
|
between the generated-region markers in `docs/dev/codeowners.md`. Both are
|
||||||
|
derived artifacts; CI re-renders them on every PR (see
|
||||||
|
.github/workflows/codeowners.yml) and auto-commits the result on same-repo
|
||||||
|
PRs, so the source of truth and the human-readable view never drift.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python3 .github/scripts/render-codeowners.py
|
python3 .github/scripts/render-codeowners.py
|
||||||
|
|
@ -16,6 +20,7 @@ Exits non-zero on:
|
||||||
one owner; otherwise CODEOWNERS would assign nobody and GitHub
|
one owner; otherwise CODEOWNERS would assign nobody and GitHub
|
||||||
would silently fall back to "no required reviewer", which
|
would silently fall back to "no required reviewer", which
|
||||||
defeats the purpose).
|
defeats the purpose).
|
||||||
|
- Missing generated-region markers in docs/dev/codeowners.md.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
@ -34,6 +39,13 @@ except ImportError:
|
||||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||||
SOURCE = REPO_ROOT / ".github" / "codeowners-roles.yml"
|
SOURCE = REPO_ROOT / ".github" / "codeowners-roles.yml"
|
||||||
OUTPUT = REPO_ROOT / ".github" / "CODEOWNERS"
|
OUTPUT = REPO_ROOT / ".github" / "CODEOWNERS"
|
||||||
|
DOCS = REPO_ROOT / "docs" / "dev" / "codeowners.md"
|
||||||
|
|
||||||
|
# The "who owns what" tables in docs/dev/codeowners.md are spliced between
|
||||||
|
# these markers so the human-readable view never drifts from the source of
|
||||||
|
# truth. Edit codeowners-roles.yml and re-render — never the table by hand.
|
||||||
|
DOCS_BEGIN = "<!-- BEGIN GENERATED OWNERSHIP — edit codeowners-roles.yml + run render-codeowners.py -->"
|
||||||
|
DOCS_END = "<!-- END GENERATED OWNERSHIP -->"
|
||||||
|
|
||||||
BANNER = """\
|
BANNER = """\
|
||||||
# AUTOGENERATED from .github/codeowners-roles.yml. Do not edit by hand.
|
# AUTOGENERATED from .github/codeowners-roles.yml. Do not edit by hand.
|
||||||
|
|
@ -75,6 +87,62 @@ def owners_for(role_names: list[str], roles: dict) -> list[str]:
|
||||||
return seen
|
return seen
|
||||||
|
|
||||||
|
|
||||||
|
def _oneline(text: str) -> str:
|
||||||
|
"""Collapse a folded/multi-line YAML description into one cell of text."""
|
||||||
|
return " ".join((text or "").split())
|
||||||
|
|
||||||
|
|
||||||
|
def ownership_tables(spec: dict, roles: dict) -> str:
|
||||||
|
"""Render the human-readable "who owns what" markdown — a path→owners
|
||||||
|
table (the operative view at PR time, in last-match-wins order with the
|
||||||
|
catch-all first) plus a role→members table. Spliced into the docs between
|
||||||
|
the markers so it is always current with the source of truth."""
|
||||||
|
out: list[str] = []
|
||||||
|
|
||||||
|
out.append("**Path → owners** (GitHub applies *last match wins*; the `*` "
|
||||||
|
"catch-all is listed first and is overridden by the specific "
|
||||||
|
"patterns below it):")
|
||||||
|
out.append("")
|
||||||
|
out.append("| Path | Owners | Role(s) |")
|
||||||
|
out.append("|---|---|---|")
|
||||||
|
if "default" in spec:
|
||||||
|
owners = " ".join(owners_for(spec["default"], roles))
|
||||||
|
out.append(f"| `*` | {owners} | {', '.join(spec['default'])} |")
|
||||||
|
for pattern, role_names in (spec.get("paths") or {}).items():
|
||||||
|
owners = " ".join(owners_for(role_names, roles))
|
||||||
|
out.append(f"| `{pattern}` | {owners} | {', '.join(role_names)} |")
|
||||||
|
out.append("")
|
||||||
|
|
||||||
|
out.append("**Roles**:")
|
||||||
|
out.append("")
|
||||||
|
out.append("| Role | Members | Description |")
|
||||||
|
out.append("|---|---|---|")
|
||||||
|
for name, role in roles.items():
|
||||||
|
members = " ".join(f"@{m}" for m in (role.get("members") or []))
|
||||||
|
out.append(f"| `{name}` | {members} | {_oneline(role.get('description', ''))} |")
|
||||||
|
out.append("")
|
||||||
|
|
||||||
|
return "\n".join(out)
|
||||||
|
|
||||||
|
|
||||||
|
def splice_docs(table_md: str) -> None:
|
||||||
|
"""Replace the region between DOCS_BEGIN/DOCS_END in the docs file with the
|
||||||
|
freshly generated tables, leaving surrounding prose untouched."""
|
||||||
|
if not DOCS.exists():
|
||||||
|
sys.exit(f"error: docs file not found: {DOCS}")
|
||||||
|
text = DOCS.read_text()
|
||||||
|
if DOCS_BEGIN not in text or DOCS_END not in text:
|
||||||
|
sys.exit(
|
||||||
|
f"error: ownership markers not found in {DOCS.relative_to(REPO_ROOT)}. "
|
||||||
|
f"Add the lines:\n {DOCS_BEGIN}\n {DOCS_END}\n"
|
||||||
|
f"around the generated table region."
|
||||||
|
)
|
||||||
|
head, rest = text.split(DOCS_BEGIN, 1)
|
||||||
|
_, tail = rest.split(DOCS_END, 1)
|
||||||
|
new = f"{head}{DOCS_BEGIN}\n\n{table_md}\n{DOCS_END}{tail}"
|
||||||
|
DOCS.write_text(new)
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
if not SOURCE.exists():
|
if not SOURCE.exists():
|
||||||
sys.exit(f"error: source file not found: {SOURCE}")
|
sys.exit(f"error: source file not found: {SOURCE}")
|
||||||
|
|
@ -127,6 +195,9 @@ def main() -> int:
|
||||||
|
|
||||||
OUTPUT.write_text(rendered)
|
OUTPUT.write_text(rendered)
|
||||||
print(f"wrote {OUTPUT.relative_to(REPO_ROOT)}")
|
print(f"wrote {OUTPUT.relative_to(REPO_ROOT)}")
|
||||||
|
|
||||||
|
splice_docs(ownership_tables(spec, roles))
|
||||||
|
print(f"updated {DOCS.relative_to(REPO_ROOT)}")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
12
.github/workflows/ci.yml
vendored
12
.github/workflows/ci.yml
vendored
|
|
@ -111,6 +111,18 @@ jobs:
|
||||||
- name: Verify AGENTS.md ↔ docs/ cross-links
|
- name: Verify AGENTS.md ↔ docs/ cross-links
|
||||||
run: bash scripts/check-agents-md.sh
|
run: bash scripts/check-agents-md.sh
|
||||||
|
|
||||||
|
entrypoint_test:
|
||||||
|
name: Container Entrypoint
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
steps:
|
||||||
|
- name: Checkout source
|
||||||
|
uses: actions/checkout@v5.0.1
|
||||||
|
|
||||||
|
- name: Verify omnigraph-server entrypoint arg composition
|
||||||
|
run: sh docker/entrypoint_test.sh
|
||||||
|
|
||||||
test:
|
test:
|
||||||
name: Test Workspace
|
name: Test Workspace
|
||||||
needs: classify_changes
|
needs: classify_changes
|
||||||
|
|
|
||||||
72
.github/workflows/codeowners.yml
vendored
72
.github/workflows/codeowners.yml
vendored
|
|
@ -1,19 +1,24 @@
|
||||||
name: CODEOWNERS
|
name: CODEOWNERS
|
||||||
|
|
||||||
|
# Runs on EVERY pull request (no paths filter). The two jobs below are
|
||||||
|
# required status checks on `main`; a path-filtered required check never
|
||||||
|
# reports for PRs outside the filter and leaves them permanently "pending"
|
||||||
|
# (the trap that forced admin-override merges). Always-run + cheap
|
||||||
|
# short-circuit is what keeps them honest.
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
|
||||||
- '.github/codeowners-roles.yml'
|
|
||||||
- '.github/CODEOWNERS'
|
|
||||||
- '.github/scripts/render-codeowners.py'
|
|
||||||
- '.github/workflows/codeowners.yml'
|
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
# Read-only; we never push from this workflow.
|
# `drift` auto-commits the regenerated artifacts back to same-repo PR
|
||||||
|
# branches, so it needs write access.
|
||||||
permissions:
|
permissions:
|
||||||
contents: read
|
contents: write
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
# NOTE: the job `name:` values below ("CODEOWNERS matches source" /
|
||||||
|
# "CODEOWNERS not hand-edited") ARE the status-check contexts that
|
||||||
|
# .github/branch-protection.json must list verbatim. Renaming a job here
|
||||||
|
# is a branch-protection change — update the JSON and re-apply.
|
||||||
drift:
|
drift:
|
||||||
name: CODEOWNERS matches source
|
name: CODEOWNERS matches source
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
@ -28,19 +33,56 @@ jobs:
|
||||||
- name: Install PyYAML
|
- name: Install PyYAML
|
||||||
run: pip install pyyaml
|
run: pip install pyyaml
|
||||||
|
|
||||||
- name: Re-render CODEOWNERS
|
- name: Re-render CODEOWNERS + ownership docs
|
||||||
run: python3 .github/scripts/render-codeowners.py
|
run: python3 .github/scripts/render-codeowners.py
|
||||||
|
|
||||||
- name: Reject drift
|
# Same-repo PR: push the regenerated artifacts back so contributors
|
||||||
|
# never have to run the script locally. Mirrors the openapi.json
|
||||||
|
# auto-commit in ci.yml (separate shallow clone of the head branch so
|
||||||
|
# the pushed commit carries only the regenerated files).
|
||||||
|
- name: Commit regenerated artifacts to PR branch
|
||||||
|
if: |
|
||||||
|
github.event_name == 'pull_request' &&
|
||||||
|
github.event.pull_request.head.repo.full_name == github.repository
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
run: |
|
run: |
|
||||||
if ! git diff --quiet .github/CODEOWNERS; then
|
if git diff --quiet -- .github/CODEOWNERS docs/dev/codeowners.md; then
|
||||||
echo "::error::.github/CODEOWNERS is out of sync with .github/codeowners-roles.yml."
|
echo "CODEOWNERS and ownership docs already in sync."
|
||||||
echo "::error::Run \`python3 .github/scripts/render-codeowners.py\` locally and commit the result."
|
exit 0
|
||||||
|
fi
|
||||||
|
tmp=$(mktemp -d)
|
||||||
|
git clone --depth 1 --branch "${{ github.head_ref }}" \
|
||||||
|
"https://x-access-token:${GITHUB_TOKEN}@github.com/${{ github.repository }}.git" \
|
||||||
|
"$tmp"
|
||||||
|
cp .github/CODEOWNERS "$tmp/.github/CODEOWNERS"
|
||||||
|
cp docs/dev/codeowners.md "$tmp/docs/dev/codeowners.md"
|
||||||
|
cd "$tmp"
|
||||||
|
if git diff --quiet -- .github/CODEOWNERS docs/dev/codeowners.md; then
|
||||||
|
echo "Head branch already matches; nothing to push."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
git config user.name "github-actions[bot]"
|
||||||
|
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||||
|
git add .github/CODEOWNERS docs/dev/codeowners.md
|
||||||
|
git commit -m "chore: regenerate CODEOWNERS + ownership docs"
|
||||||
|
git push
|
||||||
|
|
||||||
|
# Fork PR / workflow_dispatch: cannot push back, so enforce drift
|
||||||
|
# strictly. The contributor runs the script and commits the result.
|
||||||
|
- name: Verify in sync (forks / manual runs)
|
||||||
|
if: |
|
||||||
|
!(github.event_name == 'pull_request' &&
|
||||||
|
github.event.pull_request.head.repo.full_name == github.repository)
|
||||||
|
run: |
|
||||||
|
if ! git diff --quiet -- .github/CODEOWNERS docs/dev/codeowners.md; then
|
||||||
|
echo "::error::Generated CODEOWNERS / ownership docs are out of sync with .github/codeowners-roles.yml."
|
||||||
|
echo "::error::Run \`python3 .github/scripts/render-codeowners.py\` and commit the result."
|
||||||
echo "--- diff ---"
|
echo "--- diff ---"
|
||||||
git --no-pager diff .github/CODEOWNERS
|
git --no-pager diff -- .github/CODEOWNERS docs/dev/codeowners.md
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
echo "CODEOWNERS is in sync with its source."
|
echo "Generated artifacts are in sync with their source."
|
||||||
|
|
||||||
noedit:
|
noedit:
|
||||||
name: CODEOWNERS not hand-edited
|
name: CODEOWNERS not hand-edited
|
||||||
|
|
@ -52,6 +94,8 @@ jobs:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Reject hand-edits to generated file
|
- name: Reject hand-edits to generated file
|
||||||
|
# Only meaningful for PRs (needs a base to diff against).
|
||||||
|
if: github.event_name == 'pull_request'
|
||||||
run: |
|
run: |
|
||||||
base="origin/${{ github.base_ref }}"
|
base="origin/${{ github.base_ref }}"
|
||||||
git fetch origin "${{ github.base_ref }}" --quiet
|
git fetch origin "${{ github.base_ref }}" --quiet
|
||||||
|
|
|
||||||
46
.github/workflows/release-edge.yml
vendored
46
.github/workflows/release-edge.yml
vendored
|
|
@ -43,6 +43,8 @@ jobs:
|
||||||
asset_name: omnigraph-linux-x86_64
|
asset_name: omnigraph-linux-x86_64
|
||||||
- runner: macos-14
|
- runner: macos-14
|
||||||
asset_name: omnigraph-macos-arm64
|
asset_name: omnigraph-macos-arm64
|
||||||
|
- runner: windows-latest
|
||||||
|
asset_name: omnigraph-windows-x86_64
|
||||||
env:
|
env:
|
||||||
CARGO_TERM_COLOR: always
|
CARGO_TERM_COLOR: always
|
||||||
steps:
|
steps:
|
||||||
|
|
@ -59,6 +61,10 @@ jobs:
|
||||||
if: runner.os == 'macOS'
|
if: runner.os == 'macOS'
|
||||||
run: brew install protobuf
|
run: brew install protobuf
|
||||||
|
|
||||||
|
- name: Install Windows dependencies
|
||||||
|
if: runner.os == 'Windows'
|
||||||
|
run: choco install protoc -y
|
||||||
|
|
||||||
- name: Install Rust stable
|
- name: Install Rust stable
|
||||||
uses: dtolnay/rust-toolchain@stable
|
uses: dtolnay/rust-toolchain@stable
|
||||||
with:
|
with:
|
||||||
|
|
@ -73,7 +79,8 @@ jobs:
|
||||||
- name: Build release binaries
|
- name: Build release binaries
|
||||||
run: cargo build --release --locked -p omnigraph-cli -p omnigraph-server
|
run: cargo build --release --locked -p omnigraph-cli -p omnigraph-server
|
||||||
|
|
||||||
- name: Package release archive
|
- name: Package Unix release archive
|
||||||
|
if: runner.os != 'Windows'
|
||||||
run: |
|
run: |
|
||||||
mkdir -p release
|
mkdir -p release
|
||||||
install -m 0755 target/release/omnigraph release/omnigraph
|
install -m 0755 target/release/omnigraph release/omnigraph
|
||||||
|
|
@ -81,6 +88,22 @@ jobs:
|
||||||
tar -C release -czf "${{ matrix.asset_name }}.tar.gz" omnigraph omnigraph-server
|
tar -C release -czf "${{ matrix.asset_name }}.tar.gz" omnigraph omnigraph-server
|
||||||
shasum -a 256 "${{ matrix.asset_name }}.tar.gz" > "${{ matrix.asset_name }}.sha256"
|
shasum -a 256 "${{ matrix.asset_name }}.tar.gz" > "${{ matrix.asset_name }}.sha256"
|
||||||
|
|
||||||
|
- name: Package Windows release archive
|
||||||
|
if: runner.os == 'Windows'
|
||||||
|
run: |
|
||||||
|
New-Item -ItemType Directory -Force -Path release | Out-Null
|
||||||
|
Copy-Item target/release/omnigraph.exe release/omnigraph.exe
|
||||||
|
Copy-Item target/release/omnigraph-server.exe release/omnigraph-server.exe
|
||||||
|
Compress-Archive -Path release/omnigraph.exe, release/omnigraph-server.exe -DestinationPath "${{ matrix.asset_name }}.zip" -Force
|
||||||
|
$hash = (Get-FileHash "${{ matrix.asset_name }}.zip" -Algorithm SHA256).Hash.ToLowerInvariant()
|
||||||
|
"$hash ${{ matrix.asset_name }}.zip" | Out-File -FilePath "${{ matrix.asset_name }}.sha256" -Encoding ascii
|
||||||
|
New-Item -ItemType Directory -Force -Path verify | Out-Null
|
||||||
|
Expand-Archive -Path "${{ matrix.asset_name }}.zip" -DestinationPath verify -Force
|
||||||
|
$items = Get-ChildItem -Path verify -File
|
||||||
|
if ($items.Count -ne 2 -or !(Test-Path verify/omnigraph.exe) -or !(Test-Path verify/omnigraph-server.exe)) {
|
||||||
|
throw "Windows release archive is missing expected binaries"
|
||||||
|
}
|
||||||
|
|
||||||
- name: Publish edge release assets
|
- name: Publish edge release assets
|
||||||
uses: softprops/action-gh-release@v2.5.0
|
uses: softprops/action-gh-release@v2.5.0
|
||||||
with:
|
with:
|
||||||
|
|
@ -91,5 +114,22 @@ jobs:
|
||||||
body: |
|
body: |
|
||||||
Rolling prerelease from `${{ github.sha }}`.
|
Rolling prerelease from `${{ github.sha }}`.
|
||||||
files: |
|
files: |
|
||||||
${{ matrix.asset_name }}.tar.gz
|
${{ matrix.asset_name }}.*
|
||||||
${{ matrix.asset_name }}.sha256
|
|
||||||
|
smoke_windows_installer:
|
||||||
|
name: Smoke Windows installer
|
||||||
|
needs: build_release
|
||||||
|
runs-on: windows-latest
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
steps:
|
||||||
|
- name: Checkout source
|
||||||
|
uses: actions/checkout@v5.0.1
|
||||||
|
|
||||||
|
- name: Install from edge release
|
||||||
|
run: ./scripts/install.ps1 -ReleaseChannel edge -InstallDir "$env:RUNNER_TEMP/omnigraph-bin"
|
||||||
|
|
||||||
|
- name: Smoke installed binaries
|
||||||
|
run: |
|
||||||
|
& "$env:RUNNER_TEMP/omnigraph-bin/omnigraph.exe" version
|
||||||
|
& "$env:RUNNER_TEMP/omnigraph-bin/omnigraph-server.exe" --help
|
||||||
|
|
|
||||||
72
.github/workflows/release.yml
vendored
72
.github/workflows/release.yml
vendored
|
|
@ -20,6 +20,8 @@ jobs:
|
||||||
asset_name: omnigraph-linux-x86_64
|
asset_name: omnigraph-linux-x86_64
|
||||||
- runner: macos-14
|
- runner: macos-14
|
||||||
asset_name: omnigraph-macos-arm64
|
asset_name: omnigraph-macos-arm64
|
||||||
|
- runner: windows-latest
|
||||||
|
asset_name: omnigraph-windows-x86_64
|
||||||
env:
|
env:
|
||||||
CARGO_TERM_COLOR: always
|
CARGO_TERM_COLOR: always
|
||||||
steps:
|
steps:
|
||||||
|
|
@ -36,6 +38,10 @@ jobs:
|
||||||
if: runner.os == 'macOS'
|
if: runner.os == 'macOS'
|
||||||
run: brew install protobuf
|
run: brew install protobuf
|
||||||
|
|
||||||
|
- name: Install Windows dependencies
|
||||||
|
if: runner.os == 'Windows'
|
||||||
|
run: choco install protoc -y
|
||||||
|
|
||||||
- name: Install Rust stable
|
- name: Install Rust stable
|
||||||
uses: dtolnay/rust-toolchain@stable
|
uses: dtolnay/rust-toolchain@stable
|
||||||
with:
|
with:
|
||||||
|
|
@ -50,7 +56,8 @@ jobs:
|
||||||
- name: Build release binaries
|
- name: Build release binaries
|
||||||
run: cargo build --release --locked -p omnigraph-cli -p omnigraph-server
|
run: cargo build --release --locked -p omnigraph-cli -p omnigraph-server
|
||||||
|
|
||||||
- name: Package release archive
|
- name: Package Unix release archive
|
||||||
|
if: runner.os != 'Windows'
|
||||||
run: |
|
run: |
|
||||||
mkdir -p release
|
mkdir -p release
|
||||||
install -m 0755 target/release/omnigraph release/omnigraph
|
install -m 0755 target/release/omnigraph release/omnigraph
|
||||||
|
|
@ -58,12 +65,27 @@ jobs:
|
||||||
tar -C release -czf "${{ matrix.asset_name }}.tar.gz" omnigraph omnigraph-server
|
tar -C release -czf "${{ matrix.asset_name }}.tar.gz" omnigraph omnigraph-server
|
||||||
shasum -a 256 "${{ matrix.asset_name }}.tar.gz" > "${{ matrix.asset_name }}.sha256"
|
shasum -a 256 "${{ matrix.asset_name }}.tar.gz" > "${{ matrix.asset_name }}.sha256"
|
||||||
|
|
||||||
|
- name: Package Windows release archive
|
||||||
|
if: runner.os == 'Windows'
|
||||||
|
run: |
|
||||||
|
New-Item -ItemType Directory -Force -Path release | Out-Null
|
||||||
|
Copy-Item target/release/omnigraph.exe release/omnigraph.exe
|
||||||
|
Copy-Item target/release/omnigraph-server.exe release/omnigraph-server.exe
|
||||||
|
Compress-Archive -Path release/omnigraph.exe, release/omnigraph-server.exe -DestinationPath "${{ matrix.asset_name }}.zip" -Force
|
||||||
|
$hash = (Get-FileHash "${{ matrix.asset_name }}.zip" -Algorithm SHA256).Hash.ToLowerInvariant()
|
||||||
|
"$hash ${{ matrix.asset_name }}.zip" | Out-File -FilePath "${{ matrix.asset_name }}.sha256" -Encoding ascii
|
||||||
|
New-Item -ItemType Directory -Force -Path verify | Out-Null
|
||||||
|
Expand-Archive -Path "${{ matrix.asset_name }}.zip" -DestinationPath verify -Force
|
||||||
|
$items = Get-ChildItem -Path verify -File
|
||||||
|
if ($items.Count -ne 2 -or !(Test-Path verify/omnigraph.exe) -or !(Test-Path verify/omnigraph-server.exe)) {
|
||||||
|
throw "Windows release archive is missing expected binaries"
|
||||||
|
}
|
||||||
|
|
||||||
- name: Publish GitHub release assets
|
- name: Publish GitHub release assets
|
||||||
uses: softprops/action-gh-release@v2.5.0
|
uses: softprops/action-gh-release@v2.5.0
|
||||||
with:
|
with:
|
||||||
files: |
|
files: |
|
||||||
${{ matrix.asset_name }}.tar.gz
|
${{ matrix.asset_name }}.*
|
||||||
${{ matrix.asset_name }}.sha256
|
|
||||||
|
|
||||||
update_homebrew_tap:
|
update_homebrew_tap:
|
||||||
name: Update Homebrew tap
|
name: Update Homebrew tap
|
||||||
|
|
@ -99,6 +121,31 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
./scripts/update-homebrew-formula.sh "${GITHUB_REF_NAME}" homebrew-tap/Formula/omnigraph.rb
|
./scripts/update-homebrew-formula.sh "${GITHUB_REF_NAME}" homebrew-tap/Formula/omnigraph.rb
|
||||||
|
|
||||||
|
# Diagnostic only: brew is not on PATH on the ubuntu runner by default, so
|
||||||
|
# set it up explicitly. Both this setup and the audit below are best-effort
|
||||||
|
# canaries, not gates — continue-on-error on each keeps a failed/flaky brew
|
||||||
|
# (the action is pinned to a moving @master ref) from skipping the actual
|
||||||
|
# tap publish below. The formula is correct by construction
|
||||||
|
# (update-homebrew-formula.sh), so brew tooling must never block the push.
|
||||||
|
- name: Set up Homebrew
|
||||||
|
if: env.HOMEBREW_TAP_SKIP != '1'
|
||||||
|
continue-on-error: true
|
||||||
|
uses: Homebrew/actions/setup-homebrew@master
|
||||||
|
|
||||||
|
- name: Audit generated formula
|
||||||
|
if: env.HOMEBREW_TAP_SKIP != '1'
|
||||||
|
continue-on-error: true
|
||||||
|
run: |
|
||||||
|
# Audit the checked-out tap by name (brew audit rejects bare paths
|
||||||
|
# and needs tap context). Symlink the checkout into Homebrew's Taps
|
||||||
|
# tree so `modernrelay/tap/omnigraph` resolves to it. Offline audit
|
||||||
|
# (no --online) keeps it deterministic; it still catches the
|
||||||
|
# ComponentsOrder/structure class of problems.
|
||||||
|
tap_dir="$(brew --repository)/Library/Taps/modernrelay/homebrew-tap"
|
||||||
|
mkdir -p "$(dirname "$tap_dir")"
|
||||||
|
ln -sfn "$PWD/homebrew-tap" "$tap_dir"
|
||||||
|
brew audit --strict modernrelay/tap/omnigraph
|
||||||
|
|
||||||
- name: Commit and push formula update
|
- name: Commit and push formula update
|
||||||
if: env.HOMEBREW_TAP_SKIP != '1'
|
if: env.HOMEBREW_TAP_SKIP != '1'
|
||||||
working-directory: homebrew-tap
|
working-directory: homebrew-tap
|
||||||
|
|
@ -113,3 +160,22 @@ jobs:
|
||||||
git add Formula/omnigraph.rb
|
git add Formula/omnigraph.rb
|
||||||
git commit -m "Update Omnigraph formula to ${GITHUB_REF_NAME}"
|
git commit -m "Update Omnigraph formula to ${GITHUB_REF_NAME}"
|
||||||
git push origin HEAD:main
|
git push origin HEAD:main
|
||||||
|
|
||||||
|
smoke_windows_installer:
|
||||||
|
name: Smoke Windows installer
|
||||||
|
needs: build_release
|
||||||
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
|
runs-on: windows-latest
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
steps:
|
||||||
|
- name: Checkout source
|
||||||
|
uses: actions/checkout@v5.0.1
|
||||||
|
|
||||||
|
- name: Install from tagged release
|
||||||
|
run: ./scripts/install.ps1 -Version "$env:GITHUB_REF_NAME" -InstallDir "$env:RUNNER_TEMP/omnigraph-bin"
|
||||||
|
|
||||||
|
- name: Smoke installed binaries
|
||||||
|
run: |
|
||||||
|
& "$env:RUNNER_TEMP/omnigraph-bin/omnigraph.exe" version
|
||||||
|
& "$env:RUNNER_TEMP/omnigraph-bin/omnigraph-server.exe" --help
|
||||||
|
|
|
||||||
34
AGENTS.md
34
AGENTS.md
|
|
@ -16,7 +16,7 @@ Tools that support `@`-imports (Claude Code) auto-include all three files via th
|
||||||
|
|
||||||
`CLAUDE.md` is a symlink to this file — there is exactly one source of truth. Edit `AGENTS.md`.
|
`CLAUDE.md` is a symlink to this file — there is exactly one source of truth. Edit `AGENTS.md`.
|
||||||
|
|
||||||
**Version surveyed:** 0.6.0
|
**Version surveyed:** 0.6.1
|
||||||
**Workspace crates:** `omnigraph-compiler`, `omnigraph` (engine), `omnigraph-policy`, `omnigraph-cli`, `omnigraph-server`
|
**Workspace crates:** `omnigraph-compiler`, `omnigraph` (engine), `omnigraph-policy`, `omnigraph-cli`, `omnigraph-server`
|
||||||
**Storage substrate:** Lance 6.x (columnar, versioned, branchable)
|
**Storage substrate:** Lance 6.x (columnar, versioned, branchable)
|
||||||
**License:** MIT
|
**License:** MIT
|
||||||
|
|
@ -81,7 +81,7 @@ Full diagram and concurrency model: [docs/dev/architecture.md](docs/dev/architec
|
||||||
| Embeddings (compiler + engine clients, env vars, `@embed`) | [docs/user/embeddings.md](docs/user/embeddings.md) |
|
| Embeddings (compiler + engine clients, env vars, `@embed`) | [docs/user/embeddings.md](docs/user/embeddings.md) |
|
||||||
| Branches, commit graph, snapshots, system branches | [docs/user/branches-commits.md](docs/user/branches-commits.md) |
|
| Branches, commit graph, snapshots, system branches | [docs/user/branches-commits.md](docs/user/branches-commits.md) |
|
||||||
| Transactions and atomicity (per-query atomic; branches as multi-query transactions) | [docs/user/transactions.md](docs/user/transactions.md) |
|
| Transactions and atomicity (per-query atomic; branches as multi-query transactions) | [docs/user/transactions.md](docs/user/transactions.md) |
|
||||||
| Direct-publish writes (the former Run state machine, now demoted to publisher CAS) | [docs/dev/runs.md](docs/dev/runs.md) |
|
| Direct-publish write path (staging, D2, recovery sidecars; the former Run state machine) | [docs/dev/writes.md](docs/dev/writes.md) |
|
||||||
| Three-way merge and conflict kinds | [docs/dev/merge.md](docs/dev/merge.md) |
|
| Three-way merge and conflict kinds | [docs/dev/merge.md](docs/dev/merge.md) |
|
||||||
| Diff / change feed (`diff_between`, `diff_commits`) | [docs/user/changes.md](docs/user/changes.md) |
|
| Diff / change feed (`diff_between`, `diff_commits`) | [docs/user/changes.md](docs/user/changes.md) |
|
||||||
| Query execution, mutation execution, bulk loader, `load` vs `ingest` | [docs/dev/execution.md](docs/dev/execution.md) |
|
| Query execution, mutation execution, bulk loader, `load` vs `ingest` | [docs/dev/execution.md](docs/dev/execution.md) |
|
||||||
|
|
@ -164,6 +164,32 @@ If a proposal fits one of these, the burden is on the proposer to justify why th
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Build, test, lint
|
||||||
|
|
||||||
|
Rust stable workspace (edition 2024). `protoc` is a build dependency (`brew install protobuf` / `apt-get install protobuf-compiler libprotobuf-dev`). **Crate dir ≠ package name** for the engine: the directory is `crates/omnigraph` but its Cargo package is `omnigraph-engine` (use that in `-p`). The CLI binary built from `omnigraph-cli` is named `omnigraph`.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo build --workspace --locked # build everything
|
||||||
|
cargo test --workspace --locked # the canonical CI gate (matches CI exactly)
|
||||||
|
cargo run -p omnigraph-cli -- <args> # run the `omnigraph` CLI from source
|
||||||
|
cargo run -p omnigraph-server -- <uri> --bind 0.0.0.0:8080 # run the server from source
|
||||||
|
|
||||||
|
# Run one crate / one test file / one test fn
|
||||||
|
cargo test -p omnigraph-engine --test traversal # one integration-test file (see docs/dev/testing.md)
|
||||||
|
cargo test -p omnigraph-engine --test writes concurrent # one test fn by name substring
|
||||||
|
cargo test -p omnigraph-engine some_inline_test -- --nocapture # show stdout
|
||||||
|
|
||||||
|
# Feature-gated suites (each is its own job in CI, not part of the default run)
|
||||||
|
cargo test -p omnigraph-engine --features failpoints --test failpoints # fault injection
|
||||||
|
cargo build -p omnigraph-server --features aws # AWS Secrets Manager bearer-token source
|
||||||
|
```
|
||||||
|
|
||||||
|
S3-backed tests (`s3_storage`, and the S3 paths in server/CLI system tests) **skip** unless `OMNIGRAPH_S3_TEST_BUCKET` + `AWS_*` (incl. `AWS_ENDPOINT_URL_S3` for non-AWS) are set; CI runs them against containerized RustFS. `scripts/local-rustfs-bootstrap.sh` stands up a local S3 environment.
|
||||||
|
|
||||||
|
CI does **not** run `clippy` or `rustfmt` as gates — but `cargo test --workspace --locked` is the exact gate, so run it before pushing. Two non-test CI checks: `scripts/check-agents-md.sh` (doc cross-link integrity — run it after moving/renaming docs) and OpenAPI drift (`crates/omnigraph-server/tests/openapi.rs` regenerates `openapi.json`; set `OMNIGRAPH_UPDATE_OPENAPI=1` to update the checked-in copy when a server/API change is intentional).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Quick-reference flows
|
## Quick-reference flows
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
@ -210,8 +236,8 @@ omnigraph policy explain --actor act-alice --action change --branch main
|
||||||
| Columnar storage on object store | ✅ Arrow/Lance | URI normalization, S3 env-var plumbing |
|
| Columnar storage on object store | ✅ Arrow/Lance | URI normalization, S3 env-var plumbing |
|
||||||
| Per-dataset versioning + time travel | ✅ | `snapshot_at_version`, `entity_at`, snapshot-pinned reads across many tables |
|
| Per-dataset versioning + time travel | ✅ | `snapshot_at_version`, `entity_at`, snapshot-pinned reads across many tables |
|
||||||
| Per-dataset branches | ✅ | **Graph-level** branches (atomic across all sub-tables), lazy fork, system branch filtering |
|
| Per-dataset branches | ✅ | **Graph-level** branches (atomic across all sub-tables), lazy fork, system branch filtering |
|
||||||
| Atomic single-dataset commits | ✅ | **Multi-table publish via three layers**, NOT a single Lance primitive: (1) per-table Lance `commit_staged` for the data write, (2) `__manifest` row-level CAS via `ManifestBatchPublisher` for cross-table ordering, (3) the open-time recovery sweep for the residual gap between (1) and (2). All three layers ship; the four migrated writers (`MutationStaging::finalize`, `schema_apply`, `branch_merge`, `ensure_indices`) write a `__recovery/{ulid}.json` sidecar before Phase B and delete it after Phase C. The next `Omnigraph::open` (gated on `OpenMode::ReadWrite`) runs the sweep in `db/manifest/recovery.rs`: classify, decide all-or-nothing per sidecar, roll forward via single `ManifestBatchPublisher::publish` or roll back via `Dataset::restore`, and record an audit row in `_graph_commit_recoveries.lance` (queryable via `omnigraph commit list --filter actor=omnigraph:recovery`). Continuous in-process recovery (no restart needed between Phase B failure and recovery) is the goal of a future background reconciler. Engine writes route through a sealed `TableStorage` trait exposing `stage_*` + `commit_staged` as the canonical staged-write surface; documented inline-commit residuals (`delete_where`, `create_vector_index`, plus legacy `append_batch` / `merge_insert_batches` / `overwrite_batch` / `create_*_index`) remain on the trait until upstream Lance ships a public two-phase API ([#6658](https://github.com/lance-format/lance/issues/6658), [#6666](https://github.com/lance-format/lance/issues/6666)) and the migration of every call site completes. |
|
| Atomic single-dataset commits | ✅ | **Multi-table publish via three layers**, NOT a single Lance primitive: (1) per-table Lance `commit_staged` for the data write, (2) `__manifest` row-level CAS via `ManifestBatchPublisher` for cross-table ordering, (3) the open-time recovery sweep for the residual gap between (1) and (2). All three layers ship; the five migrated writers (`MutationStaging::finalize`, `schema_apply`, `branch_merge`, `ensure_indices`, `optimize_all_tables`) write a `__recovery/{ulid}.json` sidecar before Phase B and delete it after Phase C. The next `Omnigraph::open` (gated on `OpenMode::ReadWrite`) runs the sweep in `db/manifest/recovery.rs`: classify, decide all-or-nothing per sidecar, roll forward via single `ManifestBatchPublisher::publish` or roll back via `Dataset::restore` followed by a manifest publish of the restored version (so both directions converge to `manifest == HEAD` — no residual drift), and record an audit row in `_graph_commit_recoveries.lance` (queryable via `omnigraph commit list --filter actor=omnigraph:recovery`). Continuous in-process recovery (no restart needed between Phase B failure and recovery) is the goal of a future background reconciler. Engine writes route through a sealed `TableStorage` trait exposing `stage_*` + `commit_staged` as the canonical staged-write surface; documented inline-commit residuals (`delete_where`, `create_vector_index`, plus legacy `append_batch` / `merge_insert_batches` / `overwrite_batch` / `create_*_index`) remain on the trait until upstream Lance ships a public two-phase API ([#6658](https://github.com/lance-format/lance/issues/6658), [#6666](https://github.com/lance-format/lance/issues/6666)) and the migration of every call site completes. |
|
||||||
| Compaction (`compact_files`) | ✅ | `omnigraph optimize` orchestrates over all node/edge tables, bounded concurrency |
|
| Compaction (`compact_files`) | ✅ | `omnigraph optimize` orchestrates over all node/edge tables, bounded concurrency; **publishes each compacted table's new version to `__manifest`** (so the manifest tracks the Lance HEAD — required for reads to observe compaction and for schema apply / strict writes to pass their HEAD-vs-manifest precondition), under the per-`(table, main)` write queue with `SidecarKind::Optimize` recovery coverage; **refuses on an unrecovered graph** (errors if a `__recovery` sidecar is pending — recovery may roll back a partial write, so optimize requires `manifest == HEAD` going in); **skips blob-bearing tables** (reported via `TableOptimizeStats.skipped`, not silent), gated on `LANCE_SUPPORTS_BLOB_COMPACTION` until the upstream blob-v2 compaction-decode bug is fixed (see [docs/dev/invariants.md](docs/dev/invariants.md) Known Gaps) |
|
||||||
| Cleanup (`cleanup_old_versions`) | ✅ | `omnigraph cleanup` with `--keep` / `--older-than` policy |
|
| Cleanup (`cleanup_old_versions`) | ✅ | `omnigraph cleanup` with `--keep` / `--older-than` policy |
|
||||||
| BTREE / inverted (FTS) / vector indexes | ✅ | `ensure_indices` builds them on every relevant column; idempotent; lazy across branches |
|
| BTREE / inverted (FTS) / vector indexes | ✅ | `ensure_indices` builds them on every relevant column; idempotent; lazy across branches |
|
||||||
| `merge_insert` upsert | ✅ | `LoadMode::Merge`, mutation `update`/`insert`/`delete` lowering |
|
| `merge_insert` upsert | ✅ | `LoadMode::Merge`, mutation `update`/`insert`/`delete` lowering |
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,29 @@
|
||||||
# Contributing
|
# Contributing
|
||||||
|
|
||||||
Small bug fixes and documentation improvements are welcome directly through pull
|
Thanks for your interest in OmniGraph. This page is the practical how-to; the
|
||||||
requests.
|
rules and decision authority behind it live in [GOVERNANCE.md](GOVERNANCE.md).
|
||||||
|
|
||||||
For larger changes, please open an issue or design discussion first so the
|
## Start in the right place
|
||||||
proposed direction is clear before implementation starts.
|
|
||||||
|
| I want to… | Go to | Notes |
|
||||||
|
|---|---|---|
|
||||||
|
| **Report a bug** or wrong behavior | **[Open an Issue](../../issues/new/choose)** | Concrete and reproducible. A maintainer triages it; once labelled **`accepted`** it's open for a PR. |
|
||||||
|
| **Suggest a feature / share an idea / ask** | **[Start a Discussion](../../discussions)** | Ideas and questions live here, not in Issues. |
|
||||||
|
| **Propose a design / RFC** | **An RFC pull request** | Anyone can author one — see [docs/rfcs/README.md](docs/rfcs/README.md). A maintainer merging it is acceptance. |
|
||||||
|
| **Fix something / implement a change** | **A pull request** | Must link an `accepted` issue or an accepted RFC — unless it's trivial (below). |
|
||||||
|
| **Report a security vulnerability** | **[SECURITY.md](SECURITY.md)** | Do **not** open a public Issue. |
|
||||||
|
|
||||||
|
### When can I just open a PR?
|
||||||
|
The **trivial fast-lane** — open directly, no prior issue/RFC needed: typo and
|
||||||
|
wording fixes, doc corrections, dependency bumps, comment fixes, obvious
|
||||||
|
one-line CI tweaks. Anything more substantial needs a backing `accepted` issue
|
||||||
|
or accepted RFC first, so the *why* is agreed before the *how* is reviewed. A PR
|
||||||
|
that turns out to be non-trivial will be redirected — that's about process, not
|
||||||
|
the merit of the change.
|
||||||
|
|
||||||
|
> **Maintainers (ModernRelay team)** follow a separate internal process and are
|
||||||
|
> not bound by the intake rules above. Everyone is bound by review, CODEOWNERS,
|
||||||
|
> branch protection, and CI.
|
||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
|
|
@ -49,6 +68,11 @@ CI runs both.
|
||||||
|
|
||||||
## Pull Requests
|
## Pull Requests
|
||||||
|
|
||||||
- keep changes focused
|
- **Link the backing issue or RFC** (`Closes #123`, or reference the RFC) — or
|
||||||
- include tests for behavior changes when practical
|
mark the PR as trivial per the fast-lane.
|
||||||
- update public docs when the user-facing surface changes
|
- Keep changes focused; one logical change per PR.
|
||||||
|
- Include tests for behavior changes when practical.
|
||||||
|
- Update public docs when the user-facing surface changes.
|
||||||
|
|
||||||
|
New to the codebase? Read [AGENTS.md](AGENTS.md) — the architecture map and the
|
||||||
|
always-on invariants every change is reviewed against.
|
||||||
|
|
|
||||||
10
Cargo.lock
generated
10
Cargo.lock
generated
|
|
@ -4543,7 +4543,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "omnigraph-cli"
|
name = "omnigraph-cli"
|
||||||
version = "0.6.0"
|
version = "0.6.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"assert_cmd",
|
"assert_cmd",
|
||||||
"clap",
|
"clap",
|
||||||
|
|
@ -4565,7 +4565,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "omnigraph-compiler"
|
name = "omnigraph-compiler"
|
||||||
version = "0.6.0"
|
version = "0.6.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash",
|
"ahash",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
|
|
@ -4586,7 +4586,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "omnigraph-engine"
|
name = "omnigraph-engine"
|
||||||
version = "0.6.0"
|
version = "0.6.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arc-swap",
|
"arc-swap",
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
|
|
@ -4627,7 +4627,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "omnigraph-policy"
|
name = "omnigraph-policy"
|
||||||
version = "0.6.0"
|
version = "0.6.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cedar-policy",
|
"cedar-policy",
|
||||||
"clap",
|
"clap",
|
||||||
|
|
@ -4640,7 +4640,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "omnigraph-server"
|
name = "omnigraph-server"
|
||||||
version = "0.6.0"
|
version = "0.6.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arc-swap",
|
"arc-swap",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
|
|
|
||||||
106
GOVERNANCE.md
Normal file
106
GOVERNANCE.md
Normal file
|
|
@ -0,0 +1,106 @@
|
||||||
|
# Governance
|
||||||
|
|
||||||
|
This document describes how **external contributions** to OmniGraph are
|
||||||
|
proposed, accepted, and merged. It exists so an outside contributor can answer,
|
||||||
|
without asking: *where does my report/idea/change go, who decides, and what has
|
||||||
|
to happen before code lands?*
|
||||||
|
|
||||||
|
> **Scope.** This governs the public contribution surface — Issues,
|
||||||
|
> Discussions, RFCs, and pull requests from people outside the ModernRelay
|
||||||
|
> team. **Maintainers operate under a separate internal process** and are not
|
||||||
|
> bound by the intake gates below. Everyone, maintainer or not, is still bound
|
||||||
|
> by the universal gates: branch protection on `main` and CODEOWNERS review
|
||||||
|
> (see [docs/dev/branch-protection.md](docs/dev/branch-protection.md) and
|
||||||
|
> [docs/dev/codeowners.md](docs/dev/codeowners.md)).
|
||||||
|
|
||||||
|
## Roles
|
||||||
|
|
||||||
|
| Role | Who | Authority |
|
||||||
|
|---|---|---|
|
||||||
|
| **Maintainer** | The code owners in [`.github/CODEOWNERS`](.github/CODEOWNERS) (generated from [`.github/codeowners-roles.yml`](.github/codeowners-roles.yml)) | Validate issues, accept/reject RFCs, review and merge PRs, set direction. Final decision authority. |
|
||||||
|
| **Contributor** | Anyone else | Report problems (Issues), propose ideas (Discussions), author RFCs, and open pull requests. |
|
||||||
|
|
||||||
|
Decision authority rests with the maintainers. CODEOWNERS is the single source
|
||||||
|
of truth for who that is; this document does not duplicate the list.
|
||||||
|
|
||||||
|
## The three channels
|
||||||
|
|
||||||
|
Each channel has one job. Using the right one is the first thing we ask of a
|
||||||
|
contribution.
|
||||||
|
|
||||||
|
| Channel | Purpose | Not for |
|
||||||
|
|---|---|---|
|
||||||
|
| **[Issues](../../issues)** | **Report a problem** — a bug, a regression, a documented behavior that's wrong. Something concrete and reproducible. | Feature requests, ideas, questions, or design proposals (→ Discussions). |
|
||||||
|
| **[Discussions](../../discussions)** | **Propose and explore** — new ideas, feature requests, questions, and the incubation of RFCs. | Bug reports (→ Issues). |
|
||||||
|
| **Pull requests** | **Land a sanctioned change** — a fix for a *validated* issue, an *accepted* RFC, or a trivial change (see fast-lane). | Substantive change with no backing issue/RFC — it will be redirected. |
|
||||||
|
|
||||||
|
## How a change becomes mergeable
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────── bug ───────────┐ ┌──────── idea / feature ────────┐
|
||||||
|
▼ │ ▼ │
|
||||||
|
Issue (problem report) │ Discussion (idea / RFC incubation) │
|
||||||
|
│ │ │ │
|
||||||
|
maintainer triage │ rough consensus │
|
||||||
|
│ │ │ graduate │
|
||||||
|
▼ │ ▼ │
|
||||||
|
label: accepted ──────────┐ │ RFC PR (docs/rfcs/NNNN-*.md) │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ │ │ maintainer review │
|
||||||
|
▼ ▼ │ ▼ │
|
||||||
|
Pull request ◀──────────┴──────────│── merged == accepted │
|
||||||
|
(links the issue or the accepted RFC) ◀───────┘ (implementation PRs reference it) │
|
||||||
|
│
|
||||||
|
review + CODEOWNERS + branch protection
|
||||||
|
▼
|
||||||
|
merged
|
||||||
|
```
|
||||||
|
|
||||||
|
### Issues → validated
|
||||||
|
A new issue starts unlabeled. A maintainer triages it and, if it's a real,
|
||||||
|
in-scope problem, applies the **`accepted`** label. **Only `accepted` issues are
|
||||||
|
open for a contributor PR.** This prevents the "I fixed an issue you hadn't
|
||||||
|
agreed was a problem" rejection. Want to fix something? Get the issue accepted
|
||||||
|
first, or pick one already labelled `accepted` / `help wanted`.
|
||||||
|
|
||||||
|
### Discussions → RFCs → accepted
|
||||||
|
Ideas and feature requests start in **Discussions**. Anyone — including external
|
||||||
|
contributors — may then **author an RFC** by opening a pull request that adds
|
||||||
|
`docs/rfcs/NNNN-title.md` (see [docs/rfcs/README.md](docs/rfcs/README.md)). The
|
||||||
|
RFC is reviewed as code; **a maintainer merging it is the act of acceptance**
|
||||||
|
(it becomes the durable decision record). Implementation PRs then reference the
|
||||||
|
accepted RFC.
|
||||||
|
|
||||||
|
Authoring an RFC is open to everyone; **accepting one is a maintainer
|
||||||
|
decision.** Maintainers may also decline an RFC, with rationale, by closing it.
|
||||||
|
|
||||||
|
### Pull requests → sanctioned
|
||||||
|
A contributor PR must do one of:
|
||||||
|
1. link a maintainer-**`accepted`** issue it fixes, or
|
||||||
|
2. be (or reference) an **accepted RFC**, or
|
||||||
|
3. qualify for the **trivial fast-lane**.
|
||||||
|
|
||||||
|
**Trivial fast-lane** — these may be opened directly, no prior issue/RFC:
|
||||||
|
typo and wording fixes, documentation corrections, dependency bumps, comment
|
||||||
|
fixes, and obviously-correct one-line CI tweaks. When in doubt, open an Issue or
|
||||||
|
Discussion first; a PR that turns out to be non-trivial will be asked to.
|
||||||
|
|
||||||
|
A substantive PR with no backing issue/RFC will be closed with a pointer to the
|
||||||
|
right channel — not as a judgment of the idea, but to keep design discussion
|
||||||
|
where it's reviewable.
|
||||||
|
|
||||||
|
## What maintainers do *not* gate
|
||||||
|
Maintainers' own changes do not pass through the intake gates above — the team
|
||||||
|
runs a separate internal process. The universal gates (review, CODEOWNERS,
|
||||||
|
branch protection, CI) apply to everyone. Enforcement of the intake rules is, to
|
||||||
|
start, **by convention and review** (PR template + labels); an automated check
|
||||||
|
keyed to author association may be added later if volume warrants.
|
||||||
|
|
||||||
|
## Code of conduct & security
|
||||||
|
- Conduct: [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md).
|
||||||
|
- Security issues are **not** public Issues — see [SECURITY.md](SECURITY.md).
|
||||||
|
|
||||||
|
## Changing this document
|
||||||
|
Governance changes the same way code does: a pull request, reviewed by
|
||||||
|
maintainers. This file describes the external surface; the internal maintainer
|
||||||
|
process is intentionally out of scope here.
|
||||||
75
README.md
75
README.md
|
|
@ -5,33 +5,35 @@
|
||||||
[](https://crates.io/crates/omnigraph-cli)
|
[](https://crates.io/crates/omnigraph-cli)
|
||||||
[](https://github.com/ModernRelay/omnigraph/actions/workflows/ci.yml)
|
[](https://github.com/ModernRelay/omnigraph/actions/workflows/ci.yml)
|
||||||
|
|
||||||
**Object-storage native knowledge graph with git-style workflows. Designed for agents and humans to collaborate on shared structured knowledge.**
|
**Lakehouse native graph engine built for context assembly**
|
||||||
|
|
||||||
Turns fragmented context into a live graph, lets humans and agents coordinate through that graph, and uses branches so agent-generated changes can be reviewed and merged safely.
|
Omnigraph acts as operational state & coordination layer for agents
|
||||||
|
|
||||||
Built on Rust, Arrow, DataFusion and Lance.
|
- Git-style versioning & branching
|
||||||
|
- Multimodal retrieval (graph+vector/fts+filters) optimized for context assembly
|
||||||
Join the [Omnigraph Slack community](https://join.slack.com/t/omnigraphworkspace/shared_invite/zt-3wfpglyxj-lHvJGhuySPfqLtN35uJZNw)
|
- Object storage native (S3, RustFS)
|
||||||
|
|
||||||
## Use Cases
|
|
||||||
|
|
||||||
- Company brain / [Second brain](https://github.com/ModernRelay/omnigraph-cookbooks/tree/main/second-brain)
|
|
||||||
- Context graph
|
|
||||||
- Knowledge base for multi-agent research
|
|
||||||
- Incident response graph
|
|
||||||
- Compliance & audit graph
|
|
||||||
|
|
||||||
|
|
||||||
## Capabilities
|
|
||||||
|
|
||||||
- Typed schema, typed queries, and typed mutations
|
|
||||||
- Native blob-as-data support (docs, images, videos, etc)
|
- Native blob-as-data support (docs, images, videos, etc)
|
||||||
- Schema-as-code, query validation and linting
|
- VPC, On-prem, hybrid deployment
|
||||||
- Git-style graph workflows: branches, commits, merges, and transactional runs
|
- [`Lance`](https://github.com/lance-format/lance) format as open storage layer
|
||||||
- Local, on-prem & cloud S3-native storage with snapshot-pinned reads
|
|
||||||
- Graph traversal + text, fuzzy, BM25, vector, and RRF search in one runtime
|
| AS CODE | What it means |
|
||||||
- Policy-as-code for server-side access control
|
|---|---|
|
||||||
- Single CLI for multiple deployments
|
| **Schema AS CODE** | Typed `.pg` schemas, planned, applied, enforced |
|
||||||
|
| **Context AS CODE** | Linted queries & agentic nudges, versioned and reusable |
|
||||||
|
| **Security AS CODE** | Cedar policies enforced server-side on every mutation |
|
||||||
|
| **Dashboards AS CODE** | Declarative views & controls over the graph *(coming)* |
|
||||||
|
|
||||||
|
## Core Use Cases
|
||||||
|
|
||||||
|
| Use case | What it's for
|
||||||
|
|---|---|
|
||||||
|
| **Company brain** | Org knowledge unified into one queryable graph |
|
||||||
|
| **Context graph** | Decision traces and codified tribal knowledge |
|
||||||
|
| **Agentic memory** | Durable, versioned memory for long-running agents |
|
||||||
|
| **Dev graph** | Issues & dependency model for coding agents |
|
||||||
|
| **R&D data layer** | Experiments & trials data written into branches |
|
||||||
|
| **ML workflows** | Versioned, branchable graphs for training & eval |
|
||||||
|
| **Karpathy's LLM wiki** | A living, agent-updatable knowledge base |
|
||||||
|
|
||||||
## Quick Install
|
## Quick Install
|
||||||
|
|
||||||
|
|
@ -86,12 +88,29 @@ omnigraph branch create --from main feature-x ./graph.omni
|
||||||
omnigraph branch merge feature-x --into main ./graph.omni
|
omnigraph branch merge feature-x --into main ./graph.omni
|
||||||
```
|
```
|
||||||
|
|
||||||
See [docs/user/cli.md](docs/user/cli.md) for schema apply, snapshots, ingest, runs, and policy commands.
|
See [docs/user/cli.md](docs/user/cli.md) for schema apply, snapshots, ingest, commits, and policy commands.
|
||||||
|
|
||||||
|
## Clients
|
||||||
|
|
||||||
|
For programmatic access to a running `omnigraph-server`:
|
||||||
|
|
||||||
|
- **TypeScript SDK** — [`@modernrelay/omnigraph`](https://www.npmjs.com/package/@modernrelay/omnigraph) ([source](https://github.com/ModernRelay/omnigraph-ts/tree/main/packages/sdk)). Instance-per-client, typed errors, camelCase types, async-iterator streaming export.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm install @modernrelay/omnigraph
|
||||||
|
```
|
||||||
|
|
||||||
|
- **Model Context Protocol server** — [`@modernrelay/omnigraph-mcp`](https://www.npmjs.com/package/@modernrelay/omnigraph-mcp) ([source](https://github.com/ModernRelay/omnigraph-ts/tree/main/packages/mcp)). Bridges Omnigraph to LLM hosts (Claude Desktop, Claude Code, …) over stdio. Exposes tools and resources for schema, branches, queries, mutations, ingest, and bundles curated best-practices guidance from the cookbook.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm install -g @modernrelay/omnigraph-mcp
|
||||||
|
```
|
||||||
|
|
||||||
|
Both packages are versioned in lockstep with `omnigraph-server` on major.minor: `@modernrelay/omnigraph@X.Y.*` targets `omnigraph-server@X.Y.*`. See [`ModernRelay/omnigraph-ts`](https://github.com/ModernRelay/omnigraph-ts) for the monorepo.
|
||||||
|
|
||||||
## Docs
|
## Docs
|
||||||
|
|
||||||
- [Install guide](docs/user/install.md)
|
- [Install guide](docs/user/install.md)
|
||||||
- [CLI guide](docs/user/cli.md)
|
|
||||||
- [Deployment guide](docs/user/deployment.md)
|
- [Deployment guide](docs/user/deployment.md)
|
||||||
|
|
||||||
## Build And Test
|
## Build And Test
|
||||||
|
|
@ -113,8 +132,8 @@ Notes:
|
||||||
|
|
||||||
- `crates/omnigraph-compiler`: shared schema/query parser, typechecker, catalog, and IR lowering
|
- `crates/omnigraph-compiler`: shared schema/query parser, typechecker, catalog, and IR lowering
|
||||||
- `crates/omnigraph`: storage/runtime, branching, merge, change detection, and query execution
|
- `crates/omnigraph`: storage/runtime, branching, merge, change detection, and query execution
|
||||||
- `crates/omnigraph-cli`: CLI for init/load/ingest/read/change/branch/snapshot/export/policy operations
|
- `crates/omnigraph-cli`: CLI for graph lifecycle (init/load/ingest), query/mutate, branch/commit/merge, schema/lint, snapshot/export, policy, and maintenance (optimize/cleanup)
|
||||||
- `crates/omnigraph-server`: Axum HTTP server for remote reads, changes, ingest, export, branches, commits, and runs
|
- `crates/omnigraph-server`: Axum HTTP server for remote reads, changes, ingest, export, branches, and commits
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "omnigraph-cli"
|
name = "omnigraph-cli"
|
||||||
version = "0.6.0"
|
version = "0.6.1"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
description = "CLI for the Omnigraph graph database."
|
description = "CLI for the Omnigraph graph database."
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|
@ -13,10 +13,10 @@ name = "omnigraph"
|
||||||
path = "src/main.rs"
|
path = "src/main.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.0" }
|
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.1" }
|
||||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" }
|
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.1" }
|
||||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" }
|
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.1" }
|
||||||
omnigraph-server = { path = "../omnigraph-server", version = "0.6.0" }
|
omnigraph-server = { path = "../omnigraph-server", version = "0.6.1" }
|
||||||
clap = { workspace = true }
|
clap = { workspace = true }
|
||||||
color-eyre = { workspace = true }
|
color-eyre = { workspace = true }
|
||||||
serde = { workspace = true }
|
serde = { workspace = true }
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ use clap::{Arg, ArgAction, Args, CommandFactory, FromArgMatches, Parser, Subcomm
|
||||||
use color_eyre::eyre::{Result, bail};
|
use color_eyre::eyre::{Result, bail};
|
||||||
use omnigraph::db::{Omnigraph, ReadTarget, SnapshotId};
|
use omnigraph::db::{Omnigraph, ReadTarget, SnapshotId};
|
||||||
use omnigraph::loader::LoadMode;
|
use omnigraph::loader::LoadMode;
|
||||||
|
use omnigraph::storage::normalize_root_uri;
|
||||||
use omnigraph_compiler::query::parser::parse_query;
|
use omnigraph_compiler::query::parser::parse_query;
|
||||||
use omnigraph_compiler::schema::parser::parse_schema;
|
use omnigraph_compiler::schema::parser::parse_schema;
|
||||||
use omnigraph_compiler::{
|
use omnigraph_compiler::{
|
||||||
|
|
@ -24,9 +25,10 @@ use omnigraph_server::api::{
|
||||||
SnapshotTableOutput, commit_output, ingest_output, read_output, schema_apply_output,
|
SnapshotTableOutput, commit_output, ingest_output, read_output, schema_apply_output,
|
||||||
snapshot_payload,
|
snapshot_payload,
|
||||||
};
|
};
|
||||||
|
use omnigraph_server::queries::{QueryRegistry, check, format_check_breakages};
|
||||||
use omnigraph_server::{
|
use omnigraph_server::{
|
||||||
AliasCommand, OmnigraphConfig, PolicyAction, PolicyDecision, PolicyEngine, PolicyRequest,
|
AliasCommand, OmnigraphConfig, PolicyAction, PolicyDecision, PolicyEngine, PolicyRequest,
|
||||||
PolicyTestConfig, ReadOutputFormat, load_config,
|
PolicyTestConfig, ReadOutputFormat, graph_resource_id_for_selection, load_config,
|
||||||
};
|
};
|
||||||
use reqwest::Method;
|
use reqwest::Method;
|
||||||
use reqwest::header::AUTHORIZATION;
|
use reqwest::header::AUTHORIZATION;
|
||||||
|
|
@ -153,6 +155,11 @@ enum Command {
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
json: bool,
|
json: bool,
|
||||||
},
|
},
|
||||||
|
/// Operate on the server-side stored-query registry (`queries:`).
|
||||||
|
Queries {
|
||||||
|
#[command(subcommand)]
|
||||||
|
command: QueriesCommand,
|
||||||
|
},
|
||||||
/// Show graph snapshot
|
/// Show graph snapshot
|
||||||
Snapshot {
|
Snapshot {
|
||||||
/// Graph URI
|
/// Graph URI
|
||||||
|
|
@ -502,6 +509,35 @@ enum PolicyCommand {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Subcommand)]
|
||||||
|
enum QueriesCommand {
|
||||||
|
/// Type-check the stored-query registry against the live schema.
|
||||||
|
///
|
||||||
|
/// Distinct from `omnigraph lint` (which lints one `.gq` file):
|
||||||
|
/// this validates the whole `queries:` registry — opening the graph
|
||||||
|
/// to read its schema and confirming every stored query still
|
||||||
|
/// type-checks. Exits non-zero on any breakage.
|
||||||
|
Validate {
|
||||||
|
/// Graph URI
|
||||||
|
uri: Option<String>,
|
||||||
|
#[arg(long)]
|
||||||
|
target: Option<String>,
|
||||||
|
#[arg(long)]
|
||||||
|
config: Option<PathBuf>,
|
||||||
|
#[arg(long)]
|
||||||
|
json: bool,
|
||||||
|
},
|
||||||
|
/// List the registered stored queries (name, MCP exposure, params).
|
||||||
|
List {
|
||||||
|
#[arg(long)]
|
||||||
|
target: Option<String>,
|
||||||
|
#[arg(long)]
|
||||||
|
config: Option<PathBuf>,
|
||||||
|
#[arg(long)]
|
||||||
|
json: bool,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Args, Clone)]
|
#[derive(Debug, Args, Clone)]
|
||||||
struct ParamsArgs {
|
struct ParamsArgs {
|
||||||
#[arg(long, conflicts_with = "params_file")]
|
#[arg(long, conflicts_with = "params_file")]
|
||||||
|
|
@ -743,25 +779,66 @@ fn load_cli_config(config_path: Option<&PathBuf>) -> Result<OmnigraphConfig> {
|
||||||
Ok(config)
|
Ok(config)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn resolve_policy_engine(config: &OmnigraphConfig) -> Result<PolicyEngine> {
|
#[derive(Debug, Clone)]
|
||||||
let policy_file = config
|
struct ResolvedCliGraph {
|
||||||
.resolve_policy_file()
|
uri: String,
|
||||||
.ok_or_else(|| color_eyre::eyre::eyre!("policy.file must be set in omnigraph.yaml"))?;
|
selected: Option<String>,
|
||||||
PolicyEngine::load_graph(&policy_file, &policy_graph_id(config))
|
graph_id: String,
|
||||||
|
policy_file: Option<PathBuf>,
|
||||||
|
is_remote: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Open a local-URI graph and, when `policy.file` is configured in
|
impl ResolvedCliGraph {
|
||||||
/// `omnigraph.yaml`, install the resolved `PolicyEngine` on the engine
|
fn selected(&self) -> Option<&str> {
|
||||||
/// handle so every direct-engine write goes through
|
self.selected.as_deref()
|
||||||
/// `Omnigraph::enforce(...)` (MR-722). Without a configured policy this
|
}
|
||||||
/// is identical to a bare `Omnigraph::open`.
|
}
|
||||||
///
|
|
||||||
/// Returns owned `Omnigraph`; chained on top of `Omnigraph::open(...)`'s
|
struct ResolvedPolicyContext {
|
||||||
/// existing future to keep call sites narrow.
|
policy_file: PathBuf,
|
||||||
async fn open_local_db_with_policy(uri: &str, config: &OmnigraphConfig) -> Result<Omnigraph> {
|
graph_id: String,
|
||||||
let db = Omnigraph::open(uri).await?;
|
}
|
||||||
if config.resolve_policy_file().is_some() {
|
|
||||||
let engine = Arc::new(resolve_policy_engine(config)?);
|
fn resolve_policy_context(config: &OmnigraphConfig) -> Result<ResolvedPolicyContext> {
|
||||||
|
let selected = config.resolve_policy_tooling_graph_selection()?;
|
||||||
|
let policy_file = config
|
||||||
|
.resolve_policy_file_for(selected)
|
||||||
|
.ok_or_else(|| {
|
||||||
|
color_eyre::eyre::eyre!(
|
||||||
|
"policy.file or graphs.<name>.policy.file must be set in omnigraph.yaml"
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
let graph_id = match selected {
|
||||||
|
Some(name) => graph_resource_id_for_selection(Some(name), ""),
|
||||||
|
None => graph_resource_id_for_selection(None, "default"),
|
||||||
|
};
|
||||||
|
Ok(ResolvedPolicyContext {
|
||||||
|
policy_file,
|
||||||
|
graph_id,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resolve_policy_engine(context: &ResolvedPolicyContext) -> Result<PolicyEngine> {
|
||||||
|
PolicyEngine::load_graph(&context.policy_file, &context.graph_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resolve_policy_engine_for_graph(graph: &ResolvedCliGraph) -> Result<PolicyEngine> {
|
||||||
|
let policy_file = graph.policy_file.as_ref().ok_or_else(|| {
|
||||||
|
color_eyre::eyre::eyre!(
|
||||||
|
"policy.file or graphs.<name>.policy.file must be set in omnigraph.yaml"
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
PolicyEngine::load_graph(policy_file, &graph.graph_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Open a local graph and install the policy resolved for the same graph
|
||||||
|
/// identity that produced the URI. A named graph uses
|
||||||
|
/// `graphs.<name>.policy.file`; an explicit positional URI is anonymous and
|
||||||
|
/// uses the legacy top-level `policy.file`.
|
||||||
|
async fn open_local_db_with_policy(graph: &ResolvedCliGraph) -> Result<Omnigraph> {
|
||||||
|
let db = Omnigraph::open(&graph.uri).await?;
|
||||||
|
if graph.policy_file.is_some() {
|
||||||
|
let engine = Arc::new(resolve_policy_engine_for_graph(graph)?);
|
||||||
Ok(db.with_policy(engine as Arc<dyn omnigraph_policy::PolicyChecker>))
|
Ok(db.with_policy(engine as Arc<dyn omnigraph_policy::PolicyChecker>))
|
||||||
} else {
|
} else {
|
||||||
Ok(db)
|
Ok(db)
|
||||||
|
|
@ -778,22 +855,16 @@ fn resolve_cli_actor<'a>(cli_as: Option<&'a str>, config: &'a OmnigraphConfig) -
|
||||||
cli_as.or(config.cli.actor.as_deref())
|
cli_as.or(config.cli.actor.as_deref())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn resolve_policy_tests_path(config: &OmnigraphConfig) -> Result<PathBuf> {
|
fn resolve_policy_tests_path(context: &ResolvedPolicyContext) -> PathBuf {
|
||||||
config.resolve_policy_tests_file().ok_or_else(|| {
|
context.policy_file.with_file_name("policy.tests.yaml")
|
||||||
color_eyre::eyre::eyre!(
|
|
||||||
"policy.tests.yaml requires policy.file to be set in omnigraph.yaml"
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn policy_graph_id(config: &OmnigraphConfig) -> String {
|
fn normalize_policy_graph_uri(uri: &str) -> Result<String> {
|
||||||
if let Some(name) = &config.project.name {
|
if is_remote_uri(uri) {
|
||||||
return name.clone();
|
Ok(uri.trim_end_matches('/').to_string())
|
||||||
|
} else {
|
||||||
|
Ok(normalize_root_uri(uri)?)
|
||||||
}
|
}
|
||||||
config
|
|
||||||
.resolve_target_uri(None, None, config.server_graph_name())
|
|
||||||
.or_else(|_| config.resolve_target_uri(None, None, config.cli_graph_name()))
|
|
||||||
.unwrap_or_else(|_| "default".to_string())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn resolve_remote_bearer_token(
|
fn resolve_remote_bearer_token(
|
||||||
|
|
@ -877,6 +948,47 @@ fn resolve_uri(
|
||||||
config.resolve_target_uri(cli_uri, cli_target, config.cli_graph_name())
|
config.resolve_target_uri(cli_uri, cli_target, config.cli_graph_name())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn resolve_cli_graph(
|
||||||
|
config: &OmnigraphConfig,
|
||||||
|
cli_uri: Option<String>,
|
||||||
|
cli_target: Option<&str>,
|
||||||
|
) -> Result<ResolvedCliGraph> {
|
||||||
|
let selected = if cli_uri.is_some() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
cli_target
|
||||||
|
.map(str::to_string)
|
||||||
|
.or_else(|| config.cli_graph_name().map(str::to_string))
|
||||||
|
};
|
||||||
|
config.resolve_graph_selection(selected.as_deref())?;
|
||||||
|
let uri = resolve_uri(config, cli_uri, cli_target)?;
|
||||||
|
let normalized_uri = normalize_policy_graph_uri(&uri)?;
|
||||||
|
let graph_id = graph_resource_id_for_selection(selected.as_deref(), &normalized_uri);
|
||||||
|
Ok(ResolvedCliGraph {
|
||||||
|
graph_id,
|
||||||
|
is_remote: is_remote_uri(&uri),
|
||||||
|
policy_file: config.resolve_policy_file_for(selected.as_deref()),
|
||||||
|
selected,
|
||||||
|
uri,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resolve_local_graph(
|
||||||
|
config: &OmnigraphConfig,
|
||||||
|
cli_uri: Option<String>,
|
||||||
|
cli_target: Option<&str>,
|
||||||
|
operation: &str,
|
||||||
|
) -> Result<ResolvedCliGraph> {
|
||||||
|
let graph = resolve_cli_graph(config, cli_uri, cli_target)?;
|
||||||
|
if graph.is_remote {
|
||||||
|
bail!(
|
||||||
|
"{} is only supported against local graph URIs in this milestone",
|
||||||
|
operation
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Ok(graph)
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse a Go-style compact duration: `7d`, `24h`, `30m`, `90s`, or a plain
|
/// Parse a Go-style compact duration: `7d`, `24h`, `30m`, `90s`, or a plain
|
||||||
/// integer as seconds. Used by the `cleanup --older-than` flag.
|
/// integer as seconds. Used by the `cleanup --older-than` flag.
|
||||||
fn parse_duration_arg(s: &str) -> Result<std::time::Duration> {
|
fn parse_duration_arg(s: &str) -> Result<std::time::Duration> {
|
||||||
|
|
@ -915,14 +1027,7 @@ fn resolve_local_uri(
|
||||||
cli_target: Option<&str>,
|
cli_target: Option<&str>,
|
||||||
operation: &str,
|
operation: &str,
|
||||||
) -> Result<String> {
|
) -> Result<String> {
|
||||||
let uri = resolve_uri(config, cli_uri, cli_target)?;
|
Ok(resolve_local_graph(config, cli_uri, cli_target, operation)?.uri)
|
||||||
if is_remote_uri(&uri) {
|
|
||||||
bail!(
|
|
||||||
"{} is only supported against local graph URIs in this milestone",
|
|
||||||
operation
|
|
||||||
);
|
|
||||||
}
|
|
||||||
Ok(uri)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn resolve_branch(
|
fn resolve_branch(
|
||||||
|
|
@ -1609,6 +1714,248 @@ async fn execute_query_lint(
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Serialize)]
|
||||||
|
struct QueriesIssue {
|
||||||
|
query: String,
|
||||||
|
message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Serialize)]
|
||||||
|
struct QueriesValidateOutput {
|
||||||
|
ok: bool,
|
||||||
|
breakages: Vec<QueriesIssue>,
|
||||||
|
warnings: Vec<QueriesIssue>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Serialize)]
|
||||||
|
struct QueriesParam {
|
||||||
|
name: String,
|
||||||
|
#[serde(rename = "type")]
|
||||||
|
type_name: String,
|
||||||
|
nullable: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Serialize)]
|
||||||
|
struct QueriesListItem {
|
||||||
|
name: String,
|
||||||
|
mcp_expose: bool,
|
||||||
|
tool_name: Option<String>,
|
||||||
|
mutation: bool,
|
||||||
|
params: Vec<QueriesParam>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Serialize)]
|
||||||
|
struct QueriesListOutput {
|
||||||
|
queries: Vec<QueriesListItem>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve the selected graph to `(local URI, registry selection)` from one
|
||||||
|
/// precedence, so a command's schema and its stored-query registry can never
|
||||||
|
/// come from different graphs. A **positional URI is anonymous** (top-level
|
||||||
|
/// registry, ignoring the configured default graph); otherwise `--target`
|
||||||
|
/// or the configured `cli.graph` names the graph (its per-graph block).
|
||||||
|
/// Mirrors the server's single-mode identity rule.
|
||||||
|
fn resolve_selected_graph(
|
||||||
|
config: &OmnigraphConfig,
|
||||||
|
cli_uri: Option<String>,
|
||||||
|
cli_target: Option<&str>,
|
||||||
|
operation: &str,
|
||||||
|
) -> Result<(String, Option<String>)> {
|
||||||
|
let graph = resolve_local_graph(config, cli_uri, cli_target, operation)?;
|
||||||
|
Ok((graph.uri, graph.selected))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load the stored-query registry for an already-resolved graph selection
|
||||||
|
/// (`None` = anonymous → top-level; `Some(name)` = that graph's block).
|
||||||
|
fn load_registry_or_report(
|
||||||
|
config: &OmnigraphConfig,
|
||||||
|
selected: Option<&str>,
|
||||||
|
) -> Result<QueryRegistry> {
|
||||||
|
QueryRegistry::load(config, config.query_entries_for(selected)).map_err(|errors| {
|
||||||
|
color_eyre::eyre::eyre!(
|
||||||
|
"stored-query registry failed to load:\n {}",
|
||||||
|
errors
|
||||||
|
.iter()
|
||||||
|
.map(|e| e.to_string())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("\n ")
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn graph_query_registry_names(config: &OmnigraphConfig) -> Vec<&str> {
|
||||||
|
config
|
||||||
|
.graphs
|
||||||
|
.iter()
|
||||||
|
.filter_map(|(name, graph)| (!graph.queries.is_empty()).then_some(name.as_str()))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resolve_registry_selection_for_list(
|
||||||
|
config: &OmnigraphConfig,
|
||||||
|
target: Option<&str>,
|
||||||
|
) -> Result<Option<String>> {
|
||||||
|
let selected = target
|
||||||
|
.map(str::to_string)
|
||||||
|
.or_else(|| config.cli_graph_name().map(str::to_string));
|
||||||
|
if let Some(name) = selected.as_deref() {
|
||||||
|
config.resolve_graph_selection(Some(name))?;
|
||||||
|
return Ok(selected);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !config.query_entries().is_empty() {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let graph_names = graph_query_registry_names(config);
|
||||||
|
if graph_names.is_empty() {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
bail!(
|
||||||
|
"stored-query registries are configured for graph{} {} but no graph was selected. Pass `--target {}` or set `cli.graph`.",
|
||||||
|
if graph_names.len() == 1 { "" } else { "s" },
|
||||||
|
graph_names.join(", "),
|
||||||
|
graph_names[0],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn validate_registry_for_catalog(
|
||||||
|
registry: &QueryRegistry,
|
||||||
|
catalog: &omnigraph_compiler::catalog::Catalog,
|
||||||
|
label: &str,
|
||||||
|
) -> omnigraph::error::Result<()> {
|
||||||
|
let report = check(registry, catalog);
|
||||||
|
if report.has_breakages() {
|
||||||
|
return Err(omnigraph::error::OmniError::manifest(
|
||||||
|
format_check_breakages(label, &report),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn execute_queries_validate(
|
||||||
|
uri: Option<String>,
|
||||||
|
target: Option<String>,
|
||||||
|
config_path: Option<&PathBuf>,
|
||||||
|
json: bool,
|
||||||
|
) -> Result<()> {
|
||||||
|
let config = load_cli_config(config_path)?;
|
||||||
|
// One selection drives both the schema URI and the registry, so a
|
||||||
|
// positional URI and a `--target` can't validate different graphs.
|
||||||
|
let (uri, selected) =
|
||||||
|
resolve_selected_graph(&config, uri, target.as_deref(), "queries validate")?;
|
||||||
|
let registry = load_registry_or_report(&config, selected.as_deref())?;
|
||||||
|
let db = Omnigraph::open(&uri).await?;
|
||||||
|
let report = check(®istry, &db.catalog());
|
||||||
|
|
||||||
|
let output = QueriesValidateOutput {
|
||||||
|
ok: !report.has_breakages(),
|
||||||
|
breakages: report
|
||||||
|
.breakages
|
||||||
|
.iter()
|
||||||
|
.map(|b| QueriesIssue {
|
||||||
|
query: b.query.clone(),
|
||||||
|
message: b.message.clone(),
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
warnings: report
|
||||||
|
.warnings
|
||||||
|
.iter()
|
||||||
|
.map(|w| QueriesIssue {
|
||||||
|
query: w.query.clone(),
|
||||||
|
message: w.message.clone(),
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if json {
|
||||||
|
print_json(&output)?;
|
||||||
|
} else {
|
||||||
|
if output.breakages.is_empty() {
|
||||||
|
println!(
|
||||||
|
"OK {} stored quer{} type-check against the schema",
|
||||||
|
registry.len(),
|
||||||
|
if registry.len() == 1 { "y" } else { "ies" }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
for issue in &output.breakages {
|
||||||
|
println!("ERROR query '{}': {}", issue.query, issue.message);
|
||||||
|
}
|
||||||
|
for issue in &output.warnings {
|
||||||
|
println!("WARN query '{}': {}", issue.query, issue.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if report.has_breakages() {
|
||||||
|
io::stdout().flush()?;
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn execute_queries_list(
|
||||||
|
target: Option<String>,
|
||||||
|
config_path: Option<&PathBuf>,
|
||||||
|
json: bool,
|
||||||
|
) -> Result<()> {
|
||||||
|
let config = load_cli_config(config_path)?;
|
||||||
|
let selected = resolve_registry_selection_for_list(&config, target.as_deref())?;
|
||||||
|
let registry = load_registry_or_report(&config, selected.as_deref())?;
|
||||||
|
|
||||||
|
let output = QueriesListOutput {
|
||||||
|
queries: registry
|
||||||
|
.iter()
|
||||||
|
.map(|q| QueriesListItem {
|
||||||
|
name: q.name.clone(),
|
||||||
|
mcp_expose: q.expose,
|
||||||
|
tool_name: q.tool_name.clone(),
|
||||||
|
mutation: q.is_mutation(),
|
||||||
|
params: q
|
||||||
|
.decl
|
||||||
|
.params
|
||||||
|
.iter()
|
||||||
|
.map(|p| QueriesParam {
|
||||||
|
name: p.name.clone(),
|
||||||
|
type_name: p.type_name.clone(),
|
||||||
|
nullable: p.nullable,
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if json {
|
||||||
|
print_json(&output)?;
|
||||||
|
} else if output.queries.is_empty() {
|
||||||
|
println!("(no stored queries registered)");
|
||||||
|
} else {
|
||||||
|
for q in &output.queries {
|
||||||
|
let kind = if q.mutation { "mutation" } else { "read" };
|
||||||
|
let params = q
|
||||||
|
.params
|
||||||
|
.iter()
|
||||||
|
.map(|p| {
|
||||||
|
format!(
|
||||||
|
"${}: {}{}",
|
||||||
|
p.name,
|
||||||
|
p.type_name,
|
||||||
|
if p.nullable { "?" } else { "" }
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join(", ");
|
||||||
|
let mcp = if q.mcp_expose {
|
||||||
|
format!(" [mcp: {}]", q.tool_name.as_deref().unwrap_or(&q.name))
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
println!("{kind} {}({params}){mcp}", q.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
async fn execute_read(
|
async fn execute_read(
|
||||||
uri: &str,
|
uri: &str,
|
||||||
query_source: &str,
|
query_source: &str,
|
||||||
|
|
@ -1655,7 +2002,7 @@ async fn execute_read_remote(
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn execute_change(
|
async fn execute_change(
|
||||||
uri: &str,
|
graph: &ResolvedCliGraph,
|
||||||
query_source: &str,
|
query_source: &str,
|
||||||
query_name: Option<&str>,
|
query_name: Option<&str>,
|
||||||
branch: &str,
|
branch: &str,
|
||||||
|
|
@ -1665,7 +2012,7 @@ async fn execute_change(
|
||||||
) -> Result<ChangeOutput> {
|
) -> Result<ChangeOutput> {
|
||||||
let (selected_name, query_params) = select_named_query(query_source, query_name)?;
|
let (selected_name, query_params) = select_named_query(query_source, query_name)?;
|
||||||
let params = query_params_from_json(&query_params, params_json)?;
|
let params = query_params_from_json(&query_params, params_json)?;
|
||||||
let db = open_local_db_with_policy(uri, config).await?;
|
let db = open_local_db_with_policy(graph).await?;
|
||||||
let actor = resolve_cli_actor(cli_as_actor, config);
|
let actor = resolve_cli_actor(cli_as_actor, config);
|
||||||
let result = db
|
let result = db
|
||||||
.mutate_as(branch, query_source, &selected_name, ¶ms, actor)
|
.mutate_as(branch, query_source, &selected_name, ¶ms, actor)
|
||||||
|
|
@ -1893,9 +2240,10 @@ async fn main() -> Result<()> {
|
||||||
json,
|
json,
|
||||||
} => {
|
} => {
|
||||||
let config = load_cli_config(config.as_ref())?;
|
let config = load_cli_config(config.as_ref())?;
|
||||||
let uri = resolve_local_uri(&config, uri, target.as_deref(), "load")?;
|
let graph = resolve_local_graph(&config, uri, target.as_deref(), "load")?;
|
||||||
|
let uri = graph.uri.clone();
|
||||||
let branch = resolve_branch(&config, branch, None, "main");
|
let branch = resolve_branch(&config, branch, None, "main");
|
||||||
let db = open_local_db_with_policy(&uri, &config).await?;
|
let db = open_local_db_with_policy(&graph).await?;
|
||||||
let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config);
|
let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config);
|
||||||
let result = db
|
let result = db
|
||||||
.load_file_as(&branch, &data.to_string_lossy(), mode.into(), actor)
|
.load_file_as(&branch, &data.to_string_lossy(), mode.into(), actor)
|
||||||
|
|
@ -1936,10 +2284,11 @@ async fn main() -> Result<()> {
|
||||||
let config = load_cli_config(config.as_ref())?;
|
let config = load_cli_config(config.as_ref())?;
|
||||||
let bearer_token =
|
let bearer_token =
|
||||||
resolve_remote_bearer_token(&config, uri.as_deref(), target.as_deref())?;
|
resolve_remote_bearer_token(&config, uri.as_deref(), target.as_deref())?;
|
||||||
let uri = resolve_uri(&config, uri, target.as_deref())?;
|
let graph = resolve_cli_graph(&config, uri, target.as_deref())?;
|
||||||
|
let uri = graph.uri.clone();
|
||||||
let branch = resolve_branch(&config, branch, None, "main");
|
let branch = resolve_branch(&config, branch, None, "main");
|
||||||
let from = resolve_branch(&config, from, None, "main");
|
let from = resolve_branch(&config, from, None, "main");
|
||||||
let payload = if is_remote_uri(&uri) {
|
let payload = if graph.is_remote {
|
||||||
let data = fs::read_to_string(&data)?;
|
let data = fs::read_to_string(&data)?;
|
||||||
remote_json::<IngestOutput>(
|
remote_json::<IngestOutput>(
|
||||||
&http_client,
|
&http_client,
|
||||||
|
|
@ -1955,7 +2304,7 @@ async fn main() -> Result<()> {
|
||||||
)
|
)
|
||||||
.await?
|
.await?
|
||||||
} else {
|
} else {
|
||||||
let db = open_local_db_with_policy(&uri, &config).await?;
|
let db = open_local_db_with_policy(&graph).await?;
|
||||||
let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config);
|
let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config);
|
||||||
let result = db
|
let result = db
|
||||||
.ingest_file_as(
|
.ingest_file_as(
|
||||||
|
|
@ -1986,9 +2335,10 @@ async fn main() -> Result<()> {
|
||||||
let config = load_cli_config(config.as_ref())?;
|
let config = load_cli_config(config.as_ref())?;
|
||||||
let bearer_token =
|
let bearer_token =
|
||||||
resolve_remote_bearer_token(&config, uri.as_deref(), target.as_deref())?;
|
resolve_remote_bearer_token(&config, uri.as_deref(), target.as_deref())?;
|
||||||
let uri = resolve_uri(&config, uri, target.as_deref())?;
|
let graph = resolve_cli_graph(&config, uri, target.as_deref())?;
|
||||||
|
let uri = graph.uri.clone();
|
||||||
let from = resolve_branch(&config, from, None, "main");
|
let from = resolve_branch(&config, from, None, "main");
|
||||||
let payload = if is_remote_uri(&uri) {
|
let payload = if graph.is_remote {
|
||||||
remote_json::<BranchCreateOutput>(
|
remote_json::<BranchCreateOutput>(
|
||||||
&http_client,
|
&http_client,
|
||||||
Method::POST,
|
Method::POST,
|
||||||
|
|
@ -2001,7 +2351,7 @@ async fn main() -> Result<()> {
|
||||||
)
|
)
|
||||||
.await?
|
.await?
|
||||||
} else {
|
} else {
|
||||||
let db = open_local_db_with_policy(&uri, &config).await?;
|
let db = open_local_db_with_policy(&graph).await?;
|
||||||
let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config);
|
let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config);
|
||||||
db.branch_create_from_as(ReadTarget::branch(&from), &name, actor)
|
db.branch_create_from_as(ReadTarget::branch(&from), &name, actor)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
@ -2027,8 +2377,9 @@ async fn main() -> Result<()> {
|
||||||
let config = load_cli_config(config.as_ref())?;
|
let config = load_cli_config(config.as_ref())?;
|
||||||
let bearer_token =
|
let bearer_token =
|
||||||
resolve_remote_bearer_token(&config, uri.as_deref(), target.as_deref())?;
|
resolve_remote_bearer_token(&config, uri.as_deref(), target.as_deref())?;
|
||||||
let uri = resolve_uri(&config, uri, target.as_deref())?;
|
let graph = resolve_cli_graph(&config, uri, target.as_deref())?;
|
||||||
let payload = if is_remote_uri(&uri) {
|
let uri = graph.uri.clone();
|
||||||
|
let payload = if graph.is_remote {
|
||||||
remote_json::<BranchListOutput>(
|
remote_json::<BranchListOutput>(
|
||||||
&http_client,
|
&http_client,
|
||||||
Method::GET,
|
Method::GET,
|
||||||
|
|
@ -2061,8 +2412,9 @@ async fn main() -> Result<()> {
|
||||||
let config = load_cli_config(config.as_ref())?;
|
let config = load_cli_config(config.as_ref())?;
|
||||||
let bearer_token =
|
let bearer_token =
|
||||||
resolve_remote_bearer_token(&config, uri.as_deref(), target.as_deref())?;
|
resolve_remote_bearer_token(&config, uri.as_deref(), target.as_deref())?;
|
||||||
let uri = resolve_uri(&config, uri, target.as_deref())?;
|
let graph = resolve_cli_graph(&config, uri, target.as_deref())?;
|
||||||
let payload = if is_remote_uri(&uri) {
|
let uri = graph.uri.clone();
|
||||||
|
let payload = if graph.is_remote {
|
||||||
remote_json::<BranchDeleteOutput>(
|
remote_json::<BranchDeleteOutput>(
|
||||||
&http_client,
|
&http_client,
|
||||||
Method::DELETE,
|
Method::DELETE,
|
||||||
|
|
@ -2072,7 +2424,7 @@ async fn main() -> Result<()> {
|
||||||
)
|
)
|
||||||
.await?
|
.await?
|
||||||
} else {
|
} else {
|
||||||
let db = open_local_db_with_policy(&uri, &config).await?;
|
let db = open_local_db_with_policy(&graph).await?;
|
||||||
let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config);
|
let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config);
|
||||||
db.branch_delete_as(&name, actor).await?;
|
db.branch_delete_as(&name, actor).await?;
|
||||||
BranchDeleteOutput {
|
BranchDeleteOutput {
|
||||||
|
|
@ -2098,9 +2450,10 @@ async fn main() -> Result<()> {
|
||||||
let config = load_cli_config(config.as_ref())?;
|
let config = load_cli_config(config.as_ref())?;
|
||||||
let bearer_token =
|
let bearer_token =
|
||||||
resolve_remote_bearer_token(&config, uri.as_deref(), target.as_deref())?;
|
resolve_remote_bearer_token(&config, uri.as_deref(), target.as_deref())?;
|
||||||
let uri = resolve_uri(&config, uri, target.as_deref())?;
|
let graph = resolve_cli_graph(&config, uri, target.as_deref())?;
|
||||||
|
let uri = graph.uri.clone();
|
||||||
let into = resolve_branch(&config, into, None, "main");
|
let into = resolve_branch(&config, into, None, "main");
|
||||||
let payload = if is_remote_uri(&uri) {
|
let payload = if graph.is_remote {
|
||||||
remote_json::<BranchMergeOutput>(
|
remote_json::<BranchMergeOutput>(
|
||||||
&http_client,
|
&http_client,
|
||||||
Method::POST,
|
Method::POST,
|
||||||
|
|
@ -2113,7 +2466,7 @@ async fn main() -> Result<()> {
|
||||||
)
|
)
|
||||||
.await?
|
.await?
|
||||||
} else {
|
} else {
|
||||||
let db = open_local_db_with_policy(&uri, &config).await?;
|
let db = open_local_db_with_policy(&graph).await?;
|
||||||
let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config);
|
let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config);
|
||||||
let outcome = db.branch_merge_as(&source, &into, actor).await?;
|
let outcome = db.branch_merge_as(&source, &into, actor).await?;
|
||||||
BranchMergeOutput {
|
BranchMergeOutput {
|
||||||
|
|
@ -2248,9 +2601,10 @@ async fn main() -> Result<()> {
|
||||||
let config = load_cli_config(config.as_ref())?;
|
let config = load_cli_config(config.as_ref())?;
|
||||||
let bearer_token =
|
let bearer_token =
|
||||||
resolve_remote_bearer_token(&config, uri.as_deref(), target.as_deref())?;
|
resolve_remote_bearer_token(&config, uri.as_deref(), target.as_deref())?;
|
||||||
let uri = resolve_uri(&config, uri, target.as_deref())?;
|
let graph = resolve_cli_graph(&config, uri, target.as_deref())?;
|
||||||
|
let uri = graph.uri.clone();
|
||||||
let schema_source = fs::read_to_string(&schema)?;
|
let schema_source = fs::read_to_string(&schema)?;
|
||||||
let output = if is_remote_uri(&uri) {
|
let output = if graph.is_remote {
|
||||||
// MR-694 PR B: SchemaApplyRequest gained an
|
// MR-694 PR B: SchemaApplyRequest gained an
|
||||||
// allow_data_loss field so Hard-mode drops are no
|
// allow_data_loss field so Hard-mode drops are no
|
||||||
// longer CLI-only. The previous bail is gone; the
|
// longer CLI-only. The previous bail is gone; the
|
||||||
|
|
@ -2268,13 +2622,22 @@ async fn main() -> Result<()> {
|
||||||
)
|
)
|
||||||
.await?
|
.await?
|
||||||
} else {
|
} else {
|
||||||
let db = open_local_db_with_policy(&uri, &config).await?;
|
let db = open_local_db_with_policy(&graph).await?;
|
||||||
let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config);
|
let actor = resolve_cli_actor(cli.as_actor.as_deref(), &config);
|
||||||
|
let registry = load_registry_or_report(&config, graph.selected())?;
|
||||||
|
let registry = (!registry.is_empty()).then_some(registry);
|
||||||
|
let label = graph.selected().unwrap_or(&uri).to_string();
|
||||||
let result = db
|
let result = db
|
||||||
.apply_schema_as(
|
.apply_schema_as_with_catalog_check(
|
||||||
&schema_source,
|
&schema_source,
|
||||||
omnigraph::db::SchemaApplyOptions { allow_data_loss },
|
omnigraph::db::SchemaApplyOptions { allow_data_loss },
|
||||||
actor,
|
actor,
|
||||||
|
|catalog| {
|
||||||
|
if let Some(registry) = registry.as_ref() {
|
||||||
|
validate_registry_for_catalog(registry, catalog, &label)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
schema_apply_output(&uri, result)
|
schema_apply_output(&uri, result)
|
||||||
|
|
@ -2331,6 +2694,23 @@ async fn main() -> Result<()> {
|
||||||
.await?;
|
.await?;
|
||||||
finish_query_lint(&output, json)?;
|
finish_query_lint(&output, json)?;
|
||||||
}
|
}
|
||||||
|
Command::Queries { command } => match command {
|
||||||
|
QueriesCommand::Validate {
|
||||||
|
uri,
|
||||||
|
target,
|
||||||
|
config,
|
||||||
|
json,
|
||||||
|
} => {
|
||||||
|
execute_queries_validate(uri, target, config.as_ref(), json).await?;
|
||||||
|
}
|
||||||
|
QueriesCommand::List {
|
||||||
|
target,
|
||||||
|
config,
|
||||||
|
json,
|
||||||
|
} => {
|
||||||
|
execute_queries_list(target, config.as_ref(), json)?;
|
||||||
|
}
|
||||||
|
},
|
||||||
Command::Snapshot {
|
Command::Snapshot {
|
||||||
uri,
|
uri,
|
||||||
target,
|
target,
|
||||||
|
|
@ -2436,7 +2816,8 @@ async fn main() -> Result<()> {
|
||||||
.as_deref()
|
.as_deref()
|
||||||
.or_else(|| alias_config.and_then(|alias| alias.graph.as_deref()));
|
.or_else(|| alias_config.and_then(|alias| alias.graph.as_deref()));
|
||||||
let bearer_token = resolve_remote_bearer_token(&config, uri.as_deref(), target_name)?;
|
let bearer_token = resolve_remote_bearer_token(&config, uri.as_deref(), target_name)?;
|
||||||
let uri = resolve_uri(&config, uri, target_name)?;
|
let graph = resolve_cli_graph(&config, uri, target_name)?;
|
||||||
|
let uri = graph.uri.clone();
|
||||||
let query_source = resolve_query_source(
|
let query_source = resolve_query_source(
|
||||||
&config,
|
&config,
|
||||||
query.as_ref(),
|
query.as_ref(),
|
||||||
|
|
@ -2458,7 +2839,7 @@ async fn main() -> Result<()> {
|
||||||
alias_config.and_then(|alias| alias.branch.clone()),
|
alias_config.and_then(|alias| alias.branch.clone()),
|
||||||
)?;
|
)?;
|
||||||
let query_name = name.or_else(|| alias_config.and_then(|alias| alias.name.clone()));
|
let query_name = name.or_else(|| alias_config.and_then(|alias| alias.name.clone()));
|
||||||
let output = if is_remote_uri(&uri) {
|
let output = if graph.is_remote {
|
||||||
execute_read_remote(
|
execute_read_remote(
|
||||||
&http_client,
|
&http_client,
|
||||||
&uri,
|
&uri,
|
||||||
|
|
@ -2521,7 +2902,8 @@ async fn main() -> Result<()> {
|
||||||
.as_deref()
|
.as_deref()
|
||||||
.or_else(|| alias_config.and_then(|alias| alias.graph.as_deref()));
|
.or_else(|| alias_config.and_then(|alias| alias.graph.as_deref()));
|
||||||
let bearer_token = resolve_remote_bearer_token(&config, uri.as_deref(), target_name)?;
|
let bearer_token = resolve_remote_bearer_token(&config, uri.as_deref(), target_name)?;
|
||||||
let uri = resolve_uri(&config, uri, target_name)?;
|
let graph = resolve_cli_graph(&config, uri, target_name)?;
|
||||||
|
let uri = graph.uri.clone();
|
||||||
let query_source = resolve_query_source(
|
let query_source = resolve_query_source(
|
||||||
&config,
|
&config,
|
||||||
query.as_ref(),
|
query.as_ref(),
|
||||||
|
|
@ -2543,7 +2925,7 @@ async fn main() -> Result<()> {
|
||||||
"main",
|
"main",
|
||||||
);
|
);
|
||||||
let query_name = name.or_else(|| alias_config.and_then(|alias| alias.name.clone()));
|
let query_name = name.or_else(|| alias_config.and_then(|alias| alias.name.clone()));
|
||||||
let output = if is_remote_uri(&uri) {
|
let output = if graph.is_remote {
|
||||||
execute_change_remote(
|
execute_change_remote(
|
||||||
&http_client,
|
&http_client,
|
||||||
&uri,
|
&uri,
|
||||||
|
|
@ -2556,7 +2938,7 @@ async fn main() -> Result<()> {
|
||||||
.await?
|
.await?
|
||||||
} else {
|
} else {
|
||||||
execute_change(
|
execute_change(
|
||||||
&uri,
|
&graph,
|
||||||
&query_source,
|
&query_source,
|
||||||
query_name.as_deref(),
|
query_name.as_deref(),
|
||||||
&branch,
|
&branch,
|
||||||
|
|
@ -2575,20 +2957,19 @@ async fn main() -> Result<()> {
|
||||||
Command::Policy { command } => match command {
|
Command::Policy { command } => match command {
|
||||||
PolicyCommand::Validate { config } => {
|
PolicyCommand::Validate { config } => {
|
||||||
let config = load_cli_config(config.as_ref())?;
|
let config = load_cli_config(config.as_ref())?;
|
||||||
let engine = resolve_policy_engine(&config)?;
|
let context = resolve_policy_context(&config)?;
|
||||||
let policy_file = config
|
let engine = resolve_policy_engine(&context)?;
|
||||||
.resolve_policy_file()
|
|
||||||
.expect("policy file should exist after resolve_policy_engine");
|
|
||||||
println!(
|
println!(
|
||||||
"policy valid: {} [{} actors]",
|
"policy valid: {} [{} actors]",
|
||||||
policy_file.display(),
|
context.policy_file.display(),
|
||||||
engine.known_actor_count()
|
engine.known_actor_count()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
PolicyCommand::Test { config } => {
|
PolicyCommand::Test { config } => {
|
||||||
let config = load_cli_config(config.as_ref())?;
|
let config = load_cli_config(config.as_ref())?;
|
||||||
let engine = resolve_policy_engine(&config)?;
|
let context = resolve_policy_context(&config)?;
|
||||||
let tests_path = resolve_policy_tests_path(&config)?;
|
let engine = resolve_policy_engine(&context)?;
|
||||||
|
let tests_path = resolve_policy_tests_path(&context);
|
||||||
let tests = PolicyTestConfig::load(&tests_path)?;
|
let tests = PolicyTestConfig::load(&tests_path)?;
|
||||||
engine.run_tests(&tests)?;
|
engine.run_tests(&tests)?;
|
||||||
println!("policy tests passed: {} cases", tests.cases.len());
|
println!("policy tests passed: {} cases", tests.cases.len());
|
||||||
|
|
@ -2601,7 +2982,8 @@ async fn main() -> Result<()> {
|
||||||
target_branch,
|
target_branch,
|
||||||
} => {
|
} => {
|
||||||
let config = load_cli_config(config.as_ref())?;
|
let config = load_cli_config(config.as_ref())?;
|
||||||
let engine = resolve_policy_engine(&config)?;
|
let context = resolve_policy_context(&config)?;
|
||||||
|
let engine = resolve_policy_engine(&context)?;
|
||||||
let request = PolicyRequest {
|
let request = PolicyRequest {
|
||||||
action,
|
action,
|
||||||
branch,
|
branch,
|
||||||
|
|
@ -2629,18 +3011,19 @@ async fn main() -> Result<()> {
|
||||||
"fragments_removed": s.fragments_removed,
|
"fragments_removed": s.fragments_removed,
|
||||||
"fragments_added": s.fragments_added,
|
"fragments_added": s.fragments_added,
|
||||||
"committed": s.committed,
|
"committed": s.committed,
|
||||||
|
"skipped": s.skipped.map(|r| r.as_str()),
|
||||||
})).collect::<Vec<_>>(),
|
})).collect::<Vec<_>>(),
|
||||||
});
|
});
|
||||||
print_json(&value)?;
|
print_json(&value)?;
|
||||||
} else {
|
} else {
|
||||||
println!("optimize {} — {} tables", uri, stats.len());
|
println!("optimize {} — {} tables", uri, stats.len());
|
||||||
for s in &stats {
|
for s in &stats {
|
||||||
if s.committed {
|
if let Some(reason) = s.skipped {
|
||||||
|
println!(" {:<40} skipped ({reason})", s.table_key);
|
||||||
|
} else if s.committed {
|
||||||
println!(
|
println!(
|
||||||
" {:<40} frags {} → {} ✓",
|
" {:<40} frags {} → {} ✓",
|
||||||
s.table_key,
|
s.table_key, s.fragments_removed, s.fragments_added
|
||||||
s.fragments_removed + s.fragments_added - s.fragments_added,
|
|
||||||
s.fragments_added
|
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
println!(" {:<40} no-op", s.table_key);
|
println!(" {:<40} no-op", s.table_key);
|
||||||
|
|
@ -2699,20 +3082,33 @@ async fn main() -> Result<()> {
|
||||||
"table_key": s.table_key,
|
"table_key": s.table_key,
|
||||||
"bytes_removed": s.bytes_removed,
|
"bytes_removed": s.bytes_removed,
|
||||||
"old_versions_removed": s.old_versions_removed,
|
"old_versions_removed": s.old_versions_removed,
|
||||||
|
"error": s.error,
|
||||||
})).collect::<Vec<_>>(),
|
})).collect::<Vec<_>>(),
|
||||||
});
|
});
|
||||||
print_json(&value)?;
|
print_json(&value)?;
|
||||||
} else {
|
} else {
|
||||||
let total_bytes: u64 = stats.iter().map(|s| s.bytes_removed).sum();
|
let total_bytes: u64 = stats.iter().map(|s| s.bytes_removed).sum();
|
||||||
let total_versions: u64 = stats.iter().map(|s| s.old_versions_removed).sum();
|
let total_versions: u64 = stats.iter().map(|s| s.old_versions_removed).sum();
|
||||||
|
let failed: Vec<&str> = stats
|
||||||
|
.iter()
|
||||||
|
.filter(|s| s.error.is_some())
|
||||||
|
.map(|s| s.table_key.as_str())
|
||||||
|
.collect();
|
||||||
println!(
|
println!(
|
||||||
"cleanup {} ({}) — removed {} versions ({} bytes) across {} tables",
|
"cleanup {} ({}) — removed {} versions ({} bytes) across {} tables",
|
||||||
uri,
|
uri,
|
||||||
policy_desc,
|
policy_desc,
|
||||||
total_versions,
|
total_versions,
|
||||||
total_bytes,
|
total_bytes,
|
||||||
stats.len()
|
stats.len() - failed.len()
|
||||||
);
|
);
|
||||||
|
if !failed.is_empty() {
|
||||||
|
println!(
|
||||||
|
" {} table(s) failed and will be retried on the next cleanup: {}",
|
||||||
|
failed.len(),
|
||||||
|
failed.join(", ")
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Command::Graphs { command } => match command {
|
Command::Graphs { command } => match command {
|
||||||
|
|
@ -2761,7 +3157,8 @@ mod tests {
|
||||||
use super::{
|
use super::{
|
||||||
DEFAULT_BEARER_TOKEN_ENV, apply_bearer_token, bearer_token_from_env_file,
|
DEFAULT_BEARER_TOKEN_ENV, apply_bearer_token, bearer_token_from_env_file,
|
||||||
legacy_change_request_body, load_cli_config, load_env_file_into_process,
|
legacy_change_request_body, load_cli_config, load_env_file_into_process,
|
||||||
normalize_bearer_token, parse_env_assignment, resolve_remote_bearer_token,
|
normalize_bearer_token, parse_env_assignment, resolve_policy_context,
|
||||||
|
resolve_cli_graph, resolve_remote_bearer_token,
|
||||||
};
|
};
|
||||||
use omnigraph_server::load_config;
|
use omnigraph_server::load_config;
|
||||||
use reqwest::header::AUTHORIZATION;
|
use reqwest::header::AUTHORIZATION;
|
||||||
|
|
@ -3021,4 +3418,150 @@ graphs:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn graph_identity_resolve_policy_context_named_cli_graph_uses_graph_key_not_project_name_or_uri() {
|
||||||
|
let temp = tempdir().unwrap();
|
||||||
|
let config_path = temp.path().join("omnigraph.yaml");
|
||||||
|
fs::write(
|
||||||
|
&config_path,
|
||||||
|
r#"
|
||||||
|
project:
|
||||||
|
name: misleading-project
|
||||||
|
graphs:
|
||||||
|
local:
|
||||||
|
uri: /tmp/local-policy-graph.omni
|
||||||
|
policy:
|
||||||
|
file: ./policy.yaml
|
||||||
|
cli:
|
||||||
|
graph: local
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let config = load_config(Some(&config_path)).unwrap();
|
||||||
|
let context = resolve_policy_context(&config).unwrap();
|
||||||
|
assert_eq!(context.graph_id, "local");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn graph_identity_resolve_policy_context_server_graph_uses_graph_key_when_cli_graph_absent() {
|
||||||
|
let temp = tempdir().unwrap();
|
||||||
|
let config_path = temp.path().join("omnigraph.yaml");
|
||||||
|
fs::write(
|
||||||
|
&config_path,
|
||||||
|
r#"
|
||||||
|
project:
|
||||||
|
name: misleading-project
|
||||||
|
graphs:
|
||||||
|
local:
|
||||||
|
uri: /tmp/local-policy-graph.omni
|
||||||
|
policy:
|
||||||
|
file: ./server-policy.yaml
|
||||||
|
server:
|
||||||
|
graph: local
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let config = load_config(Some(&config_path)).unwrap();
|
||||||
|
let context = resolve_policy_context(&config).unwrap();
|
||||||
|
assert_eq!(context.graph_id, "local");
|
||||||
|
assert!(context.policy_file.ends_with("server-policy.yaml"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn graph_identity_resolve_policy_context_anonymous_uses_top_level_default_identity() {
|
||||||
|
let temp = tempdir().unwrap();
|
||||||
|
let config_path = temp.path().join("omnigraph.yaml");
|
||||||
|
fs::write(
|
||||||
|
&config_path,
|
||||||
|
r#"
|
||||||
|
project:
|
||||||
|
name: misleading-project
|
||||||
|
graphs:
|
||||||
|
local:
|
||||||
|
uri: /tmp/local-policy-graph.omni
|
||||||
|
policy:
|
||||||
|
file: ./top-policy.yaml
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let config = load_config(Some(&config_path)).unwrap();
|
||||||
|
let context = resolve_policy_context(&config).unwrap();
|
||||||
|
assert_eq!(context.graph_id, "default");
|
||||||
|
assert!(context.policy_file.ends_with("top-policy.yaml"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn graph_identity_resolve_cli_graph_named_target_uses_graph_key_not_project_name_or_uri() {
|
||||||
|
let temp = tempdir().unwrap();
|
||||||
|
let config_path = temp.path().join("omnigraph.yaml");
|
||||||
|
fs::write(
|
||||||
|
&config_path,
|
||||||
|
r#"
|
||||||
|
project:
|
||||||
|
name: misleading-project
|
||||||
|
graphs:
|
||||||
|
prod:
|
||||||
|
uri: s3://bucket/prod-graph/
|
||||||
|
policy:
|
||||||
|
file: ./prod-policy.yaml
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let config = load_config(Some(&config_path)).unwrap();
|
||||||
|
let graph = resolve_cli_graph(&config, None, Some("prod")).unwrap();
|
||||||
|
assert_eq!(graph.selected(), Some("prod"));
|
||||||
|
assert_eq!(graph.graph_id, "prod");
|
||||||
|
assert_eq!(graph.uri, "s3://bucket/prod-graph/");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn graph_identity_resolve_cli_graph_positional_uri_uses_anonymous_normalized_uri() {
|
||||||
|
let temp = tempdir().unwrap();
|
||||||
|
let config_path = temp.path().join("omnigraph.yaml");
|
||||||
|
fs::write(
|
||||||
|
&config_path,
|
||||||
|
r#"
|
||||||
|
project:
|
||||||
|
name: misleading-project
|
||||||
|
graphs:
|
||||||
|
local:
|
||||||
|
uri: /tmp/configured-graph.omni
|
||||||
|
policy:
|
||||||
|
file: ./policy.yaml
|
||||||
|
cli:
|
||||||
|
graph: local
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let config = load_config(Some(&config_path)).unwrap();
|
||||||
|
let local_graph_path = temp.path().join("explicit-graph.omni");
|
||||||
|
let local_graph = resolve_cli_graph(
|
||||||
|
&config,
|
||||||
|
Some(format!("file://{}", local_graph_path.display())),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(local_graph.selected(), None);
|
||||||
|
assert_eq!(
|
||||||
|
local_graph.graph_id,
|
||||||
|
local_graph_path.to_string_lossy().as_ref()
|
||||||
|
);
|
||||||
|
assert_eq!(local_graph.policy_file, None);
|
||||||
|
|
||||||
|
let s3_graph = resolve_cli_graph(
|
||||||
|
&config,
|
||||||
|
Some("s3://bucket/anonymous-graph/".to_string()),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(s3_graph.selected(), None);
|
||||||
|
assert_eq!(s3_graph.graph_id, "s3://bucket/anonymous-graph");
|
||||||
|
assert_eq!(s3_graph.policy_file, None);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2376,3 +2376,295 @@ fn graphs_list_against_local_uri_errors_with_remote_only_message() {
|
||||||
"expected 'remote multi-graph server URL' rejection in stderr; got:\n{stderr}"
|
"expected 'remote multi-graph server URL' rejection in stderr; got:\n{stderr}"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn queries_test_config(graph_uri: &str, entry: &str, gq_file: &str) -> String {
|
||||||
|
format!(
|
||||||
|
"graphs:\n local:\n uri: '{}'\n queries:\n {entry}:\n file: ./{gq_file}\n\
|
||||||
|
cli:\n graph: local\npolicy: {{}}\n",
|
||||||
|
graph_uri.replace('\'', "''")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn queries_validate_exits_zero_on_clean_registry() {
|
||||||
|
let graph = SystemGraph::loaded();
|
||||||
|
graph.write_query(
|
||||||
|
"find_person.gq",
|
||||||
|
"query find_person($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
|
||||||
|
);
|
||||||
|
let config = graph.write_config(
|
||||||
|
"omnigraph.yaml",
|
||||||
|
&queries_test_config(&graph.path().to_string_lossy(), "find_person", "find_person.gq"),
|
||||||
|
);
|
||||||
|
let output = output_success(cli().arg("queries").arg("validate").arg("--config").arg(&config));
|
||||||
|
let stdout = stdout_string(&output);
|
||||||
|
assert!(stdout.contains("OK"), "stdout:\n{stdout}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn queries_validate_exits_nonzero_on_type_broken_query() {
|
||||||
|
let graph = SystemGraph::loaded();
|
||||||
|
// `Widget` is not in the fixture schema.
|
||||||
|
graph.write_query("ghost.gq", "query ghost() { match { $w: Widget } return { $w.name } }");
|
||||||
|
let config = graph.write_config(
|
||||||
|
"omnigraph.yaml",
|
||||||
|
&queries_test_config(&graph.path().to_string_lossy(), "ghost", "ghost.gq"),
|
||||||
|
);
|
||||||
|
let output = output_failure(cli().arg("queries").arg("validate").arg("--config").arg(&config));
|
||||||
|
let stdout = stdout_string(&output);
|
||||||
|
assert!(
|
||||||
|
stdout.contains("ghost"),
|
||||||
|
"validation should name the broken query; stdout:\n{stdout}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn queries_list_prints_registered_query() {
|
||||||
|
let graph = SystemGraph::loaded();
|
||||||
|
graph.write_query(
|
||||||
|
"find_person.gq",
|
||||||
|
"query find_person($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
|
||||||
|
);
|
||||||
|
// Exposed with an explicit tool name so the list shows the MCP suffix.
|
||||||
|
let config = graph.write_config(
|
||||||
|
"omnigraph.yaml",
|
||||||
|
&format!(
|
||||||
|
concat!(
|
||||||
|
"graphs:\n",
|
||||||
|
" local:\n",
|
||||||
|
" uri: '{}'\n",
|
||||||
|
" queries:\n",
|
||||||
|
" find_person:\n",
|
||||||
|
" file: ./find_person.gq\n",
|
||||||
|
" mcp: {{ expose: true, tool_name: lookup_person }}\n",
|
||||||
|
"cli:\n",
|
||||||
|
" graph: local\n",
|
||||||
|
"policy: {{}}\n",
|
||||||
|
),
|
||||||
|
graph.path().to_string_lossy().replace('\'', "''")
|
||||||
|
),
|
||||||
|
);
|
||||||
|
let output = output_success(cli().arg("queries").arg("list").arg("--config").arg(&config));
|
||||||
|
let stdout = stdout_string(&output);
|
||||||
|
assert!(stdout.contains("find_person"), "stdout:\n{stdout}");
|
||||||
|
assert!(
|
||||||
|
stdout.contains("$name: String"),
|
||||||
|
"list should show typed params; stdout:\n{stdout}"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
stdout.contains("[mcp: lookup_person]"),
|
||||||
|
"list should show the MCP tool name for exposed queries; stdout:\n{stdout}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn queries_list_requires_graph_selection_for_per_graph_only_registries() {
|
||||||
|
let graph = SystemGraph::loaded();
|
||||||
|
graph.write_query(
|
||||||
|
"find_person.gq",
|
||||||
|
"query find_person($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
|
||||||
|
);
|
||||||
|
let config = graph.write_config(
|
||||||
|
"omnigraph.yaml",
|
||||||
|
&format!(
|
||||||
|
concat!(
|
||||||
|
"graphs:\n",
|
||||||
|
" local:\n",
|
||||||
|
" uri: '{}'\n",
|
||||||
|
" queries:\n",
|
||||||
|
" find_person:\n",
|
||||||
|
" file: ./find_person.gq\n",
|
||||||
|
"policy: {{}}\n",
|
||||||
|
),
|
||||||
|
graph.path().to_string_lossy().replace('\'', "''")
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
let output = output_failure(cli().arg("queries").arg("list").arg("--config").arg(&config));
|
||||||
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||||
|
assert!(
|
||||||
|
stderr.contains("local") && stderr.contains("--target local"),
|
||||||
|
"error must name the graph and give a concrete selection hint; stderr:\n{stderr}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn queries_list_without_graph_selection_lists_top_level_registry() {
|
||||||
|
let graph = SystemGraph::loaded();
|
||||||
|
graph.write_query(
|
||||||
|
"top_find.gq",
|
||||||
|
"query top_find($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
|
||||||
|
);
|
||||||
|
let config = graph.write_config(
|
||||||
|
"omnigraph.yaml",
|
||||||
|
concat!(
|
||||||
|
"queries:\n",
|
||||||
|
" top_find:\n",
|
||||||
|
" file: ./top_find.gq\n",
|
||||||
|
"policy: {}\n",
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
let output = output_success(cli().arg("queries").arg("list").arg("--config").arg(&config));
|
||||||
|
let stdout = stdout_string(&output);
|
||||||
|
assert!(stdout.contains("top_find"), "stdout:\n{stdout}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn queries_list_unknown_target_errors() {
|
||||||
|
// `queries list` opens no graph URI, so unknown-graph validation can't ride
|
||||||
|
// along on URI resolution the way it does for every other command. An
|
||||||
|
// unknown `--target` must still error (naming the graph) instead of
|
||||||
|
// silently falling back to the top-level registry and showing the wrong
|
||||||
|
// (or empty) catalog.
|
||||||
|
let graph = SystemGraph::loaded();
|
||||||
|
graph.write_query(
|
||||||
|
"find_person.gq",
|
||||||
|
"query find_person($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
|
||||||
|
);
|
||||||
|
let config = graph.write_config(
|
||||||
|
"omnigraph.yaml",
|
||||||
|
&queries_test_config(&graph.path().to_string_lossy(), "find_person", "find_person.gq"),
|
||||||
|
);
|
||||||
|
let output = output_failure(
|
||||||
|
cli()
|
||||||
|
.arg("queries")
|
||||||
|
.arg("list")
|
||||||
|
.arg("--target")
|
||||||
|
.arg("nonexistent")
|
||||||
|
.arg("--config")
|
||||||
|
.arg(&config),
|
||||||
|
);
|
||||||
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||||
|
assert!(
|
||||||
|
stderr.contains("nonexistent"),
|
||||||
|
"error must name the unknown graph; stderr:\n{stderr}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn queries_commands_reject_named_graph_with_populated_top_level_block() {
|
||||||
|
// A named graph (here via `cli.graph`) uses its own `graphs.<name>` block,
|
||||||
|
// so a populated top-level `queries:` block would be silently ignored — a
|
||||||
|
// config the server REFUSES to boot. `queries validate`/`list` must reject
|
||||||
|
// it too (matching boot) instead of validating/listing the per-graph block
|
||||||
|
// and giving a false green.
|
||||||
|
let graph = SystemGraph::loaded();
|
||||||
|
graph.write_query(
|
||||||
|
"find_person.gq",
|
||||||
|
"query find_person($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
|
||||||
|
);
|
||||||
|
let config = graph.write_config(
|
||||||
|
"omnigraph.yaml",
|
||||||
|
&format!(
|
||||||
|
concat!(
|
||||||
|
"graphs:\n",
|
||||||
|
" local:\n",
|
||||||
|
" uri: '{}'\n",
|
||||||
|
" queries:\n",
|
||||||
|
" find_person:\n",
|
||||||
|
" file: ./find_person.gq\n",
|
||||||
|
"cli:\n",
|
||||||
|
" graph: local\n",
|
||||||
|
"queries:\n", // populated top-level block: the coherence violation
|
||||||
|
" legacy:\n",
|
||||||
|
" file: ./legacy.gq\n",
|
||||||
|
"policy: {{}}\n",
|
||||||
|
),
|
||||||
|
graph.path().to_string_lossy().replace('\'', "''")
|
||||||
|
),
|
||||||
|
);
|
||||||
|
// Both resolve `local` from cli.graph (no positional URI), so both must
|
||||||
|
// error and name the graph + the ignored block — like server boot does.
|
||||||
|
for sub in ["validate", "list"] {
|
||||||
|
let output = output_failure(cli().arg("queries").arg(sub).arg("--config").arg(&config));
|
||||||
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||||
|
assert!(
|
||||||
|
stderr.contains("local") && stderr.contains("queries"),
|
||||||
|
"`queries {sub}` must reject a named graph with a populated top-level block; stderr:\n{stderr}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn queries_validate_exits_nonzero_on_duplicate_tool_name() {
|
||||||
|
// Two exposed queries claiming one MCP tool name is a load-time
|
||||||
|
// collision — `queries validate` must fail (offline, before the engine
|
||||||
|
// opens) and name both queries plus the contested tool.
|
||||||
|
let graph = SystemGraph::loaded();
|
||||||
|
graph.write_query("a.gq", "query a() { match { $p: Person } return { $p.name } }");
|
||||||
|
graph.write_query("b.gq", "query b() { match { $p: Person } return { $p.name } }");
|
||||||
|
let config = graph.write_config(
|
||||||
|
"omnigraph.yaml",
|
||||||
|
&format!(
|
||||||
|
concat!(
|
||||||
|
"graphs:\n",
|
||||||
|
" local:\n",
|
||||||
|
" uri: '{}'\n",
|
||||||
|
" queries:\n",
|
||||||
|
" a:\n",
|
||||||
|
" file: ./a.gq\n",
|
||||||
|
" mcp: {{ expose: true, tool_name: dup }}\n",
|
||||||
|
" b:\n",
|
||||||
|
" file: ./b.gq\n",
|
||||||
|
" mcp: {{ expose: true, tool_name: dup }}\n",
|
||||||
|
"cli:\n",
|
||||||
|
" graph: local\n",
|
||||||
|
"policy: {{}}\n",
|
||||||
|
),
|
||||||
|
graph.path().to_string_lossy().replace('\'', "''")
|
||||||
|
),
|
||||||
|
);
|
||||||
|
let output = output_failure(cli().arg("queries").arg("validate").arg("--config").arg(&config));
|
||||||
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||||
|
assert!(
|
||||||
|
stderr.contains("dup") && stderr.contains("'a'") && stderr.contains("'b'"),
|
||||||
|
"duplicate tool name should be reported naming both queries; stderr:\n{stderr}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn queries_validate_positional_uri_ignores_default_graph() {
|
||||||
|
// A positional URI is anonymous → the schema AND the registry both come
|
||||||
|
// from top-level, even when `cli.graph` names a graph whose per-graph
|
||||||
|
// queries would fail. Pins that the URI and registry can't diverge.
|
||||||
|
let graph = SystemGraph::loaded();
|
||||||
|
graph.write_query(
|
||||||
|
"clean.gq",
|
||||||
|
"query clean($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
|
||||||
|
);
|
||||||
|
// `Widget` is not in the fixture schema — the default graph's per-graph
|
||||||
|
// query would break validate if it were (wrongly) selected.
|
||||||
|
graph.write_query("broken.gq", "query broken() { match { $w: Widget } return { $w.name } }");
|
||||||
|
let config = graph.write_config(
|
||||||
|
"omnigraph.yaml",
|
||||||
|
concat!(
|
||||||
|
"cli:\n graph: prod\n",
|
||||||
|
"graphs:\n",
|
||||||
|
" prod:\n",
|
||||||
|
" uri: /nonexistent-prod.omni\n",
|
||||||
|
" queries:\n",
|
||||||
|
" broken:\n",
|
||||||
|
" file: ./broken.gq\n",
|
||||||
|
"queries:\n",
|
||||||
|
" clean:\n",
|
||||||
|
" file: ./clean.gq\n",
|
||||||
|
"policy: {}\n",
|
||||||
|
),
|
||||||
|
);
|
||||||
|
// Positional URI = the real loaded graph; selection is anonymous, so the
|
||||||
|
// CLEAN top-level registry validates (not prod's broken one).
|
||||||
|
let output = output_success(
|
||||||
|
cli()
|
||||||
|
.arg("queries")
|
||||||
|
.arg("validate")
|
||||||
|
.arg(graph.path())
|
||||||
|
.arg("--config")
|
||||||
|
.arg(&config),
|
||||||
|
);
|
||||||
|
let stdout = stdout_string(&output);
|
||||||
|
assert!(
|
||||||
|
stdout.contains("OK"),
|
||||||
|
"positional URI must validate the top-level registry, not the cli.graph default; stdout:\n{stdout}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -74,14 +74,36 @@ project:
|
||||||
graphs:
|
graphs:
|
||||||
local:
|
local:
|
||||||
uri: {}
|
uri: {}
|
||||||
|
policy:
|
||||||
|
file: ./policy.yaml
|
||||||
cli:
|
cli:
|
||||||
graph: local
|
graph: local
|
||||||
branch: main
|
branch: main
|
||||||
query:
|
query:
|
||||||
roots:
|
roots:
|
||||||
- .
|
- .
|
||||||
policy:
|
",
|
||||||
file: ./policy.yaml
|
yaml_string(&graph.path().to_string_lossy())
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn local_policy_server_graph_config(graph: &SystemGraph) -> String {
|
||||||
|
format!(
|
||||||
|
"\
|
||||||
|
project:
|
||||||
|
name: policy-e2e-local
|
||||||
|
graphs:
|
||||||
|
local:
|
||||||
|
uri: {}
|
||||||
|
policy:
|
||||||
|
file: ./policy.yaml
|
||||||
|
server:
|
||||||
|
graph: local
|
||||||
|
cli:
|
||||||
|
branch: main
|
||||||
|
query:
|
||||||
|
roots:
|
||||||
|
- .
|
||||||
",
|
",
|
||||||
yaml_string(&graph.path().to_string_lossy())
|
yaml_string(&graph.path().to_string_lossy())
|
||||||
)
|
)
|
||||||
|
|
@ -991,7 +1013,7 @@ query vector_search($q: String) {
|
||||||
|
|
||||||
// The publisher CAS conflict shape is verified end-to-end at the engine
|
// The publisher CAS conflict shape is verified end-to-end at the engine
|
||||||
// level in
|
// level in
|
||||||
// `crates/omnigraph/tests/runs.rs::concurrent_writers_one_succeeds_one_gets_expected_version_mismatch`
|
// `crates/omnigraph/tests/writes.rs::concurrent_writers_one_succeeds_one_gets_expected_version_mismatch`
|
||||||
// and at the HTTP boundary in
|
// and at the HTTP boundary in
|
||||||
// `crates/omnigraph-server/tests/server.rs::change_conflict_returns_manifest_conflict_409`.
|
// `crates/omnigraph-server/tests/server.rs::change_conflict_returns_manifest_conflict_409`.
|
||||||
// A CLI-level race would be timing-dependent; with direct-publish the
|
// A CLI-level race would be timing-dependent; with direct-publish the
|
||||||
|
|
@ -1000,49 +1022,55 @@ query vector_search($q: String) {
|
||||||
#[test]
|
#[test]
|
||||||
fn local_cli_policy_tooling_is_end_to_end() {
|
fn local_cli_policy_tooling_is_end_to_end() {
|
||||||
// Sanity check for the read-only policy CLI surfaces. These don't
|
// Sanity check for the read-only policy CLI surfaces. These don't
|
||||||
// mutate the graph — they just parse and evaluate the policy file —
|
// mutate the graph; they parse and evaluate the effective policy for
|
||||||
// so they don't depend on PR #4's engine-side enforcement.
|
// named graph selections, including per-graph policy files.
|
||||||
let graph = SystemGraph::loaded();
|
let graph = SystemGraph::loaded();
|
||||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||||
|
let server_graph_config = graph.write_config(
|
||||||
|
"omnigraph-policy-server.yaml",
|
||||||
|
&local_policy_server_graph_config(&graph),
|
||||||
|
);
|
||||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||||
graph.write_config("policy.tests.yaml", POLICY_E2E_TESTS_YAML);
|
graph.write_config("policy.tests.yaml", POLICY_E2E_TESTS_YAML);
|
||||||
|
|
||||||
let validate = output_success(
|
for config in [&config, &server_graph_config] {
|
||||||
cli()
|
let validate = output_success(
|
||||||
.arg("policy")
|
cli()
|
||||||
.arg("validate")
|
.arg("policy")
|
||||||
.arg("--config")
|
.arg("validate")
|
||||||
.arg(&config),
|
.arg("--config")
|
||||||
);
|
.arg(config),
|
||||||
assert!(stdout_string(&validate).contains("policy valid:"));
|
);
|
||||||
|
assert!(stdout_string(&validate).contains("policy valid:"));
|
||||||
|
|
||||||
let tests = output_success(cli().arg("policy").arg("test").arg("--config").arg(&config));
|
let tests = output_success(cli().arg("policy").arg("test").arg("--config").arg(config));
|
||||||
assert!(stdout_string(&tests).contains("policy tests passed: 2 cases"));
|
assert!(stdout_string(&tests).contains("policy tests passed: 2 cases"));
|
||||||
|
|
||||||
let explain = output_success(
|
let explain = output_success(
|
||||||
cli()
|
cli()
|
||||||
.arg("policy")
|
.arg("policy")
|
||||||
.arg("explain")
|
.arg("explain")
|
||||||
.arg("--config")
|
.arg("--config")
|
||||||
.arg(&config)
|
.arg(config)
|
||||||
.arg("--actor")
|
.arg("--actor")
|
||||||
.arg("act-bruno")
|
.arg("act-bruno")
|
||||||
.arg("--action")
|
.arg("--action")
|
||||||
.arg("change")
|
.arg("change")
|
||||||
.arg("--branch")
|
.arg("--branch")
|
||||||
.arg("main"),
|
.arg("main"),
|
||||||
);
|
);
|
||||||
let explain_stdout = stdout_string(&explain);
|
let explain_stdout = stdout_string(&explain);
|
||||||
assert!(explain_stdout.contains("decision: deny"));
|
assert!(explain_stdout.contains("decision: deny"));
|
||||||
assert!(explain_stdout.contains("branch: main"));
|
assert!(explain_stdout.contains("branch: main"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn local_cli_change_enforces_engine_layer_policy() {
|
fn local_cli_change_enforces_engine_layer_policy() {
|
||||||
// Asserts MR-722 PR #4: when `policy.file` is configured in
|
// Asserts MR-722 PR #4: when the selected graph has a configured
|
||||||
// `omnigraph.yaml`, the CLI loads PolicyEngine into Omnigraph and
|
// policy file, the CLI loads PolicyEngine into Omnigraph and every
|
||||||
// every direct-engine write hits `enforce(action, scope, actor)` —
|
// direct-engine write hits `enforce(action, scope, actor)` — identical
|
||||||
// identical to what the HTTP server gets, regardless of transport.
|
// to what the HTTP server gets, regardless of transport.
|
||||||
//
|
//
|
||||||
// Three cases, each discriminating:
|
// Three cases, each discriminating:
|
||||||
//
|
//
|
||||||
|
|
@ -1135,6 +1163,32 @@ fn local_cli_change_enforces_engine_layer_policy() {
|
||||||
assert_eq!(verify["rows"][0]["p.name"], "RagnorOnMain");
|
assert_eq!(verify["rows"][0]["p.name"], "RagnorOnMain");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn local_cli_positional_uri_does_not_inherit_default_graph_policy() {
|
||||||
|
let graph = SystemGraph::loaded();
|
||||||
|
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||||
|
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||||
|
let mutation_file = insert_person_query(&graph, "system-local-policy-positional.gq");
|
||||||
|
|
||||||
|
let allowed = parse_stdout_json(&output_success(
|
||||||
|
cli()
|
||||||
|
.arg("--as")
|
||||||
|
.arg("act-bruno")
|
||||||
|
.arg("change")
|
||||||
|
.arg("--config")
|
||||||
|
.arg(&config)
|
||||||
|
.arg("--uri")
|
||||||
|
.arg(graph.path())
|
||||||
|
.arg("--query")
|
||||||
|
.arg(&mutation_file)
|
||||||
|
.arg("--params")
|
||||||
|
.arg(r#"{"name":"PositionalUriBruno","age":4}"#)
|
||||||
|
.arg("--json"),
|
||||||
|
));
|
||||||
|
assert_eq!(allowed["affected_nodes"], 1);
|
||||||
|
assert_eq!(allowed["actor_id"], "act-bruno");
|
||||||
|
}
|
||||||
|
|
||||||
// ─── MR-722 PR A: CLI×writer matrix ───────────────────────────────────────
|
// ─── MR-722 PR A: CLI×writer matrix ───────────────────────────────────────
|
||||||
//
|
//
|
||||||
// The change writer is covered above by `local_cli_change_enforces_engine_layer_policy`.
|
// The change writer is covered above by `local_cli_change_enforces_engine_layer_policy`.
|
||||||
|
|
@ -1293,6 +1347,62 @@ fn local_cli_schema_apply_enforces_engine_layer_policy() {
|
||||||
assert_eq!(allowed["applied"], true);
|
assert_eq!(allowed["applied"], true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn local_cli_schema_apply_rejects_stored_query_breakage_before_publish() {
|
||||||
|
let graph = SystemGraph::loaded();
|
||||||
|
graph.write_query(
|
||||||
|
"stored-find-person.gq",
|
||||||
|
"query find_person($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
|
||||||
|
);
|
||||||
|
let config = graph.write_config(
|
||||||
|
"omnigraph-stored-query-schema.yaml",
|
||||||
|
&format!(
|
||||||
|
"\
|
||||||
|
graphs:
|
||||||
|
local:
|
||||||
|
uri: {}
|
||||||
|
queries:
|
||||||
|
find_person:
|
||||||
|
file: ./stored-find-person.gq
|
||||||
|
cli:
|
||||||
|
graph: local
|
||||||
|
branch: main
|
||||||
|
query:
|
||||||
|
roots:
|
||||||
|
- .
|
||||||
|
policy: {{}}
|
||||||
|
",
|
||||||
|
yaml_string(&graph.path().to_string_lossy())
|
||||||
|
),
|
||||||
|
);
|
||||||
|
let renamed_schema = std::fs::read_to_string(fixture("test.pg"))
|
||||||
|
.unwrap()
|
||||||
|
.replace("age: I32?", "years: I32? @rename_from(\"age\")");
|
||||||
|
let schema_path = graph.write_file("stored-query-breaks.pg", &renamed_schema);
|
||||||
|
|
||||||
|
let rejected = output_failure(
|
||||||
|
cli()
|
||||||
|
.arg("schema")
|
||||||
|
.arg("apply")
|
||||||
|
.arg("--config")
|
||||||
|
.arg(&config)
|
||||||
|
.arg("--schema")
|
||||||
|
.arg(&schema_path)
|
||||||
|
.arg("--json"),
|
||||||
|
);
|
||||||
|
let stderr = String::from_utf8_lossy(&rejected.stderr);
|
||||||
|
assert!(
|
||||||
|
stderr.contains("find_person") && stderr.contains("schema check"),
|
||||||
|
"schema apply should reject the stored-query breakage before publish; stderr: {stderr}"
|
||||||
|
);
|
||||||
|
|
||||||
|
let schema = stdout_string(&output_success(
|
||||||
|
cli().arg("schema").arg("show").arg("--config").arg(&config),
|
||||||
|
));
|
||||||
|
assert!(schema.contains("age: I32?"));
|
||||||
|
assert!(!schema.contains("years: I32?"));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn local_cli_branch_create_enforces_engine_layer_policy() {
|
fn local_cli_branch_create_enforces_engine_layer_policy() {
|
||||||
let graph = SystemGraph::loaded();
|
let graph = SystemGraph::loaded();
|
||||||
|
|
@ -1448,6 +1558,8 @@ project:
|
||||||
graphs:
|
graphs:
|
||||||
local:
|
local:
|
||||||
uri: {}
|
uri: {}
|
||||||
|
policy:
|
||||||
|
file: ./policy.yaml
|
||||||
cli:
|
cli:
|
||||||
graph: local
|
graph: local
|
||||||
branch: main
|
branch: main
|
||||||
|
|
@ -1455,8 +1567,6 @@ cli:
|
||||||
query:
|
query:
|
||||||
roots:
|
roots:
|
||||||
- .
|
- .
|
||||||
policy:
|
|
||||||
file: ./policy.yaml
|
|
||||||
",
|
",
|
||||||
yaml_string(&graph.path().to_string_lossy()),
|
yaml_string(&graph.path().to_string_lossy()),
|
||||||
actor,
|
actor,
|
||||||
|
|
|
||||||
|
|
@ -60,10 +60,10 @@ project:
|
||||||
graphs:
|
graphs:
|
||||||
local:
|
local:
|
||||||
uri: {}
|
uri: {}
|
||||||
|
policy:
|
||||||
|
file: ./policy.yaml
|
||||||
server:
|
server:
|
||||||
graph: local
|
graph: local
|
||||||
policy:
|
|
||||||
file: ./policy.yaml
|
|
||||||
",
|
",
|
||||||
yaml_string(&graph.path().to_string_lossy())
|
yaml_string(&graph.path().to_string_lossy())
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "omnigraph-compiler"
|
name = "omnigraph-compiler"
|
||||||
version = "0.6.0"
|
version = "0.6.1"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
description = "Schema/query compiler for Omnigraph. Zero Lance dependency."
|
description = "Schema/query compiler for Omnigraph. Zero Lance dependency."
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "omnigraph-policy"
|
name = "omnigraph-policy"
|
||||||
version = "0.6.0"
|
version = "0.6.1"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
description = "Policy / authorization layer for Omnigraph — Cedar-backed PolicyEngine, PolicyChecker trait, ResourceScope enum."
|
description = "Policy / authorization layer for Omnigraph — Cedar-backed PolicyEngine, PolicyChecker trait, ResourceScope enum."
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,21 @@ pub enum PolicyAction {
|
||||||
/// from v0.6.0; operators add and remove graphs by editing
|
/// from v0.6.0; operators add and remove graphs by editing
|
||||||
/// `omnigraph.yaml` and restarting.
|
/// `omnigraph.yaml` and restarting.
|
||||||
GraphList,
|
GraphList,
|
||||||
|
/// Gates invoking a server-side stored query by name. Per-graph and
|
||||||
|
/// **graph-scoped** (no branch dimension, like `Admin`): the per-branch
|
||||||
|
/// access of the query body is enforced by the inner `Read`/`Change`
|
||||||
|
/// gate, so branch-scoping this outer gate would be redundant (and was
|
||||||
|
/// wrong for snapshot reads). A rule that sets `branch_scope` on
|
||||||
|
/// `invoke_query` is rejected by `validate()`. In this release it is
|
||||||
|
/// **coarse**: an `invoke_query` allow rule permits *any* stored query
|
||||||
|
/// on the graph (no per-query dimension yet); a future, additive
|
||||||
|
/// refinement adds an optional query-name scope.
|
||||||
|
///
|
||||||
|
/// This gate sits at the HTTP boundary. The engine `_as` writers still
|
||||||
|
/// enforce `Read`/`Change` per the query body, so a stored *mutation*
|
||||||
|
/// is double-gated: `invoke_query` to reach the tool, plus `change` for
|
||||||
|
/// the write itself.
|
||||||
|
InvokeQuery,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PolicyAction {
|
impl PolicyAction {
|
||||||
|
|
@ -70,6 +85,7 @@ impl PolicyAction {
|
||||||
Self::BranchMerge => "branch_merge",
|
Self::BranchMerge => "branch_merge",
|
||||||
Self::Admin => "admin",
|
Self::Admin => "admin",
|
||||||
Self::GraphList => "graph_list",
|
Self::GraphList => "graph_list",
|
||||||
|
Self::InvokeQuery => "invoke_query",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -99,7 +115,8 @@ impl PolicyAction {
|
||||||
| Self::BranchCreate
|
| Self::BranchCreate
|
||||||
| Self::BranchDelete
|
| Self::BranchDelete
|
||||||
| Self::BranchMerge
|
| Self::BranchMerge
|
||||||
| Self::Admin => PolicyResourceKind::Graph,
|
| Self::Admin
|
||||||
|
| Self::InvokeQuery => PolicyResourceKind::Graph,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -155,6 +172,7 @@ impl FromStr for PolicyAction {
|
||||||
"branch_merge" => Ok(Self::BranchMerge),
|
"branch_merge" => Ok(Self::BranchMerge),
|
||||||
"admin" => Ok(Self::Admin),
|
"admin" => Ok(Self::Admin),
|
||||||
"graph_list" => Ok(Self::GraphList),
|
"graph_list" => Ok(Self::GraphList),
|
||||||
|
"invoke_query" => Ok(Self::InvokeQuery),
|
||||||
other => bail!("unknown policy action '{other}'"),
|
other => bail!("unknown policy action '{other}'"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -806,6 +824,7 @@ namespace Omnigraph {
|
||||||
action "branch_delete" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
action "branch_delete" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||||
action "branch_merge" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
action "branch_merge" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||||
action "admin" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
action "admin" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||||
|
action "invoke_query" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||||
|
|
||||||
action "graph_list" appliesTo { principal: Actor, resource: Server, context: RequestContext };
|
action "graph_list" appliesTo { principal: Actor, resource: Server, context: RequestContext };
|
||||||
}
|
}
|
||||||
|
|
@ -1264,6 +1283,80 @@ rules:
|
||||||
assert!(!deny.allowed);
|
assert!(!deny.allowed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn invoke_query_authorizes_per_graph() {
|
||||||
|
let policy: PolicyConfig = serde_yaml::from_str(
|
||||||
|
r#"
|
||||||
|
version: 1
|
||||||
|
groups:
|
||||||
|
team: [act-alice]
|
||||||
|
others: [act-bruno]
|
||||||
|
rules:
|
||||||
|
- id: team-invoke-queries
|
||||||
|
allow:
|
||||||
|
actors: { group: team }
|
||||||
|
actions: [invoke_query]
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let engine = PolicyCompiler::compile(&policy, "graph").unwrap();
|
||||||
|
|
||||||
|
let allow = engine
|
||||||
|
.authorize(
|
||||||
|
"act-alice",
|
||||||
|
&PolicyRequest {
|
||||||
|
action: PolicyAction::InvokeQuery,
|
||||||
|
branch: None,
|
||||||
|
target_branch: None,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
assert!(allow.allowed);
|
||||||
|
assert_eq!(
|
||||||
|
allow.matched_rule_id.as_deref(),
|
||||||
|
Some("team-invoke-queries")
|
||||||
|
);
|
||||||
|
|
||||||
|
// Actor outside the group → deny.
|
||||||
|
let deny = engine
|
||||||
|
.authorize(
|
||||||
|
"act-bruno",
|
||||||
|
&PolicyRequest {
|
||||||
|
action: PolicyAction::InvokeQuery,
|
||||||
|
branch: None,
|
||||||
|
target_branch: None,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
assert!(!deny.allowed);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn invoke_query_rejects_branch_scope() {
|
||||||
|
// invoke_query is graph-scoped (like admin) — per-branch access is
|
||||||
|
// enforced by the inner read/change gate — so a rule that puts a
|
||||||
|
// `branch_scope` qualifier on it is rejected at validate().
|
||||||
|
let policy: PolicyConfig = serde_yaml::from_str(
|
||||||
|
r#"
|
||||||
|
version: 1
|
||||||
|
groups:
|
||||||
|
team: [act-alice]
|
||||||
|
rules:
|
||||||
|
- id: team-invoke-any-branch
|
||||||
|
allow:
|
||||||
|
actors: { group: team }
|
||||||
|
actions: [invoke_query]
|
||||||
|
branch_scope: any
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let err = policy.validate().unwrap_err().to_string();
|
||||||
|
assert!(
|
||||||
|
err.contains("branch_scope") && err.contains("invoke_query"),
|
||||||
|
"branch_scope on invoke_query must be rejected: {err}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn server_scoped_rule_cannot_use_branch_scope() {
|
fn server_scoped_rule_cannot_use_branch_scope() {
|
||||||
let policy: PolicyConfig = serde_yaml::from_str(
|
let policy: PolicyConfig = serde_yaml::from_str(
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "omnigraph-server"
|
name = "omnigraph-server"
|
||||||
version = "0.6.0"
|
version = "0.6.1"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
description = "HTTP server for the Omnigraph graph database."
|
description = "HTTP server for the Omnigraph graph database."
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|
@ -19,9 +19,9 @@ default = []
|
||||||
aws = ["dep:aws-config", "dep:aws-sdk-secretsmanager"]
|
aws = ["dep:aws-config", "dep:aws-sdk-secretsmanager"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.0" }
|
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.1" }
|
||||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" }
|
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.1" }
|
||||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" }
|
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.1" }
|
||||||
axum = { workspace = true }
|
axum = { workspace = true }
|
||||||
clap = { workspace = true }
|
clap = { workspace = true }
|
||||||
color-eyre = { workspace = true }
|
color-eyre = { workspace = true }
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,11 @@
|
||||||
use omnigraph::db::{GraphCommit, MergeOutcome, ReadTarget, SchemaApplyResult, Snapshot};
|
use omnigraph::db::{GraphCommit, MergeOutcome, ReadTarget, SchemaApplyResult, Snapshot};
|
||||||
use omnigraph::error::{MergeConflict, MergeConflictKind};
|
use omnigraph::error::{MergeConflict, MergeConflictKind};
|
||||||
use omnigraph::loader::{IngestResult, LoadMode};
|
use omnigraph::loader::{IngestResult, LoadMode};
|
||||||
|
use crate::queries::StoredQuery;
|
||||||
use omnigraph_compiler::SchemaMigrationStep;
|
use omnigraph_compiler::SchemaMigrationStep;
|
||||||
|
use omnigraph_compiler::query::ast::Param;
|
||||||
use omnigraph_compiler::result::QueryResult;
|
use omnigraph_compiler::result::QueryResult;
|
||||||
|
use omnigraph_compiler::types::{PropType, ScalarType};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use utoipa::{IntoParams, ToSchema};
|
use utoipa::{IntoParams, ToSchema};
|
||||||
|
|
@ -300,6 +303,162 @@ pub struct ChangeRequest {
|
||||||
pub branch: Option<String>,
|
pub branch: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Body for `POST /queries/{name}` — invokes the server-side stored query
|
||||||
|
/// named in the path. The query source and name come from the registry,
|
||||||
|
/// never the body; only the runtime inputs are supplied here.
|
||||||
|
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
|
||||||
|
pub struct InvokeStoredQueryRequest {
|
||||||
|
/// JSON object whose keys match the stored query's declared parameters.
|
||||||
|
#[serde(default)]
|
||||||
|
pub params: Option<Value>,
|
||||||
|
/// Branch to run against. Defaults to `main`; for a stored mutation the
|
||||||
|
/// write targets this branch.
|
||||||
|
#[serde(default)]
|
||||||
|
pub branch: Option<String>,
|
||||||
|
/// Snapshot id to read from (read queries only — rejected for a stored
|
||||||
|
/// mutation). Mutually exclusive with `branch`.
|
||||||
|
#[serde(default)]
|
||||||
|
pub snapshot: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Response for `POST /queries/{name}`: the read envelope for a stored
|
||||||
|
/// read, or the mutation envelope for a stored mutation. Serialized
|
||||||
|
/// **untagged**, so the wire shape is exactly [`ReadOutput`] or
|
||||||
|
/// [`ChangeOutput`] — classification follows the stored query, not a
|
||||||
|
/// wrapper field.
|
||||||
|
#[derive(Debug, Serialize, ToSchema)]
|
||||||
|
#[serde(untagged)]
|
||||||
|
pub enum InvokeStoredQueryResponse {
|
||||||
|
Read(ReadOutput),
|
||||||
|
Change(ChangeOutput),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The kind of a stored-query parameter, decomposed so a client (e.g. an
|
||||||
|
/// MCP server) can build a typed input schema with a closed `match` and
|
||||||
|
/// never re-parse omnigraph's type spelling. `bigint`/`date`/`datetime`/
|
||||||
|
/// `blob` are carried as JSON strings on the wire: a 64-bit integer past
|
||||||
|
/// 2^53 loses precision as a JSON number, and Date/DateTime are ISO
|
||||||
|
/// strings, Blob a blob-URI string.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum ParamKind {
|
||||||
|
String,
|
||||||
|
Bool,
|
||||||
|
Int,
|
||||||
|
#[serde(rename = "bigint")]
|
||||||
|
BigInt,
|
||||||
|
Float,
|
||||||
|
Date,
|
||||||
|
#[serde(rename = "datetime")]
|
||||||
|
DateTime,
|
||||||
|
Blob,
|
||||||
|
Vector,
|
||||||
|
List,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// One declared parameter of a stored query, projected for the catalog.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||||
|
pub struct ParamDescriptor {
|
||||||
|
pub name: String,
|
||||||
|
pub kind: ParamKind,
|
||||||
|
/// Element kind when `kind == list` (always a scalar — the grammar
|
||||||
|
/// forbids lists of vectors or nested lists).
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub item_kind: Option<ParamKind>,
|
||||||
|
/// Dimension when `kind == vector`.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub vector_dim: Option<u32>,
|
||||||
|
/// `false` → the caller must supply it; `true` → optional.
|
||||||
|
pub nullable: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// One entry in the stored-query catalog (`GET /queries`).
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||||
|
pub struct QueryCatalogEntry {
|
||||||
|
/// Registry key / invoke path segment (`POST /queries/{name}`).
|
||||||
|
pub name: String,
|
||||||
|
/// MCP tool id (the `tool_name` override, else `name`).
|
||||||
|
pub tool_name: String,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub description: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub instruction: Option<String>,
|
||||||
|
/// `true` for a stored mutation → an MCP read-only hint of `false`.
|
||||||
|
pub mutation: bool,
|
||||||
|
pub params: Vec<ParamDescriptor>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Response for `GET /queries`: the `mcp.expose` subset of a graph's
|
||||||
|
/// stored-query registry, each with typed parameters.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||||
|
pub struct QueriesCatalogOutput {
|
||||||
|
pub queries: Vec<QueryCatalogEntry>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Total map from a resolved scalar to its catalog kind. Exhaustive on
|
||||||
|
/// purpose: a new `ScalarType` is a compile error here until catalogued.
|
||||||
|
fn scalar_kind(scalar: ScalarType) -> ParamKind {
|
||||||
|
match scalar {
|
||||||
|
ScalarType::String => ParamKind::String,
|
||||||
|
ScalarType::Bool => ParamKind::Bool,
|
||||||
|
ScalarType::I32 | ScalarType::U32 => ParamKind::Int,
|
||||||
|
ScalarType::I64 | ScalarType::U64 => ParamKind::BigInt,
|
||||||
|
ScalarType::F32 | ScalarType::F64 => ParamKind::Float,
|
||||||
|
ScalarType::Date => ParamKind::Date,
|
||||||
|
ScalarType::DateTime => ParamKind::DateTime,
|
||||||
|
ScalarType::Blob => ParamKind::Blob,
|
||||||
|
ScalarType::Vector(_) => ParamKind::Vector,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn param_descriptor(param: &Param) -> ParamDescriptor {
|
||||||
|
match PropType::from_param_type_name(¶m.type_name, param.nullable) {
|
||||||
|
Some(pt) if pt.list => ParamDescriptor {
|
||||||
|
name: param.name.clone(),
|
||||||
|
kind: ParamKind::List,
|
||||||
|
item_kind: Some(scalar_kind(pt.scalar)),
|
||||||
|
vector_dim: None,
|
||||||
|
nullable: param.nullable,
|
||||||
|
},
|
||||||
|
Some(pt) => {
|
||||||
|
let (kind, vector_dim) = match pt.scalar {
|
||||||
|
ScalarType::Vector(dim) => (ParamKind::Vector, Some(dim)),
|
||||||
|
other => (scalar_kind(other), None),
|
||||||
|
};
|
||||||
|
ParamDescriptor {
|
||||||
|
name: param.name.clone(),
|
||||||
|
kind,
|
||||||
|
item_kind: None,
|
||||||
|
vector_dim,
|
||||||
|
nullable: param.nullable,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Unreachable for a parsed query (every declared param type is
|
||||||
|
// grammatical); fall back to an opaque string so the field is still
|
||||||
|
// usable rather than dropped.
|
||||||
|
None => ParamDescriptor {
|
||||||
|
name: param.name.clone(),
|
||||||
|
kind: ParamKind::String,
|
||||||
|
item_kind: None,
|
||||||
|
vector_dim: None,
|
||||||
|
nullable: param.nullable,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Project a loaded stored query into its catalog entry (typed params,
|
||||||
|
/// MCP tool name, read/mutate flag, description/instruction).
|
||||||
|
pub fn query_catalog_entry(query: &StoredQuery) -> QueryCatalogEntry {
|
||||||
|
QueryCatalogEntry {
|
||||||
|
name: query.name.clone(),
|
||||||
|
tool_name: query.effective_tool_name().to_string(),
|
||||||
|
description: query.decl.description.clone(),
|
||||||
|
instruction: query.decl.instruction.clone(),
|
||||||
|
mutation: query.is_mutation(),
|
||||||
|
params: query.decl.params.iter().map(param_descriptor).collect(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
|
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
|
||||||
pub struct SchemaApplyRequest {
|
pub struct SchemaApplyRequest {
|
||||||
/// Project schema in `.pg` source form. The diff against the current
|
/// Project schema in `.pg` source form. The diff against the current
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,13 @@ use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
pub const DEFAULT_CONFIG_FILE: &str = "omnigraph.yaml";
|
pub const DEFAULT_CONFIG_FILE: &str = "omnigraph.yaml";
|
||||||
|
|
||||||
|
pub fn graph_resource_id_for_selection(
|
||||||
|
selected_graph: Option<&str>,
|
||||||
|
normalized_uri: &str,
|
||||||
|
) -> String {
|
||||||
|
selected_graph.unwrap_or(normalized_uri).to_string()
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||||
pub struct ProjectConfig {
|
pub struct ProjectConfig {
|
||||||
pub name: Option<String>,
|
pub name: Option<String>,
|
||||||
|
|
@ -24,6 +31,14 @@ pub struct TargetConfig {
|
||||||
/// graph's HTTP-layer Cedar enforcement.
|
/// graph's HTTP-layer Cedar enforcement.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub policy: PolicySettings,
|
pub policy: PolicySettings,
|
||||||
|
/// Per-graph stored-query registry: an inline `name -> entry`
|
||||||
|
/// map. Mirrors the per-graph `policy` shape — each
|
||||||
|
/// `graphs.<id>.queries` declares that graph's stored queries. Absent
|
||||||
|
/// (or empty) = no stored queries for the graph. v1 is inline-only;
|
||||||
|
/// an external `queries.yaml` manifest indirection is a deferred
|
||||||
|
/// convenience.
|
||||||
|
#[serde(default)]
|
||||||
|
pub queries: BTreeMap<String, QueryEntry>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Default, Eq, PartialEq, Serialize, Deserialize, ValueEnum)]
|
#[derive(Debug, Clone, Copy, Default, Eq, PartialEq, Serialize, Deserialize, ValueEnum)]
|
||||||
|
|
@ -90,6 +105,50 @@ pub struct PolicySettings {
|
||||||
pub file: Option<String>,
|
pub file: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// One stored-query registry entry. The map **key** is the query's
|
||||||
|
/// identity — it must equal the `query <name>` symbol declared inside
|
||||||
|
/// the referenced `.gq` file (asserted when the registry loads).
|
||||||
|
/// Renaming the key (or the symbol) is a breaking change to callers, by
|
||||||
|
/// design.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct QueryEntry {
|
||||||
|
/// Path to the `.gq` file (relative to the config's `base_dir`). The
|
||||||
|
/// file may declare several queries; the registry selects the one
|
||||||
|
/// whose symbol matches the map key.
|
||||||
|
pub file: String,
|
||||||
|
#[serde(default)]
|
||||||
|
pub mcp: McpSettings,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// MCP exposure for a stored query. A *deployment* concern (the same
|
||||||
|
/// `.gq` may be exposed in one graph and hidden in another), so it lives
|
||||||
|
/// in YAML rather than in the `.gq` source. **Default `expose: true`** —
|
||||||
|
/// declaring a query in the manifest *is* the opt-in, so it appears in the
|
||||||
|
/// MCP tool catalog (`GET /queries`) by default; set `expose: false` to
|
||||||
|
/// keep a query HTTP/service-callable but hidden from the agent tool list.
|
||||||
|
/// `expose` governs catalog membership only — it is **not** an
|
||||||
|
/// authorization gate (invocation is gated by `invoke_query`), so a hidden
|
||||||
|
/// query is still invocable by name with the right permission.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct McpSettings {
|
||||||
|
#[serde(default = "mcp_expose_default")]
|
||||||
|
pub expose: bool,
|
||||||
|
pub tool_name: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mcp_expose_default() -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for McpSettings {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
expose: mcp_expose_default(),
|
||||||
|
tool_name: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
|
||||||
#[serde(rename_all = "snake_case")]
|
#[serde(rename_all = "snake_case")]
|
||||||
pub enum AliasCommand {
|
pub enum AliasCommand {
|
||||||
|
|
@ -137,6 +196,12 @@ pub struct OmnigraphConfig {
|
||||||
pub aliases: BTreeMap<String, AliasConfig>,
|
pub aliases: BTreeMap<String, AliasConfig>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub policy: PolicySettings,
|
pub policy: PolicySettings,
|
||||||
|
/// Top-level stored-query registry, used in single-graph
|
||||||
|
/// mode — mirrors how the top-level `policy` applies to the single
|
||||||
|
/// graph. In multi-graph mode this is unused; each graph's
|
||||||
|
/// `graphs.<id>.queries` applies instead.
|
||||||
|
#[serde(default)]
|
||||||
|
pub queries: BTreeMap<String, QueryEntry>,
|
||||||
#[serde(skip)]
|
#[serde(skip)]
|
||||||
base_dir: PathBuf,
|
base_dir: PathBuf,
|
||||||
}
|
}
|
||||||
|
|
@ -152,6 +217,7 @@ impl Default for OmnigraphConfig {
|
||||||
query: QueryDefaults::default(),
|
query: QueryDefaults::default(),
|
||||||
aliases: BTreeMap::new(),
|
aliases: BTreeMap::new(),
|
||||||
policy: PolicySettings::default(),
|
policy: PolicySettings::default(),
|
||||||
|
queries: BTreeMap::new(),
|
||||||
base_dir: PathBuf::new(),
|
base_dir: PathBuf::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -244,6 +310,124 @@ impl OmnigraphConfig {
|
||||||
.map(|path| self.resolve_config_path(path))
|
.map(|path| self.resolve_config_path(path))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The top-level stored-query registry entries (single-graph mode).
|
||||||
|
pub fn query_entries(&self) -> &BTreeMap<String, QueryEntry> {
|
||||||
|
&self.queries
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The per-graph stored-query registry entries for a named target
|
||||||
|
/// (multi-graph mode). Returns `None` if the target is unknown.
|
||||||
|
pub fn target_query_entries(
|
||||||
|
&self,
|
||||||
|
target_name: &str,
|
||||||
|
) -> Option<&BTreeMap<String, QueryEntry>> {
|
||||||
|
self.graphs.get(target_name).map(|target| &target.queries)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The stored-query registry entries that apply for a graph
|
||||||
|
/// selection — the single definition of "which `queries:` block
|
||||||
|
/// governs graph X", shared by server boot and the CLI so the two
|
||||||
|
/// can't drift. A named graph present in `graphs:` uses its
|
||||||
|
/// per-graph block; everything else (no selection, or a name that is
|
||||||
|
/// not a known graph, e.g. a bare URI) falls back to the top-level
|
||||||
|
/// block (single-graph mode).
|
||||||
|
pub fn query_entries_for(&self, graph: Option<&str>) -> &BTreeMap<String, QueryEntry> {
|
||||||
|
match graph {
|
||||||
|
Some(name) if self.graphs.contains_key(name) => &self.graphs[name].queries,
|
||||||
|
_ => &self.queries,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The single CLI gate that turns a raw graph selection into a *validated*
|
||||||
|
/// one — the fallible counterpart to the infallible
|
||||||
|
/// [`OmnigraphConfig::query_entries_for`]. Both `queries` subcommands route
|
||||||
|
/// their selection through here so neither can skip a check the other (or
|
||||||
|
/// server boot) applies:
|
||||||
|
/// * a known name passes through, but only after the same coherence check
|
||||||
|
/// server boot enforces
|
||||||
|
/// ([`OmnigraphConfig::ensure_top_level_blocks_honored`]) — a named graph
|
||||||
|
/// with a populated top-level block is rejected;
|
||||||
|
/// * an unknown name errors with the **same** message
|
||||||
|
/// [`OmnigraphConfig::resolve_target_uri`] produces, so a command that
|
||||||
|
/// opens no URI rejects an unknown `--target` exactly like the
|
||||||
|
/// URI-resolving commands do;
|
||||||
|
/// * an anonymous selection (`None`, e.g. a bare URI) stays anonymous,
|
||||||
|
/// resolving to the top-level registry downstream (top-level honored).
|
||||||
|
pub fn resolve_graph_selection<'a>(&self, graph: Option<&'a str>) -> Result<Option<&'a str>> {
|
||||||
|
match graph {
|
||||||
|
Some(name) if self.graphs.contains_key(name) => {
|
||||||
|
self.ensure_top_level_blocks_honored(Some(name))?;
|
||||||
|
Ok(Some(name))
|
||||||
|
}
|
||||||
|
Some(name) => bail!("graph '{}' not found in {}", name, DEFAULT_CONFIG_FILE),
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn resolve_policy_tooling_graph_selection(&self) -> Result<Option<&str>> {
|
||||||
|
self.resolve_graph_selection(self.cli_graph_name().or_else(|| self.server_graph_name()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The policy file that applies for a graph selection — the policy
|
||||||
|
/// sibling of [`OmnigraphConfig::query_entries_for`], so policy and
|
||||||
|
/// queries resolve by the same identity rule. A named graph in
|
||||||
|
/// `graphs:` uses its per-graph `policy.file` with **no** top-level
|
||||||
|
/// fallback (a named graph with no per-graph policy has no policy —
|
||||||
|
/// that keeps the boot-time coherence check meaningful); anything else
|
||||||
|
/// (no selection, or a bare URI) uses the top-level `policy.file`.
|
||||||
|
pub fn resolve_policy_file_for(&self, graph: Option<&str>) -> Option<PathBuf> {
|
||||||
|
match graph {
|
||||||
|
Some(name) if self.graphs.contains_key(name) => self.resolve_target_policy_file(name),
|
||||||
|
_ => self.resolve_policy_file(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Names of any top-level config blocks (`policy.file`, `queries:`)
|
||||||
|
/// that are populated. Used by the boot-time coherence check: when a
|
||||||
|
/// **named** graph is served (single-mode by name, or multi-mode),
|
||||||
|
/// the top-level blocks are not honored, so a populated one is a
|
||||||
|
/// configuration error rather than a silent no-op.
|
||||||
|
pub fn populated_top_level_blocks(&self) -> Vec<&'static str> {
|
||||||
|
let mut blocks = Vec::new();
|
||||||
|
if self.policy.file.is_some() {
|
||||||
|
blocks.push("policy.file");
|
||||||
|
}
|
||||||
|
if !self.queries.is_empty() {
|
||||||
|
blocks.push("queries");
|
||||||
|
}
|
||||||
|
blocks
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A named graph uses its own `graphs.<name>` block, so a populated
|
||||||
|
/// top-level block would be silently ignored — a config error. The single
|
||||||
|
/// definition of that rule, shared by server boot and the CLI selection
|
||||||
|
/// gate ([`OmnigraphConfig::resolve_graph_selection`]) so the two can't
|
||||||
|
/// drift. An anonymous selection (`None`, e.g. a bare URI) legitimately
|
||||||
|
/// honors the top-level blocks, so it is never rejected here.
|
||||||
|
pub fn ensure_top_level_blocks_honored(&self, selected: Option<&str>) -> Result<()> {
|
||||||
|
if let Some(name) = selected {
|
||||||
|
let unhonored = self.populated_top_level_blocks();
|
||||||
|
if !unhonored.is_empty() {
|
||||||
|
bail!(
|
||||||
|
"named graph '{name}' uses its own `graphs.{name}.…` block, but top-level {} \
|
||||||
|
{} set and would be ignored. Move it to `graphs.{name}` (e.g. \
|
||||||
|
`graphs.{name}.policy.file`, `graphs.{name}.queries`).",
|
||||||
|
unhonored.join(" and "),
|
||||||
|
if unhonored.len() == 1 { "is" } else { "are" },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve a stored-query `.gq` file path (from a registry entry),
|
||||||
|
/// relative to the config's `base_dir`. Mirrors policy-file
|
||||||
|
/// resolution; the registry loader calls this to turn each entry's
|
||||||
|
/// `file:` value into an absolute path.
|
||||||
|
pub fn resolve_query_file(&self, value: &str) -> PathBuf {
|
||||||
|
self.resolve_config_path(value)
|
||||||
|
}
|
||||||
|
|
||||||
/// Resolve the server-level policy file path (used by management
|
/// Resolve the server-level policy file path (used by management
|
||||||
/// endpoints). Returns `None` if `server.policy.file` is not set.
|
/// endpoints). Returns `None` if `server.policy.file` is not set.
|
||||||
pub fn resolve_server_policy_file(&self) -> Option<PathBuf> {
|
pub fn resolve_server_policy_file(&self) -> Option<PathBuf> {
|
||||||
|
|
@ -387,7 +571,9 @@ mod tests {
|
||||||
|
|
||||||
use tempfile::tempdir;
|
use tempfile::tempdir;
|
||||||
|
|
||||||
use super::{ReadOutputFormat, TableCellLayout, load_config_in};
|
use super::{
|
||||||
|
ReadOutputFormat, TableCellLayout, graph_resource_id_for_selection, load_config_in,
|
||||||
|
};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn load_config_reads_yaml_defaults_from_current_dir() {
|
fn load_config_reads_yaml_defaults_from_current_dir() {
|
||||||
|
|
@ -451,6 +637,114 @@ policy: {}
|
||||||
assert!(config.graphs.is_empty());
|
assert!(config.graphs.is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn graph_resource_id_for_selection_uses_name_or_anonymous_uri() {
|
||||||
|
assert_eq!(
|
||||||
|
graph_resource_id_for_selection(Some("local"), "/tmp/graph.omni"),
|
||||||
|
"local"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
graph_resource_id_for_selection(None, "/tmp/graph.omni"),
|
||||||
|
"/tmp/graph.omni"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_graph_selection_validates_membership_and_coherence() {
|
||||||
|
let temp = tempdir().unwrap();
|
||||||
|
fs::write(
|
||||||
|
temp.path().join("omnigraph.yaml"),
|
||||||
|
"graphs:\n local:\n uri: ./demo.omni\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let config = load_config_in(temp.path(), None).unwrap();
|
||||||
|
|
||||||
|
// A known graph passes through unchanged.
|
||||||
|
assert_eq!(config.resolve_graph_selection(Some("local")).unwrap(), Some("local"));
|
||||||
|
// An anonymous selection stays anonymous (→ top-level registry downstream).
|
||||||
|
assert_eq!(config.resolve_graph_selection(None).unwrap(), None);
|
||||||
|
// An unknown name errors, naming the graph (matching resolve_target_uri).
|
||||||
|
let err = config.resolve_graph_selection(Some("ghost")).unwrap_err().to_string();
|
||||||
|
assert!(
|
||||||
|
err.contains("ghost") && err.contains("not found"),
|
||||||
|
"unknown graph must error naming it: {err}"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Coherence: a named graph plus a populated top-level block is the
|
||||||
|
// config server boot refuses, so the gate rejects it too (shared rule
|
||||||
|
// via ensure_top_level_blocks_honored). An anonymous selection still
|
||||||
|
// passes — top-level is honored when no graph is named.
|
||||||
|
let temp2 = tempdir().unwrap();
|
||||||
|
fs::write(
|
||||||
|
temp2.path().join("omnigraph.yaml"),
|
||||||
|
"graphs:\n local:\n uri: ./demo.omni\npolicy:\n file: ./top.yaml\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let incoherent = load_config_in(temp2.path(), None).unwrap();
|
||||||
|
let err = incoherent
|
||||||
|
.resolve_graph_selection(Some("local"))
|
||||||
|
.unwrap_err()
|
||||||
|
.to_string();
|
||||||
|
assert!(
|
||||||
|
err.contains("local") && err.contains("policy.file"),
|
||||||
|
"named graph + populated top-level block must be rejected, naming both: {err}"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
incoherent.resolve_graph_selection(None).unwrap(),
|
||||||
|
None,
|
||||||
|
"anonymous selection still honors top-level"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn policy_tooling_graph_selection_prefers_cli_then_server_and_validates() {
|
||||||
|
let temp = tempdir().unwrap();
|
||||||
|
fs::write(
|
||||||
|
temp.path().join("omnigraph.yaml"),
|
||||||
|
"graphs:\n local:\n uri: ./local.omni\n prod:\n uri: ./prod.omni\n\
|
||||||
|
server:\n graph: local\ncli:\n graph: prod\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let config = load_config_in(temp.path(), None).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
config.resolve_policy_tooling_graph_selection().unwrap(),
|
||||||
|
Some("prod")
|
||||||
|
);
|
||||||
|
|
||||||
|
let temp = tempdir().unwrap();
|
||||||
|
fs::write(
|
||||||
|
temp.path().join("omnigraph.yaml"),
|
||||||
|
"graphs:\n local:\n uri: ./local.omni\nserver:\n graph: local\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let config = load_config_in(temp.path(), None).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
config.resolve_policy_tooling_graph_selection().unwrap(),
|
||||||
|
Some("local")
|
||||||
|
);
|
||||||
|
|
||||||
|
let temp = tempdir().unwrap();
|
||||||
|
fs::write(temp.path().join("omnigraph.yaml"), "policy: {}\n").unwrap();
|
||||||
|
let config = load_config_in(temp.path(), None).unwrap();
|
||||||
|
assert_eq!(config.resolve_policy_tooling_graph_selection().unwrap(), None);
|
||||||
|
|
||||||
|
let temp = tempdir().unwrap();
|
||||||
|
fs::write(
|
||||||
|
temp.path().join("omnigraph.yaml"),
|
||||||
|
"graphs:\n local:\n uri: ./local.omni\nserver:\n graph: ghost\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let config = load_config_in(temp.path(), None).unwrap();
|
||||||
|
let err = config
|
||||||
|
.resolve_policy_tooling_graph_selection()
|
||||||
|
.unwrap_err()
|
||||||
|
.to_string();
|
||||||
|
assert!(
|
||||||
|
err.contains("ghost") && err.contains("not found"),
|
||||||
|
"unknown server.graph must use graph-selection validation: {err}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn resolve_query_path_searches_config_roots() {
|
fn resolve_query_path_searches_config_roots() {
|
||||||
let temp = tempdir().unwrap();
|
let temp = tempdir().unwrap();
|
||||||
|
|
@ -489,6 +783,118 @@ policy: {}
|
||||||
assert_eq!(resolved, config_dir.join("local.gq"));
|
assert_eq!(resolved, config_dir.join("local.gq"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn queries_block_round_trips_inline_and_per_graph() {
|
||||||
|
let temp = tempdir().unwrap();
|
||||||
|
fs::write(
|
||||||
|
temp.path().join("omnigraph.yaml"),
|
||||||
|
r#"
|
||||||
|
graphs:
|
||||||
|
prod:
|
||||||
|
uri: s3://bucket/prod
|
||||||
|
queries:
|
||||||
|
find_user:
|
||||||
|
file: ./queries/find_user.gq
|
||||||
|
mcp:
|
||||||
|
expose: true
|
||||||
|
tool_name: lookup_user
|
||||||
|
internal_audit:
|
||||||
|
file: ./queries/audit.gq
|
||||||
|
queries:
|
||||||
|
single_mode_q:
|
||||||
|
file: ./q.gq
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let config = load_config_in(temp.path(), None).unwrap();
|
||||||
|
|
||||||
|
// Per-graph registry (multi-graph mode).
|
||||||
|
let prod = config.target_query_entries("prod").unwrap();
|
||||||
|
assert_eq!(prod.len(), 2);
|
||||||
|
let find_user = &prod["find_user"];
|
||||||
|
assert_eq!(find_user.file, "./queries/find_user.gq");
|
||||||
|
assert!(find_user.mcp.expose);
|
||||||
|
assert_eq!(find_user.mcp.tool_name.as_deref(), Some("lookup_user"));
|
||||||
|
// Default exposure is true (the manifest entry is the opt-in); tool_name absent.
|
||||||
|
let audit = &prod["internal_audit"];
|
||||||
|
assert!(audit.mcp.expose);
|
||||||
|
assert!(audit.mcp.tool_name.is_none());
|
||||||
|
|
||||||
|
// Top-level registry (single-graph mode).
|
||||||
|
assert_eq!(config.query_entries().len(), 1);
|
||||||
|
|
||||||
|
// The shared selector resolves the same blocks the server boot
|
||||||
|
// and the CLI use: a known graph → its per-graph block; no
|
||||||
|
// selection or an unknown name → the top-level block (the latter
|
||||||
|
// pins the behavior of the CLI's now-deleted fallback arm).
|
||||||
|
assert_eq!(config.query_entries_for(Some("prod")).len(), 2);
|
||||||
|
assert_eq!(config.query_entries_for(None).len(), 1);
|
||||||
|
assert_eq!(config.query_entries_for(Some("nonexistent")).len(), 1);
|
||||||
|
|
||||||
|
// Path resolution joins against base_dir, like policy files.
|
||||||
|
assert_eq!(
|
||||||
|
config.resolve_query_file(&find_user.file),
|
||||||
|
temp.path().join("./queries/find_user.gq")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_policy_file_for_follows_identity() {
|
||||||
|
let temp = tempdir().unwrap();
|
||||||
|
fs::write(
|
||||||
|
temp.path().join("omnigraph.yaml"),
|
||||||
|
"policy:\n file: ./top.yaml\ngraphs:\n prod:\n uri: s3://b/prod\n \
|
||||||
|
policy:\n file: ./prod.yaml\n bare:\n uri: s3://b/bare\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let config = load_config_in(temp.path(), None).unwrap();
|
||||||
|
|
||||||
|
// Named graph with its own policy → per-graph (not top-level).
|
||||||
|
assert!(
|
||||||
|
config
|
||||||
|
.resolve_policy_file_for(Some("prod"))
|
||||||
|
.unwrap()
|
||||||
|
.ends_with("prod.yaml")
|
||||||
|
);
|
||||||
|
// Named graph with NO per-graph policy → None (no top-level fallback;
|
||||||
|
// load-bearing for the boot coherence check).
|
||||||
|
assert!(config.resolve_policy_file_for(Some("bare")).is_none());
|
||||||
|
// Anonymous (bare URI) or an unknown name → top-level.
|
||||||
|
assert!(
|
||||||
|
config
|
||||||
|
.resolve_policy_file_for(None)
|
||||||
|
.unwrap()
|
||||||
|
.ends_with("top.yaml")
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
config
|
||||||
|
.resolve_policy_file_for(Some("nope"))
|
||||||
|
.unwrap()
|
||||||
|
.ends_with("top.yaml")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn queries_block_absent_yields_empty_registry() {
|
||||||
|
let temp = tempdir().unwrap();
|
||||||
|
fs::write(
|
||||||
|
temp.path().join("omnigraph.yaml"),
|
||||||
|
"graphs:\n local:\n uri: ./demo.omni\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let config = load_config_in(temp.path(), None).unwrap();
|
||||||
|
// Additive: no `queries:` anywhere → empty registries everywhere.
|
||||||
|
assert!(config.query_entries().is_empty());
|
||||||
|
assert!(
|
||||||
|
config
|
||||||
|
.target_query_entries("local")
|
||||||
|
.unwrap()
|
||||||
|
.is_empty()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn policy_block_accepts_non_empty_mapping() {
|
fn policy_block_accepts_non_empty_mapping() {
|
||||||
let temp = tempdir().unwrap();
|
let temp = tempdir().unwrap();
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ pub mod config;
|
||||||
pub mod graph_id;
|
pub mod graph_id;
|
||||||
pub mod identity;
|
pub mod identity;
|
||||||
pub mod policy;
|
pub mod policy;
|
||||||
|
pub mod queries;
|
||||||
pub mod registry;
|
pub mod registry;
|
||||||
pub mod workload;
|
pub mod workload;
|
||||||
|
|
||||||
|
|
@ -11,6 +12,8 @@ pub use graph_id::GraphId;
|
||||||
pub use identity::{AuthSource, GraphKey, ResolvedActor, Scope, TenantId};
|
pub use identity::{AuthSource, GraphKey, ResolvedActor, Scope, TenantId};
|
||||||
pub use registry::{GraphHandle, GraphRegistry, InsertError, RegistryLookup, RegistrySnapshot};
|
pub use registry::{GraphHandle, GraphRegistry, InsertError, RegistryLookup, RegistrySnapshot};
|
||||||
|
|
||||||
|
use crate::queries::{QueryRegistry, check, format_check_breakages};
|
||||||
|
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::io;
|
use std::io;
|
||||||
|
|
@ -22,7 +25,8 @@ use api::{
|
||||||
BranchCreateOutput, BranchCreateRequest, BranchDeleteOutput, BranchListOutput,
|
BranchCreateOutput, BranchCreateRequest, BranchDeleteOutput, BranchListOutput,
|
||||||
BranchMergeOutput, BranchMergeRequest, ChangeOutput, ChangeRequest, CommitListOutput,
|
BranchMergeOutput, BranchMergeRequest, ChangeOutput, ChangeRequest, CommitListOutput,
|
||||||
CommitListQuery, ErrorCode, ErrorOutput, ExportRequest, GraphInfo, GraphListResponse,
|
CommitListQuery, ErrorCode, ErrorOutput, ExportRequest, GraphInfo, GraphListResponse,
|
||||||
HealthOutput, IngestOutput, IngestRequest, QueryRequest, ReadOutput, ReadRequest,
|
HealthOutput, IngestOutput, IngestRequest, InvokeStoredQueryRequest,
|
||||||
|
InvokeStoredQueryResponse, QueriesCatalogOutput, QueryRequest, ReadOutput, ReadRequest,
|
||||||
SchemaApplyOutput, SchemaApplyRequest, SchemaOutput, SnapshotQuery, ingest_output,
|
SchemaApplyOutput, SchemaApplyRequest, SchemaOutput, SnapshotQuery, ingest_output,
|
||||||
schema_apply_output, snapshot_payload,
|
schema_apply_output, snapshot_payload,
|
||||||
};
|
};
|
||||||
|
|
@ -40,12 +44,13 @@ use color_eyre::eyre::{Result, WrapErr, bail};
|
||||||
pub use config::{
|
pub use config::{
|
||||||
AliasCommand, AliasConfig, CliDefaults, DEFAULT_CONFIG_FILE, OmnigraphConfig, PolicySettings,
|
AliasCommand, AliasConfig, CliDefaults, DEFAULT_CONFIG_FILE, OmnigraphConfig, PolicySettings,
|
||||||
ProjectConfig, QueryDefaults, ReadOutputFormat, ServerDefaults, TableCellLayout, TargetConfig,
|
ProjectConfig, QueryDefaults, ReadOutputFormat, ServerDefaults, TableCellLayout, TargetConfig,
|
||||||
load_config,
|
graph_resource_id_for_selection, load_config,
|
||||||
};
|
};
|
||||||
use futures::stream;
|
use futures::stream;
|
||||||
use omnigraph::db::{Omnigraph, ReadTarget};
|
use omnigraph::db::{Omnigraph, ReadTarget};
|
||||||
use omnigraph::error::{ManifestConflictDetails, ManifestErrorKind, OmniError};
|
use omnigraph::error::{ManifestConflictDetails, ManifestErrorKind, OmniError};
|
||||||
use omnigraph::storage::normalize_root_uri;
|
use omnigraph::storage::normalize_root_uri;
|
||||||
|
use omnigraph_compiler::catalog::Catalog;
|
||||||
use omnigraph_compiler::json_params_to_param_map;
|
use omnigraph_compiler::json_params_to_param_map;
|
||||||
use omnigraph_compiler::query::parser::parse_query;
|
use omnigraph_compiler::query::parser::parse_query;
|
||||||
use omnigraph_compiler::{JsonParamMode, ParamMap};
|
use omnigraph_compiler::{JsonParamMode, ParamMap};
|
||||||
|
|
@ -93,6 +98,8 @@ fn hash_bearer_token(token: &str) -> BearerTokenHash {
|
||||||
server_export,
|
server_export,
|
||||||
#[allow(deprecated)] server_change,
|
#[allow(deprecated)] server_change,
|
||||||
server_mutate,
|
server_mutate,
|
||||||
|
server_list_queries,
|
||||||
|
server_invoke_query,
|
||||||
server_schema_apply,
|
server_schema_apply,
|
||||||
server_schema_get,
|
server_schema_get,
|
||||||
server_ingest,
|
server_ingest,
|
||||||
|
|
@ -157,8 +164,16 @@ pub enum ServerConfigMode {
|
||||||
/// set to a named target.
|
/// set to a named target.
|
||||||
Single {
|
Single {
|
||||||
uri: String,
|
uri: String,
|
||||||
|
/// Cedar graph resource id for the single graph. A named selection
|
||||||
|
/// uses the graph name; an anonymous URI uses the normalized URI to
|
||||||
|
/// preserve legacy single-graph policy identity.
|
||||||
|
graph_id: String,
|
||||||
/// Top-level `policy.file` (single-graph Cedar policy).
|
/// Top-level `policy.file` (single-graph Cedar policy).
|
||||||
policy_file: Option<PathBuf>,
|
policy_file: Option<PathBuf>,
|
||||||
|
/// Top-level stored-query registry, loaded and identity-checked
|
||||||
|
/// at settings-build time; type-checked against the schema when
|
||||||
|
/// the engine opens.
|
||||||
|
queries: QueryRegistry,
|
||||||
},
|
},
|
||||||
/// Multi-graph invocation — `--config omnigraph.yaml` with a
|
/// Multi-graph invocation — `--config omnigraph.yaml` with a
|
||||||
/// non-empty `graphs:` map and no single-mode selector.
|
/// non-empty `graphs:` map and no single-mode selector.
|
||||||
|
|
@ -185,6 +200,10 @@ pub struct GraphStartupConfig {
|
||||||
pub graph_id: String,
|
pub graph_id: String,
|
||||||
pub uri: String,
|
pub uri: String,
|
||||||
pub policy_file: Option<PathBuf>,
|
pub policy_file: Option<PathBuf>,
|
||||||
|
/// Per-graph stored-query registry, loaded and identity-checked at
|
||||||
|
/// settings-build time; type-checked against the schema when this
|
||||||
|
/// graph's engine opens.
|
||||||
|
pub queries: QueryRegistry,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Runtime routing for the server. Single mode = legacy
|
/// Runtime routing for the server. Single mode = legacy
|
||||||
|
|
@ -285,7 +304,31 @@ impl AppState {
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let bearer_tokens = hash_bearer_tokens(bearer_tokens);
|
let bearer_tokens = hash_bearer_tokens(bearer_tokens);
|
||||||
let per_graph_policy = policy_engine.map(Arc::new);
|
let per_graph_policy = policy_engine.map(Arc::new);
|
||||||
Self::build_single_mode(uri, db, bearer_tokens, per_graph_policy, Arc::new(workload))
|
Self::build_single_mode(uri, db, bearer_tokens, per_graph_policy, Arc::new(workload), None)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Like `new_single`, but attaches a pre-validated stored-query
|
||||||
|
/// registry. Private — the production single-mode boot path
|
||||||
|
/// (`open_single_with_queries`) is the only caller; every public
|
||||||
|
/// `new_*` constructor builds with no stored queries.
|
||||||
|
fn new_single_with_queries(
|
||||||
|
uri: String,
|
||||||
|
db: Omnigraph,
|
||||||
|
bearer_tokens: Vec<(String, String)>,
|
||||||
|
policy_engine: Option<PolicyEngine>,
|
||||||
|
workload: workload::WorkloadController,
|
||||||
|
queries: Option<Arc<QueryRegistry>>,
|
||||||
|
) -> Self {
|
||||||
|
let bearer_tokens = hash_bearer_tokens(bearer_tokens);
|
||||||
|
let per_graph_policy = policy_engine.map(Arc::new);
|
||||||
|
Self::build_single_mode(
|
||||||
|
uri,
|
||||||
|
db,
|
||||||
|
bearer_tokens,
|
||||||
|
per_graph_policy,
|
||||||
|
Arc::new(workload),
|
||||||
|
queries,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new(uri: String, db: Omnigraph) -> Self {
|
pub fn new(uri: String, db: Omnigraph) -> Self {
|
||||||
|
|
@ -377,6 +420,39 @@ impl AppState {
|
||||||
uri: impl Into<String>,
|
uri: impl Into<String>,
|
||||||
bearer_tokens: Vec<(String, String)>,
|
bearer_tokens: Vec<(String, String)>,
|
||||||
policy_file: Option<&PathBuf>,
|
policy_file: Option<&PathBuf>,
|
||||||
|
) -> Result<Self> {
|
||||||
|
Self::open_single_with_queries(
|
||||||
|
uri,
|
||||||
|
bearer_tokens,
|
||||||
|
policy_file,
|
||||||
|
QueryRegistry::default(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Single-mode boot with a stored-query registry: open the engine,
|
||||||
|
/// **type-check the registry against the live schema and refuse to
|
||||||
|
/// start on a breakage** (same posture as bad policy YAML), log
|
||||||
|
/// non-blocking warnings, then attach the registry to the handle.
|
||||||
|
/// With an empty registry the check is a no-op and no registry is
|
||||||
|
/// attached — that is the path `open_with_bearer_tokens_and_policy`
|
||||||
|
/// (no stored queries) takes.
|
||||||
|
pub async fn open_single_with_queries(
|
||||||
|
uri: impl Into<String>,
|
||||||
|
bearer_tokens: Vec<(String, String)>,
|
||||||
|
policy_file: Option<&PathBuf>,
|
||||||
|
queries: QueryRegistry,
|
||||||
|
) -> Result<Self> {
|
||||||
|
Self::open_single_with_queries_for_graph_id(uri, bearer_tokens, policy_file, queries, None)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn open_single_with_queries_for_graph_id(
|
||||||
|
uri: impl Into<String>,
|
||||||
|
bearer_tokens: Vec<(String, String)>,
|
||||||
|
policy_file: Option<&PathBuf>,
|
||||||
|
queries: QueryRegistry,
|
||||||
|
graph_id: Option<String>,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
// The "policy requires tokens" invariant is enforced once by
|
// The "policy requires tokens" invariant is enforced once by
|
||||||
// `classify_server_runtime_state` in `serve()`, before either
|
// `classify_server_runtime_state` in `serve()`, before either
|
||||||
|
|
@ -384,16 +460,24 @@ impl AppState {
|
||||||
// time we get here, the (policy, no-tokens) combination has
|
// time we get here, the (policy, no-tokens) combination has
|
||||||
// already been rejected — no second bail needed.
|
// already been rejected — no second bail needed.
|
||||||
let uri = normalize_root_uri(&uri.into()).wrap_err("normalize graph URI")?;
|
let uri = normalize_root_uri(&uri.into()).wrap_err("normalize graph URI")?;
|
||||||
|
let graph_id = graph_id.unwrap_or_else(|| uri.clone());
|
||||||
let db = Omnigraph::open(&uri).await?;
|
let db = Omnigraph::open(&uri).await?;
|
||||||
|
|
||||||
|
// Validate the registry against the live schema and resolve it to
|
||||||
|
// an attachable handle (refuse boot on breakage).
|
||||||
|
let registry = validate_and_attach(queries, &db.catalog(), &graph_id)?;
|
||||||
|
|
||||||
let policy_engine = match policy_file {
|
let policy_engine = match policy_file {
|
||||||
Some(path) => Some(PolicyEngine::load_graph(path, &uri)?),
|
Some(path) => Some(PolicyEngine::load_graph(path, &graph_id)?),
|
||||||
None => None,
|
None => None,
|
||||||
};
|
};
|
||||||
Ok(Self::new_with_bearer_tokens_and_policy(
|
Ok(Self::new_single_with_queries(
|
||||||
uri,
|
uri,
|
||||||
db,
|
db,
|
||||||
bearer_tokens,
|
bearer_tokens,
|
||||||
policy_engine,
|
policy_engine,
|
||||||
|
workload::WorkloadController::from_env(),
|
||||||
|
registry,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -408,6 +492,7 @@ impl AppState {
|
||||||
bearer_tokens: Arc<[(BearerTokenHash, Arc<str>)]>,
|
bearer_tokens: Arc<[(BearerTokenHash, Arc<str>)]>,
|
||||||
policy_engine: Option<Arc<PolicyEngine>>,
|
policy_engine: Option<Arc<PolicyEngine>>,
|
||||||
workload: Arc<workload::WorkloadController>,
|
workload: Arc<workload::WorkloadController>,
|
||||||
|
queries: Option<Arc<QueryRegistry>>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
// Engine-layer policy gate (MR-722). With a per-graph policy
|
// Engine-layer policy gate (MR-722). With a per-graph policy
|
||||||
// installed, every `_as` writer on `Omnigraph` calls into the
|
// installed, every `_as` writer on `Omnigraph` calls into the
|
||||||
|
|
@ -436,6 +521,7 @@ impl AppState {
|
||||||
uri,
|
uri,
|
||||||
engine: Arc::new(db),
|
engine: Arc::new(db),
|
||||||
policy: policy_engine,
|
policy: policy_engine,
|
||||||
|
queries,
|
||||||
});
|
});
|
||||||
Self {
|
Self {
|
||||||
routing: GraphRouting::Single { handle },
|
routing: GraphRouting::Single { handle },
|
||||||
|
|
@ -750,6 +836,58 @@ pub fn init_tracing() {
|
||||||
let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init();
|
let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Log each non-blocking advisory from a registry check report.
|
||||||
|
fn log_registry_warnings(label: &str, report: &queries::CheckReport) {
|
||||||
|
for warning in &report.warnings {
|
||||||
|
warn!(graph = label, query = %warning.query, "stored query: {}", warning.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn validate_registry_against_catalog(
|
||||||
|
registry: &QueryRegistry,
|
||||||
|
catalog: &Catalog,
|
||||||
|
label: &str,
|
||||||
|
) -> omnigraph::error::Result<()> {
|
||||||
|
let report = check(registry, catalog);
|
||||||
|
if report.has_breakages() {
|
||||||
|
return Err(OmniError::manifest(format_check_breakages(label, &report)));
|
||||||
|
}
|
||||||
|
log_registry_warnings(label, &report);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Validate a loaded stored-query registry against the live schema and
|
||||||
|
/// resolve it to an attachable handle. Refuses boot on any breakage
|
||||||
|
/// (same posture as bad policy YAML), logs the non-blocking warnings,
|
||||||
|
/// and collapses an empty registry to `None` (nothing attached). This is
|
||||||
|
/// the single gate every open path funnels through, so no opener can
|
||||||
|
/// attach a registry that has not been schema-checked. `label` names the
|
||||||
|
/// graph in messages.
|
||||||
|
fn validate_and_attach(
|
||||||
|
queries: QueryRegistry,
|
||||||
|
catalog: &Catalog,
|
||||||
|
label: &str,
|
||||||
|
) -> Result<Option<Arc<QueryRegistry>>> {
|
||||||
|
validate_registry_against_catalog(&queries, catalog, label)
|
||||||
|
.map_err(|err| color_eyre::eyre::eyre!(err.to_string()))?;
|
||||||
|
Ok(if queries.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(Arc::new(queries))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Format every load error (parse / identity failure) into a multi-line
|
||||||
|
/// boot-abort message.
|
||||||
|
fn format_registry_load_errors(label: &str, errors: &[queries::LoadError]) -> String {
|
||||||
|
let joined = errors
|
||||||
|
.iter()
|
||||||
|
.map(|e| e.to_string())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("\n ");
|
||||||
|
format!("graph '{label}': stored-query registry failed to load:\n {joined}")
|
||||||
|
}
|
||||||
|
|
||||||
pub fn load_server_settings(
|
pub fn load_server_settings(
|
||||||
config_path: Option<&PathBuf>,
|
config_path: Option<&PathBuf>,
|
||||||
cli_uri: Option<String>,
|
cli_uri: Option<String>,
|
||||||
|
|
@ -799,15 +937,43 @@ pub fn load_server_settings(
|
||||||
let uri = normalize_root_uri(&raw_uri).wrap_err_with(|| {
|
let uri = normalize_root_uri(&raw_uri).wrap_err_with(|| {
|
||||||
format!("normalize single-graph URI '{raw_uri}' from server settings")
|
format!("normalize single-graph URI '{raw_uri}' from server settings")
|
||||||
})?;
|
})?;
|
||||||
let policy_file = config.resolve_policy_file();
|
// Config follows graph IDENTITY, not mode: a bare URI is anonymous
|
||||||
ServerConfigMode::Single { uri, policy_file }
|
// (top-level config); a graph chosen by name uses its per-graph
|
||||||
|
// `graphs.<name>.{policy,queries}`. `resolve_target_uri` already
|
||||||
|
// errored on an unknown name, so a `Some(name)` here is a known graph.
|
||||||
|
let selected: Option<&str> = if has_cli_uri {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
cli_target.as_deref().or_else(|| config.server_graph_name())
|
||||||
|
};
|
||||||
|
// A named selection must not leave a populated top-level block
|
||||||
|
// silently unused — refuse boot and point at the per-graph block. The
|
||||||
|
// same rule the CLI selection gate enforces, shared via one helper so
|
||||||
|
// the boot check and `omnigraph queries validate`/`list` can't drift.
|
||||||
|
config.ensure_top_level_blocks_honored(selected)?;
|
||||||
|
// Load + identity-check now (no engine needed); the schema
|
||||||
|
// type-check happens when the engine opens.
|
||||||
|
let policy_file = config.resolve_policy_file_for(selected);
|
||||||
|
let queries = QueryRegistry::load(&config, config.query_entries_for(selected))
|
||||||
|
.map_err(|errs| color_eyre::eyre::eyre!(format_registry_load_errors(&uri, &errs)))?;
|
||||||
|
let graph_id = graph_resource_id_for_selection(selected, &uri);
|
||||||
|
ServerConfigMode::Single {
|
||||||
|
uri,
|
||||||
|
graph_id,
|
||||||
|
policy_file,
|
||||||
|
queries,
|
||||||
|
}
|
||||||
} else if has_explicit_config && has_graphs_map {
|
} else if has_explicit_config && has_graphs_map {
|
||||||
if config.resolve_policy_file().is_some() {
|
// Multi mode: every graph uses its per-graph block; top-level
|
||||||
|
// policy/queries are never honored, so a populated one is an error.
|
||||||
|
let unhonored = config.populated_top_level_blocks();
|
||||||
|
if !unhonored.is_empty() {
|
||||||
bail!(
|
bail!(
|
||||||
"top-level `policy.file` is single-graph/CLI-local policy only; \
|
"multi-graph mode: top-level {} {} not honored — each graph uses its own \
|
||||||
in multi-graph mode move per-graph rules to \
|
`graphs.<graph_id>.…` block. Move per-graph rules there (and any \
|
||||||
`graphs.<graph_id>.policy.file` and move `graph_list` rules to \
|
`graph_list` policy to `server.policy.file`).",
|
||||||
`server.policy.file`."
|
unhonored.join(" and "),
|
||||||
|
if unhonored.len() == 1 { "is" } else { "are" },
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
// Rule 4 → Multi mode. Build a startup config per graph.
|
// Rule 4 → Multi mode. Build a startup config per graph.
|
||||||
|
|
@ -823,10 +989,17 @@ pub fn load_server_settings(
|
||||||
let uri = normalize_root_uri(&raw_uri).wrap_err_with(|| {
|
let uri = normalize_root_uri(&raw_uri).wrap_err_with(|| {
|
||||||
format!("normalize URI '{raw_uri}' for graph '{name}' in omnigraph.yaml")
|
format!("normalize URI '{raw_uri}' for graph '{name}' in omnigraph.yaml")
|
||||||
})?;
|
})?;
|
||||||
|
// Per-graph `queries:`, selected through the shared
|
||||||
|
// `query_entries_for` so server and CLI resolve identically.
|
||||||
|
// Load + identity-check now; the schema type-check happens
|
||||||
|
// when this graph's engine opens.
|
||||||
|
let queries = QueryRegistry::load(&config, config.query_entries_for(Some(name.as_str())))
|
||||||
|
.map_err(|errs| color_eyre::eyre::eyre!(format_registry_load_errors(name, &errs)))?;
|
||||||
graphs.push(GraphStartupConfig {
|
graphs.push(GraphStartupConfig {
|
||||||
graph_id: name.clone(),
|
graph_id: name.clone(),
|
||||||
uri,
|
uri,
|
||||||
policy_file: config.resolve_target_policy_file(name),
|
policy_file: config.resolve_target_policy_file(name),
|
||||||
|
queries,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
let config_path = config_path
|
let config_path = config_path
|
||||||
|
|
@ -949,6 +1122,8 @@ pub fn build_app(state: AppState) -> Router {
|
||||||
server_change
|
server_change
|
||||||
}))
|
}))
|
||||||
.route("/mutate", post(server_mutate))
|
.route("/mutate", post(server_mutate))
|
||||||
|
.route("/queries", get(server_list_queries))
|
||||||
|
.route("/queries/{name}", post(server_invoke_query))
|
||||||
.route("/schema", get(server_schema_get))
|
.route("/schema", get(server_schema_get))
|
||||||
.route("/schema/apply", post(server_schema_apply))
|
.route("/schema/apply", post(server_schema_apply))
|
||||||
.route(
|
.route(
|
||||||
|
|
@ -1046,10 +1221,28 @@ pub async fn serve(config: ServerConfig) -> Result<()> {
|
||||||
|
|
||||||
let bind = config.bind.clone();
|
let bind = config.bind.clone();
|
||||||
let state = match config.mode {
|
let state = match config.mode {
|
||||||
ServerConfigMode::Single { uri, policy_file } => {
|
ServerConfigMode::Single {
|
||||||
|
uri,
|
||||||
|
graph_id,
|
||||||
|
policy_file,
|
||||||
|
queries,
|
||||||
|
} => {
|
||||||
let uri_for_log = uri.clone();
|
let uri_for_log = uri.clone();
|
||||||
info!(uri = %uri_for_log, bind = %bind, mode = "single", "serving omnigraph");
|
info!(
|
||||||
AppState::open_with_bearer_tokens_and_policy(uri, tokens, policy_file.as_ref()).await?
|
uri = %uri_for_log,
|
||||||
|
graph_id = %graph_id,
|
||||||
|
bind = %bind,
|
||||||
|
mode = "single",
|
||||||
|
"serving omnigraph"
|
||||||
|
);
|
||||||
|
AppState::open_single_with_queries_for_graph_id(
|
||||||
|
uri,
|
||||||
|
tokens,
|
||||||
|
policy_file.as_ref(),
|
||||||
|
queries,
|
||||||
|
Some(graph_id),
|
||||||
|
)
|
||||||
|
.await?
|
||||||
}
|
}
|
||||||
ServerConfigMode::Multi {
|
ServerConfigMode::Multi {
|
||||||
graphs,
|
graphs,
|
||||||
|
|
@ -1131,6 +1324,12 @@ async fn open_single_graph(cfg: GraphStartupConfig) -> Result<Arc<GraphHandle>>
|
||||||
.await
|
.await
|
||||||
.map_err(|err| color_eyre::eyre::eyre!("open graph '{}' at {}: {err}", graph_id, uri))?;
|
.map_err(|err| color_eyre::eyre::eyre!("open graph '{}' at {}: {err}", graph_id, uri))?;
|
||||||
|
|
||||||
|
// Validate this graph's stored queries against the live schema and
|
||||||
|
// resolve them to an attachable handle (refuse boot on breakage).
|
||||||
|
// Done before the policy match rebinds `db`; the catalog handle is an
|
||||||
|
// owned `Arc`, so no borrow of `db` survives into the match.
|
||||||
|
let queries = validate_and_attach(cfg.queries, &db.catalog(), graph_id.as_str())?;
|
||||||
|
|
||||||
let (policy_arc, db) = match &cfg.policy_file {
|
let (policy_arc, db) = match &cfg.policy_file {
|
||||||
Some(path) => {
|
Some(path) => {
|
||||||
let policy = PolicyEngine::load_graph(path, graph_id.as_str())?;
|
let policy = PolicyEngine::load_graph(path, graph_id.as_str())?;
|
||||||
|
|
@ -1146,6 +1345,7 @@ async fn open_single_graph(cfg: GraphStartupConfig) -> Result<Arc<GraphHandle>>
|
||||||
uri,
|
uri,
|
||||||
engine: Arc::new(db),
|
engine: Arc::new(db),
|
||||||
policy: policy_arc,
|
policy: policy_arc,
|
||||||
|
queries,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1479,7 +1679,21 @@ fn log_policy_decision(actor_id: &str, request: &PolicyRequest, decision: &Polic
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// HTTP-layer Cedar policy gate. Two sources of the policy engine:
|
/// The allow/deny **decision** an authorization check produces, kept
|
||||||
|
/// separate from the operational failures (`Err`) that can occur while
|
||||||
|
/// computing it. [`authorize_request`] collapses `Denied` to a 403; a caller
|
||||||
|
/// that needs to remap a denial without also remapping operational failures
|
||||||
|
/// (the stored-query invoke handler hides a denial as a 404) matches on this
|
||||||
|
/// directly, so a real 401 (missing bearer) or 500 (policy-evaluation error)
|
||||||
|
/// keeps its true status instead of being masked as the denial's response.
|
||||||
|
enum Authz {
|
||||||
|
Allowed,
|
||||||
|
Denied(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// HTTP-layer Cedar policy gate, returning the allow/deny [`Authz`] decision
|
||||||
|
/// and reserving `Err` for operational failures (401 missing bearer, 500
|
||||||
|
/// policy-evaluation error). Two sources of the policy engine:
|
||||||
/// * Per-graph handler — passes `handle.policy.as_deref()` so the
|
/// * Per-graph handler — passes `handle.policy.as_deref()` so the
|
||||||
/// graph's Cedar rules govern read/change/branch_*/schema_apply.
|
/// graph's Cedar rules govern read/change/branch_*/schema_apply.
|
||||||
/// * Management handler — passes `state.server_policy.as_deref()` so
|
/// * Management handler — passes `state.server_policy.as_deref()` so
|
||||||
|
|
@ -1493,11 +1707,11 @@ fn log_policy_decision(actor_id: &str, request: &PolicyRequest, decision: &Polic
|
||||||
/// dropped from the type), so handlers cannot smuggle it through the
|
/// dropped from the type), so handlers cannot smuggle it through the
|
||||||
/// request. See `actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
|
/// request. See `actor_id_resolves_from_bearer_token_ignoring_client_supplied_headers`
|
||||||
/// at `tests/server.rs`.
|
/// at `tests/server.rs`.
|
||||||
fn authorize_request(
|
fn authorize(
|
||||||
actor: Option<&ResolvedActor>,
|
actor: Option<&ResolvedActor>,
|
||||||
policy: Option<&PolicyEngine>,
|
policy: Option<&PolicyEngine>,
|
||||||
request: PolicyRequest,
|
request: PolicyRequest,
|
||||||
) -> std::result::Result<(), ApiError> {
|
) -> std::result::Result<Authz, ApiError> {
|
||||||
let Some(engine) = policy else {
|
let Some(engine) = policy else {
|
||||||
// No PolicyEngine installed. Three runtime states can reach this:
|
// No PolicyEngine installed. Three runtime states can reach this:
|
||||||
//
|
//
|
||||||
|
|
@ -1524,21 +1738,23 @@ fn authorize_request(
|
||||||
// operator's only path to enabling it is configuring an
|
// operator's only path to enabling it is configuring an
|
||||||
// explicit `server.policy.file` in omnigraph.yaml.
|
// explicit `server.policy.file` in omnigraph.yaml.
|
||||||
if request.action.resource_kind() == PolicyResourceKind::Server {
|
if request.action.resource_kind() == PolicyResourceKind::Server {
|
||||||
return Err(ApiError::forbidden(
|
return Ok(Authz::Denied(
|
||||||
"server-scoped actions require an explicit `server.policy.file` \
|
"server-scoped actions require an explicit `server.policy.file` \
|
||||||
configured in omnigraph.yaml — the management surface is closed \
|
configured in omnigraph.yaml — the management surface is closed \
|
||||||
by default in every runtime state, including --unauthenticated, \
|
by default in every runtime state, including --unauthenticated, \
|
||||||
so that server topology is never exposed without operator opt-in.",
|
so that server topology is never exposed without operator opt-in."
|
||||||
|
.to_string(),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
if actor.is_some() && request.action != PolicyAction::Read {
|
if actor.is_some() && request.action != PolicyAction::Read {
|
||||||
return Err(ApiError::forbidden(
|
return Ok(Authz::Denied(
|
||||||
"server runs in default-deny mode (bearer tokens configured but no \
|
"server runs in default-deny mode (bearer tokens configured but no \
|
||||||
policy file). Only `read` actions are permitted; configure \
|
policy file). Only `read` actions are permitted; configure \
|
||||||
`policy.file` in omnigraph.yaml to enable other actions.",
|
`policy.file` in omnigraph.yaml to enable other actions."
|
||||||
|
.to_string(),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
return Ok(());
|
return Ok(Authz::Allowed);
|
||||||
};
|
};
|
||||||
let Some(actor) = actor else {
|
let Some(actor) = actor else {
|
||||||
return Err(ApiError::unauthorized("missing bearer token"));
|
return Err(ApiError::unauthorized("missing bearer token"));
|
||||||
|
|
@ -1560,9 +1776,26 @@ fn authorize_request(
|
||||||
.map_err(|err| ApiError::internal(format!("policy: {err}")))?;
|
.map_err(|err| ApiError::internal(format!("policy: {err}")))?;
|
||||||
log_policy_decision(actor_id, &request, &decision);
|
log_policy_decision(actor_id, &request, &decision);
|
||||||
if decision.allowed {
|
if decision.allowed {
|
||||||
Ok(())
|
Ok(Authz::Allowed)
|
||||||
} else {
|
} else {
|
||||||
Err(ApiError::forbidden(decision.message))
|
Ok(Authz::Denied(decision.message))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Thin wrapper over [`authorize`] for the handlers that treat any denial as a
|
||||||
|
/// 403: a denial becomes `ApiError::forbidden`, and operational failures
|
||||||
|
/// (401 missing bearer, 500 policy-evaluation error) propagate unchanged. The
|
||||||
|
/// stored-query invoke handler does **not** use this — it consumes the
|
||||||
|
/// [`Authz`] decision directly to hide a denial as a 404 while letting an
|
||||||
|
/// operational failure keep its true status.
|
||||||
|
fn authorize_request(
|
||||||
|
actor: Option<&ResolvedActor>,
|
||||||
|
policy: Option<&PolicyEngine>,
|
||||||
|
request: PolicyRequest,
|
||||||
|
) -> std::result::Result<(), ApiError> {
|
||||||
|
match authorize(actor, policy, request)? {
|
||||||
|
Authz::Allowed => Ok(()),
|
||||||
|
Authz::Denied(message) => Err(ApiError::forbidden(message)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2001,6 +2234,194 @@ async fn server_mutate(
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Path parameter for `POST /queries/{name}`.
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct QueryNamePath {
|
||||||
|
name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_optional_invoke_body(
|
||||||
|
body: Bytes,
|
||||||
|
) -> std::result::Result<InvokeStoredQueryRequest, ApiError> {
|
||||||
|
if body.is_empty() {
|
||||||
|
return Ok(InvokeStoredQueryRequest::default());
|
||||||
|
}
|
||||||
|
serde_json::from_slice::<Option<InvokeStoredQueryRequest>>(&body)
|
||||||
|
.map(|request| request.unwrap_or_default())
|
||||||
|
.map_err(|err| {
|
||||||
|
ApiError::bad_request(format!("invalid stored-query invocation body: {err}"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[utoipa::path(
|
||||||
|
post,
|
||||||
|
path = "/queries/{name}",
|
||||||
|
tag = "queries",
|
||||||
|
operation_id = "invoke_query",
|
||||||
|
params(("name" = String, Path, description = "Stored query name (the registry key)")),
|
||||||
|
request_body = Option<InvokeStoredQueryRequest>,
|
||||||
|
responses(
|
||||||
|
(status = 200, description = "Read envelope (ReadOutput) or mutation envelope (ChangeOutput), serialized untagged", body = InvokeStoredQueryResponse),
|
||||||
|
(status = 400, description = "Bad request (param type error; snapshot on a stored mutation)", body = ErrorOutput),
|
||||||
|
(status = 401, description = "Unauthorized", body = ErrorOutput),
|
||||||
|
(status = 403, description = "Forbidden (the inner `change` gate for a stored mutation)", body = ErrorOutput),
|
||||||
|
(status = 404, description = "Unknown stored query, or `invoke_query` denied — indistinguishable to a caller without the grant", body = ErrorOutput),
|
||||||
|
(status = 409, description = "Merge conflict", body = ErrorOutput),
|
||||||
|
(status = 429, description = "Per-actor admission cap exceeded; honor `Retry-After` header", body = ErrorOutput),
|
||||||
|
(status = 500, description = "Policy evaluation error (a denial is reported as 404, not 500)", body = ErrorOutput),
|
||||||
|
),
|
||||||
|
security(("bearer_token" = [])),
|
||||||
|
)]
|
||||||
|
/// Invoke a curated, server-side stored query by name.
|
||||||
|
///
|
||||||
|
/// The query source comes from the graph's `queries:` registry, not the
|
||||||
|
/// request body — callers send only runtime inputs (`params`, `branch`,
|
||||||
|
/// `snapshot`). Gated by the `invoke_query` Cedar action at the boundary;
|
||||||
|
/// a stored *mutation* additionally passes the engine's `change` gate
|
||||||
|
/// (double-gated). An actor **without** `invoke_query` cannot tell a denied
|
||||||
|
/// query from a missing one — both return the same 404, so the catalog
|
||||||
|
/// can't be probed without the grant. Once `invoke_query` is held, the
|
||||||
|
/// inner `read`/`change` gate may surface a 403 for an existing query the
|
||||||
|
/// actor can't run (the intended double-gate signal).
|
||||||
|
async fn server_invoke_query(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Extension(handle): Extension<Arc<GraphHandle>>,
|
||||||
|
actor: Option<Extension<ResolvedActor>>,
|
||||||
|
Path(QueryNamePath { name }): Path<QueryNamePath>,
|
||||||
|
body: Bytes,
|
||||||
|
) -> std::result::Result<Json<InvokeStoredQueryResponse>, ApiError> {
|
||||||
|
let req = parse_optional_invoke_body(body)?;
|
||||||
|
// A caller without `invoke_query` can't tell a denial from a missing
|
||||||
|
// query: both 404 with this exact message, so the catalog can't be
|
||||||
|
// probed without the grant. (A caller that holds invoke_query may still
|
||||||
|
// see the inner gate's 403 for an existing query it can't run — intended.)
|
||||||
|
const NOT_FOUND: &str = "stored query not found";
|
||||||
|
let actor_ref = actor.as_ref().map(|Extension(actor)| actor);
|
||||||
|
|
||||||
|
// Boundary gate (authentication already ran in `require_bearer_auth`).
|
||||||
|
// A denial is hidden as 404 (deny == missing, so the catalog can't be
|
||||||
|
// probed without the grant), but operational failures (401 missing bearer,
|
||||||
|
// 500 policy-evaluation error) propagate with their true status via `?`
|
||||||
|
// rather than being masked as a missing query.
|
||||||
|
match authorize(
|
||||||
|
actor_ref,
|
||||||
|
handle.policy.as_deref(),
|
||||||
|
PolicyRequest {
|
||||||
|
action: PolicyAction::InvokeQuery,
|
||||||
|
// Graph-scoped: no branch dimension. The per-branch/snapshot
|
||||||
|
// access is enforced by the inner read/change gate in the
|
||||||
|
// runner, so the outer gate must not resolve a branch (doing so
|
||||||
|
// was wrong for snapshot reads).
|
||||||
|
branch: None,
|
||||||
|
target_branch: None,
|
||||||
|
},
|
||||||
|
)? {
|
||||||
|
Authz::Allowed => {}
|
||||||
|
Authz::Denied(_) => return Err(ApiError::not_found(NOT_FOUND)),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve against the per-graph registry (same 404 on a miss).
|
||||||
|
let stored = handle
|
||||||
|
.queries
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|registry| registry.lookup(&name))
|
||||||
|
.ok_or_else(|| ApiError::not_found(NOT_FOUND))?;
|
||||||
|
|
||||||
|
// Detach what we need before `handle` moves into the runner — the
|
||||||
|
// registry borrow lives inside `handle`.
|
||||||
|
let source = Arc::clone(&stored.source);
|
||||||
|
let query_name = stored.name.clone();
|
||||||
|
let is_mutation = stored.is_mutation();
|
||||||
|
|
||||||
|
info!(
|
||||||
|
graph = %handle.uri,
|
||||||
|
actor = ?actor_ref.map(|a| a.actor_id.as_ref()),
|
||||||
|
query = %query_name,
|
||||||
|
kind = if is_mutation { "mutate" } else { "read" },
|
||||||
|
"stored query invoked"
|
||||||
|
);
|
||||||
|
|
||||||
|
if is_mutation {
|
||||||
|
if req.snapshot.is_some() {
|
||||||
|
return Err(ApiError::bad_request(
|
||||||
|
"stored mutation cannot target a snapshot",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let branch = req.branch.unwrap_or_else(|| "main".to_string());
|
||||||
|
let output = run_mutate(
|
||||||
|
state,
|
||||||
|
handle,
|
||||||
|
actor_ref,
|
||||||
|
&source,
|
||||||
|
Some(&query_name),
|
||||||
|
req.params.as_ref(),
|
||||||
|
branch,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(Json(InvokeStoredQueryResponse::Change(output)))
|
||||||
|
} else {
|
||||||
|
let (selected, target, result) = run_query(
|
||||||
|
handle,
|
||||||
|
actor_ref,
|
||||||
|
&source,
|
||||||
|
Some(&query_name),
|
||||||
|
req.params.as_ref(),
|
||||||
|
req.branch,
|
||||||
|
req.snapshot,
|
||||||
|
true,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(Json(InvokeStoredQueryResponse::Read(api::read_output(
|
||||||
|
selected, &target, result,
|
||||||
|
))))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[utoipa::path(
|
||||||
|
get,
|
||||||
|
path = "/queries",
|
||||||
|
tag = "queries",
|
||||||
|
operation_id = "list_queries",
|
||||||
|
responses(
|
||||||
|
(status = 200, description = "Stored-query catalog (the mcp.expose subset, with typed params)", body = QueriesCatalogOutput),
|
||||||
|
(status = 401, description = "Unauthorized", body = ErrorOutput),
|
||||||
|
(status = 403, description = "Forbidden", body = ErrorOutput),
|
||||||
|
),
|
||||||
|
security(("bearer_token" = [])),
|
||||||
|
)]
|
||||||
|
/// List the graph's exposed stored queries as a typed tool catalog.
|
||||||
|
///
|
||||||
|
/// Returns the `mcp.expose == true` subset of the `queries:` registry, each
|
||||||
|
/// with its MCP tool name, read/mutate flag, description/instruction, and
|
||||||
|
/// typed parameters — enough for a client to register them as tools without
|
||||||
|
/// fetching `.gq` source. Read-gated; the catalog is graph-wide (branch
|
||||||
|
/// independent — `read` is authorized against `main`). **Not** Cedar-filtered
|
||||||
|
/// per query yet, so it can list a query whose `invoke_query` the caller
|
||||||
|
/// lacks (a known gap until per-query authorization lands).
|
||||||
|
async fn server_list_queries(
|
||||||
|
Extension(handle): Extension<Arc<GraphHandle>>,
|
||||||
|
actor: Option<Extension<ResolvedActor>>,
|
||||||
|
) -> std::result::Result<Json<QueriesCatalogOutput>, ApiError> {
|
||||||
|
authorize_request(
|
||||||
|
actor.as_ref().map(|Extension(actor)| actor),
|
||||||
|
handle.policy.as_deref(),
|
||||||
|
PolicyRequest {
|
||||||
|
action: PolicyAction::Read,
|
||||||
|
branch: Some("main".to_string()),
|
||||||
|
target_branch: None,
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
let queries = match handle.queries.as_ref() {
|
||||||
|
Some(registry) => registry
|
||||||
|
.iter()
|
||||||
|
.filter(|q| q.expose)
|
||||||
|
.map(api::query_catalog_entry)
|
||||||
|
.collect(),
|
||||||
|
None => Vec::new(),
|
||||||
|
};
|
||||||
|
Ok(Json(QueriesCatalogOutput { queries }))
|
||||||
|
}
|
||||||
|
|
||||||
#[utoipa::path(
|
#[utoipa::path(
|
||||||
get,
|
get,
|
||||||
path = "/schema",
|
path = "/schema",
|
||||||
|
|
@ -2088,18 +2509,26 @@ async fn server_schema_apply(
|
||||||
.map_err(ApiError::from_workload_reject)?;
|
.map_err(ApiError::from_workload_reject)?;
|
||||||
let result = {
|
let result = {
|
||||||
let db = &handle.engine;
|
let db = &handle.engine;
|
||||||
|
let registry = handle.queries.as_deref();
|
||||||
|
let label = handle.key.graph_id.as_str().to_string();
|
||||||
// Engine-layer policy enforcement (MR-722): pass the resolved
|
// Engine-layer policy enforcement (MR-722): pass the resolved
|
||||||
// actor through so apply_schema_as can call enforce() with the
|
// actor through so apply_schema_as can call enforce() with the
|
||||||
// authoritative identity. With a policy installed in AppState,
|
// authoritative identity. With a policy installed in AppState,
|
||||||
// engine-side enforcement re-checks the same decision the
|
// engine-side enforcement re-checks the same decision the
|
||||||
// HTTP-layer authorize_request just made above. PR #3 collapses
|
// HTTP-layer authorize_request just made above. PR #3 collapses
|
||||||
// the redundancy.
|
// the redundancy.
|
||||||
db.apply_schema_as(
|
db.apply_schema_as_with_catalog_check(
|
||||||
&request.schema_source,
|
&request.schema_source,
|
||||||
omnigraph::db::SchemaApplyOptions {
|
omnigraph::db::SchemaApplyOptions {
|
||||||
allow_data_loss: request.allow_data_loss,
|
allow_data_loss: request.allow_data_loss,
|
||||||
},
|
},
|
||||||
actor_id,
|
actor_id,
|
||||||
|
|catalog| {
|
||||||
|
if let Some(registry) = registry {
|
||||||
|
validate_registry_against_catalog(registry, catalog, &label)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.map_err(ApiError::from_omni)?
|
.map_err(ApiError::from_omni)?
|
||||||
|
|
@ -2658,12 +3087,133 @@ mod tests {
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use tempfile::tempdir;
|
use tempfile::tempdir;
|
||||||
|
|
||||||
|
/// `authorize` returns the allow/deny **decision** (`Authz`) and reserves
|
||||||
|
/// `Err` for operational failures, so the invoke handler can hide a denial
|
||||||
|
/// as 404 without also masking a 401/500. Pins each outcome.
|
||||||
|
#[test]
|
||||||
|
fn authorize_splits_decision_from_operational_error() {
|
||||||
|
use super::{Authz, PolicyAction, PolicyCompiler, PolicyConfig, PolicyRequest, ResolvedActor, authorize};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
fn req(action: PolicyAction) -> PolicyRequest {
|
||||||
|
PolicyRequest { action, branch: None, target_branch: None }
|
||||||
|
}
|
||||||
|
let actor = ResolvedActor::cluster_static(Arc::from("act-alice"));
|
||||||
|
|
||||||
|
// --- No policy engine installed (open / default-deny modes) ---
|
||||||
|
// A server-scoped action is denied in every no-policy state.
|
||||||
|
assert!(matches!(
|
||||||
|
authorize(Some(&actor), None, req(PolicyAction::GraphList)).unwrap(),
|
||||||
|
Authz::Denied(_)
|
||||||
|
));
|
||||||
|
// Authenticated actor + a non-read per-graph action → default-deny.
|
||||||
|
assert!(matches!(
|
||||||
|
authorize(Some(&actor), None, req(PolicyAction::Change)).unwrap(),
|
||||||
|
Authz::Denied(_)
|
||||||
|
));
|
||||||
|
// `read` is the one per-graph action permitted without a policy.
|
||||||
|
assert!(matches!(
|
||||||
|
authorize(Some(&actor), None, req(PolicyAction::Read)).unwrap(),
|
||||||
|
Authz::Allowed
|
||||||
|
));
|
||||||
|
// Open mode (no actor, no policy) → allowed.
|
||||||
|
assert!(matches!(
|
||||||
|
authorize(None, None, req(PolicyAction::Read)).unwrap(),
|
||||||
|
Authz::Allowed
|
||||||
|
));
|
||||||
|
|
||||||
|
// --- Policy engine installed ---
|
||||||
|
let policy: PolicyConfig = serde_yaml::from_str(
|
||||||
|
"version: 1\n\
|
||||||
|
groups:\n team: [act-alice]\n\
|
||||||
|
rules:\n - id: team-read\n allow:\n actors: { group: team }\n actions: [read]\n branch_scope: any\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let engine = PolicyCompiler::compile(&policy, "graph").unwrap();
|
||||||
|
|
||||||
|
// A matched allow rule → Allowed.
|
||||||
|
assert!(matches!(
|
||||||
|
authorize(
|
||||||
|
Some(&actor),
|
||||||
|
Some(&engine),
|
||||||
|
PolicyRequest { action: PolicyAction::Read, branch: Some("main".to_string()), target_branch: None },
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
Authz::Allowed
|
||||||
|
));
|
||||||
|
// Known actor, no matching allow rule → Denied, carrying the decision message.
|
||||||
|
match authorize(
|
||||||
|
Some(&actor),
|
||||||
|
Some(&engine),
|
||||||
|
PolicyRequest { action: PolicyAction::Change, branch: Some("main".to_string()), target_branch: None },
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
{
|
||||||
|
Authz::Denied(message) => assert!(!message.is_empty(), "a deny carries its decision message"),
|
||||||
|
Authz::Allowed => panic!("change must be denied: only read is allowed"),
|
||||||
|
}
|
||||||
|
// Policy installed but no actor → operational failure (`Err`), NOT a
|
||||||
|
// decision. This is the split that keeps a 401/500 from being masked
|
||||||
|
// as the denial's response in the invoke handler.
|
||||||
|
assert!(
|
||||||
|
authorize(None, Some(&engine), req(PolicyAction::Read)).is_err(),
|
||||||
|
"a missing actor with a policy installed is an operational error, not a deny"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn hash_bearer_token_produces_32_byte_output() {
|
fn hash_bearer_token_produces_32_byte_output() {
|
||||||
let hash = hash_bearer_token("any-token");
|
let hash = hash_bearer_token("any-token");
|
||||||
assert_eq!(hash.len(), 32);
|
assert_eq!(hash.len(), 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The single gate both open paths funnel through: it refuses a
|
||||||
|
/// schema breakage (naming the graph label + query), attaches a clean
|
||||||
|
/// registry, and collapses an empty one to `None`. Pure over its args
|
||||||
|
/// (no engine), so it covers the multi-graph path's logic too — the
|
||||||
|
/// only per-path difference is the `label`, asserted here.
|
||||||
|
#[test]
|
||||||
|
fn validate_and_attach_gates_on_schema_and_collapses_empty() {
|
||||||
|
use crate::queries::{QueryRegistry, RegistrySpec};
|
||||||
|
use omnigraph_compiler::catalog::build_catalog;
|
||||||
|
use omnigraph_compiler::schema::parser::parse_schema;
|
||||||
|
|
||||||
|
let schema = parse_schema("node User {\nname: String\n}\n").unwrap();
|
||||||
|
let catalog = build_catalog(&schema).unwrap();
|
||||||
|
let spec = |name: &str, source: &str| RegistrySpec {
|
||||||
|
name: name.to_string(),
|
||||||
|
source: source.to_string(),
|
||||||
|
expose: false,
|
||||||
|
tool_name: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Empty registry → nothing attached, no error.
|
||||||
|
let empty =
|
||||||
|
super::validate_and_attach(QueryRegistry::default(), &catalog, "g").unwrap();
|
||||||
|
assert!(empty.is_none());
|
||||||
|
|
||||||
|
// A query that type-checks → attached.
|
||||||
|
let ok = QueryRegistry::from_specs(vec![spec(
|
||||||
|
"find_user",
|
||||||
|
"query find_user() { match { $u: User } return { $u.name } }",
|
||||||
|
)])
|
||||||
|
.unwrap();
|
||||||
|
assert!(super::validate_and_attach(ok, &catalog, "g").unwrap().is_some());
|
||||||
|
|
||||||
|
// A query referencing a type the schema lacks → boot refusal that
|
||||||
|
// names both the graph label and the offending query.
|
||||||
|
let broken = QueryRegistry::from_specs(vec![spec(
|
||||||
|
"ghost",
|
||||||
|
"query ghost() { match { $w: Widget } return { $w.name } }",
|
||||||
|
)])
|
||||||
|
.unwrap();
|
||||||
|
let err = super::validate_and_attach(broken, &catalog, "graph-x").unwrap_err();
|
||||||
|
let msg = err.to_string();
|
||||||
|
assert!(msg.contains("graph-x"), "labels the graph: {msg}");
|
||||||
|
assert!(msg.contains("ghost"), "names the query: {msg}");
|
||||||
|
assert!(msg.contains("schema check"), "mentions the schema check: {msg}");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn hash_bearer_token_is_deterministic() {
|
fn hash_bearer_token_is_deterministic() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|
@ -2707,7 +3257,10 @@ server:
|
||||||
|
|
||||||
let settings = load_server_settings(Some(&config), None, None, None, false).unwrap();
|
let settings = load_server_settings(Some(&config), None, None, None, false).unwrap();
|
||||||
match &settings.mode {
|
match &settings.mode {
|
||||||
ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "/tmp/demo.omni"),
|
ServerConfigMode::Single { uri, graph_id, .. } => {
|
||||||
|
assert_eq!(uri, "/tmp/demo.omni");
|
||||||
|
assert_eq!(graph_id, "local");
|
||||||
|
}
|
||||||
ServerConfigMode::Multi { .. } => panic!("expected Single mode, got Multi"),
|
ServerConfigMode::Multi { .. } => panic!("expected Single mode, got Multi"),
|
||||||
}
|
}
|
||||||
assert_eq!(settings.bind, "0.0.0.0:9090");
|
assert_eq!(settings.bind, "0.0.0.0:9090");
|
||||||
|
|
@ -2739,7 +3292,10 @@ server:
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
match &settings.mode {
|
match &settings.mode {
|
||||||
ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "/tmp/override.omni"),
|
ServerConfigMode::Single { uri, graph_id, .. } => {
|
||||||
|
assert_eq!(uri, "/tmp/override.omni");
|
||||||
|
assert_eq!(graph_id, "/tmp/override.omni");
|
||||||
|
}
|
||||||
ServerConfigMode::Multi { .. } => panic!("expected Single mode, got Multi"),
|
ServerConfigMode::Multi { .. } => panic!("expected Single mode, got Multi"),
|
||||||
}
|
}
|
||||||
assert_eq!(settings.bind, "0.0.0.0:9999");
|
assert_eq!(settings.bind, "0.0.0.0:9999");
|
||||||
|
|
@ -2768,7 +3324,10 @@ server:
|
||||||
load_server_settings(Some(&config), None, Some("dev".to_string()), None, false)
|
load_server_settings(Some(&config), None, Some("dev".to_string()), None, false)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
match &settings.mode {
|
match &settings.mode {
|
||||||
ServerConfigMode::Single { uri, .. } => assert_eq!(uri, "http://127.0.0.1:8080"),
|
ServerConfigMode::Single { uri, graph_id, .. } => {
|
||||||
|
assert_eq!(uri, "http://127.0.0.1:8080");
|
||||||
|
assert_eq!(graph_id, "dev");
|
||||||
|
}
|
||||||
ServerConfigMode::Multi { .. } => panic!("expected Single mode, got Multi"),
|
ServerConfigMode::Multi { .. } => panic!("expected Single mode, got Multi"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -2848,6 +3407,7 @@ server:
|
||||||
.to_string_lossy()
|
.to_string_lossy()
|
||||||
.into_owned(),
|
.into_owned(),
|
||||||
policy_file: None,
|
policy_file: None,
|
||||||
|
queries: crate::queries::QueryRegistry::default(),
|
||||||
}],
|
}],
|
||||||
config_path: temp.path().join("omnigraph.yaml"),
|
config_path: temp.path().join("omnigraph.yaml"),
|
||||||
server_policy_file: Some(policy_path),
|
server_policy_file: Some(policy_path),
|
||||||
|
|
@ -2895,7 +3455,9 @@ server:
|
||||||
.join("graph.omni")
|
.join("graph.omni")
|
||||||
.to_string_lossy()
|
.to_string_lossy()
|
||||||
.into_owned(),
|
.into_owned(),
|
||||||
|
graph_id: "default".to_string(),
|
||||||
policy_file: None,
|
policy_file: None,
|
||||||
|
queries: crate::queries::QueryRegistry::default(),
|
||||||
},
|
},
|
||||||
bind: "127.0.0.1:0".to_string(),
|
bind: "127.0.0.1:0".to_string(),
|
||||||
allow_unauthenticated: false,
|
allow_unauthenticated: false,
|
||||||
|
|
|
||||||
688
crates/omnigraph-server/src/queries.rs
Normal file
688
crates/omnigraph-server/src/queries.rs
Normal file
|
|
@ -0,0 +1,688 @@
|
||||||
|
//! Stored-query registry.
|
||||||
|
//!
|
||||||
|
//! A server-side registry of named, parameter-typed `.gq` queries that
|
||||||
|
//! operators declare in `omnigraph.yaml` (per-graph, or top-level in
|
||||||
|
//! single mode) and the server loads at startup. Each entry is parsed
|
||||||
|
//! and its identity asserted here (`load`); type-checking against the
|
||||||
|
//! live schema happens separately (a `check` pass) so the loader stays
|
||||||
|
//! callable without an open engine (the CLI's offline `queries check`).
|
||||||
|
//!
|
||||||
|
//! Identity is the query **name**: the manifest key must equal the
|
||||||
|
//! `query <name>` symbol declared in the referenced `.gq` file. The two
|
||||||
|
//! are asserted equal at load — one name, two places that must agree.
|
||||||
|
//! Renaming either is a breaking change to callers, by design.
|
||||||
|
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::fs;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use omnigraph_compiler::catalog::Catalog;
|
||||||
|
use omnigraph_compiler::query::ast::QueryDecl;
|
||||||
|
use omnigraph_compiler::query::parser::parse_query;
|
||||||
|
use omnigraph_compiler::query::typecheck::typecheck_query_decl;
|
||||||
|
use omnigraph_compiler::types::{PropType, ScalarType};
|
||||||
|
|
||||||
|
use crate::config::{OmnigraphConfig, QueryEntry};
|
||||||
|
|
||||||
|
/// One loaded stored query. `source` is the full `.gq` file text — the
|
||||||
|
/// invocation handler hands it to `run_query` / `run_mutate` verbatim,
|
||||||
|
/// which reuse the same parse/IR/exec path as the inline routes (no
|
||||||
|
/// parallel implementation).
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct StoredQuery {
|
||||||
|
/// Identity: manifest key == `query <name>` symbol.
|
||||||
|
pub name: String,
|
||||||
|
/// Full `.gq` source text the query was selected from.
|
||||||
|
pub source: Arc<str>,
|
||||||
|
/// Parsed declaration (params, mutations, description, …).
|
||||||
|
pub decl: QueryDecl,
|
||||||
|
/// Whether this query is listed in the MCP tool catalog (`GET /queries`).
|
||||||
|
/// Default `true` (the manifest entry is the opt-in); `expose: false`
|
||||||
|
/// keeps it HTTP/service-callable but hidden from the agent tool list.
|
||||||
|
/// Catalog membership only — not an authorization gate.
|
||||||
|
pub expose: bool,
|
||||||
|
/// Optional MCP tool-name override; defaults to `name`.
|
||||||
|
pub tool_name: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StoredQuery {
|
||||||
|
/// `true` if the selected declaration contains insert/update/delete
|
||||||
|
/// statements — drives read-vs-mutate routing at invocation time.
|
||||||
|
pub fn is_mutation(&self) -> bool {
|
||||||
|
!self.decl.mutations.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The MCP tool name this query is catalogued under: the explicit
|
||||||
|
/// `tool_name` override, else the query `name`. The catalog key —
|
||||||
|
/// enforced unique across exposed queries at load. Server-side
|
||||||
|
/// consumers (the uniqueness check, the future catalog projection) read
|
||||||
|
/// this; the CLI `queries list` resolves the same rule on its own DTO.
|
||||||
|
pub fn effective_tool_name(&self) -> &str {
|
||||||
|
self.tool_name.as_deref().unwrap_or(&self.name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A loaded, identity-checked stored-query registry for one graph.
|
||||||
|
#[derive(Debug, Clone, Default)]
|
||||||
|
pub struct QueryRegistry {
|
||||||
|
by_name: BTreeMap<String, StoredQuery>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// In-memory registry entry before file I/O. Used by [`QueryRegistry::load`]
|
||||||
|
/// (after reading each `.gq` from disk) and directly by tests.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct RegistrySpec {
|
||||||
|
pub name: String,
|
||||||
|
pub source: String,
|
||||||
|
pub expose: bool,
|
||||||
|
pub tool_name: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A single registry load failure. Collected (not fail-fast) so a bad
|
||||||
|
/// `omnigraph.yaml` surfaces every broken entry at once, matching the
|
||||||
|
/// bad-policy-YAML posture.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct LoadError {
|
||||||
|
/// The offending query name, when the failure is entry-scoped.
|
||||||
|
pub query: Option<String>,
|
||||||
|
pub message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for LoadError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match &self.query {
|
||||||
|
Some(name) => write!(f, "stored query '{name}': {}", self.message),
|
||||||
|
None => write!(f, "stored query registry: {}", self.message),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QueryRegistry {
|
||||||
|
/// Build a registry from in-memory specs: parse each source, select
|
||||||
|
/// the declaration whose symbol equals the manifest key, and assert
|
||||||
|
/// they agree. Collects every failure. No schema type-checking here
|
||||||
|
/// — that is [`check`].
|
||||||
|
pub fn from_specs(specs: Vec<RegistrySpec>) -> Result<Self, Vec<LoadError>> {
|
||||||
|
let mut by_name = BTreeMap::new();
|
||||||
|
let mut errors = Vec::new();
|
||||||
|
|
||||||
|
for spec in specs {
|
||||||
|
match parse_query(&spec.source) {
|
||||||
|
Ok(file) => {
|
||||||
|
match file.queries.into_iter().find(|q| q.name == spec.name) {
|
||||||
|
Some(decl) => {
|
||||||
|
by_name.insert(
|
||||||
|
spec.name.clone(),
|
||||||
|
StoredQuery {
|
||||||
|
name: spec.name,
|
||||||
|
source: Arc::from(spec.source),
|
||||||
|
decl,
|
||||||
|
expose: spec.expose,
|
||||||
|
tool_name: spec.tool_name,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
None => errors.push(LoadError {
|
||||||
|
query: Some(spec.name.clone()),
|
||||||
|
message: format!(
|
||||||
|
"no `query {}` declaration found in its `.gq` file \
|
||||||
|
(the registry key must match the query symbol)",
|
||||||
|
spec.name
|
||||||
|
),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(err) => errors.push(LoadError {
|
||||||
|
query: Some(spec.name),
|
||||||
|
message: format!("parse error: {err}"),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exposed queries are catalogued under their effective tool name;
|
||||||
|
// two claiming one name is an MCP-namespace collision. Refuse it at
|
||||||
|
// load (collected, not fail-fast), naming the loser and the winner.
|
||||||
|
// Iterating the `BTreeMap` makes the winner deterministic (the
|
||||||
|
// lexicographically-first query name; config is a map, so YAML
|
||||||
|
// declaration order isn't preserved anyway) and the error order
|
||||||
|
// stable. Scoped to a block so these borrows of `by_name` end
|
||||||
|
// before it is moved into `Self`.
|
||||||
|
{
|
||||||
|
let mut claimed: BTreeMap<&str, &str> = BTreeMap::new();
|
||||||
|
for query in by_name.values().filter(|q| q.expose) {
|
||||||
|
let tool = query.effective_tool_name();
|
||||||
|
if let Some(winner) = claimed.insert(tool, &query.name) {
|
||||||
|
errors.push(LoadError {
|
||||||
|
query: Some(query.name.clone()),
|
||||||
|
message: format!(
|
||||||
|
"MCP tool name '{tool}' already claimed by exposed query '{winner}'"
|
||||||
|
),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if errors.is_empty() {
|
||||||
|
Ok(Self { by_name })
|
||||||
|
} else {
|
||||||
|
Err(errors)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read each registry entry's `.gq` file from disk and build the
|
||||||
|
/// registry. `entries` is either the top-level `queries` map (single
|
||||||
|
/// mode) or a graph's `queries` map (multi mode); `config` resolves
|
||||||
|
/// each entry's relative `file:` path against `base_dir`.
|
||||||
|
pub fn load(
|
||||||
|
config: &OmnigraphConfig,
|
||||||
|
entries: &BTreeMap<String, QueryEntry>,
|
||||||
|
) -> Result<Self, Vec<LoadError>> {
|
||||||
|
let mut specs = Vec::with_capacity(entries.len());
|
||||||
|
let mut errors = Vec::new();
|
||||||
|
for (name, entry) in entries {
|
||||||
|
let path = config.resolve_query_file(&entry.file);
|
||||||
|
match fs::read_to_string(&path) {
|
||||||
|
Ok(source) => specs.push(RegistrySpec {
|
||||||
|
name: name.clone(),
|
||||||
|
source,
|
||||||
|
expose: entry.mcp.expose,
|
||||||
|
tool_name: entry.mcp.tool_name.clone(),
|
||||||
|
}),
|
||||||
|
Err(err) => errors.push(LoadError {
|
||||||
|
query: Some(name.clone()),
|
||||||
|
message: format!("cannot read '{}': {err}", path.display()),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse/identity/uniqueness-check the readable specs even when some
|
||||||
|
// files failed to read, so every broken entry (I/O, parse, identity,
|
||||||
|
// tool-name collision) surfaces in one pass rather than one per
|
||||||
|
// restart. I/O errors come first (in `entries` key order), then the
|
||||||
|
// spec errors. A non-empty `errors` always fails the load.
|
||||||
|
match Self::from_specs(specs) {
|
||||||
|
Ok(registry) if errors.is_empty() => Ok(registry),
|
||||||
|
Ok(_) => Err(errors),
|
||||||
|
Err(spec_errors) => {
|
||||||
|
errors.extend(spec_errors);
|
||||||
|
Err(errors)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn lookup(&self, name: &str) -> Option<&StoredQuery> {
|
||||||
|
self.by_name.get(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn iter(&self) -> impl Iterator<Item = &StoredQuery> {
|
||||||
|
self.by_name.values()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.by_name.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.by_name.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A stored query that fails to type-check against the live schema —
|
||||||
|
/// e.g. it references a node/edge type or property that was renamed or
|
||||||
|
/// removed by a migration. Breakages **block server boot** (same posture
|
||||||
|
/// as bad policy YAML), surfacing schema drift at the deploy boundary
|
||||||
|
/// rather than silently at invocation time.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Breakage {
|
||||||
|
pub query: String,
|
||||||
|
pub message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A non-blocking advisory found during validation. Logged at boot;
|
||||||
|
/// never blocks startup. Currently: an MCP-exposed query that declares a
|
||||||
|
/// parameter an agent cannot realistically supply.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Warning {
|
||||||
|
pub query: String,
|
||||||
|
pub message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Outcome of validating a registry against a schema. Breakages are
|
||||||
|
/// fatal (boot refuses); warnings are advisory.
|
||||||
|
#[derive(Debug, Clone, Default)]
|
||||||
|
pub struct CheckReport {
|
||||||
|
pub breakages: Vec<Breakage>,
|
||||||
|
pub warnings: Vec<Warning>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CheckReport {
|
||||||
|
pub fn has_breakages(&self) -> bool {
|
||||||
|
!self.breakages.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_clean(&self) -> bool {
|
||||||
|
self.breakages.is_empty() && self.warnings.is_empty()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Validate a loaded registry against the live schema.
|
||||||
|
///
|
||||||
|
/// Pure over `(registry, catalog)` — takes an already-parsed registry and
|
||||||
|
/// a catalog, so it is callable both at server boot (with the engine's
|
||||||
|
/// `catalog()`) and offline from the CLI (`omnigraph queries check`),
|
||||||
|
/// without coupling to server config or an open engine connection.
|
||||||
|
///
|
||||||
|
/// Every query is type-checked via the same `typecheck_query_decl` the
|
||||||
|
/// engine runs for inline queries — no parallel implementation. Failures
|
||||||
|
/// are **collected, not fail-fast**, so an operator sees every broken
|
||||||
|
/// query in one pass.
|
||||||
|
///
|
||||||
|
/// Advisory lint (warn, never block): an `mcp.expose: true` query that
|
||||||
|
/// declares a `Vector(N)` parameter. An LLM cannot supply a raw embedding
|
||||||
|
/// vector; such a query should take a `String` parameter and let the
|
||||||
|
/// engine embed it server-side at query time. Service-to-service callers
|
||||||
|
/// may legitimately pass vectors, so this warns rather than rejects.
|
||||||
|
pub fn check(registry: &QueryRegistry, catalog: &Catalog) -> CheckReport {
|
||||||
|
let mut report = CheckReport::default();
|
||||||
|
for query in registry.iter() {
|
||||||
|
if let Err(err) = typecheck_query_decl(catalog, &query.decl) {
|
||||||
|
report.breakages.push(Breakage {
|
||||||
|
query: query.name.clone(),
|
||||||
|
message: err.to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if query.expose {
|
||||||
|
for param in &query.decl.params {
|
||||||
|
// Resolve to the structured type via the compiler's own
|
||||||
|
// resolver rather than string-matching `Vector(` — one
|
||||||
|
// canonical definition of "is a vector", so this lint can't
|
||||||
|
// drift from how the parser/type system spells the type.
|
||||||
|
let is_vector = PropType::from_param_type_name(¶m.type_name, param.nullable)
|
||||||
|
.is_some_and(|pt| matches!(pt.scalar, ScalarType::Vector(_)));
|
||||||
|
if is_vector {
|
||||||
|
report.warnings.push(Warning {
|
||||||
|
query: query.name.clone(),
|
||||||
|
message: format!(
|
||||||
|
"MCP-exposed query declares a `{}` parameter `${}` that agents \
|
||||||
|
cannot supply; use a `String` parameter for server-side embedding",
|
||||||
|
param.type_name, param.name
|
||||||
|
),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
report
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Format every breakage in a registry check report into a multi-line
|
||||||
|
/// operator-facing message, naming each offending query.
|
||||||
|
pub fn format_check_breakages(label: &str, report: &CheckReport) -> String {
|
||||||
|
let joined = report
|
||||||
|
.breakages
|
||||||
|
.iter()
|
||||||
|
.map(|b| format!("query '{}': {}", b.query, b.message))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("\n ");
|
||||||
|
format!(
|
||||||
|
"graph '{label}': {} stored quer{} failed the schema check:\n {joined}",
|
||||||
|
report.breakages.len(),
|
||||||
|
if report.breakages.len() == 1 {
|
||||||
|
"y"
|
||||||
|
} else {
|
||||||
|
"ies"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn spec(name: &str, source: &str, expose: bool) -> RegistrySpec {
|
||||||
|
RegistrySpec {
|
||||||
|
name: name.to_string(),
|
||||||
|
source: source.to_string(),
|
||||||
|
expose,
|
||||||
|
tool_name: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn spec_tool(name: &str, source: &str, expose: bool, tool_name: &str) -> RegistrySpec {
|
||||||
|
RegistrySpec {
|
||||||
|
name: name.to_string(),
|
||||||
|
source: source.to_string(),
|
||||||
|
expose,
|
||||||
|
tool_name: Some(tool_name.to_string()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn key_equal_symbol_loads() {
|
||||||
|
let reg = QueryRegistry::from_specs(vec![spec(
|
||||||
|
"find_user",
|
||||||
|
"query find_user($id: String) { match { $u: User } return { $u.name } }",
|
||||||
|
true,
|
||||||
|
)])
|
||||||
|
.unwrap();
|
||||||
|
let q = reg.lookup("find_user").unwrap();
|
||||||
|
assert_eq!(q.name, "find_user");
|
||||||
|
assert!(q.expose);
|
||||||
|
assert_eq!(q.decl.params.len(), 1);
|
||||||
|
assert!(!q.is_mutation());
|
||||||
|
// No override → the effective tool name is the query name.
|
||||||
|
assert_eq!(q.effective_tool_name(), "find_user");
|
||||||
|
|
||||||
|
// An explicit override is what the catalog keys on.
|
||||||
|
let with_tool = QueryRegistry::from_specs(vec![spec_tool(
|
||||||
|
"find_user",
|
||||||
|
"query find_user($id: String) { match { $u: User } return { $u.name } }",
|
||||||
|
true,
|
||||||
|
"lookup_user",
|
||||||
|
)])
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
with_tool.lookup("find_user").unwrap().effective_tool_name(),
|
||||||
|
"lookup_user"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn key_mismatch_is_an_identity_error() {
|
||||||
|
let errors = QueryRegistry::from_specs(vec![spec(
|
||||||
|
"find_user",
|
||||||
|
// symbol is `lookup`, key is `find_user` — must be rejected.
|
||||||
|
"query lookup($id: String) { match { $u: User } return { $u.name } }",
|
||||||
|
false,
|
||||||
|
)])
|
||||||
|
.unwrap_err();
|
||||||
|
assert_eq!(errors.len(), 1);
|
||||||
|
assert_eq!(errors[0].query.as_deref(), Some("find_user"));
|
||||||
|
assert!(errors[0].message.contains("must match the query symbol"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn multi_query_file_selects_the_matching_symbol() {
|
||||||
|
let source = "query a($x: I64) { match { $u: User } return { $u.name } }\n\
|
||||||
|
query b($y: String) { match { $u: User } return { $u.name } }";
|
||||||
|
let reg = QueryRegistry::from_specs(vec![spec("b", source, false)]).unwrap();
|
||||||
|
let q = reg.lookup("b").unwrap();
|
||||||
|
assert_eq!(q.name, "b");
|
||||||
|
assert_eq!(q.decl.params[0].name, "y");
|
||||||
|
assert!(reg.lookup("a").is_none(), "only the selected symbol is registered");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn duplicate_exposed_tool_name_is_a_load_error() {
|
||||||
|
// Two MCP-exposed queries claiming one tool name is an ambiguity in
|
||||||
|
// the catalog key space — refused at load, naming both queries and
|
||||||
|
// the contested tool.
|
||||||
|
let errors = QueryRegistry::from_specs(vec![
|
||||||
|
spec_tool("a", "query a() { match { $u: User } return { $u.name } }", true, "dup"),
|
||||||
|
spec_tool("b", "query b() { match { $u: User } return { $u.name } }", true, "dup"),
|
||||||
|
])
|
||||||
|
.unwrap_err();
|
||||||
|
assert_eq!(errors.len(), 1);
|
||||||
|
let msg = errors[0].to_string();
|
||||||
|
assert!(msg.contains("'dup'"), "names the contested tool: {msg}");
|
||||||
|
assert!(msg.contains("'a'"), "names the winning query: {msg}");
|
||||||
|
assert!(msg.contains("'b'"), "names the losing query: {msg}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn duplicate_tool_name_among_unexposed_is_allowed() {
|
||||||
|
// Unexposed queries have no MCP tool, so a shared effective tool
|
||||||
|
// name is inert — must not error (pins the exposed-only scope).
|
||||||
|
let reg = QueryRegistry::from_specs(vec![
|
||||||
|
spec_tool("a", "query a() { match { $u: User } return { $u.name } }", false, "dup"),
|
||||||
|
spec_tool("b", "query b() { match { $u: User } return { $u.name } }", false, "dup"),
|
||||||
|
])
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(reg.len(), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_error_surfaces_per_entry() {
|
||||||
|
let errors =
|
||||||
|
QueryRegistry::from_specs(vec![spec("broken", "query broken( {{ not valid", false)])
|
||||||
|
.unwrap_err();
|
||||||
|
assert_eq!(errors[0].query.as_deref(), Some("broken"));
|
||||||
|
assert!(errors[0].message.contains("parse error"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn errors_collect_rather_than_fail_fast() {
|
||||||
|
let errors = QueryRegistry::from_specs(vec![
|
||||||
|
spec("good", "query good() { match { $u: User } return { $u.name } }", false),
|
||||||
|
spec("mismatch", "query other() { match { $u: User } return { $u.name } }", false),
|
||||||
|
spec("broken", "query broken(", false),
|
||||||
|
])
|
||||||
|
.unwrap_err();
|
||||||
|
// `good` loads cleanly; only the mismatch and the parse error are
|
||||||
|
// reported, and both surface in one pass (not fail-fast).
|
||||||
|
assert_eq!(errors.len(), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn mutation_body_classifies_as_mutation() {
|
||||||
|
let reg = QueryRegistry::from_specs(vec![spec(
|
||||||
|
"add_user",
|
||||||
|
"query add_user($name: String) { insert User { name: $name } }",
|
||||||
|
false,
|
||||||
|
)])
|
||||||
|
.unwrap();
|
||||||
|
assert!(reg.lookup("add_user").unwrap().is_mutation());
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- check(registry, catalog) ---
|
||||||
|
|
||||||
|
use omnigraph_compiler::catalog::build_catalog;
|
||||||
|
use omnigraph_compiler::schema::parser::parse_schema;
|
||||||
|
|
||||||
|
fn test_catalog() -> Catalog {
|
||||||
|
let schema = parse_schema(
|
||||||
|
r#"
|
||||||
|
node User {
|
||||||
|
name: String
|
||||||
|
age: I32?
|
||||||
|
embedding: Vector(4)
|
||||||
|
}
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
build_catalog(&schema).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn check_passes_for_valid_query() {
|
||||||
|
let reg = QueryRegistry::from_specs(vec![spec(
|
||||||
|
"find_user",
|
||||||
|
"query find_user($name: String) { match { $u: User { name: $name } } return { $u.age } }",
|
||||||
|
false,
|
||||||
|
)])
|
||||||
|
.unwrap();
|
||||||
|
let report = check(®, &test_catalog());
|
||||||
|
assert!(report.is_clean(), "unexpected: {:?}", report);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn check_reports_unknown_type_as_breakage() {
|
||||||
|
let reg = QueryRegistry::from_specs(vec![spec(
|
||||||
|
"ghost",
|
||||||
|
// `Widget` is not in the schema.
|
||||||
|
"query ghost() { match { $w: Widget } return { $w.name } }",
|
||||||
|
false,
|
||||||
|
)])
|
||||||
|
.unwrap();
|
||||||
|
let report = check(®, &test_catalog());
|
||||||
|
assert!(report.has_breakages());
|
||||||
|
assert_eq!(report.breakages[0].query, "ghost");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn check_reports_unknown_property_as_breakage() {
|
||||||
|
let reg = QueryRegistry::from_specs(vec![spec(
|
||||||
|
"bad_prop",
|
||||||
|
// `User` exists but has no `nickname`.
|
||||||
|
"query bad_prop() { match { $u: User } return { $u.nickname } }",
|
||||||
|
false,
|
||||||
|
)])
|
||||||
|
.unwrap();
|
||||||
|
let report = check(®, &test_catalog());
|
||||||
|
assert!(report.has_breakages());
|
||||||
|
assert_eq!(report.breakages[0].query, "bad_prop");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn check_collects_every_breakage_not_fail_fast() {
|
||||||
|
let reg = QueryRegistry::from_specs(vec![
|
||||||
|
spec("a", "query a() { match { $w: Widget } return { $w.x } }", false),
|
||||||
|
spec("b", "query b() { match { $g: Gadget } return { $g.y } }", false),
|
||||||
|
spec(
|
||||||
|
"ok",
|
||||||
|
"query ok() { match { $u: User } return { $u.name } }",
|
||||||
|
false,
|
||||||
|
),
|
||||||
|
])
|
||||||
|
.unwrap();
|
||||||
|
let report = check(®, &test_catalog());
|
||||||
|
assert_eq!(report.breakages.len(), 2, "both bad queries reported: {:?}", report);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn vector_param_on_exposed_query_warns() {
|
||||||
|
let reg = QueryRegistry::from_specs(vec![spec(
|
||||||
|
"vec_search",
|
||||||
|
"query vec_search($q: Vector(4)) { match { $u: User } return { $u.name } \
|
||||||
|
order { nearest($u.embedding, $q) } limit 3 }",
|
||||||
|
true, // mcp.expose
|
||||||
|
)])
|
||||||
|
.unwrap();
|
||||||
|
let report = check(®, &test_catalog());
|
||||||
|
assert!(!report.has_breakages(), "valid query: {:?}", report);
|
||||||
|
assert_eq!(report.warnings.len(), 1);
|
||||||
|
assert_eq!(report.warnings[0].query, "vec_search");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn vector_param_on_unexposed_query_is_silent() {
|
||||||
|
let reg = QueryRegistry::from_specs(vec![spec(
|
||||||
|
"vec_search",
|
||||||
|
"query vec_search($q: Vector(4)) { match { $u: User } return { $u.name } \
|
||||||
|
order { nearest($u.embedding, $q) } limit 3 }",
|
||||||
|
false, // not exposed — vector param is fine for service-to-service callers
|
||||||
|
)])
|
||||||
|
.unwrap();
|
||||||
|
let report = check(®, &test_catalog());
|
||||||
|
assert!(report.is_clean(), "unexpected: {:?}", report);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn non_vector_param_on_exposed_query_does_not_warn() {
|
||||||
|
// The recommended `String` alternative on an exposed query does not
|
||||||
|
// resolve to a Vector, so the embedding advisory stays silent. Guards
|
||||||
|
// the structured type check against a false positive (and pins that
|
||||||
|
// only `Vector(_)` triggers the warning).
|
||||||
|
let reg = QueryRegistry::from_specs(vec![spec(
|
||||||
|
"search",
|
||||||
|
"query search($name: String) { match { $u: User { name: $name } } return { $u.name } }",
|
||||||
|
true,
|
||||||
|
)])
|
||||||
|
.unwrap();
|
||||||
|
let report = check(®, &test_catalog());
|
||||||
|
assert!(report.is_clean(), "no breakage or warning expected: {:?}", report);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- catalog projection (api::query_catalog_entry) ---
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn catalog_entry_projects_every_param_kind() {
|
||||||
|
use crate::api::{self, ParamKind};
|
||||||
|
let reg = QueryRegistry::from_specs(vec![spec_tool(
|
||||||
|
"all_types",
|
||||||
|
"query all_types($s: String, $i: I32, $big: I64, $u: U64, $f: F64, $b: Bool, \
|
||||||
|
$d: Date, $dt: DateTime, $blob: Blob, $opt: String?, $list: [I32], $vec: Vector(4)) \
|
||||||
|
{ match { $x: User } return { $x.name } }",
|
||||||
|
true,
|
||||||
|
"all",
|
||||||
|
)])
|
||||||
|
.unwrap();
|
||||||
|
let entry = api::query_catalog_entry(reg.lookup("all_types").unwrap());
|
||||||
|
assert_eq!(entry.name, "all_types");
|
||||||
|
assert_eq!(entry.tool_name, "all");
|
||||||
|
assert!(!entry.mutation);
|
||||||
|
|
||||||
|
let by: std::collections::HashMap<_, _> =
|
||||||
|
entry.params.iter().map(|p| (p.name.as_str(), p)).collect();
|
||||||
|
assert_eq!(by["s"].kind, ParamKind::String);
|
||||||
|
assert_eq!(by["i"].kind, ParamKind::Int);
|
||||||
|
assert_eq!(by["big"].kind, ParamKind::BigInt, "I64 → bigint (string on the wire)");
|
||||||
|
assert_eq!(by["u"].kind, ParamKind::BigInt, "U64 → bigint");
|
||||||
|
assert_eq!(by["f"].kind, ParamKind::Float);
|
||||||
|
assert_eq!(by["b"].kind, ParamKind::Bool);
|
||||||
|
assert_eq!(by["d"].kind, ParamKind::Date);
|
||||||
|
assert_eq!(by["dt"].kind, ParamKind::DateTime);
|
||||||
|
assert_eq!(by["blob"].kind, ParamKind::Blob);
|
||||||
|
assert!(!by["s"].nullable);
|
||||||
|
assert!(by["opt"].nullable, "String? → nullable");
|
||||||
|
assert_eq!(by["list"].kind, ParamKind::List);
|
||||||
|
assert_eq!(by["list"].item_kind, Some(ParamKind::Int), "[I32] → list of int");
|
||||||
|
assert_eq!(by["vec"].kind, ParamKind::Vector);
|
||||||
|
assert_eq!(by["vec"].vector_dim, Some(4));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn catalog_entry_flags_mutation_and_empty_params() {
|
||||||
|
use crate::api;
|
||||||
|
let reg = QueryRegistry::from_specs(vec![spec(
|
||||||
|
"add_user",
|
||||||
|
"query add_user($name: String) { insert User { name: $name } }",
|
||||||
|
true,
|
||||||
|
)])
|
||||||
|
.unwrap();
|
||||||
|
let entry = api::query_catalog_entry(reg.lookup("add_user").unwrap());
|
||||||
|
assert!(entry.mutation, "insert body → mutation flag");
|
||||||
|
|
||||||
|
let reg2 = QueryRegistry::from_specs(vec![spec(
|
||||||
|
"no_params",
|
||||||
|
"query no_params() { match { $u: User } return { $u.name } }",
|
||||||
|
true,
|
||||||
|
)])
|
||||||
|
.unwrap();
|
||||||
|
let entry2 = api::query_catalog_entry(reg2.lookup("no_params").unwrap());
|
||||||
|
assert!(entry2.params.is_empty(), "no declared params → empty list");
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- load() error collection (file I/O + parse in one pass) ---
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn load_collects_io_and_parse_errors_in_one_pass() {
|
||||||
|
use crate::config::load_config;
|
||||||
|
let temp = tempfile::tempdir().unwrap();
|
||||||
|
std::fs::write(
|
||||||
|
temp.path().join("good.gq"),
|
||||||
|
"query good() { match { $u: User } return { $u.name } }",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
std::fs::write(temp.path().join("broken.gq"), "query broken( {{ not valid").unwrap();
|
||||||
|
// `missing.gq` is deliberately not written (an I/O failure).
|
||||||
|
std::fs::write(
|
||||||
|
temp.path().join("omnigraph.yaml"),
|
||||||
|
"queries:\n good:\n file: ./good.gq\n \
|
||||||
|
missing:\n file: ./missing.gq\n broken:\n file: ./broken.gq\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let config = load_config(Some(&temp.path().join("omnigraph.yaml"))).unwrap();
|
||||||
|
|
||||||
|
let errors = QueryRegistry::load(&config, config.query_entries()).unwrap_err();
|
||||||
|
let joined = errors.iter().map(|e| e.to_string()).collect::<Vec<_>>().join("\n");
|
||||||
|
// Both the missing file AND the parse error surface in one pass —
|
||||||
|
// the I/O failure must not mask the parse failure.
|
||||||
|
assert!(joined.contains("missing"), "I/O error must surface: {joined}");
|
||||||
|
assert!(
|
||||||
|
joined.contains("broken") && joined.contains("parse error"),
|
||||||
|
"the parse error in a readable file must surface in the same pass: {joined}"
|
||||||
|
);
|
||||||
|
assert!(!joined.contains("'good'"), "the valid entry is not an error: {joined}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -29,6 +29,7 @@ use tokio::sync::Mutex;
|
||||||
|
|
||||||
use crate::identity::GraphKey;
|
use crate::identity::GraphKey;
|
||||||
use crate::policy::PolicyEngine;
|
use crate::policy::PolicyEngine;
|
||||||
|
use crate::queries::QueryRegistry;
|
||||||
|
|
||||||
/// Open handle for a single graph in the registry. Cheap to clone (`Arc`-wrapped
|
/// Open handle for a single graph in the registry. Cheap to clone (`Arc`-wrapped
|
||||||
/// engine + policy). Cluster-mode handlers extract this via
|
/// engine + policy). Cluster-mode handlers extract this via
|
||||||
|
|
@ -47,6 +48,11 @@ pub struct GraphHandle {
|
||||||
/// `_as` writers"; the HTTP-layer `require_bearer_auth` middleware still
|
/// `_as` writers"; the HTTP-layer `require_bearer_auth` middleware still
|
||||||
/// runs regardless.
|
/// runs regardless.
|
||||||
pub policy: Option<Arc<PolicyEngine>>,
|
pub policy: Option<Arc<PolicyEngine>>,
|
||||||
|
/// Per-graph stored-query registry, loaded and validated at
|
||||||
|
/// startup. `None` means the operator declared no stored queries for
|
||||||
|
/// this graph — `POST /queries/{name}` then 404s. Mirrors the
|
||||||
|
/// optional `policy` shape.
|
||||||
|
pub queries: Option<Arc<QueryRegistry>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Immutable snapshot of the registry's current state. Replaced atomically
|
/// Immutable snapshot of the registry's current state. Replaced atomically
|
||||||
|
|
@ -245,6 +251,7 @@ fn canonicalize_handle_uri(
|
||||||
uri: canonical_uri.clone(),
|
uri: canonical_uri.clone(),
|
||||||
engine: Arc::clone(&handle.engine),
|
engine: Arc::clone(&handle.engine),
|
||||||
policy: handle.policy.clone(),
|
policy: handle.policy.clone(),
|
||||||
|
queries: handle.queries.clone(),
|
||||||
});
|
});
|
||||||
Ok((canonical_uri, canonical_handle))
|
Ok((canonical_uri, canonical_handle))
|
||||||
}
|
}
|
||||||
|
|
@ -276,6 +283,7 @@ mod tests {
|
||||||
uri: graph_uri,
|
uri: graph_uri,
|
||||||
engine: Arc::new(engine),
|
engine: Arc::new(engine),
|
||||||
policy: None,
|
policy: None,
|
||||||
|
queries: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -340,12 +348,14 @@ mod tests {
|
||||||
uri: shared_uri.clone(),
|
uri: shared_uri.clone(),
|
||||||
engine: Arc::clone(&engine),
|
engine: Arc::clone(&engine),
|
||||||
policy: None,
|
policy: None,
|
||||||
|
queries: None,
|
||||||
});
|
});
|
||||||
let h2 = Arc::new(GraphHandle {
|
let h2 = Arc::new(GraphHandle {
|
||||||
key: GraphKey::cluster(GraphId::try_from("beta").unwrap()),
|
key: GraphKey::cluster(GraphId::try_from("beta").unwrap()),
|
||||||
uri: shared_uri,
|
uri: shared_uri,
|
||||||
engine,
|
engine,
|
||||||
policy: None,
|
policy: None,
|
||||||
|
queries: None,
|
||||||
});
|
});
|
||||||
|
|
||||||
let registry = GraphRegistry::new();
|
let registry = GraphRegistry::new();
|
||||||
|
|
@ -411,12 +421,14 @@ mod tests {
|
||||||
uri: shared_uri.clone(),
|
uri: shared_uri.clone(),
|
||||||
engine: Arc::clone(&engine),
|
engine: Arc::clone(&engine),
|
||||||
policy: None,
|
policy: None,
|
||||||
|
queries: None,
|
||||||
});
|
});
|
||||||
let h2 = Arc::new(GraphHandle {
|
let h2 = Arc::new(GraphHandle {
|
||||||
key: GraphKey::cluster(GraphId::try_from("beta").unwrap()),
|
key: GraphKey::cluster(GraphId::try_from("beta").unwrap()),
|
||||||
uri: shared_uri,
|
uri: shared_uri,
|
||||||
engine,
|
engine,
|
||||||
policy: None,
|
policy: None,
|
||||||
|
queries: None,
|
||||||
});
|
});
|
||||||
let err = match GraphRegistry::from_handles(vec![h1, h2]) {
|
let err = match GraphRegistry::from_handles(vec![h1, h2]) {
|
||||||
Ok(_) => panic!("expected DuplicateUri, got Ok"),
|
Ok(_) => panic!("expected DuplicateUri, got Ok"),
|
||||||
|
|
|
||||||
|
|
@ -168,6 +168,8 @@ const EXPECTED_PATHS: &[&str] = &[
|
||||||
"/export",
|
"/export",
|
||||||
"/change",
|
"/change",
|
||||||
"/mutate",
|
"/mutate",
|
||||||
|
"/queries",
|
||||||
|
"/queries/{name}",
|
||||||
"/schema",
|
"/schema",
|
||||||
"/schema/apply",
|
"/schema/apply",
|
||||||
"/ingest",
|
"/ingest",
|
||||||
|
|
@ -701,6 +703,8 @@ fn protected_endpoints_reference_bearer_token_security() {
|
||||||
("/read", "post"),
|
("/read", "post"),
|
||||||
("/change", "post"),
|
("/change", "post"),
|
||||||
("/schema/apply", "post"),
|
("/schema/apply", "post"),
|
||||||
|
("/queries", "get"),
|
||||||
|
("/queries/{name}", "post"),
|
||||||
("/ingest", "post"),
|
("/ingest", "post"),
|
||||||
("/export", "post"),
|
("/export", "post"),
|
||||||
("/snapshot", "get"),
|
("/snapshot", "get"),
|
||||||
|
|
@ -913,6 +917,34 @@ fn post_endpoints_have_request_body() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn invoke_stored_query_request_body_is_optional() {
|
||||||
|
let doc = openapi_json();
|
||||||
|
let request_body = &doc["paths"]["/queries/{name}"]["post"]["requestBody"];
|
||||||
|
assert!(
|
||||||
|
request_body.is_object(),
|
||||||
|
"POST /queries/{{name}} should document its optional request body"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
request_body["required"].as_bool().unwrap_or(false),
|
||||||
|
false,
|
||||||
|
"stored-query invocation body should be optional"
|
||||||
|
);
|
||||||
|
let schema = &request_body["content"]["application/json"]["schema"];
|
||||||
|
let ref_path = schema["$ref"]
|
||||||
|
.as_str()
|
||||||
|
.or_else(|| {
|
||||||
|
schema["oneOf"]
|
||||||
|
.as_array()
|
||||||
|
.and_then(|schemas| schemas.iter().find_map(|schema| schema["$ref"].as_str()))
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
assert!(
|
||||||
|
ref_path.contains("InvokeStoredQueryRequest"),
|
||||||
|
"POST /queries/{{name}} requestBody should reference InvokeStoredQueryRequest, got {ref_path}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Serialization round-trip test
|
// Serialization round-trip test
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
@ -1117,6 +1149,7 @@ async fn app_for_multi_mode(graph_ids: &[&str]) -> (Vec<tempfile::TempDir>, Rout
|
||||||
uri: graph_uri,
|
uri: graph_uri,
|
||||||
engine: Arc::new(engine),
|
engine: Arc::new(engine),
|
||||||
policy: None,
|
policy: None,
|
||||||
|
queries: None,
|
||||||
}));
|
}));
|
||||||
dirs.push(dir);
|
dirs.push(dir);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ use axum::body::{Body, to_bytes};
|
||||||
use axum::http::header::AUTHORIZATION;
|
use axum::http::header::AUTHORIZATION;
|
||||||
use axum::http::{Method, Request, StatusCode};
|
use axum::http::{Method, Request, StatusCode};
|
||||||
use lance::index::DatasetIndexExt;
|
use lance::index::DatasetIndexExt;
|
||||||
use omnigraph::db::{Omnigraph, ReadTarget, SchemaApplyOptions};
|
use omnigraph::db::{Omnigraph, ReadTarget};
|
||||||
use omnigraph::error::OmniError;
|
use omnigraph::error::OmniError;
|
||||||
use omnigraph::loader::{LoadMode, load_jsonl};
|
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||||
use omnigraph_policy::{PolicyChecker, PolicyEngine};
|
use omnigraph_policy::{PolicyChecker, PolicyEngine};
|
||||||
|
|
@ -16,6 +16,7 @@ use omnigraph_server::api::{
|
||||||
BranchCreateRequest, BranchMergeRequest, ChangeRequest, ErrorOutput, ExportRequest,
|
BranchCreateRequest, BranchMergeRequest, ChangeRequest, ErrorOutput, ExportRequest,
|
||||||
IngestRequest, QueryRequest, ReadRequest, SchemaApplyRequest, SchemaOutput,
|
IngestRequest, QueryRequest, ReadRequest, SchemaApplyRequest, SchemaOutput,
|
||||||
};
|
};
|
||||||
|
use omnigraph_server::queries::{QueryRegistry, RegistrySpec};
|
||||||
use omnigraph_server::{AppState, build_app};
|
use omnigraph_server::{AppState, build_app};
|
||||||
use serde_json::{Value, json};
|
use serde_json::{Value, json};
|
||||||
use serial_test::serial;
|
use serial_test::serial;
|
||||||
|
|
@ -141,6 +142,469 @@ fn graph_path(root: &Path) -> PathBuf {
|
||||||
root.join("server.omni")
|
root.join("server.omni")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn stored_query_registry(specs: &[(&str, &str, bool)]) -> QueryRegistry {
|
||||||
|
QueryRegistry::from_specs(
|
||||||
|
specs
|
||||||
|
.iter()
|
||||||
|
.map(|(name, source, expose)| RegistrySpec {
|
||||||
|
name: name.to_string(),
|
||||||
|
source: source.to_string(),
|
||||||
|
expose: *expose,
|
||||||
|
tool_name: None,
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
)
|
||||||
|
.expect("specs parse and key==symbol")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn server_boots_with_a_valid_stored_query_registry() {
|
||||||
|
// A stored query that type-checks against the fixture schema
|
||||||
|
// (`Person { name, age }`) must let the server boot.
|
||||||
|
let temp = init_loaded_graph().await;
|
||||||
|
let graph = graph_path(temp.path());
|
||||||
|
let registry = stored_query_registry(&[(
|
||||||
|
"find_person",
|
||||||
|
"query find_person($name: String) { match { $p: Person { name: $name } } return { $p.age } }",
|
||||||
|
false,
|
||||||
|
)]);
|
||||||
|
let state = AppState::open_single_with_queries(
|
||||||
|
graph.to_string_lossy().to_string(),
|
||||||
|
vec![],
|
||||||
|
None,
|
||||||
|
registry,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert!(state.is_ok(), "valid registry should boot: {:?}", state.err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn server_refuses_boot_on_type_broken_stored_query() {
|
||||||
|
// A stored query referencing a type not in the schema (`Widget`)
|
||||||
|
// must abort boot, naming the offending query.
|
||||||
|
let temp = init_loaded_graph().await;
|
||||||
|
let graph = graph_path(temp.path());
|
||||||
|
let registry = stored_query_registry(&[(
|
||||||
|
"ghost",
|
||||||
|
"query ghost() { match { $w: Widget } return { $w.name } }",
|
||||||
|
false,
|
||||||
|
)]);
|
||||||
|
let result = AppState::open_single_with_queries(
|
||||||
|
graph.to_string_lossy().to_string(),
|
||||||
|
vec![],
|
||||||
|
None,
|
||||||
|
registry,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
// `AppState` is not `Debug`, so match rather than `expect_err`.
|
||||||
|
let err = match result {
|
||||||
|
Ok(_) => panic!("type-broken stored query must refuse boot"),
|
||||||
|
Err(err) => err,
|
||||||
|
};
|
||||||
|
let msg = err.to_string();
|
||||||
|
assert!(msg.contains("ghost"), "error should name the broken query: {msg}");
|
||||||
|
assert!(
|
||||||
|
msg.contains("schema check"),
|
||||||
|
"error should mention the schema check: {msg}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build a single-mode app with a stored-query registry plus a bearer→actor
|
||||||
|
/// pairing and a policy, so invoke tests exercise the `invoke_query`
|
||||||
|
/// boundary gate and the inner read/change gates together.
|
||||||
|
async fn app_with_stored_queries(
|
||||||
|
specs: &[(&str, &str, bool)],
|
||||||
|
tokens: &[(&str, &str)],
|
||||||
|
policy: &str,
|
||||||
|
) -> (tempfile::TempDir, Router) {
|
||||||
|
let temp = init_loaded_graph().await;
|
||||||
|
let graph = graph_path(temp.path());
|
||||||
|
let policy_path = temp.path().join("policy.yaml");
|
||||||
|
fs::write(&policy_path, policy).unwrap();
|
||||||
|
let registry = stored_query_registry(specs);
|
||||||
|
let state = AppState::open_single_with_queries(
|
||||||
|
graph.to_string_lossy().to_string(),
|
||||||
|
tokens
|
||||||
|
.iter()
|
||||||
|
.map(|(actor, token)| ((*actor).to_string(), (*token).to_string()))
|
||||||
|
.collect(),
|
||||||
|
Some(&policy_path),
|
||||||
|
registry,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
(temp, build_app(state))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// - `act-invoke`: invoke_query + read (stored reads, not mutations)
|
||||||
|
/// - `act-full`: invoke_query + read + change (stored mutations)
|
||||||
|
/// - `act-noinvoke`: read only, no invoke_query (boundary-denied)
|
||||||
|
/// - `act-invokeonly`: invoke_query only, no read (clears the boundary, inner read denies)
|
||||||
|
const INVOKE_POLICY_YAML: &str = r#"
|
||||||
|
version: 1
|
||||||
|
groups:
|
||||||
|
invokers: ["act-invoke"]
|
||||||
|
full: ["act-full"]
|
||||||
|
readers: ["act-noinvoke"]
|
||||||
|
invoke_only: ["act-invokeonly"]
|
||||||
|
protected_branches: [main]
|
||||||
|
rules:
|
||||||
|
# invoke_query is graph-scoped — its own rules, no branch_scope.
|
||||||
|
- id: invokers-can-invoke
|
||||||
|
allow:
|
||||||
|
actors: { group: invokers }
|
||||||
|
actions: [invoke_query]
|
||||||
|
- id: full-can-invoke
|
||||||
|
allow:
|
||||||
|
actors: { group: full }
|
||||||
|
actions: [invoke_query]
|
||||||
|
- id: invoke-only-can-invoke
|
||||||
|
allow:
|
||||||
|
actors: { group: invoke_only }
|
||||||
|
actions: [invoke_query]
|
||||||
|
# read / change are branch-scoped.
|
||||||
|
- id: invokers-can-read
|
||||||
|
allow:
|
||||||
|
actors: { group: invokers }
|
||||||
|
actions: [read]
|
||||||
|
branch_scope: any
|
||||||
|
- id: full-can-read-change
|
||||||
|
allow:
|
||||||
|
actors: { group: full }
|
||||||
|
actions: [read, change]
|
||||||
|
branch_scope: any
|
||||||
|
- id: readers-can-read
|
||||||
|
allow:
|
||||||
|
actors: { group: readers }
|
||||||
|
actions: [read]
|
||||||
|
branch_scope: any
|
||||||
|
"#;
|
||||||
|
|
||||||
|
const STORED_QUERY_SCHEMA_APPLY_POLICY_YAML: &str = r#"
|
||||||
|
version: 1
|
||||||
|
groups:
|
||||||
|
admins: [act-ragnor]
|
||||||
|
protected_branches: [main]
|
||||||
|
rules:
|
||||||
|
- id: admins-can-invoke
|
||||||
|
allow:
|
||||||
|
actors: { group: admins }
|
||||||
|
actions: [invoke_query]
|
||||||
|
- id: admins-can-read
|
||||||
|
allow:
|
||||||
|
actors: { group: admins }
|
||||||
|
actions: [read]
|
||||||
|
branch_scope: any
|
||||||
|
- id: admins-can-schema-apply
|
||||||
|
allow:
|
||||||
|
actors: { group: admins }
|
||||||
|
actions: [schema_apply]
|
||||||
|
target_branch_scope: protected
|
||||||
|
"#;
|
||||||
|
|
||||||
|
const FIND_PERSON_GQ: &str =
|
||||||
|
"query find_person($name: String) { match { $p: Person { name: $name } } return { $p.age } }";
|
||||||
|
|
||||||
|
fn invoke_request(name: &str, token: &str, body: Value) -> Request<Body> {
|
||||||
|
Request::builder()
|
||||||
|
.uri(format!("/queries/{name}"))
|
||||||
|
.method(Method::POST)
|
||||||
|
.header("content-type", "application/json")
|
||||||
|
.header("authorization", format!("Bearer {token}"))
|
||||||
|
.body(Body::from(serde_json::to_vec(&body).unwrap()))
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn invoke_request_bytes(
|
||||||
|
name: &str,
|
||||||
|
token: &str,
|
||||||
|
body: impl Into<Body>,
|
||||||
|
content_type: Option<&str>,
|
||||||
|
) -> Request<Body> {
|
||||||
|
let mut builder = Request::builder()
|
||||||
|
.uri(format!("/queries/{name}"))
|
||||||
|
.method(Method::POST)
|
||||||
|
.header("authorization", format!("Bearer {token}"));
|
||||||
|
if let Some(content_type) = content_type {
|
||||||
|
builder = builder.header("content-type", content_type);
|
||||||
|
}
|
||||||
|
builder.body(body.into()).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread")]
|
||||||
|
async fn invoke_stored_read_returns_rows() {
|
||||||
|
let (_temp, app) = app_with_stored_queries(
|
||||||
|
&[("find_person", FIND_PERSON_GQ, false)],
|
||||||
|
&[("act-invoke", "t-invoke")],
|
||||||
|
INVOKE_POLICY_YAML,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let (status, body) = json_response(
|
||||||
|
&app,
|
||||||
|
invoke_request("find_person", "t-invoke", json!({ "params": { "name": "Alice" } })),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "body: {body}");
|
||||||
|
assert_eq!(body["query_name"], "find_person");
|
||||||
|
assert_eq!(body["row_count"], 1, "Alice is in the fixture; body: {body}");
|
||||||
|
assert!(body["rows"].is_array(), "read envelope shape; body: {body}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread")]
|
||||||
|
async fn invoke_stored_read_accepts_absent_or_empty_body() {
|
||||||
|
let no_param_query = "query list_people() { match { $p: Person } return { $p.name } }";
|
||||||
|
let (_temp, app) = app_with_stored_queries(
|
||||||
|
&[("list_people", no_param_query, false)],
|
||||||
|
&[("act-invoke", "t-invoke")],
|
||||||
|
INVOKE_POLICY_YAML,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let (status, body) = json_response(
|
||||||
|
&app,
|
||||||
|
invoke_request_bytes("list_people", "t-invoke", Body::empty(), None),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "body: {body}");
|
||||||
|
assert_eq!(body["query_name"], "list_people");
|
||||||
|
|
||||||
|
let (status, body) = json_response(
|
||||||
|
&app,
|
||||||
|
invoke_request_bytes(
|
||||||
|
"list_people",
|
||||||
|
"t-invoke",
|
||||||
|
Body::empty(),
|
||||||
|
Some("application/json"),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "body: {body}");
|
||||||
|
|
||||||
|
let (status, body) = json_response(
|
||||||
|
&app,
|
||||||
|
invoke_request_bytes(
|
||||||
|
"list_people",
|
||||||
|
"t-invoke",
|
||||||
|
Body::from("{}"),
|
||||||
|
Some("application/json"),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "body: {body}");
|
||||||
|
|
||||||
|
let (status, body) = json_response(
|
||||||
|
&app,
|
||||||
|
invoke_request_bytes(
|
||||||
|
"list_people",
|
||||||
|
"t-invoke",
|
||||||
|
Body::from("{"),
|
||||||
|
Some("application/json"),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
|
||||||
|
assert!(
|
||||||
|
body["error"]
|
||||||
|
.as_str()
|
||||||
|
.unwrap_or_default()
|
||||||
|
.contains("invalid stored-query invocation body"),
|
||||||
|
"malformed JSON should be rejected as bad request; body: {body}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread")]
|
||||||
|
async fn invoke_stored_mutation_double_gates_on_change() {
|
||||||
|
let specs: &[(&str, &str, bool)] = &[(
|
||||||
|
"add_person",
|
||||||
|
"query add_person($name: String) { insert Person { name: $name } }",
|
||||||
|
false,
|
||||||
|
)];
|
||||||
|
let (_temp, app) = app_with_stored_queries(
|
||||||
|
specs,
|
||||||
|
&[("act-invoke", "t-invoke"), ("act-full", "t-full")],
|
||||||
|
INVOKE_POLICY_YAML,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// Has invoke_query but NOT change → the inner change gate denies (403).
|
||||||
|
let (status, body) = json_response(
|
||||||
|
&app,
|
||||||
|
invoke_request("add_person", "t-invoke", json!({ "params": { "name": "Eve" } })),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(
|
||||||
|
status,
|
||||||
|
StatusCode::FORBIDDEN,
|
||||||
|
"invoke_query without change must 403; body: {body}"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Has invoke_query + change → applied.
|
||||||
|
let (status, body) = json_response(
|
||||||
|
&app,
|
||||||
|
invoke_request("add_person", "t-full", json!({ "params": { "name": "Eve" } })),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "body: {body}");
|
||||||
|
assert_eq!(body["affected_nodes"], 1, "body: {body}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread")]
|
||||||
|
async fn invoke_stored_query_bad_param_is_400() {
|
||||||
|
let (_temp, app) = app_with_stored_queries(
|
||||||
|
&[("find_person", FIND_PERSON_GQ, false)],
|
||||||
|
&[("act-invoke", "t-invoke")],
|
||||||
|
INVOKE_POLICY_YAML,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
// `name` is declared String; pass a number.
|
||||||
|
let (status, body) = json_response(
|
||||||
|
&app,
|
||||||
|
invoke_request("find_person", "t-invoke", json!({ "params": { "name": 123 } })),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::BAD_REQUEST, "body: {body}");
|
||||||
|
assert!(
|
||||||
|
body["error"].as_str().unwrap_or_default().contains("name"),
|
||||||
|
"400 should name the offending param; body: {body}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread")]
|
||||||
|
async fn invoke_unknown_query_and_denied_actor_return_identical_404() {
|
||||||
|
let (_temp, app) = app_with_stored_queries(
|
||||||
|
&[("find_person", FIND_PERSON_GQ, false)],
|
||||||
|
&[("act-invoke", "t-invoke"), ("act-noinvoke", "t-noinvoke")],
|
||||||
|
INVOKE_POLICY_YAML,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// Authorized actor, unknown query name → 404.
|
||||||
|
let (unknown_status, unknown_body) =
|
||||||
|
json_response(&app, invoke_request("does_not_exist", "t-invoke", json!({}))).await;
|
||||||
|
// Denied actor (no invoke_query), real query name → 404.
|
||||||
|
let (denied_status, denied_body) = json_response(
|
||||||
|
&app,
|
||||||
|
invoke_request("find_person", "t-noinvoke", json!({ "params": { "name": "Alice" } })),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
assert_eq!(unknown_status, StatusCode::NOT_FOUND);
|
||||||
|
assert_eq!(denied_status, StatusCode::NOT_FOUND);
|
||||||
|
assert_eq!(
|
||||||
|
unknown_body, denied_body,
|
||||||
|
"deny must be byte-identical to a missing query (no catalog probing)"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread")]
|
||||||
|
async fn invoke_query_holder_without_read_sees_403_not_404() {
|
||||||
|
// The 404-hiding is for callers WITHOUT invoke_query. An actor that
|
||||||
|
// HOLDS invoke_query but lacks `read` clears the boundary gate, then the
|
||||||
|
// inner read gate denies → 403 for an EXISTING read query, vs 404 for an
|
||||||
|
// unknown one. Existence is visible to grant-holders by design (the
|
||||||
|
// documented double-gate); this pins that actual contract.
|
||||||
|
let (_temp, app) = app_with_stored_queries(
|
||||||
|
&[("find_person", FIND_PERSON_GQ, false)],
|
||||||
|
&[("act-invokeonly", "t-invokeonly")],
|
||||||
|
INVOKE_POLICY_YAML,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let (exists_status, _) = json_response(
|
||||||
|
&app,
|
||||||
|
invoke_request("find_person", "t-invokeonly", json!({ "params": { "name": "Alice" } })),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let (absent_status, _) =
|
||||||
|
json_response(&app, invoke_request("does_not_exist", "t-invokeonly", json!({}))).await;
|
||||||
|
assert_eq!(
|
||||||
|
exists_status,
|
||||||
|
StatusCode::FORBIDDEN,
|
||||||
|
"an existing read query the holder can't read → inner-gate 403"
|
||||||
|
);
|
||||||
|
assert_eq!(absent_status, StatusCode::NOT_FOUND, "unknown query still 404s");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_request(uri: &str, token: &str) -> Request<Body> {
|
||||||
|
Request::builder()
|
||||||
|
.uri(uri)
|
||||||
|
.method(Method::GET)
|
||||||
|
.header("authorization", format!("Bearer {token}"))
|
||||||
|
.body(Body::empty())
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread")]
|
||||||
|
async fn list_queries_returns_only_exposed_with_typed_params() {
|
||||||
|
let (_temp, app) = app_with_stored_queries(
|
||||||
|
&[
|
||||||
|
("find_person", FIND_PERSON_GQ, true),
|
||||||
|
(
|
||||||
|
"add_person",
|
||||||
|
"query add_person($name: String) { insert Person { name: $name } }",
|
||||||
|
true,
|
||||||
|
),
|
||||||
|
("hidden", "query hidden() { match { $p: Person } return { $p.name } }", false),
|
||||||
|
],
|
||||||
|
&[("act-invoke", "t-invoke")],
|
||||||
|
INVOKE_POLICY_YAML,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let (status, body) = json_response(&app, get_request("/queries", "t-invoke")).await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "body: {body}");
|
||||||
|
|
||||||
|
let entries = body["queries"].as_array().unwrap();
|
||||||
|
let names: Vec<&str> = entries.iter().map(|q| q["name"].as_str().unwrap()).collect();
|
||||||
|
assert!(
|
||||||
|
names.contains(&"find_person") && names.contains(&"add_person"),
|
||||||
|
"exposed queries listed: {names:?}"
|
||||||
|
);
|
||||||
|
assert!(!names.contains(&"hidden"), "non-exposed query hidden from the catalog: {names:?}");
|
||||||
|
|
||||||
|
let fp = entries.iter().find(|q| q["name"] == "find_person").unwrap();
|
||||||
|
assert_eq!(fp["mutation"], false);
|
||||||
|
assert_eq!(fp["tool_name"], "find_person");
|
||||||
|
assert_eq!(fp["params"][0]["name"], "name");
|
||||||
|
assert_eq!(fp["params"][0]["kind"], "string");
|
||||||
|
let ap = entries.iter().find(|q| q["name"] == "add_person").unwrap();
|
||||||
|
assert_eq!(ap["mutation"], true, "stored insert → mutation");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread")]
|
||||||
|
async fn list_queries_is_read_gated_so_a_non_invoker_can_list() {
|
||||||
|
// The catalog is read-gated (not invoke_query-gated), so a reader who
|
||||||
|
// lacks invoke_query still enumerates the exposed queries — the
|
||||||
|
// documented probe-oracle gap until per-query Cedar filtering lands.
|
||||||
|
let (_temp, app) = app_with_stored_queries(
|
||||||
|
&[("find_person", FIND_PERSON_GQ, true)],
|
||||||
|
&[("act-noinvoke", "t-noinvoke")],
|
||||||
|
INVOKE_POLICY_YAML,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let (status, body) = json_response(&app, get_request("/queries", "t-noinvoke")).await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "read-gated catalog; body: {body}");
|
||||||
|
let names: Vec<&str> = body["queries"]
|
||||||
|
.as_array()
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.map(|q| q["name"].as_str().unwrap())
|
||||||
|
.collect();
|
||||||
|
assert!(
|
||||||
|
names.contains(&"find_person"),
|
||||||
|
"a reader lists the catalog despite lacking invoke_query: {names:?}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread")]
|
||||||
|
async fn list_queries_is_empty_when_no_registry() {
|
||||||
|
let (_temp, app) = app_for_loaded_graph_with_auth("demo-token").await;
|
||||||
|
let (status, body) = json_response(&app, get_request("/queries", "demo-token")).await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "body: {body}");
|
||||||
|
assert!(
|
||||||
|
body["queries"].as_array().unwrap().is_empty(),
|
||||||
|
"no stored-query registry → empty catalog"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
fn drifted_test_schema() -> String {
|
fn drifted_test_schema() -> String {
|
||||||
fs::read_to_string(fixture("test.pg"))
|
fs::read_to_string(fixture("test.pg"))
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
|
@ -423,6 +887,83 @@ async fn schema_apply_route_updates_graph_for_authorized_admin() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread")]
|
||||||
|
async fn schema_apply_route_rejects_stored_query_breakage_before_publish() {
|
||||||
|
let (temp, app) = app_with_stored_queries(
|
||||||
|
&[("find_person", FIND_PERSON_GQ, true)],
|
||||||
|
&[("act-ragnor", "admin-token")],
|
||||||
|
STORED_QUERY_SCHEMA_APPLY_POLICY_YAML,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let request = Request::builder()
|
||||||
|
.method(Method::POST)
|
||||||
|
.uri("/schema/apply")
|
||||||
|
.header("content-type", "application/json")
|
||||||
|
.header("authorization", "Bearer admin-token")
|
||||||
|
.body(Body::from(
|
||||||
|
serde_json::to_vec(&SchemaApplyRequest {
|
||||||
|
schema_source: renamed_age_schema(),
|
||||||
|
..Default::default()
|
||||||
|
})
|
||||||
|
.unwrap(),
|
||||||
|
))
|
||||||
|
.unwrap();
|
||||||
|
let (status, payload) = json_response(&app, request).await;
|
||||||
|
assert_eq!(status, StatusCode::BAD_REQUEST, "body: {payload}");
|
||||||
|
let message = payload["error"].as_str().unwrap_or_default();
|
||||||
|
assert!(
|
||||||
|
message.contains("find_person") && message.contains("schema check"),
|
||||||
|
"registry breakage should name the stored query; body: {payload}"
|
||||||
|
);
|
||||||
|
|
||||||
|
let reopened = Omnigraph::open(graph_path(temp.path()).to_str().unwrap())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let person = &reopened.catalog().node_types["Person"];
|
||||||
|
assert!(person.properties.contains_key("age"));
|
||||||
|
assert!(!person.properties.contains_key("years"));
|
||||||
|
|
||||||
|
let (invoke_status, invoke_body) = json_response(
|
||||||
|
&app,
|
||||||
|
invoke_request(
|
||||||
|
"find_person",
|
||||||
|
"admin-token",
|
||||||
|
json!({ "params": { "name": "Alice" } }),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(invoke_status, StatusCode::OK, "body: {invoke_body}");
|
||||||
|
assert_eq!(invoke_body["row_count"], 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread")]
|
||||||
|
async fn schema_apply_route_noop_keeps_valid_stored_query_registry() {
|
||||||
|
let (_temp, app) = app_with_stored_queries(
|
||||||
|
&[("find_person", FIND_PERSON_GQ, true)],
|
||||||
|
&[("act-ragnor", "admin-token")],
|
||||||
|
STORED_QUERY_SCHEMA_APPLY_POLICY_YAML,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let request = Request::builder()
|
||||||
|
.method(Method::POST)
|
||||||
|
.uri("/schema/apply")
|
||||||
|
.header("content-type", "application/json")
|
||||||
|
.header("authorization", "Bearer admin-token")
|
||||||
|
.body(Body::from(
|
||||||
|
serde_json::to_vec(&SchemaApplyRequest {
|
||||||
|
schema_source: fs::read_to_string(fixture("test.pg")).unwrap(),
|
||||||
|
..Default::default()
|
||||||
|
})
|
||||||
|
.unwrap(),
|
||||||
|
))
|
||||||
|
.unwrap();
|
||||||
|
let (status, payload) = json_response(&app, request).await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "body: {payload}");
|
||||||
|
assert_eq!(payload["applied"], false);
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn schema_apply_route_requires_schema_apply_policy_permission() {
|
async fn schema_apply_route_requires_schema_apply_policy_permission() {
|
||||||
let (_temp, app) = app_for_graph_with_auth_tokens_and_policy(
|
let (_temp, app) = app_for_graph_with_auth_tokens_and_policy(
|
||||||
|
|
@ -4690,6 +5231,7 @@ mod multi_graph_startup {
|
||||||
uri: graph_uri,
|
uri: graph_uri,
|
||||||
engine: Arc::new(engine),
|
engine: Arc::new(engine),
|
||||||
policy: None,
|
policy: None,
|
||||||
|
queries: None,
|
||||||
}));
|
}));
|
||||||
dirs.push(dir);
|
dirs.push(dir);
|
||||||
}
|
}
|
||||||
|
|
@ -4985,12 +5527,14 @@ graphs:
|
||||||
uri: graph_uri.clone(),
|
uri: graph_uri.clone(),
|
||||||
engine: Arc::clone(&engine),
|
engine: Arc::clone(&engine),
|
||||||
policy: None,
|
policy: None,
|
||||||
|
queries: None,
|
||||||
});
|
});
|
||||||
let beta = Arc::new(GraphHandle {
|
let beta = Arc::new(GraphHandle {
|
||||||
key: GraphKey::cluster(GraphId::try_from("beta").unwrap()),
|
key: GraphKey::cluster(GraphId::try_from("beta").unwrap()),
|
||||||
uri: format!("file://{graph_uri}/"),
|
uri: format!("file://{graph_uri}/"),
|
||||||
engine,
|
engine,
|
||||||
policy: None,
|
policy: None,
|
||||||
|
queries: None,
|
||||||
});
|
});
|
||||||
|
|
||||||
match GraphRegistry::from_handles(vec![alpha, beta]) {
|
match GraphRegistry::from_handles(vec![alpha, beta]) {
|
||||||
|
|
@ -5016,6 +5560,7 @@ graphs:
|
||||||
uri: format!("file://{graph_uri}/"),
|
uri: format!("file://{graph_uri}/"),
|
||||||
engine: Arc::new(engine),
|
engine: Arc::new(engine),
|
||||||
policy: None,
|
policy: None,
|
||||||
|
queries: None,
|
||||||
});
|
});
|
||||||
|
|
||||||
let registry = GraphRegistry::from_handles(vec![handle]).unwrap();
|
let registry = GraphRegistry::from_handles(vec![handle]).unwrap();
|
||||||
|
|
@ -5138,11 +5683,11 @@ graphs:
|
||||||
let err = load_server_settings(Some(&config_path), None, None, None, true).unwrap_err();
|
let err = load_server_settings(Some(&config_path), None, None, None, true).unwrap_err();
|
||||||
let msg = err.to_string();
|
let msg = err.to_string();
|
||||||
assert!(
|
assert!(
|
||||||
msg.contains("top-level `policy.file` is single-graph/CLI-local policy only"),
|
msg.contains("top-level") && msg.contains("policy.file") && msg.contains("not honored"),
|
||||||
"expected single-graph policy guidance, got: {msg}"
|
"expected top-level-not-honored guidance, got: {msg}"
|
||||||
);
|
);
|
||||||
assert!(
|
assert!(
|
||||||
msg.contains("graphs.<graph_id>.policy.file"),
|
msg.contains("graphs.<graph_id>"),
|
||||||
"expected per-graph migration guidance, got: {msg}"
|
"expected per-graph migration guidance, got: {msg}"
|
||||||
);
|
);
|
||||||
assert!(
|
assert!(
|
||||||
|
|
@ -5151,6 +5696,88 @@ graphs:
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn mode_inference_multi_rejects_top_level_queries() {
|
||||||
|
// Symmetric to the policy guard: a top-level `queries:` block in
|
||||||
|
// multi-graph mode is not honored (each graph uses its own), so it
|
||||||
|
// is a loud error rather than a silent no-op.
|
||||||
|
let temp = tempfile::tempdir().unwrap();
|
||||||
|
let config_path = temp.path().join("omnigraph.yaml");
|
||||||
|
fs::write(
|
||||||
|
&config_path,
|
||||||
|
"queries:\n q:\n file: ./q.gq\ngraphs:\n alpha:\n uri: /tmp/alpha.omni\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let err = load_server_settings(Some(&config_path), None, None, None, true).unwrap_err();
|
||||||
|
let msg = err.to_string();
|
||||||
|
assert!(
|
||||||
|
msg.contains("queries") && msg.contains("not honored"),
|
||||||
|
"top-level queries must be rejected in multi-graph mode: {msg}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn single_mode_named_graph_rejects_top_level_blocks() {
|
||||||
|
// Serving a graph by name (`--target`/`server.graph`) uses its
|
||||||
|
// per-graph block; a populated top-level block would be silently
|
||||||
|
// shadowed, so boot refuses and names the per-graph location.
|
||||||
|
let temp = tempfile::tempdir().unwrap();
|
||||||
|
let config_path = temp.path().join("omnigraph.yaml");
|
||||||
|
fs::write(
|
||||||
|
&config_path,
|
||||||
|
"policy:\n file: ./top.yaml\ngraphs:\n prod:\n uri: /tmp/prod.omni\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let err =
|
||||||
|
load_server_settings(Some(&config_path), None, Some("prod".to_string()), None, true)
|
||||||
|
.unwrap_err();
|
||||||
|
let msg = err.to_string();
|
||||||
|
assert!(
|
||||||
|
msg.contains("prod") && msg.contains("policy.file") && msg.contains("graphs.prod"),
|
||||||
|
"named single-mode + top-level policy must refuse, naming the graph: {msg}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn single_mode_named_graph_uses_per_graph_policy_and_queries() {
|
||||||
|
// The identity rule: `--target prod` attaches `graphs.prod`'s own
|
||||||
|
// policy + queries, not the top-level ones (which are absent here).
|
||||||
|
let temp = tempfile::tempdir().unwrap();
|
||||||
|
fs::write(
|
||||||
|
temp.path().join("prod.gq"),
|
||||||
|
"query pq() { match { $u: User } return { $u.name } }",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let config_path = temp.path().join("omnigraph.yaml");
|
||||||
|
fs::write(
|
||||||
|
&config_path,
|
||||||
|
"graphs:\n prod:\n uri: /tmp/prod.omni\n policy:\n file: ./prod-policy.yaml\n \
|
||||||
|
queries:\n pq:\n file: ./prod.gq\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let settings =
|
||||||
|
load_server_settings(Some(&config_path), None, Some("prod".to_string()), None, true)
|
||||||
|
.unwrap();
|
||||||
|
match settings.mode {
|
||||||
|
ServerConfigMode::Single {
|
||||||
|
graph_id,
|
||||||
|
policy_file,
|
||||||
|
queries,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
assert_eq!(graph_id, "prod", "named single-mode keeps graph identity");
|
||||||
|
assert!(
|
||||||
|
policy_file
|
||||||
|
.as_ref()
|
||||||
|
.is_some_and(|p| p.ends_with("prod-policy.yaml")),
|
||||||
|
"per-graph policy attached: {policy_file:?}"
|
||||||
|
);
|
||||||
|
assert!(queries.lookup("pq").is_some(), "per-graph query attached");
|
||||||
|
}
|
||||||
|
other => panic!("expected Single mode, got {other:?}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn mode_inference_normalizes_multi_graph_uris() {
|
fn mode_inference_normalizes_multi_graph_uris() {
|
||||||
let temp = tempfile::tempdir().unwrap();
|
let temp = tempfile::tempdir().unwrap();
|
||||||
|
|
@ -5383,6 +6010,7 @@ graphs:
|
||||||
uri: graph_uri,
|
uri: graph_uri,
|
||||||
engine: Arc::new(engine),
|
engine: Arc::new(engine),
|
||||||
policy: None,
|
policy: None,
|
||||||
|
queries: None,
|
||||||
});
|
});
|
||||||
let tokens = vec![("act-andrew".to_string(), "secret-token".to_string())];
|
let tokens = vec![("act-andrew".to_string(), "secret-token".to_string())];
|
||||||
let workload = omnigraph_server::workload::WorkloadController::from_env();
|
let workload = omnigraph_server::workload::WorkloadController::from_env();
|
||||||
|
|
@ -5450,6 +6078,7 @@ graphs:
|
||||||
uri: graph_uri,
|
uri: graph_uri,
|
||||||
engine: Arc::new(engine),
|
engine: Arc::new(engine),
|
||||||
policy: None,
|
policy: None,
|
||||||
|
queries: None,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "omnigraph-engine"
|
name = "omnigraph-engine"
|
||||||
version = "0.6.0"
|
version = "0.6.1"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
description = "Runtime engine for the Omnigraph graph database."
|
description = "Runtime engine for the Omnigraph graph database."
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|
@ -16,8 +16,8 @@ default = []
|
||||||
failpoints = ["dep:fail", "fail/failpoints"]
|
failpoints = ["dep:fail", "fail/failpoints"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" }
|
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.1" }
|
||||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" }
|
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.1" }
|
||||||
lance = { workspace = true }
|
lance = { workspace = true }
|
||||||
lance-datafusion = { workspace = true }
|
lance-datafusion = { workspace = true }
|
||||||
datafusion = { workspace = true }
|
datafusion = { workspace = true }
|
||||||
|
|
@ -51,7 +51,7 @@ chrono = { workspace = true }
|
||||||
arc-swap = { workspace = true }
|
arc-swap = { workspace = true }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" }
|
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.1" }
|
||||||
tokio = { workspace = true }
|
tokio = { workspace = true }
|
||||||
lance-namespace-impls = { workspace = true }
|
lance-namespace-impls = { workspace = true }
|
||||||
serial_test = "3"
|
serial_test = "3"
|
||||||
|
|
|
||||||
|
|
@ -169,6 +169,37 @@ impl CommitGraph {
|
||||||
self.refresh().await
|
self.refresh().await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Idempotently drop the commit-graph branch `name`, tolerating an
|
||||||
|
/// already-absent branch (see [`TableStore::force_delete_branch`] for the
|
||||||
|
/// same semantics). Used by the best-effort reclaim in `branch_delete` and
|
||||||
|
/// the `cleanup` orphan reconciler. `RefConflict` (referencing descendants)
|
||||||
|
/// is still surfaced.
|
||||||
|
pub async fn force_delete_branch(&mut self, name: &str) -> Result<()> {
|
||||||
|
let mut ds = Dataset::open(&graph_commits_uri(&self.root_uri))
|
||||||
|
.await
|
||||||
|
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||||
|
match ds.force_delete_branch(name).await {
|
||||||
|
Ok(()) => {}
|
||||||
|
Err(lance::Error::RefNotFound { .. }) | Err(lance::Error::NotFound { .. }) => {}
|
||||||
|
Err(e) => return Err(OmniError::Lance(e.to_string())),
|
||||||
|
}
|
||||||
|
self.refresh().await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// List the named branches present on the commit-graph dataset. The
|
||||||
|
/// `cleanup` reconciler diffs this against the manifest branch set to find
|
||||||
|
/// orphaned commit-graph branches to reclaim.
|
||||||
|
pub async fn list_branches(&self) -> Result<Vec<String>> {
|
||||||
|
let ds = Dataset::open(&graph_commits_uri(&self.root_uri))
|
||||||
|
.await
|
||||||
|
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||||
|
let branches = ds
|
||||||
|
.list_branches()
|
||||||
|
.await
|
||||||
|
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||||
|
Ok(branches.into_keys().collect())
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn append_commit(
|
pub async fn append_commit(
|
||||||
&mut self,
|
&mut self,
|
||||||
manifest_branch: Option<&str>,
|
manifest_branch: Option<&str>,
|
||||||
|
|
@ -345,7 +376,7 @@ impl CommitGraph {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn graph_commits_uri(root_uri: &str) -> String {
|
pub(crate) fn graph_commits_uri(root_uri: &str) -> String {
|
||||||
format!("{}/{}", root_uri.trim_end_matches('/'), GRAPH_COMMITS_DIR)
|
format!("{}/{}", root_uri.trim_end_matches('/'), GRAPH_COMMITS_DIR)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -211,14 +211,47 @@ impl GraphCoordinator {
|
||||||
let branch = normalize_branch_name(name)?
|
let branch = normalize_branch_name(name)?
|
||||||
.ok_or_else(|| OmniError::manifest("cannot create branch 'main'".to_string()))?;
|
.ok_or_else(|| OmniError::manifest("cannot create branch 'main'".to_string()))?;
|
||||||
self.ensure_commit_graph_initialized().await?;
|
self.ensure_commit_graph_initialized().await?;
|
||||||
|
|
||||||
|
// Manifest authority flip first.
|
||||||
self.manifest.create_branch(&branch).await?;
|
self.manifest.create_branch(&branch).await?;
|
||||||
failpoints::maybe_fail("branch_create.after_manifest_branch_create")?;
|
|
||||||
if let Some(commit_graph) = &mut self.commit_graph {
|
// Derived commit-graph branch. If anything after the authority flip
|
||||||
commit_graph.create_branch(&branch).await?;
|
// fails, roll back the manifest branch so the branch never half-exists
|
||||||
|
// (a manifest branch with no commit-graph branch breaks the next write).
|
||||||
|
if let Err(err) = self.create_commit_graph_branch(&branch).await {
|
||||||
|
if let Err(rollback_err) = self.manifest.delete_branch(&branch).await {
|
||||||
|
tracing::warn!(
|
||||||
|
target: "omnigraph::branch_create",
|
||||||
|
branch = %branch,
|
||||||
|
error = %rollback_err,
|
||||||
|
"rollback of manifest branch failed after commit-graph create failure",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return Err(err);
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create the derived commit-graph branch for `branch`, healing a zombie ref
|
||||||
|
/// left by an incomplete prior delete. The manifest branch was just created
|
||||||
|
/// fresh, so any existing commit-graph branch with this name is provably
|
||||||
|
/// orphaned and is force-dropped before recreating.
|
||||||
|
async fn create_commit_graph_branch(&mut self, branch: &str) -> Result<()> {
|
||||||
|
failpoints::maybe_fail("branch_create.after_manifest_branch_create")?;
|
||||||
|
let Some(commit_graph) = &mut self.commit_graph else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
if commit_graph
|
||||||
|
.list_branches()
|
||||||
|
.await?
|
||||||
|
.iter()
|
||||||
|
.any(|existing| existing == branch)
|
||||||
|
{
|
||||||
|
commit_graph.force_delete_branch(branch).await?;
|
||||||
|
}
|
||||||
|
commit_graph.create_branch(branch).await
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn branch_delete(&mut self, name: &str) -> Result<()> {
|
pub async fn branch_delete(&mut self, name: &str) -> Result<()> {
|
||||||
let branch = normalize_branch_name(name)?
|
let branch = normalize_branch_name(name)?
|
||||||
.ok_or_else(|| OmniError::manifest("cannot delete branch 'main'".to_string()))?;
|
.ok_or_else(|| OmniError::manifest("cannot delete branch 'main'".to_string()))?;
|
||||||
|
|
@ -229,20 +262,43 @@ impl GraphCoordinator {
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Manifest authority flip — the single atomic op that makes the branch
|
||||||
|
// cease to exist. Must succeed; everything after is derived state
|
||||||
|
// reclaimed best-effort.
|
||||||
self.manifest.delete_branch(&branch).await?;
|
self.manifest.delete_branch(&branch).await?;
|
||||||
|
|
||||||
|
// Commit-graph branch is derived state. Reclaim best-effort with the
|
||||||
|
// idempotent force variant: a failure here (or a missing dataset) is
|
||||||
|
// reconciled by `cleanup` and must not fail the delete after the
|
||||||
|
// authority already flipped.
|
||||||
|
if let Err(err) = self.reclaim_commit_graph_branch(&branch).await {
|
||||||
|
tracing::warn!(
|
||||||
|
target: "omnigraph::branch_delete::cleanup",
|
||||||
|
branch = %branch,
|
||||||
|
error = %err,
|
||||||
|
"best-effort commit-graph branch reclaim failed; cleanup will reconcile",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Best-effort, idempotent reclaim of the commit-graph branch `branch`.
|
||||||
|
/// Tolerates an absent commit-graph dataset (a graph that never committed).
|
||||||
|
async fn reclaim_commit_graph_branch(&mut self, branch: &str) -> Result<()> {
|
||||||
|
failpoints::maybe_fail("branch_delete.before_commit_graph_reclaim")?;
|
||||||
if let Some(commit_graph) = &mut self.commit_graph {
|
if let Some(commit_graph) = &mut self.commit_graph {
|
||||||
commit_graph.delete_branch(&branch).await?;
|
commit_graph.force_delete_branch(branch).await
|
||||||
} else if self
|
} else if self
|
||||||
.storage
|
.storage
|
||||||
.exists(&graph_commits_uri(self.root_uri()))
|
.exists(&graph_commits_uri(self.root_uri()))
|
||||||
.await?
|
.await?
|
||||||
{
|
{
|
||||||
let mut commit_graph = CommitGraph::open(self.root_uri()).await?;
|
let mut commit_graph = CommitGraph::open(self.root_uri()).await?;
|
||||||
commit_graph.delete_branch(&branch).await?;
|
commit_graph.force_delete_branch(branch).await
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn snapshot_at_version(&self, version: u64) -> Result<Snapshot> {
|
pub async fn snapshot_at_version(&self, version: u64) -> Result<Snapshot> {
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ use publisher::{GraphNamespacePublisher, ManifestBatchPublisher};
|
||||||
pub(crate) use recovery::{
|
pub(crate) use recovery::{
|
||||||
RecoveryMode, RecoverySidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin,
|
RecoveryMode, RecoverySidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin,
|
||||||
SidecarTableRegistration, SidecarTombstone, delete_sidecar, has_schema_apply_sidecar,
|
SidecarTableRegistration, SidecarTombstone, delete_sidecar, has_schema_apply_sidecar,
|
||||||
new_sidecar, recover_manifest_drift, write_sidecar,
|
list_sidecars, new_sidecar, recover_manifest_drift, write_sidecar,
|
||||||
};
|
};
|
||||||
pub use state::SubTableEntry;
|
pub use state::SubTableEntry;
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
@ -48,6 +48,22 @@ const OBJECT_TYPE_TABLE_VERSION: &str = "table_version";
|
||||||
const OBJECT_TYPE_TABLE_TOMBSTONE: &str = "table_tombstone";
|
const OBJECT_TYPE_TABLE_TOMBSTONE: &str = "table_tombstone";
|
||||||
const TABLE_VERSION_MANAGEMENT_KEY: &str = "table_version_management";
|
const TABLE_VERSION_MANAGEMENT_KEY: &str = "table_version_management";
|
||||||
|
|
||||||
|
/// Apply pending internal-schema migrations against `__manifest` on the
|
||||||
|
/// open-for-write path, independent of a publish.
|
||||||
|
///
|
||||||
|
/// `Omnigraph::open(ReadWrite)` calls this before the coordinator reads branch
|
||||||
|
/// state, so branch-observing code (`branch_list`, the schema-apply
|
||||||
|
/// blocking-branch checks) sees the post-migration graph. In particular the
|
||||||
|
/// v2→v3 step sweeps legacy `__run__*` staging branches off `__manifest`
|
||||||
|
/// (MR-770); running it here closes the window where those branches would
|
||||||
|
/// otherwise block schema apply before the first publish runs the migration.
|
||||||
|
///
|
||||||
|
/// Idempotent: a no-op stamp read when the on-disk version already matches.
|
||||||
|
pub(crate) async fn migrate_on_open(root_uri: &str) -> Result<()> {
|
||||||
|
let mut dataset = open_manifest_dataset(root_uri, None).await?;
|
||||||
|
migrations::migrate_internal_schema(&mut dataset).await
|
||||||
|
}
|
||||||
|
|
||||||
/// Immutable point-in-time view of the database.
|
/// Immutable point-in-time view of the database.
|
||||||
///
|
///
|
||||||
/// Cheap to create (no storage I/O). All reads within a query go through one
|
/// Cheap to create (no storage I/O). All reads within a query go through one
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,11 @@ use crate::error::{OmniError, Result};
|
||||||
/// - v2 — `__manifest.object_id` carries the unenforced-PK annotation,
|
/// - v2 — `__manifest.object_id` carries the unenforced-PK annotation,
|
||||||
/// engaging Lance's bloom-filter conflict resolver at commit time. Added
|
/// engaging Lance's bloom-filter conflict resolver at commit time. Added
|
||||||
/// alongside `expected_table_versions` OCC on `ManifestBatchPublisher::publish`.
|
/// alongside `expected_table_versions` OCC on `ManifestBatchPublisher::publish`.
|
||||||
pub(super) const INTERNAL_MANIFEST_SCHEMA_VERSION: u32 = 2;
|
/// - v3 — one-time sweep of legacy `__run__<id>` staging branches left on the
|
||||||
|
/// `__manifest` dataset by the pre-v0.4.0 Run state machine (removed in
|
||||||
|
/// MR-771). Once swept, the `is_internal_run_branch` defense-in-depth guard
|
||||||
|
/// is no longer needed (MR-770).
|
||||||
|
pub(super) const INTERNAL_MANIFEST_SCHEMA_VERSION: u32 = 3;
|
||||||
|
|
||||||
const INTERNAL_SCHEMA_VERSION_KEY: &str = "omnigraph:internal_schema_version";
|
const INTERNAL_SCHEMA_VERSION_KEY: &str = "omnigraph:internal_schema_version";
|
||||||
const OBJECT_ID_PK_KEY: &str = "lance-schema:unenforced-primary-key";
|
const OBJECT_ID_PK_KEY: &str = "lance-schema:unenforced-primary-key";
|
||||||
|
|
@ -89,6 +93,10 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()>
|
||||||
migrate_v1_to_v2(dataset).await?;
|
migrate_v1_to_v2(dataset).await?;
|
||||||
current = 2;
|
current = 2;
|
||||||
}
|
}
|
||||||
|
2 => {
|
||||||
|
migrate_v2_to_v3(dataset).await?;
|
||||||
|
current = 3;
|
||||||
|
}
|
||||||
other => {
|
other => {
|
||||||
return Err(OmniError::manifest_internal(format!(
|
return Err(OmniError::manifest_internal(format!(
|
||||||
"no internal-schema migration registered for v{} → v{}",
|
"no internal-schema migration registered for v{} → v{}",
|
||||||
|
|
@ -122,6 +130,51 @@ async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> {
|
||||||
set_stamp(dataset, 2).await
|
set_stamp(dataset, 2).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// v2 → v3: sweep legacy `__run__<id>` staging branches off the `__manifest`
|
||||||
|
/// dataset, then bump the stamp.
|
||||||
|
///
|
||||||
|
/// The pre-v0.4.0 Run state machine (removed in MR-771) created graph-level
|
||||||
|
/// staging branches named `__run__<ulid>` on `__manifest`. MR-771 stopped
|
||||||
|
/// creating them but left any pre-existing ones in place; Lance's
|
||||||
|
/// `list_branches` still enumerates them, so they leak into `branch_list()`
|
||||||
|
/// and count as blocking branches at schema-apply time. This one-time sweep
|
||||||
|
/// removes them so the `is_internal_run_branch` guard can retire (MR-770).
|
||||||
|
///
|
||||||
|
/// The `"__run__"` prefix is inlined here on purpose: this migration must keep
|
||||||
|
/// working after the `run_registry` module (the guard) is deleted, so it does
|
||||||
|
/// not depend on it.
|
||||||
|
///
|
||||||
|
/// Idempotent under both sequential retry and concurrent runners: each run
|
||||||
|
/// re-enumerates `list_branches` fresh, and `force_delete_branch` tolerates a
|
||||||
|
/// branch that is already gone — so a crash before the stamp bump, or a second
|
||||||
|
/// process opening the same legacy graph at the same time, never errors out.
|
||||||
|
async fn migrate_v2_to_v3(dataset: &mut Dataset) -> Result<()> {
|
||||||
|
const LEGACY_RUN_BRANCH_PREFIX: &str = "__run__";
|
||||||
|
let branches = dataset
|
||||||
|
.list_branches()
|
||||||
|
.await
|
||||||
|
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||||
|
let run_branches: Vec<String> = branches
|
||||||
|
.into_keys()
|
||||||
|
.filter(|name| {
|
||||||
|
name.trim_start_matches('/')
|
||||||
|
.starts_with(LEGACY_RUN_BRANCH_PREFIX)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
for name in run_branches {
|
||||||
|
// `force_delete_branch` deletes even when the `BranchContents` is
|
||||||
|
// already gone. Plain `delete_branch` errors "BranchContents not
|
||||||
|
// found", which would fail a second concurrent open (or a retry that
|
||||||
|
// raced another runner) after the first one swept the branch. Force is
|
||||||
|
// exactly Lance's documented path for cleaning up zombie branches.
|
||||||
|
dataset
|
||||||
|
.force_delete_branch(&name)
|
||||||
|
.await
|
||||||
|
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||||
|
}
|
||||||
|
set_stamp(dataset, 3).await
|
||||||
|
}
|
||||||
|
|
||||||
async fn set_stamp(dataset: &mut Dataset, version: u32) -> Result<()> {
|
async fn set_stamp(dataset: &mut Dataset, version: u32) -> Result<()> {
|
||||||
dataset
|
dataset
|
||||||
.update_schema_metadata([(INTERNAL_SCHEMA_VERSION_KEY.to_string(), version.to_string())])
|
.update_schema_metadata([(INTERNAL_SCHEMA_VERSION_KEY.to_string(), version.to_string())])
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
//!
|
//!
|
||||||
//! This module implements the building blocks of the per-sidecar recovery
|
//! This module implements the building blocks of the per-sidecar recovery
|
||||||
//! sweep that closes the documented Phase B → Phase C residual (see
|
//! sweep that closes the documented Phase B → Phase C residual (see
|
||||||
//! `docs/dev/runs.md` "Open-time recovery sweep"). The high-level shape:
|
//! `docs/dev/writes.md` "Open-time recovery sweep"). The high-level shape:
|
||||||
//!
|
//!
|
||||||
//! 1. Each writer that performs a multi-table commit writes a small JSON
|
//! 1. Each writer that performs a multi-table commit writes a small JSON
|
||||||
//! sidecar at `__recovery/{ulid}.json` BEFORE its per-table
|
//! sidecar at `__recovery/{ulid}.json` BEFORE its per-table
|
||||||
|
|
@ -106,6 +106,12 @@ pub(crate) enum SidecarKind {
|
||||||
BranchMerge,
|
BranchMerge,
|
||||||
/// `ensure_indices_for_branch` — index lifecycle commits.
|
/// `ensure_indices_for_branch` — index lifecycle commits.
|
||||||
EnsureIndices,
|
EnsureIndices,
|
||||||
|
/// `optimize_all_tables` — Lance `compact_files` (reserve-fragments +
|
||||||
|
/// rewrite commits) followed by a manifest publish of the compacted
|
||||||
|
/// version. Loose-match like the other multi-commit writers; roll-forward
|
||||||
|
/// is always safe because compaction is content-preserving (Lance
|
||||||
|
/// `Operation::Rewrite` "reorganizes data without semantic modification").
|
||||||
|
Optimize,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// One table's contribution to a sidecar's intended commit. The classifier
|
/// One table's contribution to a sidecar's intended commit. The classifier
|
||||||
|
|
@ -412,11 +418,13 @@ pub(crate) fn parse_sidecar(sidecar_uri: &str, body: &str) -> Result<RecoverySid
|
||||||
/// - **Strict** (`Mutation`, `Load`): exactly one `commit_staged` per
|
/// - **Strict** (`Mutation`, `Load`): exactly one `commit_staged` per
|
||||||
/// table, so `lance_head == manifest_pinned + 1` AND
|
/// table, so `lance_head == manifest_pinned + 1` AND
|
||||||
/// `post_commit_pin == lance_head` is required.
|
/// `post_commit_pin == lance_head` is required.
|
||||||
/// - **Loose** (`SchemaApply`, `EnsureIndices`, `BranchMerge`): the
|
/// - **Loose** (`SchemaApply`, `EnsureIndices`, `BranchMerge`,
|
||||||
/// writer may run N ≥ 1 `commit_staged` calls per table (one per
|
/// `Optimize`): the writer advances the Lance HEAD by N ≥ 1 commits
|
||||||
/// index built + one for the overwrite, etc.; merge tables run
|
/// per table (one per index built + one for the overwrite, etc.;
|
||||||
/// merge_insert + delete_where + index rebuilds) and the exact N
|
/// merge tables run merge_insert + delete_where + index rebuilds;
|
||||||
/// is hard to compute at sidecar-write time. The loose match accepts
|
/// `Optimize` runs `compact_files`, which commits reserve-fragments +
|
||||||
|
/// rewrite) and the exact N is hard to compute at sidecar-write time.
|
||||||
|
/// The loose match accepts
|
||||||
/// any `lance_head > manifest_pinned` as `RolledPastExpected` when
|
/// any `lance_head > manifest_pinned` as `RolledPastExpected` when
|
||||||
/// `pin.expected_version == manifest_pinned` (the writer's CAS
|
/// `pin.expected_version == manifest_pinned` (the writer's CAS
|
||||||
/// target matches what the manifest currently shows). The risk this
|
/// target matches what the manifest currently shows). The risk this
|
||||||
|
|
@ -494,9 +502,12 @@ pub(crate) fn decide(classifications: &[TableClassification]) -> SidecarDecision
|
||||||
/// Skipping the restore in those cases would leave Lance HEAD ahead of
|
/// Skipping the restore in those cases would leave Lance HEAD ahead of
|
||||||
/// the manifest with no recovery artifact left.
|
/// the manifest with no recovery artifact left.
|
||||||
///
|
///
|
||||||
/// Cost: under repeated mid-rollback crashes (rare), Lance HEAD
|
/// Cost: a successful roll-back appends one restore commit and then publishes
|
||||||
/// accumulates extra restore commits that `omnigraph cleanup` reclaims.
|
/// the manifest to match (`roll_back_sidecar`), so the table converges
|
||||||
/// Bounded by the number of recovery iterations — typically 1.
|
/// (`manifest == HEAD`) in one pass. Only repeated crashes *between* the restore
|
||||||
|
/// and that publish (rare) accumulate extra restore commits; each re-classified
|
||||||
|
/// roll-back restores again and `omnigraph cleanup` reclaims the surplus.
|
||||||
|
/// Bounded by the number of interrupted recovery iterations — typically 0.
|
||||||
pub(crate) async fn restore_table_to_version(
|
pub(crate) async fn restore_table_to_version(
|
||||||
table_path: &str,
|
table_path: &str,
|
||||||
branch: Option<&str>,
|
branch: Option<&str>,
|
||||||
|
|
@ -801,13 +812,24 @@ async fn roll_back_sidecar(
|
||||||
sidecar: &RecoverySidecar,
|
sidecar: &RecoverySidecar,
|
||||||
states: &[ClassifiedTable],
|
states: &[ClassifiedTable],
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
// Restore every table whose Lance HEAD has drifted from the
|
// Restore every drifted table (RolledPastExpected / UnexpectedAtP1 /
|
||||||
// manifest pin (RolledPastExpected, UnexpectedAtP1,
|
// UnexpectedMultistep) to its manifest-pinned content, then PUBLISH so
|
||||||
// UnexpectedMultistep). NoMovement tables are already at the
|
// `manifest == Lance HEAD` for each — symmetric with roll-forward. The
|
||||||
// manifest pin — no action. Restore is unconditional; repeated
|
// restore commit's content equals the manifest-pinned version, so re-pinning
|
||||||
// mid-rollback crashes accumulate a few extra Lance commits that
|
// the manifest to the new (restored) HEAD is content-correct and closes the
|
||||||
// `omnigraph cleanup` reclaims.
|
// orphaned-drift class (`HEAD > manifest` with no covering sidecar). This is
|
||||||
|
// what makes a failed-then-retried schema_apply converge: after one
|
||||||
|
// roll-back `manifest == HEAD`, so the retry's precondition passes instead of
|
||||||
|
// failing one version higher each iteration.
|
||||||
|
//
|
||||||
|
// NoMovement tables are already at the pin — excluded from both the restore
|
||||||
|
// and the publish. The audit `to_version` stays the *logical* rolled-back-to
|
||||||
|
// version (`manifest_pinned`), while the manifest is published at
|
||||||
|
// `manifest_pinned + 1` (the restore commit, same content) — keep that
|
||||||
|
// asymmetry so the audit records the drift (`from_version > to_version`).
|
||||||
let mut outcomes = Vec::with_capacity(sidecar.tables.len());
|
let mut outcomes = Vec::with_capacity(sidecar.tables.len());
|
||||||
|
let mut updates: Vec<ManifestChange> = Vec::with_capacity(sidecar.tables.len());
|
||||||
|
let mut expected: HashMap<String, u64> = HashMap::with_capacity(sidecar.tables.len());
|
||||||
for (pin, state) in sidecar.tables.iter().zip(states.iter()) {
|
for (pin, state) in sidecar.tables.iter().zip(states.iter()) {
|
||||||
if matches!(
|
if matches!(
|
||||||
state.classification,
|
state.classification,
|
||||||
|
|
@ -821,10 +843,20 @@ async fn roll_back_sidecar(
|
||||||
state.manifest_pinned,
|
state.manifest_pinned,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
// `from_version` records the Lance HEAD observed BEFORE the
|
// Publish the post-restore HEAD, CAS against the current (unmoved)
|
||||||
// restore (the actual drift), not the manifest pin. Operators
|
// manifest pin — the same helper roll-forward uses.
|
||||||
// reading `_graph_commit_recoveries.lance` see "rolled back
|
push_table_update_at_head(
|
||||||
// from v7 to v5" rather than "v5 → v5".
|
root_uri,
|
||||||
|
&pin.table_key,
|
||||||
|
&pin.table_path,
|
||||||
|
pin.table_branch.as_deref(),
|
||||||
|
state.manifest_pinned,
|
||||||
|
&mut updates,
|
||||||
|
&mut expected,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
// `from_version` records the Lance HEAD observed BEFORE the restore
|
||||||
|
// (the actual drift); `to_version` the logical pin we rolled back to.
|
||||||
outcomes.push(TableOutcome {
|
outcomes.push(TableOutcome {
|
||||||
table_key: pin.table_key.clone(),
|
table_key: pin.table_key.clone(),
|
||||||
from_version: state.lance_head,
|
from_version: state.lance_head,
|
||||||
|
|
@ -832,13 +864,23 @@ async fn roll_back_sidecar(
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Manifest pin doesn't move on rollback; record an audit-only
|
// Publish the restored HEADs so manifest == HEAD. A degenerate all-NoMovement
|
||||||
// commit at the existing version so operators can correlate via
|
// roll-back restores nothing — there's nothing to publish, and the audit
|
||||||
// `omnigraph commit list --filter actor=omnigraph:recovery`.
|
// records the unchanged snapshot version.
|
||||||
|
let manifest_version = if updates.is_empty() {
|
||||||
|
snapshot.version()
|
||||||
|
} else {
|
||||||
|
let publisher = GraphNamespacePublisher::new(root_uri, sidecar.branch.as_deref());
|
||||||
|
publisher
|
||||||
|
.publish(&updates, &expected)
|
||||||
|
.await?
|
||||||
|
.version()
|
||||||
|
.version
|
||||||
|
};
|
||||||
record_audit(
|
record_audit(
|
||||||
root_uri,
|
root_uri,
|
||||||
sidecar,
|
sidecar,
|
||||||
snapshot.version(),
|
manifest_version,
|
||||||
RecoveryKind::RolledBack,
|
RecoveryKind::RolledBack,
|
||||||
outcomes,
|
outcomes,
|
||||||
)
|
)
|
||||||
|
|
@ -919,44 +961,20 @@ async fn roll_forward_all(
|
||||||
HashMap::with_capacity(sidecar.tables.len() + sidecar.additional_registrations.len());
|
HashMap::with_capacity(sidecar.tables.len() + sidecar.additional_registrations.len());
|
||||||
|
|
||||||
for pin in &sidecar.tables {
|
for pin in &sidecar.tables {
|
||||||
// Open the dataset at its CURRENT Lance HEAD on the pin's branch
|
// Publish to the table's CURRENT Lance HEAD on the pin's branch (not the
|
||||||
// (not at the sidecar's post_commit_pin). For strict-match writers
|
// sidecar's `post_commit_pin`, a lower bound for loose-match writers that
|
||||||
// (Mutation/Load) HEAD == post_commit_pin by construction. For
|
// run multiple commit_staged calls per table). CAS against the pin's
|
||||||
// loose-match writers (SchemaApply/EnsureIndices/BranchMerge) HEAD
|
// pre-write `expected_version`.
|
||||||
// may be higher than post_commit_pin (multiple commit_staged
|
let head_version = push_table_update_at_head(
|
||||||
// calls per table); we want to publish to the actual current HEAD.
|
|
||||||
let head_ds = Dataset::open(&pin.table_path)
|
|
||||||
.await
|
|
||||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
|
||||||
let head_ds = match pin.table_branch.as_deref() {
|
|
||||||
Some(b) if b != "main" => head_ds
|
|
||||||
.checkout_branch(b)
|
|
||||||
.await
|
|
||||||
.map_err(|e| OmniError::Lance(e.to_string()))?,
|
|
||||||
_ => head_ds,
|
|
||||||
};
|
|
||||||
let head_version = head_ds.version().version;
|
|
||||||
|
|
||||||
let row_count = head_ds
|
|
||||||
.count_rows(None)
|
|
||||||
.await
|
|
||||||
.map_err(|e| OmniError::Lance(e.to_string()))? as u64;
|
|
||||||
|
|
||||||
let table_relative_path = super::table_path_for_table_key(&pin.table_key)?;
|
|
||||||
let version_metadata = super::metadata::TableVersionMetadata::from_dataset(
|
|
||||||
root_uri,
|
root_uri,
|
||||||
&table_relative_path,
|
&pin.table_key,
|
||||||
&head_ds,
|
&pin.table_path,
|
||||||
)?;
|
pin.table_branch.as_deref(),
|
||||||
|
pin.expected_version,
|
||||||
updates.push(ManifestChange::Update(SubTableUpdate {
|
&mut updates,
|
||||||
table_key: pin.table_key.clone(),
|
&mut expected,
|
||||||
table_version: head_version,
|
)
|
||||||
table_branch: pin.table_branch.clone(),
|
.await?;
|
||||||
row_count,
|
|
||||||
version_metadata,
|
|
||||||
}));
|
|
||||||
expected.insert(pin.table_key.clone(), pin.expected_version);
|
|
||||||
published_versions.insert(pin.table_key.clone(), head_version);
|
published_versions.insert(pin.table_key.clone(), head_version);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1047,6 +1065,57 @@ async fn roll_forward_all(
|
||||||
Ok((new_dataset.version().version, published_versions))
|
Ok((new_dataset.version().version, published_versions))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Open `table_path` at its branch HEAD, read the current Lance HEAD version,
|
||||||
|
/// row count, and version metadata, and push a `ManifestChange::Update` (plus
|
||||||
|
/// its CAS `expected` entry) that re-pins the manifest to that HEAD. Returns the
|
||||||
|
/// published HEAD version.
|
||||||
|
///
|
||||||
|
/// Shared by `roll_forward_all` (where `expected_version` is the sidecar's
|
||||||
|
/// pre-write pin) and `roll_back_sidecar` (where it is the manifest-pinned
|
||||||
|
/// version the table was just restored to). The HEAD is read AFTER any restore
|
||||||
|
/// in the same single-threaded sweep, so no concurrent writer can have advanced
|
||||||
|
/// it.
|
||||||
|
async fn push_table_update_at_head(
|
||||||
|
root_uri: &str,
|
||||||
|
table_key: &str,
|
||||||
|
table_path: &str,
|
||||||
|
branch: Option<&str>,
|
||||||
|
expected_version: u64,
|
||||||
|
updates: &mut Vec<ManifestChange>,
|
||||||
|
expected: &mut HashMap<String, u64>,
|
||||||
|
) -> Result<u64> {
|
||||||
|
let head_ds = Dataset::open(table_path)
|
||||||
|
.await
|
||||||
|
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||||
|
let head_ds = match branch {
|
||||||
|
Some(b) if b != "main" => head_ds
|
||||||
|
.checkout_branch(b)
|
||||||
|
.await
|
||||||
|
.map_err(|e| OmniError::Lance(e.to_string()))?,
|
||||||
|
_ => head_ds,
|
||||||
|
};
|
||||||
|
let head_version = head_ds.version().version;
|
||||||
|
let row_count = head_ds
|
||||||
|
.count_rows(None)
|
||||||
|
.await
|
||||||
|
.map_err(|e| OmniError::Lance(e.to_string()))? as u64;
|
||||||
|
let table_relative_path = super::table_path_for_table_key(table_key)?;
|
||||||
|
let version_metadata = super::metadata::TableVersionMetadata::from_dataset(
|
||||||
|
root_uri,
|
||||||
|
&table_relative_path,
|
||||||
|
&head_ds,
|
||||||
|
)?;
|
||||||
|
updates.push(ManifestChange::Update(SubTableUpdate {
|
||||||
|
table_key: table_key.to_string(),
|
||||||
|
table_version: head_version,
|
||||||
|
table_branch: branch.map(str::to_string),
|
||||||
|
row_count,
|
||||||
|
version_metadata,
|
||||||
|
}));
|
||||||
|
expected.insert(table_key.to_string(), expected_version);
|
||||||
|
Ok(head_version)
|
||||||
|
}
|
||||||
|
|
||||||
/// Append the audit row describing this recovery action.
|
/// Append the audit row describing this recovery action.
|
||||||
///
|
///
|
||||||
/// Two-part write: (a) `_graph_commits.lance` row anchored on the recovery
|
/// Two-part write: (a) `_graph_commits.lance` row anchored on the recovery
|
||||||
|
|
|
||||||
|
|
@ -1461,6 +1461,80 @@ async fn test_publish_migrates_pre_stamp_manifest_to_current_version() {
|
||||||
assert!(reopened.snapshot().entry("node:Person").is_some());
|
assert!(reopened.snapshot().entry("node:Person").is_some());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_v2_to_v3_sweeps_legacy_run_branches_on_write_open() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let uri = dir.path().to_str().unwrap();
|
||||||
|
let catalog = build_test_catalog();
|
||||||
|
let mut mc = ManifestCoordinator::init(uri, &catalog).await.unwrap();
|
||||||
|
|
||||||
|
// Synthesize a pre-MR-770 graph: several stale `__run__` staging branches
|
||||||
|
// left on `__manifest` (a real legacy graph accumulates one per run), plus
|
||||||
|
// a real user branch that must survive the sweep. Multiple run branches
|
||||||
|
// exercise the migration's delete loop on a single reused dataset handle.
|
||||||
|
mc.create_branch("__run__01J9LEGACY").await.unwrap();
|
||||||
|
mc.create_branch("__run__01J9SECOND").await.unwrap();
|
||||||
|
mc.create_branch("__run__01J9THIRD").await.unwrap();
|
||||||
|
mc.create_branch("feature").await.unwrap();
|
||||||
|
let before = mc.list_branches().await.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
before.iter().filter(|b| b.starts_with("__run__")).count(),
|
||||||
|
3,
|
||||||
|
"precondition: three legacy run branches exist on __manifest; got {before:?}",
|
||||||
|
);
|
||||||
|
|
||||||
|
// Rewind the internal-schema stamp to v2 so the next write-open runs the
|
||||||
|
// v2 → v3 sweep arm (init stamps at the current version, which is past it).
|
||||||
|
{
|
||||||
|
let mut ds = open_manifest_dataset(uri, None).await.unwrap();
|
||||||
|
ds.update_schema_metadata([(
|
||||||
|
"omnigraph:internal_schema_version".to_string(),
|
||||||
|
Some("2".to_string()),
|
||||||
|
)])
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let post = open_manifest_dataset(uri, None).await.unwrap();
|
||||||
|
assert_eq!(super::migrations::read_stamp(&post), 2, "stamp rewound to v2");
|
||||||
|
}
|
||||||
|
|
||||||
|
// A no-op publish forces the open-for-write path, which runs the migration.
|
||||||
|
let mut expected = HashMap::new();
|
||||||
|
expected.insert("node:Person".to_string(), 1);
|
||||||
|
GraphNamespacePublisher::new(uri, None)
|
||||||
|
.publish(&[], &expected)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Stamp advanced to current; the legacy run branch is physically gone from
|
||||||
|
// `__manifest` (checked via the raw, unfiltered manifest list — not the
|
||||||
|
// guard-filtered `branch_list`), and the real branch + `main` survive.
|
||||||
|
let post = open_manifest_dataset(uri, None).await.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
super::migrations::read_stamp(&post),
|
||||||
|
super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION,
|
||||||
|
);
|
||||||
|
let reopened = ManifestCoordinator::open(uri).await.unwrap();
|
||||||
|
let after = reopened.list_branches().await.unwrap();
|
||||||
|
assert!(
|
||||||
|
!after.iter().any(|b| b.starts_with("__run__")),
|
||||||
|
"legacy run branch must be swept; got {after:?}",
|
||||||
|
);
|
||||||
|
assert!(after.iter().any(|b| b == "feature"), "user branch must survive");
|
||||||
|
assert!(after.iter().any(|b| b == "main"), "main must survive");
|
||||||
|
|
||||||
|
// Idempotent: a second write-open finds the stamp at current and does not
|
||||||
|
// re-run the sweep or error.
|
||||||
|
GraphNamespacePublisher::new(uri, None)
|
||||||
|
.publish(&[], &expected)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let final_ds = open_manifest_dataset(uri, None).await.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
super::migrations::read_stamp(&final_ds),
|
||||||
|
super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_publish_rejects_manifest_stamped_at_future_version() {
|
async fn test_publish_rejects_manifest_stamped_at_future_version() {
|
||||||
let dir = tempfile::tempdir().unwrap();
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@ pub mod graph_coordinator;
|
||||||
pub mod manifest;
|
pub mod manifest;
|
||||||
mod omnigraph;
|
mod omnigraph;
|
||||||
mod recovery_audit;
|
mod recovery_audit;
|
||||||
mod run_registry;
|
|
||||||
mod schema_state;
|
mod schema_state;
|
||||||
pub(crate) mod write_queue;
|
pub(crate) mod write_queue;
|
||||||
|
|
||||||
|
|
@ -13,9 +12,8 @@ pub use manifest::{Snapshot, SubTableEntry, SubTableUpdate};
|
||||||
pub(crate) use omnigraph::ensure_public_branch_ref;
|
pub(crate) use omnigraph::ensure_public_branch_ref;
|
||||||
pub use omnigraph::{
|
pub use omnigraph::{
|
||||||
CleanupPolicyOptions, InitOptions, MergeOutcome, Omnigraph, OpenMode, SchemaApplyOptions,
|
CleanupPolicyOptions, InitOptions, MergeOutcome, Omnigraph, OpenMode, SchemaApplyOptions,
|
||||||
SchemaApplyResult, TableCleanupStats, TableOptimizeStats,
|
SchemaApplyResult, SkipReason, TableCleanupStats, TableOptimizeStats,
|
||||||
};
|
};
|
||||||
pub(crate) use run_registry::is_internal_run_branch;
|
|
||||||
|
|
||||||
pub(crate) const SCHEMA_APPLY_LOCK_BRANCH: &str = "__schema_apply_lock__";
|
pub(crate) const SCHEMA_APPLY_LOCK_BRANCH: &str = "__schema_apply_lock__";
|
||||||
|
|
||||||
|
|
@ -69,5 +67,8 @@ pub(crate) fn is_schema_apply_lock_branch(name: &str) -> bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn is_internal_system_branch(name: &str) -> bool {
|
pub(crate) fn is_internal_system_branch(name: &str) -> bool {
|
||||||
is_internal_run_branch(name) || is_schema_apply_lock_branch(name)
|
// Legacy `__run__*` staging branches (Run state machine, removed MR-771)
|
||||||
|
// are swept off `__manifest` by the v2→v3 internal-schema migration, so the
|
||||||
|
// only internal branch the engine still creates is the schema-apply lock.
|
||||||
|
is_schema_apply_lock_branch(name)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ mod optimize;
|
||||||
mod schema_apply;
|
mod schema_apply;
|
||||||
mod table_ops;
|
mod table_ops;
|
||||||
|
|
||||||
pub use optimize::{CleanupPolicyOptions, TableCleanupStats, TableOptimizeStats};
|
pub use optimize::{CleanupPolicyOptions, SkipReason, TableCleanupStats, TableOptimizeStats};
|
||||||
pub use schema_apply::SchemaApplyOptions;
|
pub use schema_apply::SchemaApplyOptions;
|
||||||
|
|
||||||
use super::commit_graph::GraphCommit;
|
use super::commit_graph::GraphCommit;
|
||||||
|
|
@ -67,6 +67,12 @@ pub struct SchemaApplyResult {
|
||||||
pub steps: Vec<SchemaMigrationStep>,
|
pub steps: Vec<SchemaMigrationStep>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct SchemaApplyPreview {
|
||||||
|
pub plan: SchemaMigrationPlan,
|
||||||
|
pub catalog: Catalog,
|
||||||
|
}
|
||||||
|
|
||||||
/// Top-level handle to an Omnigraph database.
|
/// Top-level handle to an Omnigraph database.
|
||||||
///
|
///
|
||||||
/// An Omnigraph is a Lance-native graph database with git-style branching.
|
/// An Omnigraph is a Lance-native graph database with git-style branching.
|
||||||
|
|
@ -340,6 +346,16 @@ impl Omnigraph {
|
||||||
mode: OpenMode,
|
mode: OpenMode,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let root = normalize_root_uri(uri)?;
|
let root = normalize_root_uri(uri)?;
|
||||||
|
// Apply pending internal-schema migrations before the coordinator reads
|
||||||
|
// branch state, so `branch_list` and the schema-apply blocking-branch
|
||||||
|
// checks observe the post-migration graph — notably the v2→v3 sweep of
|
||||||
|
// legacy `__run__*` staging branches (MR-770). ReadWrite only: a
|
||||||
|
// read-only open must not trigger object-store writes, so a read-only
|
||||||
|
// open of an unmigrated legacy graph still lists `__run__*` until its
|
||||||
|
// first read-write open (an accepted, documented limitation).
|
||||||
|
if matches!(mode, OpenMode::ReadWrite) {
|
||||||
|
crate::db::manifest::migrate_on_open(&root).await?;
|
||||||
|
}
|
||||||
// Open the coordinator first so the schema-staging recovery sweep can
|
// Open the coordinator first so the schema-staging recovery sweep can
|
||||||
// compare its snapshot against any leftover staging files.
|
// compare its snapshot against any leftover staging files.
|
||||||
let mut coordinator = GraphCoordinator::open(&root, Arc::clone(&storage)).await?;
|
let mut coordinator = GraphCoordinator::open(&root, Arc::clone(&storage)).await?;
|
||||||
|
|
@ -493,6 +509,14 @@ impl Omnigraph {
|
||||||
schema_apply::plan_schema(self, desired_schema_source, options).await
|
schema_apply::plan_schema(self, desired_schema_source, options).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn preview_schema_apply_with_options(
|
||||||
|
&self,
|
||||||
|
desired_schema_source: &str,
|
||||||
|
options: SchemaApplyOptions,
|
||||||
|
) -> Result<SchemaApplyPreview> {
|
||||||
|
schema_apply::preview_schema_apply(self, desired_schema_source, options).await
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn apply_schema(&self, desired_schema_source: &str) -> Result<SchemaApplyResult> {
|
pub async fn apply_schema(&self, desired_schema_source: &str) -> Result<SchemaApplyResult> {
|
||||||
self.apply_schema_as(desired_schema_source, SchemaApplyOptions::default(), None)
|
self.apply_schema_as(desired_schema_source, SchemaApplyOptions::default(), None)
|
||||||
.await
|
.await
|
||||||
|
|
@ -523,7 +547,28 @@ impl Omnigraph {
|
||||||
options: SchemaApplyOptions,
|
options: SchemaApplyOptions,
|
||||||
actor: Option<&str>,
|
actor: Option<&str>,
|
||||||
) -> Result<SchemaApplyResult> {
|
) -> Result<SchemaApplyResult> {
|
||||||
schema_apply::apply_schema(self, desired_schema_source, options, actor).await
|
self.apply_schema_as_with_catalog_check(desired_schema_source, options, actor, |_| Ok(()))
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn apply_schema_as_with_catalog_check<F>(
|
||||||
|
&self,
|
||||||
|
desired_schema_source: &str,
|
||||||
|
options: SchemaApplyOptions,
|
||||||
|
actor: Option<&str>,
|
||||||
|
validate_catalog: F,
|
||||||
|
) -> Result<SchemaApplyResult>
|
||||||
|
where
|
||||||
|
F: FnOnce(&Catalog) -> Result<()>,
|
||||||
|
{
|
||||||
|
schema_apply::apply_schema(
|
||||||
|
self,
|
||||||
|
desired_schema_source,
|
||||||
|
options,
|
||||||
|
actor,
|
||||||
|
validate_catalog,
|
||||||
|
)
|
||||||
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn ensure_schema_apply_idle(&self, operation: &str) -> Result<()> {
|
pub(crate) async fn ensure_schema_apply_idle(&self, operation: &str) -> Result<()> {
|
||||||
|
|
@ -1058,11 +1103,14 @@ impl Omnigraph {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn cleanup_deleted_branch_tables(
|
/// Best-effort reclaim of the per-table Lance forks a just-deleted branch
|
||||||
&self,
|
/// owned. Runs AFTER the manifest authority flip, so the branch is already
|
||||||
branch: &str,
|
/// gone and these forks are unreachable orphans. A failure here (transient
|
||||||
owned_tables: &[(String, String)],
|
/// object-store error, the `branch_delete.before_table_cleanup` failpoint)
|
||||||
) -> Result<()> {
|
/// is logged and swallowed: the `cleanup` reconciler is the guaranteed
|
||||||
|
/// backstop that converges any leftover orphan. Uses `force_delete_branch`
|
||||||
|
/// so a partially-reclaimed retry is idempotent.
|
||||||
|
async fn cleanup_deleted_branch_tables(&self, branch: &str, owned_tables: &[(String, String)]) {
|
||||||
let mut seen_paths = HashSet::new();
|
let mut seen_paths = HashSet::new();
|
||||||
let mut cleanup_targets = owned_tables
|
let mut cleanup_targets = owned_tables
|
||||||
.iter()
|
.iter()
|
||||||
|
|
@ -1073,15 +1121,21 @@ impl Omnigraph {
|
||||||
|
|
||||||
for (table_key, table_path) in cleanup_targets {
|
for (table_key, table_path) in cleanup_targets {
|
||||||
let dataset_uri = self.table_store.dataset_uri(&table_path);
|
let dataset_uri = self.table_store.dataset_uri(&table_path);
|
||||||
if let Err(err) = self.table_store.delete_branch(&dataset_uri, branch).await {
|
let outcome = match crate::failpoints::maybe_fail("branch_delete.before_table_cleanup")
|
||||||
return Err(OmniError::manifest_internal(format!(
|
{
|
||||||
"branch '{}' was deleted but cleanup failed for {}: {}",
|
Ok(()) => self.table_store.force_delete_branch(&dataset_uri, branch).await,
|
||||||
branch, table_key, err
|
Err(injected) => Err(injected),
|
||||||
)));
|
};
|
||||||
|
if let Err(err) = outcome {
|
||||||
|
tracing::warn!(
|
||||||
|
target: "omnigraph::branch_delete::cleanup",
|
||||||
|
branch = %branch,
|
||||||
|
table = %table_key,
|
||||||
|
error = %err,
|
||||||
|
"best-effort fork reclaim failed; cleanup will reconcile the orphan",
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn delete_branch_storage_only(&self, branch: &str) -> Result<()> {
|
async fn delete_branch_storage_only(&self, branch: &str) -> Result<()> {
|
||||||
|
|
@ -1105,9 +1159,12 @@ impl Omnigraph {
|
||||||
.map(|entry| (entry.table_key.clone(), entry.table_path.clone()))
|
.map(|entry| (entry.table_key.clone(), entry.table_path.clone()))
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
// Authority flip (+ best-effort commit-graph reclaim) — must succeed.
|
||||||
self.coordinator.write().await.branch_delete(branch).await?;
|
self.coordinator.write().await.branch_delete(branch).await?;
|
||||||
|
// Best-effort per-table fork reclaim; cleanup reconciles any leftover.
|
||||||
self.cleanup_deleted_branch_tables(branch, &owned_tables)
|
self.cleanup_deleted_branch_tables(branch, &owned_tables)
|
||||||
.await
|
.await;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn normalize_branch_name(branch: &str) -> Result<Option<String>> {
|
pub(crate) fn normalize_branch_name(branch: &str) -> Result<Option<String>> {
|
||||||
|
|
@ -1444,12 +1501,6 @@ pub(crate) fn normalize_branch_name(branch: &str) -> Result<Option<String>> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn ensure_public_branch_ref(branch: &str, operation: &str) -> Result<()> {
|
pub(crate) fn ensure_public_branch_ref(branch: &str, operation: &str) -> Result<()> {
|
||||||
if super::is_internal_run_branch(branch) {
|
|
||||||
return Err(OmniError::manifest(format!(
|
|
||||||
"{} does not allow internal run ref '{}'",
|
|
||||||
operation, branch
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
if is_internal_system_branch(branch) {
|
if is_internal_system_branch(branch) {
|
||||||
return Err(OmniError::manifest(format!(
|
return Err(OmniError::manifest(format!(
|
||||||
"{} does not allow internal system ref '{}'",
|
"{} does not allow internal system ref '{}'",
|
||||||
|
|
@ -1853,7 +1904,6 @@ fn json_value_from_array(array: &dyn Array, row: usize) -> Result<serde_json::Va
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::db::is_internal_run_branch;
|
|
||||||
use crate::db::manifest::ManifestCoordinator;
|
use crate::db::manifest::ManifestCoordinator;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
@ -2191,11 +2241,11 @@ edge WorksAt: Person -> Company
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_apply_schema_succeeds_after_load() {
|
async fn test_apply_schema_succeeds_after_load() {
|
||||||
// Historical: schema apply used to be blocked by leftover
|
// Historical: schema apply used to be blocked by leftover
|
||||||
// `__run__` branches. A defense-in-depth filter now skips
|
// `__run__` branches. The Run state machine was removed in
|
||||||
// internal system branches, and run branches were made
|
// MR-771, so a fresh graph never creates a `__run__` branch;
|
||||||
// ephemeral on every terminal state — so in practice no
|
// legacy ones are swept by the v2→v3 manifest migration. This
|
||||||
// `__run__` branch survives publish. The filter still guards
|
// asserts the invariant a current graph upholds: publish leaves
|
||||||
// the invariant.
|
// no `__run__` branch behind, so schema apply proceeds.
|
||||||
let dir = tempfile::tempdir().unwrap();
|
let dir = tempfile::tempdir().unwrap();
|
||||||
let uri = dir.path().to_str().unwrap();
|
let uri = dir.path().to_str().unwrap();
|
||||||
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||||
|
|
@ -2210,8 +2260,8 @@ edge WorksAt: Person -> Company
|
||||||
|
|
||||||
let all_branches = db.coordinator.read().await.all_branches().await.unwrap();
|
let all_branches = db.coordinator.read().await.all_branches().await.unwrap();
|
||||||
assert!(
|
assert!(
|
||||||
!all_branches.iter().any(|b| is_internal_run_branch(b)),
|
!all_branches.iter().any(|b| b.starts_with("__run__")),
|
||||||
"run branch should be deleted after publish, got: {:?}",
|
"no __run__ branch should exist after publish, got: {:?}",
|
||||||
all_branches
|
all_branches
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -2223,6 +2273,56 @@ edge WorksAt: Person -> Company
|
||||||
assert!(result.applied, "schema apply should have applied");
|
assert!(result.applied, "schema apply should have applied");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Regression (MR-770): a pre-v0.4.0 graph that still carries a stale
|
||||||
|
/// `__run__*` branch on `__manifest` must not block schema apply. The
|
||||||
|
/// v2→v3 sweep runs in `Omnigraph::open(ReadWrite)` — before the
|
||||||
|
/// schema-apply blocking-branch check — so apply succeeds with no
|
||||||
|
/// intervening publish.
|
||||||
|
///
|
||||||
|
/// Confirmed to fail before the open-time migration landed: the reopened
|
||||||
|
/// graph still listed `__run__legacy`, and `apply_schema` returned
|
||||||
|
/// "found non-main branches: __run__legacy".
|
||||||
|
#[tokio::test]
|
||||||
|
async fn legacy_run_branch_is_swept_on_open_and_does_not_block_schema_apply() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let uri = dir.path().to_str().unwrap();
|
||||||
|
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||||
|
|
||||||
|
// Synthesize a legacy graph: a stale `__run__` branch on `__manifest`
|
||||||
|
// plus the manifest stamp rewound to v2 (pre-sweep).
|
||||||
|
db.branch_create("__run__legacy").await.unwrap();
|
||||||
|
drop(db);
|
||||||
|
{
|
||||||
|
let mut ds = lance::Dataset::open(&format!("{}/__manifest", uri))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
ds.update_schema_metadata([(
|
||||||
|
"omnigraph:internal_schema_version".to_string(),
|
||||||
|
Some("2".to_string()),
|
||||||
|
)])
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reopen (ReadWrite): the open-time migration must sweep `__run__legacy`
|
||||||
|
// before any branch-observing code runs.
|
||||||
|
let db = Omnigraph::open(uri).await.unwrap();
|
||||||
|
let branches = db.branch_list().await.unwrap();
|
||||||
|
assert!(
|
||||||
|
!branches.iter().any(|b| b.starts_with("__run__")),
|
||||||
|
"open-time migration must sweep legacy __run__ branches; got {branches:?}",
|
||||||
|
);
|
||||||
|
|
||||||
|
// Schema apply must proceed with no intervening publish — the
|
||||||
|
// blocking-branch check no longer sees `__run__legacy`.
|
||||||
|
let desired = TEST_SCHEMA.replace(
|
||||||
|
" age: I32?\n}",
|
||||||
|
" age: I32?\n nickname: String?\n}",
|
||||||
|
);
|
||||||
|
let result = db.apply_schema(&desired).await.unwrap();
|
||||||
|
assert!(result.applied, "schema apply should have applied");
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_apply_schema_adds_index_for_existing_property() {
|
async fn test_apply_schema_adds_index_for_existing_property() {
|
||||||
let dir = tempfile::tempdir().unwrap();
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
|
|
||||||
|
|
@ -8,8 +8,14 @@
|
||||||
//! Two dials:
|
//! Two dials:
|
||||||
//!
|
//!
|
||||||
//! * `optimize_all_tables` — Lance `compact_files` on every table. Rewrites
|
//! * `optimize_all_tables` — Lance `compact_files` on every table. Rewrites
|
||||||
//! small fragments into fewer large ones. Non-destructive (creates a new
|
//! small fragments into fewer large ones, then **publishes the compacted
|
||||||
//! version; old fragments remain reachable via older manifest versions).
|
//! version to the `__manifest`** so the manifest's `table_version` tracks the
|
||||||
|
//! compacted Lance HEAD (reads pin the manifest version, so without the
|
||||||
|
//! publish compaction would be invisible to readers and would break the
|
||||||
|
//! HEAD-vs-manifest precondition of schema apply / strict writes). Compaction
|
||||||
|
//! is content-preserving (Lance `Operation::Rewrite` "reorganizes data
|
||||||
|
//! without semantic modification"), so old fragments remain reachable via
|
||||||
|
//! older manifest versions until `cleanup` runs.
|
||||||
//! * `cleanup_all_tables` — Lance `cleanup_old_versions` on every table.
|
//! * `cleanup_all_tables` — Lance `cleanup_old_versions` on every table.
|
||||||
//! Removes manifests (and their unique fragments) older than the configured
|
//! Removes manifests (and their unique fragments) older than the configured
|
||||||
//! retention. Destructive to version history — callers should gate this
|
//! retention. Destructive to version history — callers should gate this
|
||||||
|
|
@ -23,7 +29,9 @@ use std::time::Duration;
|
||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
use futures::stream::StreamExt;
|
use futures::stream::StreamExt;
|
||||||
use lance::dataset::cleanup::{CleanupPolicy, RemovalStats};
|
use lance::dataset::cleanup::{CleanupPolicy, RemovalStats};
|
||||||
use lance::dataset::optimize::{CompactionMetrics, CompactionOptions, compact_files};
|
use lance::dataset::optimize::{
|
||||||
|
CompactionMetrics, CompactionOptions, compact_files, plan_compaction,
|
||||||
|
};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
|
@ -40,6 +48,20 @@ fn maint_concurrency() -> usize {
|
||||||
.unwrap_or(DEFAULT_MAINT_CONCURRENCY)
|
.unwrap_or(DEFAULT_MAINT_CONCURRENCY)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether the installed Lance can compact a dataset that contains blob
|
||||||
|
/// columns. `false` today: Lance `compact_files` forces
|
||||||
|
/// `BlobHandling::AllBinary` on the read side, and the blob-v2 struct decoder
|
||||||
|
/// mis-counts columns ("there were more fields in the schema than provided
|
||||||
|
/// column indices"), failing even a pristine uniform-V2_2 multi-fragment blob
|
||||||
|
/// table. Reads are unaffected (queries use descriptor handling).
|
||||||
|
///
|
||||||
|
/// While `false`, [`optimize_all_tables`] skips blob-bearing tables and reports
|
||||||
|
/// [`SkipReason::BlobColumnsUnsupportedByLance`] instead of aborting the whole
|
||||||
|
/// sweep. Flip to `true` once the upstream Lance fix ships — the
|
||||||
|
/// `lance_surface_guards.rs::compact_files_still_fails_on_blob_columns` guard
|
||||||
|
/// turns red on that bump and forces this flip. Tracked in `docs/dev/lance.md`.
|
||||||
|
const LANCE_SUPPORTS_BLOB_COMPACTION: bool = false;
|
||||||
|
|
||||||
/// Retention knobs for [`cleanup_all_tables`]. At least one must be set or
|
/// Retention knobs for [`cleanup_all_tables`]. At least one must be set or
|
||||||
/// nothing is cleaned. If both are set, Lance applies them as AND (a manifest
|
/// nothing is cleaned. If both are set, Lance applies them as AND (a manifest
|
||||||
/// is kept if it satisfies either — i.e. only manifests older than BOTH the
|
/// is kept if it satisfies either — i.e. only manifests older than BOTH the
|
||||||
|
|
@ -52,76 +74,314 @@ pub struct CleanupPolicyOptions {
|
||||||
pub older_than: Option<Duration>,
|
pub older_than: Option<Duration>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Per-table outcome of `optimize_all_tables`.
|
/// Why `optimize` did not compact a table. Typed so callers branch on the
|
||||||
|
/// reason rather than sniffing a string. One variant today, gated by
|
||||||
|
/// [`LANCE_SUPPORTS_BLOB_COMPACTION`].
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
#[non_exhaustive]
|
||||||
|
pub enum SkipReason {
|
||||||
|
/// The table has one or more `Blob` columns. Lance `compact_files` forces
|
||||||
|
/// `BlobHandling::AllBinary`, which mis-decodes blob-v2 columns; see
|
||||||
|
/// [`LANCE_SUPPORTS_BLOB_COMPACTION`] and `docs/dev/lance.md`.
|
||||||
|
BlobColumnsUnsupportedByLance,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SkipReason {
|
||||||
|
/// Stable machine-readable token for serialized output (e.g. CLI `--json`).
|
||||||
|
/// Once emitted this is part of the output contract — keep it stable.
|
||||||
|
pub fn as_str(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
SkipReason::BlobColumnsUnsupportedByLance => "blob_columns_unsupported_by_lance",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for SkipReason {
|
||||||
|
/// Human-readable reason for CLI and log output.
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let msg = match self {
|
||||||
|
SkipReason::BlobColumnsUnsupportedByLance => {
|
||||||
|
"blob columns — Lance compaction unsupported"
|
||||||
|
}
|
||||||
|
};
|
||||||
|
f.write_str(msg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Per-table outcome of `optimize_all_tables`. This is a returned result type,
|
||||||
|
/// not built by callers, so it is `#[non_exhaustive]`: future fields stay
|
||||||
|
/// non-breaking and downstream code reads fields rather than constructing it.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
|
#[non_exhaustive]
|
||||||
pub struct TableOptimizeStats {
|
pub struct TableOptimizeStats {
|
||||||
pub table_key: String,
|
pub table_key: String,
|
||||||
/// Number of source fragments that were rewritten by Lance.
|
/// Number of source fragments that were rewritten by Lance.
|
||||||
pub fragments_removed: usize,
|
pub fragments_removed: usize,
|
||||||
/// Number of new, larger fragments Lance produced.
|
/// Number of new, larger fragments Lance produced.
|
||||||
pub fragments_added: usize,
|
pub fragments_added: usize,
|
||||||
/// Did this table get a new Lance manifest version from the compaction?
|
/// Did this table get a new manifest version from the compaction? True when
|
||||||
|
/// compaction ran and its compacted version was published to `__manifest`.
|
||||||
pub committed: bool,
|
pub committed: bool,
|
||||||
|
/// `Some(reason)` if this table was deliberately not compacted. When set,
|
||||||
|
/// `fragments_removed == 0`, `fragments_added == 0`, and `!committed`.
|
||||||
|
pub skipped: Option<SkipReason>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Per-table outcome of `cleanup_all_tables`.
|
impl TableOptimizeStats {
|
||||||
|
/// Stat for a table that Lance actually compacted.
|
||||||
|
fn compacted(table_key: String, metrics: &CompactionMetrics, committed: bool) -> Self {
|
||||||
|
Self {
|
||||||
|
table_key,
|
||||||
|
fragments_removed: metrics.fragments_removed,
|
||||||
|
fragments_added: metrics.fragments_added,
|
||||||
|
committed,
|
||||||
|
skipped: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stat for a table that was deliberately skipped (compaction not attempted).
|
||||||
|
fn skipped(table_key: String, reason: SkipReason) -> Self {
|
||||||
|
Self {
|
||||||
|
table_key,
|
||||||
|
fragments_removed: 0,
|
||||||
|
fragments_added: 0,
|
||||||
|
committed: false,
|
||||||
|
skipped: Some(reason),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Per-table outcome of `cleanup_all_tables`. `error` is `Some` when this
|
||||||
|
/// table's version GC failed; cleanup is fault-isolated per table, so a single
|
||||||
|
/// table's failure is recorded here rather than aborting the whole sweep.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct TableCleanupStats {
|
pub struct TableCleanupStats {
|
||||||
pub table_key: String,
|
pub table_key: String,
|
||||||
pub bytes_removed: u64,
|
pub bytes_removed: u64,
|
||||||
pub old_versions_removed: u64,
|
pub old_versions_removed: u64,
|
||||||
|
pub error: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Run Lance `compact_files` on every node + edge table on `main`.
|
/// Run Lance `compact_files` on every node + edge table on `main`, publishing
|
||||||
/// Tables run in parallel (bounded concurrency).
|
/// each compacted table's new version to the `__manifest`. Tables run in
|
||||||
|
/// parallel (bounded concurrency); each is fault-isolated only at the Lance
|
||||||
|
/// level — a publish error is propagated (the recovery sidecar covers it).
|
||||||
pub async fn optimize_all_tables(db: &Omnigraph) -> Result<Vec<TableOptimizeStats>> {
|
pub async fn optimize_all_tables(db: &Omnigraph) -> Result<Vec<TableOptimizeStats>> {
|
||||||
db.ensure_schema_state_valid().await?;
|
db.ensure_schema_state_valid().await?;
|
||||||
db.ensure_schema_apply_idle("optimize").await?;
|
db.ensure_schema_apply_idle("optimize").await?;
|
||||||
|
|
||||||
|
// Refuse on an unrecovered graph. A pending recovery sidecar means a failed
|
||||||
|
// write left partial state that the open-time sweep must resolve (roll
|
||||||
|
// forward/back) first; compacting + publishing a table covered by such a
|
||||||
|
// sidecar could commit a partial write the sweep would roll back. Reopen the
|
||||||
|
// graph to run recovery, then re-run optimize.
|
||||||
|
if !crate::db::manifest::list_sidecars(db.root_uri(), db.storage_adapter())
|
||||||
|
.await?
|
||||||
|
.is_empty()
|
||||||
|
{
|
||||||
|
return Err(OmniError::manifest_conflict(
|
||||||
|
"optimize requires a clean recovery state; reopen the graph to run the \
|
||||||
|
recovery sweep before optimizing",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
let resolved = db.resolved_branch_target(None).await?;
|
let resolved = db.resolved_branch_target(None).await?;
|
||||||
let snapshot = resolved.snapshot;
|
let snapshot = resolved.snapshot;
|
||||||
|
|
||||||
let table_tasks: Vec<_> = all_table_keys(&db.catalog())
|
// Compute per-table state (path + whether it has blob columns) up front, in
|
||||||
.into_iter()
|
// a scope that drops the catalog handle before the async stream starts.
|
||||||
.filter_map(|table_key| {
|
let table_tasks: Vec<(String, String, bool)> = {
|
||||||
let entry = snapshot.entry(&table_key)?;
|
let catalog = db.catalog();
|
||||||
|
let mut tasks = Vec::new();
|
||||||
|
for table_key in all_table_keys(&catalog) {
|
||||||
|
let Some(entry) = snapshot.entry(&table_key) else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
let full_path = format!("{}/{}", db.root_uri, entry.table_path);
|
let full_path = format!("{}/{}", db.root_uri, entry.table_path);
|
||||||
Some((table_key, full_path))
|
let has_blob = !blob_properties_for_table_key(&catalog, &table_key)?.is_empty();
|
||||||
})
|
tasks.push((table_key, full_path, has_blob));
|
||||||
.collect();
|
}
|
||||||
|
tasks
|
||||||
|
};
|
||||||
|
|
||||||
if table_tasks.is_empty() {
|
if table_tasks.is_empty() {
|
||||||
return Ok(Vec::new());
|
return Ok(Vec::new());
|
||||||
}
|
}
|
||||||
|
|
||||||
let concurrency = maint_concurrency().min(table_tasks.len()).max(1);
|
let concurrency = maint_concurrency().min(table_tasks.len()).max(1);
|
||||||
let table_store = &db.table_store;
|
|
||||||
|
|
||||||
let stats: Vec<Result<TableOptimizeStats>> = futures::stream::iter(table_tasks.into_iter())
|
let stats: Vec<Result<TableOptimizeStats>> = futures::stream::iter(table_tasks.into_iter())
|
||||||
.map(|(table_key, full_path)| async move {
|
.map(move |(table_key, full_path, has_blob)| async move {
|
||||||
let mut ds = table_store
|
optimize_one_table(db, table_key, full_path, has_blob).await
|
||||||
.open_dataset_head_for_write(&table_key, &full_path, None)
|
|
||||||
.await?;
|
|
||||||
let version_before = ds.version().version;
|
|
||||||
let metrics: CompactionMetrics =
|
|
||||||
compact_files(&mut ds, CompactionOptions::default(), None)
|
|
||||||
.await
|
|
||||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
|
||||||
let version_after = ds.version().version;
|
|
||||||
Ok(TableOptimizeStats {
|
|
||||||
table_key,
|
|
||||||
fragments_removed: metrics.fragments_removed,
|
|
||||||
fragments_added: metrics.fragments_added,
|
|
||||||
committed: version_after != version_before,
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
.buffer_unordered(concurrency)
|
.buffer_unordered(concurrency)
|
||||||
.collect()
|
.collect()
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
|
// Invalidate caches for any table that published a compaction — done BEFORE
|
||||||
|
// propagating a sibling table's error, since the published versions are
|
||||||
|
// durable and reads must observe the new fragment layout (Lance invalidates
|
||||||
|
// the original row addresses on rewrite). The CSR/CSC graph topology index
|
||||||
|
// is rebuilt only when an edge table moved. Mirrors schema_apply's
|
||||||
|
// post-publish invalidation.
|
||||||
|
let any_committed = stats
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s, Ok(st) if st.committed));
|
||||||
|
let edge_committed = stats
|
||||||
|
.iter()
|
||||||
|
.any(|s| matches!(s, Ok(st) if st.committed && st.table_key.starts_with("edge:")));
|
||||||
|
if any_committed {
|
||||||
|
db.runtime_cache.invalidate_all().await;
|
||||||
|
if edge_committed {
|
||||||
|
db.invalidate_graph_index().await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
stats.into_iter().collect()
|
stats.into_iter().collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Compact one table and publish the compacted version to the `__manifest`.
|
||||||
|
///
|
||||||
|
/// Compaction (`compact_files`) advances the *dataset's* Lance HEAD via a
|
||||||
|
/// reserve-fragments + rewrite commit, but Lance knows nothing about the
|
||||||
|
/// `__manifest`. To keep the manifest the single authority for each table's
|
||||||
|
/// visible version (invariant 2), optimize must publish the compacted version.
|
||||||
|
/// The Lance-HEAD-before-manifest-publish gap is unavoidable (Lance has no
|
||||||
|
/// staged/uncommitted compaction), so it is covered by a recovery sidecar like
|
||||||
|
/// the other multi-commit writers; roll-forward is always safe because
|
||||||
|
/// compaction is content-preserving.
|
||||||
|
async fn optimize_one_table(
|
||||||
|
db: &Omnigraph,
|
||||||
|
table_key: String,
|
||||||
|
full_path: String,
|
||||||
|
has_blob: bool,
|
||||||
|
) -> Result<TableOptimizeStats> {
|
||||||
|
// Lance `compact_files` mis-decodes blob-v2 columns under the forced
|
||||||
|
// `BlobHandling::AllBinary` read (see LANCE_SUPPORTS_BLOB_COMPACTION). Skip
|
||||||
|
// blob-bearing tables and report it rather than aborting the whole sweep.
|
||||||
|
if has_blob && !LANCE_SUPPORTS_BLOB_COMPACTION {
|
||||||
|
tracing::warn!(
|
||||||
|
target: "omnigraph::optimize",
|
||||||
|
table = %table_key,
|
||||||
|
"skipping compaction: table has blob columns the current Lance \
|
||||||
|
cannot rewrite (blob-v2 AllBinary decode bug); other tables \
|
||||||
|
unaffected — rerun after the Lance fix",
|
||||||
|
);
|
||||||
|
return Ok(TableOptimizeStats::skipped(
|
||||||
|
table_key,
|
||||||
|
SkipReason::BlobColumnsUnsupportedByLance,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Serialize the whole compact→publish against concurrent mutations on this
|
||||||
|
// (table, main): compaction is a Rewrite op that retryable-conflicts with a
|
||||||
|
// concurrent Merge/Update/Delete on overlapping fragments, and an
|
||||||
|
// interleaved write would also move the manifest version out from under the
|
||||||
|
// CAS below. Holding the queue makes the CAS baseline read under it exact.
|
||||||
|
let _guard = db
|
||||||
|
.write_queue()
|
||||||
|
.acquire_many(&[(table_key.clone(), None)])
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let mut ds = db
|
||||||
|
.table_store
|
||||||
|
.open_dataset_head_for_write(&table_key, &full_path, None)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// CAS baseline: the table's current manifest version, read under the queue
|
||||||
|
// (in-memory coordinator snapshot, no storage I/O — stable for this section).
|
||||||
|
let expected_version = db
|
||||||
|
.snapshot()
|
||||||
|
.await
|
||||||
|
.entry(&table_key)
|
||||||
|
.map(|e| e.table_version)
|
||||||
|
.ok_or_else(|| OmniError::manifest(format!("no manifest entry for {}", table_key)))?;
|
||||||
|
|
||||||
|
// Precise "will it compact?" check — `plan_compaction` also accounts for
|
||||||
|
// deletion materialization (which can rewrite even a single fragment). A
|
||||||
|
// steady-state already-compacted table yields an empty plan and is never
|
||||||
|
// pinned in a sidecar (a zero-commit pin would classify NoMovement on
|
||||||
|
// recovery and force an all-or-nothing rollback). There is no drift to
|
||||||
|
// reconcile here: optimize runs only on a recovered graph (the pending-
|
||||||
|
// sidecar guard above), and recovery roll-back now publishes, so
|
||||||
|
// `HEAD == manifest` holds going in.
|
||||||
|
let options = CompactionOptions::default();
|
||||||
|
let plan = plan_compaction(&ds, &options)
|
||||||
|
.await
|
||||||
|
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||||
|
if plan.num_tasks() == 0 {
|
||||||
|
return Ok(TableOptimizeStats::compacted(
|
||||||
|
table_key,
|
||||||
|
&CompactionMetrics::default(),
|
||||||
|
false,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase A: recovery sidecar BEFORE compaction advances the Lance HEAD, so a
|
||||||
|
// crash before the manifest publish rolls forward on next open.
|
||||||
|
let sidecar = crate::db::manifest::new_sidecar(
|
||||||
|
crate::db::manifest::SidecarKind::Optimize,
|
||||||
|
None,
|
||||||
|
// optimize is system-attributed (no `optimize_as` actor API today).
|
||||||
|
None,
|
||||||
|
vec![crate::db::manifest::SidecarTablePin {
|
||||||
|
table_key: table_key.clone(),
|
||||||
|
table_path: full_path.clone(),
|
||||||
|
expected_version,
|
||||||
|
// Lower bound — compaction commits N≥1 versions (reserve + rewrite);
|
||||||
|
// the classifier loose-matches SidecarKind::Optimize.
|
||||||
|
post_commit_pin: expected_version + 1,
|
||||||
|
table_branch: None,
|
||||||
|
}],
|
||||||
|
);
|
||||||
|
let handle =
|
||||||
|
crate::db::manifest::write_sidecar(db.root_uri(), db.storage_adapter(), &sidecar).await?;
|
||||||
|
|
||||||
|
// Phase B: compaction (reserve-fragments + rewrite commits advance HEAD).
|
||||||
|
let version_before = ds.version().version;
|
||||||
|
let metrics: CompactionMetrics = compact_files(&mut ds, options, None)
|
||||||
|
.await
|
||||||
|
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||||
|
let version_after = ds.version().version;
|
||||||
|
let committed = version_after != version_before;
|
||||||
|
|
||||||
|
// Pin the per-writer Phase B → Phase C residual for optimize: Lance HEAD has
|
||||||
|
// advanced but the manifest publish below hasn't run.
|
||||||
|
crate::failpoints::maybe_fail("optimize.post_phase_b_pre_manifest_commit")?;
|
||||||
|
|
||||||
|
// Phase C: publish the compacted version to the manifest (one CAS commit,
|
||||||
|
// expected = the version observed under the queue). On failure the sidecar
|
||||||
|
// is intentionally left for the open-time recovery sweep to roll forward.
|
||||||
|
if committed {
|
||||||
|
let state = db.table_store.table_state(&full_path, &ds).await?;
|
||||||
|
let update = crate::db::SubTableUpdate {
|
||||||
|
table_key: table_key.clone(),
|
||||||
|
table_version: state.version,
|
||||||
|
table_branch: None,
|
||||||
|
row_count: state.row_count,
|
||||||
|
version_metadata: state.version_metadata,
|
||||||
|
};
|
||||||
|
let mut expected = std::collections::HashMap::new();
|
||||||
|
expected.insert(table_key.clone(), expected_version);
|
||||||
|
db.coordinator
|
||||||
|
.write()
|
||||||
|
.await
|
||||||
|
.commit_updates_with_actor_with_expected(&[update], &expected, None)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase D: delete the sidecar (best-effort; recovery resolves a leftover).
|
||||||
|
if let Err(err) = crate::db::manifest::delete_sidecar(&handle, db.storage_adapter()).await {
|
||||||
|
tracing::warn!(
|
||||||
|
error = %err,
|
||||||
|
operation_id = handle.operation_id.as_str(),
|
||||||
|
"optimize recovery sidecar cleanup failed; next open's recovery sweep will resolve it"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(TableOptimizeStats::compacted(table_key, &metrics, committed))
|
||||||
|
}
|
||||||
|
|
||||||
/// Run Lance `cleanup_old_versions` on every node + edge table on `main`,
|
/// Run Lance `cleanup_old_versions` on every node + edge table on `main`,
|
||||||
/// using [`CleanupPolicyOptions`]. The latest manifest is always preserved
|
/// using [`CleanupPolicyOptions`]. The latest manifest is always preserved
|
||||||
/// regardless (Lance invariant).
|
/// regardless (Lance invariant).
|
||||||
|
|
@ -138,6 +398,26 @@ pub async fn cleanup_all_tables(
|
||||||
db.ensure_schema_state_valid().await?;
|
db.ensure_schema_state_valid().await?;
|
||||||
db.ensure_schema_apply_idle("cleanup").await?;
|
db.ensure_schema_apply_idle("cleanup").await?;
|
||||||
|
|
||||||
|
// Reclaim orphaned branch forks (from an incomplete prior `branch_delete`)
|
||||||
|
// before version GC. Authority-derived and idempotent; the eager
|
||||||
|
// best-effort reclaim in `branch_delete` covers the common case, this is
|
||||||
|
// the guaranteed backstop. Logged for observability.
|
||||||
|
let reconciled = reconcile_orphaned_branches(db).await?;
|
||||||
|
if !reconciled.reclaimed.is_empty() {
|
||||||
|
tracing::info!(
|
||||||
|
count = reconciled.reclaimed.len(),
|
||||||
|
reclaimed = ?reconciled.reclaimed,
|
||||||
|
"cleanup reconciled orphaned branch forks"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if !reconciled.failures.is_empty() {
|
||||||
|
tracing::warn!(
|
||||||
|
count = reconciled.failures.len(),
|
||||||
|
failures = ?reconciled.failures,
|
||||||
|
"cleanup could not reconcile some orphaned forks; will retry next cleanup"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
let before_timestamp = options.older_than.map(|d| Utc::now() - d);
|
let before_timestamp = options.older_than.map(|d| Utc::now() - d);
|
||||||
let keep_versions = options.keep_versions;
|
let keep_versions = options.keep_versions;
|
||||||
|
|
||||||
|
|
@ -160,36 +440,205 @@ pub async fn cleanup_all_tables(
|
||||||
let concurrency = maint_concurrency().min(table_tasks.len()).max(1);
|
let concurrency = maint_concurrency().min(table_tasks.len()).max(1);
|
||||||
let table_store = &db.table_store;
|
let table_store = &db.table_store;
|
||||||
|
|
||||||
let results: Vec<Result<TableCleanupStats>> = futures::stream::iter(table_tasks.into_iter())
|
// Fault-isolated per table: a single table's GC failure is recorded on its
|
||||||
|
// stats row (`error: Some`) and logged, never aborting the healthy tables.
|
||||||
|
// cleanup is the convergence backstop, so it must do as much as it can and
|
||||||
|
// converge on re-run rather than fail wholesale (invariant 13).
|
||||||
|
let results: Vec<TableCleanupStats> = futures::stream::iter(table_tasks.into_iter())
|
||||||
.map(|(table_key, full_path)| async move {
|
.map(|(table_key, full_path)| async move {
|
||||||
let ds = table_store
|
let outcome: Result<RemovalStats> = async {
|
||||||
.open_dataset_head_for_write(&table_key, &full_path, None)
|
crate::failpoints::maybe_fail("cleanup.table_gc")?;
|
||||||
.await?;
|
let ds = table_store
|
||||||
let before_version = keep_versions
|
.open_dataset_head_for_write(&table_key, &full_path, None)
|
||||||
.map(|n| ds.version().version.saturating_sub(n as u64))
|
.await?;
|
||||||
.filter(|v| *v > 0);
|
let before_version = keep_versions
|
||||||
let policy = CleanupPolicy {
|
.map(|n| ds.version().version.saturating_sub(n as u64))
|
||||||
before_timestamp,
|
.filter(|v| *v > 0);
|
||||||
before_version,
|
let policy = CleanupPolicy {
|
||||||
delete_unverified: false,
|
before_timestamp,
|
||||||
error_if_tagged_old_versions: false,
|
before_version,
|
||||||
clean_referenced_branches: false,
|
delete_unverified: false,
|
||||||
delete_rate_limit: None,
|
error_if_tagged_old_versions: false,
|
||||||
};
|
clean_referenced_branches: false,
|
||||||
let removed: RemovalStats = lance::dataset::cleanup::cleanup_old_versions(&ds, policy)
|
delete_rate_limit: None,
|
||||||
.await
|
};
|
||||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
lance::dataset::cleanup::cleanup_old_versions(&ds, policy)
|
||||||
Ok(TableCleanupStats {
|
.await
|
||||||
table_key,
|
.map_err(|e| OmniError::Lance(e.to_string()))
|
||||||
bytes_removed: removed.bytes_removed,
|
}
|
||||||
old_versions_removed: removed.old_versions,
|
.await;
|
||||||
})
|
match outcome {
|
||||||
|
Ok(removed) => TableCleanupStats {
|
||||||
|
table_key,
|
||||||
|
bytes_removed: removed.bytes_removed,
|
||||||
|
old_versions_removed: removed.old_versions,
|
||||||
|
error: None,
|
||||||
|
},
|
||||||
|
Err(err) => {
|
||||||
|
tracing::warn!(
|
||||||
|
target: "omnigraph::cleanup",
|
||||||
|
table = %table_key,
|
||||||
|
error = %err,
|
||||||
|
"version GC failed for table; other tables unaffected",
|
||||||
|
);
|
||||||
|
TableCleanupStats {
|
||||||
|
table_key,
|
||||||
|
bytes_removed: 0,
|
||||||
|
old_versions_removed: 0,
|
||||||
|
error: Some(err.to_string()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.buffer_unordered(concurrency)
|
.buffer_unordered(concurrency)
|
||||||
.collect()
|
.collect()
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
results.into_iter().collect()
|
Ok(results)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Outcome of [`reconcile_orphaned_branches`]: the `(owner, branch)` pairs
|
||||||
|
/// reclaimed and the `(owner, error)` pairs that failed, where `owner` is a
|
||||||
|
/// table key (e.g. `node:Person`) or `"_graph_commits"`. Per-owner failures are
|
||||||
|
/// isolated and recorded here, not propagated — the next reconcile converges.
|
||||||
|
#[derive(Debug, Clone, Default)]
|
||||||
|
pub struct BranchReconcileStats {
|
||||||
|
pub reclaimed: Vec<(String, String)>,
|
||||||
|
pub failures: Vec<(String, String)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Drop every per-table and commit-graph Lance branch that the manifest no
|
||||||
|
/// longer references.
|
||||||
|
///
|
||||||
|
/// Orphaned forks arise when a `branch_delete` flips the manifest authority
|
||||||
|
/// (atomic) but a downstream best-effort reclaim does not complete. They are
|
||||||
|
/// unreachable through any snapshot — no manifest entry can name them — yet
|
||||||
|
/// they pin their `tree/{branch}/` storage and can block reusing the branch
|
||||||
|
/// name. This is the guaranteed convergence backstop: it is idempotent and
|
||||||
|
/// derived purely from the manifest authority, so it no-ops once everything is
|
||||||
|
/// reconciled, and it would harmlessly find nothing if a future Lance atomic
|
||||||
|
/// multi-dataset branch op prevented orphans from forming.
|
||||||
|
///
|
||||||
|
/// The keep-set is the full (unfiltered) manifest branch list, so system
|
||||||
|
/// branches' forks are never reclaimed; `main`/default is not a named Lance
|
||||||
|
/// branch and so is never a candidate. Referencing children are dropped before
|
||||||
|
/// parents (Lance refuses to delete a referenced parent) by ordering longest
|
||||||
|
/// branch names first.
|
||||||
|
pub async fn reconcile_orphaned_branches(db: &Omnigraph) -> Result<BranchReconcileStats> {
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
|
let keep: HashSet<String> = db
|
||||||
|
.coordinator
|
||||||
|
.read()
|
||||||
|
.await
|
||||||
|
.all_branches()
|
||||||
|
.await?
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let resolved = db.resolved_branch_target(None).await?;
|
||||||
|
let snapshot = resolved.snapshot;
|
||||||
|
let table_targets: Vec<(String, String)> = all_table_keys(&db.catalog())
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|table_key| {
|
||||||
|
let entry = snapshot.entry(&table_key)?;
|
||||||
|
let full_path = format!("{}/{}", db.root_uri, entry.table_path);
|
||||||
|
Some((table_key, full_path))
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut stats = BranchReconcileStats::default();
|
||||||
|
|
||||||
|
// Per-table fault isolation: one table's transient failure is recorded and
|
||||||
|
// logged, never aborting the rest of the sweep.
|
||||||
|
for (table_key, full_path) in table_targets {
|
||||||
|
let listed = match db.table_store.list_branches(&full_path).await {
|
||||||
|
Ok(listed) => listed,
|
||||||
|
Err(err) => {
|
||||||
|
tracing::warn!(
|
||||||
|
target: "omnigraph::cleanup",
|
||||||
|
table = %table_key,
|
||||||
|
error = %err,
|
||||||
|
"listing branches failed during reconcile; skipping table",
|
||||||
|
);
|
||||||
|
stats.failures.push((table_key.clone(), err.to_string()));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
for branch in orphan_branches(listed, &keep) {
|
||||||
|
let outcome = match crate::failpoints::maybe_fail("cleanup.reconcile_fork") {
|
||||||
|
Ok(()) => db.table_store.force_delete_branch(&full_path, &branch).await,
|
||||||
|
Err(injected) => Err(injected),
|
||||||
|
};
|
||||||
|
match outcome {
|
||||||
|
Ok(()) => stats.reclaimed.push((table_key.clone(), branch)),
|
||||||
|
Err(err) => {
|
||||||
|
tracing::warn!(
|
||||||
|
target: "omnigraph::cleanup",
|
||||||
|
table = %table_key,
|
||||||
|
branch = %branch,
|
||||||
|
error = %err,
|
||||||
|
"reclaiming orphaned fork failed; will retry next cleanup",
|
||||||
|
);
|
||||||
|
stats.failures.push((table_key.clone(), err.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Commit-graph orphans (best-effort: the dataset may not exist on a graph
|
||||||
|
// that has never committed; any failure is isolated and retried next time).
|
||||||
|
if let Err(err) = reconcile_commit_graph_orphans(db, &keep, &mut stats).await {
|
||||||
|
tracing::warn!(
|
||||||
|
target: "omnigraph::cleanup",
|
||||||
|
error = %err,
|
||||||
|
"commit-graph orphan reconcile failed; will retry next cleanup",
|
||||||
|
);
|
||||||
|
stats.failures.push(("_graph_commits".to_string(), err.to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(stats)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Commit-graph half of [`reconcile_orphaned_branches`], split out so its
|
||||||
|
/// errors can be isolated. Returns `Ok` when the commit-graph dataset is absent.
|
||||||
|
async fn reconcile_commit_graph_orphans(
|
||||||
|
db: &Omnigraph,
|
||||||
|
keep: &std::collections::HashSet<String>,
|
||||||
|
stats: &mut BranchReconcileStats,
|
||||||
|
) -> Result<()> {
|
||||||
|
let commits_uri = crate::db::commit_graph::graph_commits_uri(db.root_uri());
|
||||||
|
if !db.storage_adapter().exists(&commits_uri).await? {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let mut commit_graph = crate::db::commit_graph::CommitGraph::open(db.root_uri()).await?;
|
||||||
|
for branch in orphan_branches(commit_graph.list_branches().await?, keep) {
|
||||||
|
match commit_graph.force_delete_branch(&branch).await {
|
||||||
|
Ok(()) => stats.reclaimed.push(("_graph_commits".to_string(), branch)),
|
||||||
|
Err(err) => {
|
||||||
|
tracing::warn!(
|
||||||
|
target: "omnigraph::cleanup",
|
||||||
|
branch = %branch,
|
||||||
|
error = %err,
|
||||||
|
"reclaiming orphaned commit-graph branch failed; will retry next cleanup",
|
||||||
|
);
|
||||||
|
stats.failures.push(("_graph_commits".to_string(), err.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Filter `present` Lance branches down to those absent from the manifest
|
||||||
|
/// `keep` set, ordered children-before-parents (longest name first) so Lance's
|
||||||
|
/// referenced-parent `RefConflict` cannot block reclamation.
|
||||||
|
fn orphan_branches(present: Vec<String>, keep: &std::collections::HashSet<String>) -> Vec<String> {
|
||||||
|
let mut orphans: Vec<String> = present
|
||||||
|
.into_iter()
|
||||||
|
.filter(|branch| !keep.contains(branch))
|
||||||
|
.collect();
|
||||||
|
orphans.sort_by(|a, b| b.len().cmp(&a.len()).then_with(|| a.cmp(b)));
|
||||||
|
orphans
|
||||||
}
|
}
|
||||||
|
|
||||||
fn all_table_keys(catalog: &omnigraph_compiler::catalog::Catalog) -> Vec<String> {
|
fn all_table_keys(catalog: &omnigraph_compiler::catalog::Catalog) -> Vec<String> {
|
||||||
|
|
|
||||||
|
|
@ -48,57 +48,24 @@ pub(super) async fn plan_schema(
|
||||||
Ok(plan)
|
Ok(plan)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) async fn apply_schema(
|
struct PlannedSchemaApply {
|
||||||
db: &Omnigraph,
|
plan: SchemaMigrationPlan,
|
||||||
desired_schema_source: &str,
|
desired_ir: SchemaIR,
|
||||||
options: SchemaApplyOptions,
|
desired_catalog: Catalog,
|
||||||
actor: Option<&str>,
|
|
||||||
) -> Result<SchemaApplyResult> {
|
|
||||||
// Engine-layer policy gate (MR-722 chassis core).
|
|
||||||
//
|
|
||||||
// Fires BEFORE acquiring the schema-apply lock or doing any other
|
|
||||||
// work. When no PolicyChecker is installed this is a no-op and
|
|
||||||
// the apply path behaves exactly as it did before MR-722. When
|
|
||||||
// a PolicyChecker IS installed and the actor is None, this is a
|
|
||||||
// hard error — see Omnigraph::enforce's docstring for the
|
|
||||||
// forget-the-actor-footgun reasoning.
|
|
||||||
//
|
|
||||||
// Scope is TargetBranch("main") to match the HTTP-layer convention
|
|
||||||
// for SchemaApply: branch=None, target_branch=Some("main"). Cedar
|
|
||||||
// policies in the wild use `target_branch_scope: protected` to
|
|
||||||
// gate schema applies, so the engine-layer call has to set the
|
|
||||||
// target_branch shape that activates that predicate. Wrong scope
|
|
||||||
// here = silent policy mismatch with HTTP. See
|
|
||||||
// `omnigraph_policy::ResourceScope::to_branch_pair` for the mapping.
|
|
||||||
db.enforce(
|
|
||||||
omnigraph_policy::PolicyAction::SchemaApply,
|
|
||||||
&omnigraph_policy::ResourceScope::TargetBranch("main".to_string()),
|
|
||||||
actor,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
acquire_schema_apply_lock(db).await?;
|
|
||||||
let result = apply_schema_with_lock(db, desired_schema_source, options).await;
|
|
||||||
let release_result = release_schema_apply_lock(db).await;
|
|
||||||
match (result, release_result) {
|
|
||||||
(Ok(result), Ok(())) => Ok(result),
|
|
||||||
(Ok(_), Err(err)) => Err(err),
|
|
||||||
(Err(err), Ok(())) => Err(err),
|
|
||||||
(Err(err), Err(_)) => Err(err),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) async fn apply_schema_with_lock(
|
async fn plan_schema_for_apply(
|
||||||
db: &Omnigraph,
|
db: &Omnigraph,
|
||||||
desired_schema_source: &str,
|
desired_schema_source: &str,
|
||||||
options: SchemaApplyOptions,
|
options: SchemaApplyOptions,
|
||||||
) -> Result<SchemaApplyResult> {
|
) -> Result<PlannedSchemaApply> {
|
||||||
db.ensure_schema_state_valid().await?;
|
db.ensure_schema_state_valid().await?;
|
||||||
let branches = db.coordinator.read().await.all_branches().await?;
|
let branches = db.coordinator.read().await.all_branches().await?;
|
||||||
// Skip `main` and internal system branches. The schema-apply lock branch
|
// Skip `main` and internal system branches (the schema-apply lock branch,
|
||||||
// is excluded because it is the cluster-wide schema-apply serializer.
|
// the cluster-wide schema-apply serializer). Legacy `__run__*` staging
|
||||||
// `__run__*` branches are no longer created; the filter remains as
|
// branches were swept off `__manifest` by the v2→v3 migration that runs in
|
||||||
// defense-in-depth for legacy graphs with leftover staging branches.
|
// `Omnigraph::open(ReadWrite)` before this check (MR-770), so they no
|
||||||
// A future production sweep will let this guard go.
|
// longer appear here.
|
||||||
let blocking_branches = branches
|
let blocking_branches = branches
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter(|branch| branch != "main" && !is_internal_system_branch(branch))
|
.filter(|branch| branch != "main" && !is_internal_system_branch(branch))
|
||||||
|
|
@ -123,6 +90,87 @@ pub(super) async fn apply_schema_with_lock(
|
||||||
.unwrap_or_else(|| "unsupported schema migration plan".to_string());
|
.unwrap_or_else(|| "unsupported schema migration plan".to_string());
|
||||||
return Err(OmniError::manifest(message));
|
return Err(OmniError::manifest(message));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut desired_catalog = build_catalog_from_ir(&desired_ir)?;
|
||||||
|
fixup_blob_schemas(&mut desired_catalog);
|
||||||
|
Ok(PlannedSchemaApply {
|
||||||
|
plan,
|
||||||
|
desired_ir,
|
||||||
|
desired_catalog,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) async fn preview_schema_apply(
|
||||||
|
db: &Omnigraph,
|
||||||
|
desired_schema_source: &str,
|
||||||
|
options: SchemaApplyOptions,
|
||||||
|
) -> Result<SchemaApplyPreview> {
|
||||||
|
let planned = plan_schema_for_apply(db, desired_schema_source, options).await?;
|
||||||
|
Ok(SchemaApplyPreview {
|
||||||
|
plan: planned.plan,
|
||||||
|
catalog: planned.desired_catalog,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) async fn apply_schema<F>(
|
||||||
|
db: &Omnigraph,
|
||||||
|
desired_schema_source: &str,
|
||||||
|
options: SchemaApplyOptions,
|
||||||
|
actor: Option<&str>,
|
||||||
|
validate_catalog: F,
|
||||||
|
) -> Result<SchemaApplyResult>
|
||||||
|
where
|
||||||
|
F: FnOnce(&Catalog) -> Result<()>,
|
||||||
|
{
|
||||||
|
// Engine-layer policy gate (MR-722 chassis core).
|
||||||
|
//
|
||||||
|
// Fires BEFORE acquiring the schema-apply lock or doing any other
|
||||||
|
// work. When no PolicyChecker is installed this is a no-op and
|
||||||
|
// the apply path behaves exactly as it did before MR-722. When
|
||||||
|
// a PolicyChecker IS installed and the actor is None, this is a
|
||||||
|
// hard error — see Omnigraph::enforce's docstring for the
|
||||||
|
// forget-the-actor-footgun reasoning.
|
||||||
|
//
|
||||||
|
// Scope is TargetBranch("main") to match the HTTP-layer convention
|
||||||
|
// for SchemaApply: branch=None, target_branch=Some("main"). Cedar
|
||||||
|
// policies in the wild use `target_branch_scope: protected` to
|
||||||
|
// gate schema applies, so the engine-layer call has to set the
|
||||||
|
// target_branch shape that activates that predicate. Wrong scope
|
||||||
|
// here = silent policy mismatch with HTTP. See
|
||||||
|
// `omnigraph_policy::ResourceScope::to_branch_pair` for the mapping.
|
||||||
|
db.enforce(
|
||||||
|
omnigraph_policy::PolicyAction::SchemaApply,
|
||||||
|
&omnigraph_policy::ResourceScope::TargetBranch("main".to_string()),
|
||||||
|
actor,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
acquire_schema_apply_lock(db).await?;
|
||||||
|
let result = apply_schema_with_lock(db, desired_schema_source, options, validate_catalog).await;
|
||||||
|
let release_result = release_schema_apply_lock(db).await;
|
||||||
|
match (result, release_result) {
|
||||||
|
(Ok(result), Ok(())) => Ok(result),
|
||||||
|
(Ok(_), Err(err)) => Err(err),
|
||||||
|
(Err(err), Ok(())) => Err(err),
|
||||||
|
(Err(err), Err(_)) => Err(err),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) async fn apply_schema_with_lock<F>(
|
||||||
|
db: &Omnigraph,
|
||||||
|
desired_schema_source: &str,
|
||||||
|
options: SchemaApplyOptions,
|
||||||
|
validate_catalog: F,
|
||||||
|
) -> Result<SchemaApplyResult>
|
||||||
|
where
|
||||||
|
F: FnOnce(&Catalog) -> Result<()>,
|
||||||
|
{
|
||||||
|
let planned = plan_schema_for_apply(db, desired_schema_source, options).await?;
|
||||||
|
validate_catalog(&planned.desired_catalog)?;
|
||||||
|
let PlannedSchemaApply {
|
||||||
|
plan,
|
||||||
|
desired_ir,
|
||||||
|
desired_catalog,
|
||||||
|
} = planned;
|
||||||
if plan.steps.is_empty() {
|
if plan.steps.is_empty() {
|
||||||
return Ok(SchemaApplyResult {
|
return Ok(SchemaApplyResult {
|
||||||
supported: true,
|
supported: true,
|
||||||
|
|
@ -132,9 +180,6 @@ pub(super) async fn apply_schema_with_lock(
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut desired_catalog = build_catalog_from_ir(&desired_ir)?;
|
|
||||||
fixup_blob_schemas(&mut desired_catalog);
|
|
||||||
|
|
||||||
let snapshot = db.snapshot().await;
|
let snapshot = db.snapshot().await;
|
||||||
let base_manifest_version = snapshot.version();
|
let base_manifest_version = snapshot.version();
|
||||||
let mut added_tables = BTreeSet::new();
|
let mut added_tables = BTreeSet::new();
|
||||||
|
|
|
||||||
|
|
@ -483,6 +483,22 @@ pub(super) async fn open_owned_dataset_for_branch_write(
|
||||||
Ok((ds, Some(active_branch.to_string())))
|
Ok((ds, Some(active_branch.to_string())))
|
||||||
}
|
}
|
||||||
source_branch => {
|
source_branch => {
|
||||||
|
crate::failpoints::maybe_fail("fork.before_classify")?;
|
||||||
|
// Authority check before forking: re-read the live manifest. If this
|
||||||
|
// table is already forked on active_branch, a concurrent first-write
|
||||||
|
// won the race and our snapshot is stale — that is a retryable
|
||||||
|
// conflict, not an orphan. (A zombie fork is never in the manifest,
|
||||||
|
// so this only fires for a live concurrent fork.)
|
||||||
|
let live = db.snapshot_for_branch(Some(active_branch)).await?;
|
||||||
|
if let Some(entry) = live.entry(table_key) {
|
||||||
|
if entry.table_branch.as_deref() == Some(active_branch) {
|
||||||
|
return Err(OmniError::manifest_expected_version_mismatch(
|
||||||
|
table_key,
|
||||||
|
entry_version,
|
||||||
|
entry.table_version,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
fork_dataset_from_entry_state(
|
fork_dataset_from_entry_state(
|
||||||
db,
|
db,
|
||||||
table_key,
|
table_key,
|
||||||
|
|
|
||||||
|
|
@ -1,16 +0,0 @@
|
||||||
// The Run state machine has been removed. Mutations now write directly
|
|
||||||
// to target tables and use the publisher's `expected_table_versions`
|
|
||||||
// CAS for cross-table OCC; `__run__<id>` staging branches and the
|
|
||||||
// `_graph_runs.lance` state machine no longer exist.
|
|
||||||
//
|
|
||||||
// What remains is the branch-name predicate, kept as a defense-in-depth
|
|
||||||
// guard against users naming a public branch `__run__*`. A future
|
|
||||||
// production sweep of legacy `_graph_runs.lance` rows and stale
|
|
||||||
// `__run__*` branches will let this predicate (and this file) go too.
|
|
||||||
|
|
||||||
pub(crate) const INTERNAL_RUN_BRANCH_PREFIX: &str = "__run__";
|
|
||||||
|
|
||||||
pub(crate) fn is_internal_run_branch(name: &str) -> bool {
|
|
||||||
name.trim_start_matches('/')
|
|
||||||
.starts_with(INTERNAL_RUN_BRANCH_PREFIX)
|
|
||||||
}
|
|
||||||
|
|
@ -1087,9 +1087,9 @@ impl Omnigraph {
|
||||||
target: &str,
|
target: &str,
|
||||||
actor_id: Option<&str>,
|
actor_id: Option<&str>,
|
||||||
) -> Result<MergeOutcome> {
|
) -> Result<MergeOutcome> {
|
||||||
if is_internal_run_branch(source) || is_internal_run_branch(target) {
|
if is_internal_system_branch(source) || is_internal_system_branch(target) {
|
||||||
return Err(OmniError::manifest(format!(
|
return Err(OmniError::manifest(format!(
|
||||||
"branch_merge does not allow internal run refs ('{}' -> '{}')",
|
"branch_merge does not allow internal system refs ('{}' -> '{}')",
|
||||||
source, target
|
source, target
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ use time::format_description::well_known::Rfc3339;
|
||||||
|
|
||||||
use crate::db::commit_graph::CommitGraph;
|
use crate::db::commit_graph::CommitGraph;
|
||||||
use crate::db::manifest::ManifestCoordinator;
|
use crate::db::manifest::ManifestCoordinator;
|
||||||
use crate::db::{MergeOutcome, Omnigraph, is_internal_run_branch};
|
use crate::db::{MergeOutcome, Omnigraph, is_internal_system_branch};
|
||||||
use crate::db::{ReadTarget, Snapshot};
|
use crate::db::{ReadTarget, Snapshot};
|
||||||
use crate::embedding::EmbeddingClient;
|
use crate::embedding::EmbeddingClient;
|
||||||
use crate::error::{MergeConflict, MergeConflictKind, OmniError, Result};
|
use crate::error::{MergeConflict, MergeConflictKind, OmniError, Result};
|
||||||
|
|
|
||||||
|
|
@ -288,21 +288,24 @@ async fn load_jsonl_reader<R: BufRead>(
|
||||||
let mut node_rows: HashMap<String, Vec<JsonValue>> = HashMap::new();
|
let mut node_rows: HashMap<String, Vec<JsonValue>> = HashMap::new();
|
||||||
let mut edge_rows: HashMap<String, Vec<(String, String, JsonValue)>> = HashMap::new();
|
let mut edge_rows: HashMap<String, Vec<(String, String, JsonValue)>> = HashMap::new();
|
||||||
|
|
||||||
for (line_num, line) in reader.lines().enumerate() {
|
// Parse a stream of JSON values. Accepts both compact JSONL (one object
|
||||||
let line = line?;
|
// per line) and pretty-printed JSON where a single object spans multiple
|
||||||
let line = line.trim();
|
// lines — serde's streaming deserializer treats any whitespace (including
|
||||||
if line.is_empty() {
|
// newlines) between top-level values as a separator.
|
||||||
continue;
|
for (idx, parsed) in serde_json::Deserializer::from_reader(reader)
|
||||||
}
|
.into_iter::<JsonValue>()
|
||||||
let value: JsonValue = serde_json::from_str(line).map_err(|e| {
|
.enumerate()
|
||||||
OmniError::manifest(format!("invalid JSON on line {}: {}", line_num + 1, e))
|
{
|
||||||
|
let record_num = idx + 1;
|
||||||
|
let value: JsonValue = parsed.map_err(|e| {
|
||||||
|
OmniError::manifest(format!("invalid JSON at record {}: {}", record_num, e))
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
if let Some(type_name) = value.get("type").and_then(|v| v.as_str()) {
|
if let Some(type_name) = value.get("type").and_then(|v| v.as_str()) {
|
||||||
if !catalog.node_types.contains_key(type_name) {
|
if !catalog.node_types.contains_key(type_name) {
|
||||||
return Err(OmniError::manifest(format!(
|
return Err(OmniError::manifest(format!(
|
||||||
"line {}: unknown node type '{}'",
|
"record {}: unknown node type '{}'",
|
||||||
line_num + 1,
|
record_num,
|
||||||
type_name
|
type_name
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
@ -317,8 +320,8 @@ async fn load_jsonl_reader<R: BufRead>(
|
||||||
} else if let Some(edge_name) = value.get("edge").and_then(|v| v.as_str()) {
|
} else if let Some(edge_name) = value.get("edge").and_then(|v| v.as_str()) {
|
||||||
if catalog.lookup_edge_by_name(edge_name).is_none() {
|
if catalog.lookup_edge_by_name(edge_name).is_none() {
|
||||||
return Err(OmniError::manifest(format!(
|
return Err(OmniError::manifest(format!(
|
||||||
"line {}: unknown edge type '{}'",
|
"record {}: unknown edge type '{}'",
|
||||||
line_num + 1,
|
record_num,
|
||||||
edge_name
|
edge_name
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
@ -326,14 +329,14 @@ async fn load_jsonl_reader<R: BufRead>(
|
||||||
.get("from")
|
.get("from")
|
||||||
.and_then(|v| v.as_str())
|
.and_then(|v| v.as_str())
|
||||||
.ok_or_else(|| {
|
.ok_or_else(|| {
|
||||||
OmniError::manifest(format!("line {}: edge missing 'from'", line_num + 1))
|
OmniError::manifest(format!("record {}: edge missing 'from'", record_num))
|
||||||
})?
|
})?
|
||||||
.to_string();
|
.to_string();
|
||||||
let to = value
|
let to = value
|
||||||
.get("to")
|
.get("to")
|
||||||
.and_then(|v| v.as_str())
|
.and_then(|v| v.as_str())
|
||||||
.ok_or_else(|| {
|
.ok_or_else(|| {
|
||||||
OmniError::manifest(format!("line {}: edge missing 'to'", line_num + 1))
|
OmniError::manifest(format!("record {}: edge missing 'to'", record_num))
|
||||||
})?
|
})?
|
||||||
.to_string();
|
.to_string();
|
||||||
let data = value
|
let data = value
|
||||||
|
|
@ -347,8 +350,8 @@ async fn load_jsonl_reader<R: BufRead>(
|
||||||
.push((from, to, data));
|
.push((from, to, data));
|
||||||
} else {
|
} else {
|
||||||
return Err(OmniError::manifest(format!(
|
return Err(OmniError::manifest(format!(
|
||||||
"line {}: expected 'type' or 'edge' field",
|
"record {}: expected 'type' or 'edge' field",
|
||||||
line_num + 1
|
record_num
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -613,7 +616,7 @@ async fn load_jsonl_reader<R: BufRead>(
|
||||||
} else {
|
} else {
|
||||||
// LoadMode::Overwrite keeps the legacy inline-commit path —
|
// LoadMode::Overwrite keeps the legacy inline-commit path —
|
||||||
// truncate-then-append doesn't fit the staged shape (see
|
// truncate-then-append doesn't fit the staged shape (see
|
||||||
// `docs/runs.md` "LoadMode::Overwrite residual"). The recovery
|
// `docs/dev/writes.md` "LoadMode::Overwrite residual"). The recovery
|
||||||
// sidecar is not applicable here because the writer doesn't go
|
// sidecar is not applicable here because the writer doesn't go
|
||||||
// through MutationStaging; per-table inline commits + a final
|
// through MutationStaging; per-table inline commits + a final
|
||||||
// manifest publish handle their own residual via the documented
|
// manifest publish handle their own residual via the documented
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,7 @@ pub struct DeleteState {
|
||||||
/// `exec/mutation.rs`) and the bulk loader (`loader/mod.rs`). The
|
/// `exec/mutation.rs`) and the bulk loader (`loader/mod.rs`). The
|
||||||
/// intent: defer Lance commits to end-of-query so a mid-query failure
|
/// intent: defer Lance commits to end-of-query so a mid-query failure
|
||||||
/// leaves the touched table at the pre-mutation HEAD instead of
|
/// leaves the touched table at the pre-mutation HEAD instead of
|
||||||
/// drifting ahead. See `docs/runs.md` for the publisher-CAS contract
|
/// drifting ahead. See `docs/dev/writes.md` for the publisher-CAS contract
|
||||||
/// this builds on.
|
/// this builds on.
|
||||||
///
|
///
|
||||||
/// `transaction` is opaque from our side — Lance owns its semantics. We
|
/// `transaction` is opaque from our side — Lance owns its semantics. We
|
||||||
|
|
@ -177,6 +177,45 @@ impl TableStore {
|
||||||
.map_err(|e| OmniError::Lance(e.to_string()))
|
.map_err(|e| OmniError::Lance(e.to_string()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// List the named Lance branches present on the dataset at `dataset_uri`.
|
||||||
|
/// The `cleanup` orphan reconciler diffs this against the manifest branch
|
||||||
|
/// set to find orphaned per-table forks. `main`/default is not a named
|
||||||
|
/// branch and never appears here.
|
||||||
|
pub async fn list_branches(&self, dataset_uri: &str) -> Result<Vec<String>> {
|
||||||
|
let ds = Dataset::open(dataset_uri)
|
||||||
|
.await
|
||||||
|
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||||
|
let branches = ds
|
||||||
|
.list_branches()
|
||||||
|
.await
|
||||||
|
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||||
|
Ok(branches.into_keys().collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Idempotently drop `branch` from the dataset at `dataset_uri`.
|
||||||
|
///
|
||||||
|
/// Unlike [`delete_branch`](Self::delete_branch), this tolerates an
|
||||||
|
/// already-absent branch — both a missing contents ref (Lance's
|
||||||
|
/// `force_delete_branch` handles that) and a missing `tree/{branch}/`
|
||||||
|
/// directory (the local-store `NotFound` quirk pinned by
|
||||||
|
/// `lance_surface_guards::force_delete_branch_semantics`). Safe to call on a
|
||||||
|
/// possibly-orphaned or already-reclaimed fork.
|
||||||
|
///
|
||||||
|
/// A branch that still has referencing descendants (`RefConflict`) is NOT
|
||||||
|
/// tolerated: that is a real ordering error and surfaces as `OmniError::Lance`.
|
||||||
|
/// Used by the eager best-effort reclaim in `cleanup_deleted_branch_tables`
|
||||||
|
/// and the `cleanup` orphan reconciler.
|
||||||
|
pub async fn force_delete_branch(&self, dataset_uri: &str, branch: &str) -> Result<()> {
|
||||||
|
let mut ds = Dataset::open(dataset_uri)
|
||||||
|
.await
|
||||||
|
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||||
|
match ds.force_delete_branch(branch).await {
|
||||||
|
Ok(()) => Ok(()),
|
||||||
|
Err(lance::Error::RefNotFound { .. }) | Err(lance::Error::NotFound { .. }) => Ok(()),
|
||||||
|
Err(e) => Err(OmniError::Lance(e.to_string())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn open_dataset_at_state(
|
pub async fn open_dataset_at_state(
|
||||||
&self,
|
&self,
|
||||||
table_path: &str,
|
table_path: &str,
|
||||||
|
|
@ -243,21 +282,24 @@ impl TableStore {
|
||||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||||
self.ensure_expected_version(&source_ds, table_key, source_version)?;
|
self.ensure_expected_version(&source_ds, table_key, source_version)?;
|
||||||
|
|
||||||
match source_ds
|
if source_ds
|
||||||
.create_branch(target_branch, source_version, None)
|
.create_branch(target_branch, source_version, None)
|
||||||
.await
|
.await
|
||||||
|
.is_err()
|
||||||
{
|
{
|
||||||
Ok(_) => {}
|
// The target branch ref already exists. The caller
|
||||||
Err(create_err) => match self
|
// (`open_owned_dataset_for_branch_write`) re-reads the live manifest
|
||||||
.open_dataset_head(dataset_uri, Some(target_branch))
|
// before forking and returns a retryable error when a concurrent
|
||||||
.await
|
// writer legitimately holds the fork, so reaching here means the
|
||||||
{
|
// manifest does NOT reference this fork: it is an orphan from an
|
||||||
Ok(ds) => {
|
// incomplete prior `branch_delete`. Surface the actionable cleanup
|
||||||
self.ensure_expected_version(&ds, table_key, source_version)?;
|
// error rather than guessing from Lance branch versions.
|
||||||
return Ok(ds);
|
return Err(OmniError::manifest_conflict(format!(
|
||||||
}
|
"branch '{}' has orphaned table state for '{}' from an incomplete \
|
||||||
Err(_) => return Err(OmniError::Lance(create_err.to_string())),
|
prior delete; run `omnigraph cleanup` to reclaim it before reusing \
|
||||||
},
|
this branch name",
|
||||||
|
target_branch, table_key
|
||||||
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
let ds = self
|
let ds = self
|
||||||
|
|
@ -901,7 +943,7 @@ impl TableStore {
|
||||||
/// Lift path: either a Lance API extension that lets
|
/// Lift path: either a Lance API extension that lets
|
||||||
/// `MergeInsertBuilder` accept additional staged fragments, or an
|
/// `MergeInsertBuilder` accept additional staged fragments, or an
|
||||||
/// in-memory pre-merge here that folds prior staged batches into the
|
/// in-memory pre-merge here that folds prior staged batches into the
|
||||||
/// input stream. See `docs/runs.md`.
|
/// input stream. See `docs/dev/writes.md`.
|
||||||
pub async fn stage_merge_insert(
|
pub async fn stage_merge_insert(
|
||||||
&self,
|
&self,
|
||||||
ds: Dataset,
|
ds: Dataset,
|
||||||
|
|
|
||||||
|
|
@ -294,21 +294,19 @@ async fn composite_flow_canonical_lifecycle() {
|
||||||
);
|
);
|
||||||
|
|
||||||
// ─────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────
|
||||||
// Step 10: optimize the post-merge graph — verify indices stay
|
// Step 10: optimize the post-merge graph — verify compaction is
|
||||||
// valid and queryable.
|
// published to the manifest (so the manifest pin tracks the compacted
|
||||||
|
// Lance HEAD), indices stay valid and queryable, and a post-optimize
|
||||||
|
// strict write commits.
|
||||||
//
|
//
|
||||||
// **Known limitation**: `optimize_all_tables` calls Lance
|
// This step used to carry a "Known limitation": `optimize_all_tables`
|
||||||
// `compact_files` directly — it advances per-table Lance HEAD
|
// ran Lance `compact_files` without publishing the new version to
|
||||||
// without updating the omnigraph `__manifest` pin. After optimize,
|
// `__manifest`, so the manifest pin lagged the Lance HEAD and the next
|
||||||
// the next writer's expected_table_versions captures the
|
// strict write / schema apply failed with `ExpectedVersionMismatch`
|
||||||
// pre-optimize manifest pin, but the publisher's pre-check reads
|
// ("stale view … refresh and retry") — so post-optimize mutations were
|
||||||
// a higher version from the manifest dataset (because some other
|
// deliberately omitted here. optimize now publishes the compacted
|
||||||
// path — possibly schema-state recovery on reopen — wrote a newer
|
// version, and this flow exercises exactly that previously-failing
|
||||||
// __manifest row). The `ExpectedVersionMismatch` is benign
|
// write below.
|
||||||
// (re-issuing the mutation after a snapshot refresh succeeds), but
|
|
||||||
// a composite test cannot reliably exercise post-optimize mutations
|
|
||||||
// until that path is investigated. Coverage of post-optimize
|
|
||||||
// mutations is left to a focused optimize+cleanup integration test.
|
|
||||||
// ─────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────
|
||||||
let optimize_stats = db.optimize().await.unwrap();
|
let optimize_stats = db.optimize().await.unwrap();
|
||||||
assert!(
|
assert!(
|
||||||
|
|
@ -331,6 +329,28 @@ async fn composite_flow_canonical_lifecycle() {
|
||||||
"row counts unchanged by optimize"
|
"row counts unchanged by optimize"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// A strict update on a compacted table is exactly the write that
|
||||||
|
// failed with "stale view" before optimize published its compaction.
|
||||||
|
// It must now commit (Alice is one of the seed Persons; an update
|
||||||
|
// leaves the row count at 6).
|
||||||
|
let post_optimize_update = mutate_main(
|
||||||
|
&mut db,
|
||||||
|
MUTATION_QUERIES,
|
||||||
|
"set_age",
|
||||||
|
&mixed_params(&[("$name", "Alice")], &[("$age", 41)]),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.expect("post-optimize strict update must commit — optimize published the manifest");
|
||||||
|
assert_eq!(
|
||||||
|
post_optimize_update.affected_nodes, 1,
|
||||||
|
"post-optimize update must affect exactly Alice"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
count_rows(&db, "node:Person").await,
|
||||||
|
6,
|
||||||
|
"an update must not change the Person row count"
|
||||||
|
);
|
||||||
|
|
||||||
// ─────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────
|
||||||
// Step 11: cleanup — keep last 10 versions, only purge versions
|
// Step 11: cleanup — keep last 10 versions, only purge versions
|
||||||
// older than 1 hour. With this small test, we have well under 10
|
// older than 1 hour. With this small test, we have well under 10
|
||||||
|
|
@ -373,14 +393,27 @@ async fn composite_flow_canonical_lifecycle() {
|
||||||
branches,
|
branches,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Final query exercise — full read path works post-reopen,
|
// Final exercise — full read AND write path works post-reopen,
|
||||||
// post-cleanup. Post-cleanup mutation is omitted here pending
|
// post-cleanup. (The post-cleanup mutation was previously omitted
|
||||||
// resolution of the optimize-vs-manifest-pin interaction documented
|
// pending resolution of the optimize-vs-manifest-pin interaction in
|
||||||
// in Step 10.
|
// Step 10; that is now fixed, so a strict write here must commit.)
|
||||||
let final_total = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
|
let final_total = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert!(!final_total.batches().is_empty());
|
assert!(!final_total.batches().is_empty());
|
||||||
|
|
||||||
|
let post_reopen_update = mutate_main(
|
||||||
|
&mut db,
|
||||||
|
MUTATION_QUERIES,
|
||||||
|
"set_age",
|
||||||
|
&mixed_params(&[("$name", "Alice")], &[("$age", 42)]),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.expect("post-reopen, post-cleanup strict update must commit");
|
||||||
|
assert_eq!(
|
||||||
|
post_reopen_update.affected_nodes, 1,
|
||||||
|
"post-reopen update must affect exactly Alice"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Cross-handle sequence that exercises operations after a schema_apply
|
/// Cross-handle sequence that exercises operations after a schema_apply
|
||||||
|
|
|
||||||
|
|
@ -1933,3 +1933,87 @@ query docs_with_tag($tag: String) {
|
||||||
"contains-pushdown should return exactly the rows whose tags list contains 'red'"
|
"contains-pushdown should return exactly the rows whose tags list contains 'red'"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ─── Maintenance in the full lifecycle: optimize (compaction) ────────────────
|
||||||
|
|
||||||
|
/// `optimize` (Lance compaction) is part of a realistic graph lifecycle: it
|
||||||
|
/// advances the Lance HEAD and publishes the compacted version to the manifest.
|
||||||
|
/// The rest of the flow must keep working across that boundary — reads observe
|
||||||
|
/// the compacted data, strict updates (which check Lance HEAD == manifest
|
||||||
|
/// version) still commit, inserts still commit, and the state survives a reopen
|
||||||
|
/// (the open-time recovery sweep finds no leftover drift). Before optimize
|
||||||
|
/// published its compaction, the manifest lagged the Lance HEAD here and the
|
||||||
|
/// post-optimize update below failed with "stale view ... refresh and retry".
|
||||||
|
#[tokio::test]
|
||||||
|
async fn full_flow_optimize_then_query_update_and_reopen() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let uri = dir.path().to_str().unwrap().to_string();
|
||||||
|
let mut db = init_and_load(&dir).await;
|
||||||
|
|
||||||
|
// Build several Person fragments so compaction has something to merge.
|
||||||
|
for (name, age) in [("Eve", 40), ("Frank", 41), ("Grace", 42)] {
|
||||||
|
mutate_main(
|
||||||
|
&mut db,
|
||||||
|
MUTATION_QUERIES,
|
||||||
|
"insert_person",
|
||||||
|
&mixed_params(&[("$name", name)], &[("$age", age)]),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let stats = db.optimize().await.unwrap();
|
||||||
|
assert!(
|
||||||
|
stats.iter().any(|s| s.committed),
|
||||||
|
"a multi-fragment table should have compacted in this flow"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Reads observe the compacted data.
|
||||||
|
let qr = query_main(
|
||||||
|
&mut db,
|
||||||
|
TEST_QUERIES,
|
||||||
|
"get_person",
|
||||||
|
¶ms(&[("$name", "Alice")]),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(qr.num_rows(), 1);
|
||||||
|
|
||||||
|
// Strict update after optimize commits (previously failed with "stale view"
|
||||||
|
// because the manifest lagged the compacted Lance HEAD).
|
||||||
|
let upd = mutate_main(
|
||||||
|
&mut db,
|
||||||
|
MUTATION_QUERIES,
|
||||||
|
"set_age",
|
||||||
|
&mixed_params(&[("$name", "Alice")], &[("$age", 31)]),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(upd.affected_nodes, 1);
|
||||||
|
|
||||||
|
// Insert after optimize also commits.
|
||||||
|
mutate_main(
|
||||||
|
&mut db,
|
||||||
|
MUTATION_QUERIES,
|
||||||
|
"insert_person",
|
||||||
|
&mixed_params(&[("$name", "Ivan")], &[("$age", 50)]),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(count_rows(&db, "node:Person").await, 8); // 4 seed + Eve/Frank/Grace + Ivan
|
||||||
|
|
||||||
|
// State survives a reopen — the recovery sweep runs and finds no drift.
|
||||||
|
drop(db);
|
||||||
|
let reopened = Omnigraph::open(&uri).await.unwrap();
|
||||||
|
assert_eq!(count_rows(&reopened, "node:Person").await, 8);
|
||||||
|
let alice = reopened
|
||||||
|
.entity_at_target(ReadTarget::branch("main"), "node:Person", "Alice")
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
alice["age"],
|
||||||
|
serde_json::json!(31),
|
||||||
|
"Alice's post-optimize age update must persist across reopen"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,452 @@ async fn branch_create_failpoint_triggers() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Branch delete flips the manifest authority first, then reclaims the per-table
|
||||||
|
// forks best-effort. A failure during that reclaim (here, the
|
||||||
|
// `branch_delete.before_table_cleanup` failpoint, standing in for a transient
|
||||||
|
// object-store error) must NOT fail the call: the branch is already gone, and
|
||||||
|
// `cleanup` reconciles the stranded fork. The branch name is reusable after.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn branch_delete_partial_failure_converges_via_cleanup() {
|
||||||
|
let _scenario = FailScenario::setup();
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let uri = dir.path().to_str().unwrap().to_string();
|
||||||
|
let mut main = helpers::init_and_load(&dir).await;
|
||||||
|
|
||||||
|
main.branch_create("feature").await.unwrap();
|
||||||
|
let mut feature = Omnigraph::open(&uri).await.unwrap();
|
||||||
|
helpers::mutate_branch(
|
||||||
|
&mut feature,
|
||||||
|
"feature",
|
||||||
|
MUTATION_QUERIES,
|
||||||
|
"insert_person",
|
||||||
|
&mixed_params(&[("$name", "Eve")], &[("$age", 22)]),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
drop(feature);
|
||||||
|
|
||||||
|
let person_uri = node_table_uri(&uri, "Person");
|
||||||
|
{
|
||||||
|
let ds = lance::Dataset::open(&person_uri).await.unwrap();
|
||||||
|
assert!(
|
||||||
|
ds.list_branches().await.unwrap().contains_key("feature"),
|
||||||
|
"precondition: the owned table fork exists before delete"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inject a failure during per-table cleanup, AFTER the manifest authority
|
||||||
|
// flip. branch_delete must still succeed (best-effort reclaim).
|
||||||
|
{
|
||||||
|
let _fp = ScopedFailPoint::new("branch_delete.before_table_cleanup", "return");
|
||||||
|
main.branch_delete("feature").await.expect(
|
||||||
|
"branch_delete is best-effort after the manifest flip: a cleanup-step \
|
||||||
|
failure must not fail the call",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Authority flipped: the branch is gone.
|
||||||
|
assert_eq!(main.branch_list().await.unwrap(), vec!["main".to_string()]);
|
||||||
|
|
||||||
|
// The eager reclaim failed, so the orphan is stranded until cleanup.
|
||||||
|
{
|
||||||
|
let ds = lance::Dataset::open(&person_uri).await.unwrap();
|
||||||
|
assert!(
|
||||||
|
ds.list_branches().await.unwrap().contains_key("feature"),
|
||||||
|
"failed eager reclaim should leave the orphan for cleanup to reconcile"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// cleanup converges: the orphan is reclaimed.
|
||||||
|
main.cleanup(omnigraph::db::CleanupPolicyOptions {
|
||||||
|
keep_versions: Some(1),
|
||||||
|
older_than: None,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
{
|
||||||
|
let ds = lance::Dataset::open(&person_uri).await.unwrap();
|
||||||
|
assert!(
|
||||||
|
!ds.list_branches().await.unwrap().contains_key("feature"),
|
||||||
|
"cleanup should reconcile the orphaned fork away"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The name is reusable after cleanup reclaims the orphan.
|
||||||
|
main.branch_create("feature").await.unwrap();
|
||||||
|
let mut feature2 = Omnigraph::open(&uri).await.unwrap();
|
||||||
|
helpers::mutate_branch(
|
||||||
|
&mut feature2,
|
||||||
|
"feature",
|
||||||
|
MUTATION_QUERIES,
|
||||||
|
"insert_person",
|
||||||
|
&mixed_params(&[("$name", "Frank")], &[("$age", 41)]),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reusing a branch name whose delete left an orphaned fork (before `cleanup`
|
||||||
|
// reconciles it) must fail with a clear, actionable error pointing at
|
||||||
|
// `cleanup`, not the opaque `ExpectedVersionMismatch` that leaks from the fork
|
||||||
|
// path. The recreate itself succeeds; the first write to the previously-forked
|
||||||
|
// table is where the stale orphan collides.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn recreate_over_orphaned_fork_before_cleanup_is_actionable() {
|
||||||
|
let _scenario = FailScenario::setup();
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let uri = dir.path().to_str().unwrap().to_string();
|
||||||
|
let mut main = helpers::init_and_load(&dir).await;
|
||||||
|
|
||||||
|
main.branch_create("feature").await.unwrap();
|
||||||
|
let mut feature = Omnigraph::open(&uri).await.unwrap();
|
||||||
|
helpers::mutate_branch(
|
||||||
|
&mut feature,
|
||||||
|
"feature",
|
||||||
|
MUTATION_QUERIES,
|
||||||
|
"insert_person",
|
||||||
|
&mixed_params(&[("$name", "Eve")], &[("$age", 22)]),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
drop(feature);
|
||||||
|
|
||||||
|
// Partial delete: leaves the Person fork orphaned (cleanup not yet run).
|
||||||
|
{
|
||||||
|
let _fp = ScopedFailPoint::new("branch_delete.before_table_cleanup", "return");
|
||||||
|
main.branch_delete("feature").await.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recreate the name and write to the previously-forked table WITHOUT a
|
||||||
|
// cleanup in between.
|
||||||
|
main.branch_create("feature").await.unwrap();
|
||||||
|
let mut feature2 = Omnigraph::open(&uri).await.unwrap();
|
||||||
|
let err = helpers::mutate_branch(
|
||||||
|
&mut feature2,
|
||||||
|
"feature",
|
||||||
|
MUTATION_QUERIES,
|
||||||
|
"insert_person",
|
||||||
|
&mixed_params(&[("$name", "Frank")], &[("$age", 41)]),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.expect_err("write should collide with the stale orphaned fork");
|
||||||
|
|
||||||
|
let msg = err.to_string();
|
||||||
|
assert!(
|
||||||
|
msg.contains("cleanup")
|
||||||
|
&& (msg.contains("orphan") || msg.contains("incomplete prior delete")),
|
||||||
|
"expected an actionable orphaned-fork error pointing at cleanup, got: {msg}"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
!msg.contains("expected manifest table version"),
|
||||||
|
"should not surface the opaque ExpectedVersionMismatch, got: {msg}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// cleanup is the guaranteed convergence backstop, so one table's transient
|
||||||
|
// failure must not abort the whole sweep. Inject a one-shot version-GC failure
|
||||||
|
// for a single table and assert: cleanup still succeeds, the failure is
|
||||||
|
// surfaced per-table in the returned stats, and the independent reconcile pass
|
||||||
|
// still reclaimed an orphan.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn cleanup_isolates_single_table_failure() {
|
||||||
|
let _scenario = FailScenario::setup();
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let uri = dir.path().to_str().unwrap().to_string();
|
||||||
|
let mut db = helpers::init_and_load(&dir).await;
|
||||||
|
|
||||||
|
// Forge an orphaned fork on the Person table (a reconcile target).
|
||||||
|
let person_uri = node_table_uri(&uri, "Person");
|
||||||
|
{
|
||||||
|
let mut ds = lance::Dataset::open(&person_uri).await.unwrap();
|
||||||
|
let base = ds.version().version;
|
||||||
|
ds.create_branch("ghost", base, None).await.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// One table's version GC fails once; the sweep must isolate it.
|
||||||
|
let _fp = ScopedFailPoint::new("cleanup.table_gc", "1*return");
|
||||||
|
let stats = db
|
||||||
|
.cleanup(omnigraph::db::CleanupPolicyOptions {
|
||||||
|
keep_versions: Some(1),
|
||||||
|
older_than: None,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.expect("a single table's GC failure must not abort cleanup");
|
||||||
|
|
||||||
|
let errored = stats.iter().filter(|s| s.error.is_some()).count();
|
||||||
|
assert_eq!(
|
||||||
|
errored, 1,
|
||||||
|
"exactly one table's GC failure should be surfaced in stats, got {errored}"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
stats.len() >= 4,
|
||||||
|
"every node+edge table should still appear in the stats"
|
||||||
|
);
|
||||||
|
|
||||||
|
// The reconcile pass is independent of the GC failure, so the orphan is gone.
|
||||||
|
{
|
||||||
|
let ds = lance::Dataset::open(&person_uri).await.unwrap();
|
||||||
|
assert!(
|
||||||
|
!ds.list_branches().await.unwrap().contains_key("ghost"),
|
||||||
|
"reconcile should reclaim the orphan despite the GC failure"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Companion to the version-GC isolation test, exercising the OTHER cleanup
|
||||||
|
// loop: a force-delete failure while reconciling one orphaned fork must be
|
||||||
|
// isolated (logged, not propagated) so the sweep continues, and a later
|
||||||
|
// cleanup converges. This is the loop the Devin finding was about.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn cleanup_isolates_reconcile_failure() {
|
||||||
|
let _scenario = FailScenario::setup();
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let uri = dir.path().to_str().unwrap().to_string();
|
||||||
|
let mut db = helpers::init_and_load(&dir).await;
|
||||||
|
|
||||||
|
// Forge an orphaned fork the reconcile pass will try to reclaim.
|
||||||
|
let person_uri = node_table_uri(&uri, "Person");
|
||||||
|
{
|
||||||
|
let mut ds = lance::Dataset::open(&person_uri).await.unwrap();
|
||||||
|
let base = ds.version().version;
|
||||||
|
ds.create_branch("ghost", base, None).await.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inject a one-shot failure into the reconcile force-delete. The sweep must
|
||||||
|
// not abort.
|
||||||
|
{
|
||||||
|
let _fp = ScopedFailPoint::new("cleanup.reconcile_fork", "1*return");
|
||||||
|
db.cleanup(omnigraph::db::CleanupPolicyOptions {
|
||||||
|
keep_versions: Some(1),
|
||||||
|
older_than: None,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.expect("a reconcile force-delete failure must not abort cleanup");
|
||||||
|
}
|
||||||
|
// The blocked orphan is still present (the failure was isolated, not retried).
|
||||||
|
{
|
||||||
|
let ds = lance::Dataset::open(&person_uri).await.unwrap();
|
||||||
|
assert!(
|
||||||
|
ds.list_branches().await.unwrap().contains_key("ghost"),
|
||||||
|
"the orphan whose reclaim was injected-to-fail should remain"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// A second cleanup with no injected failure converges.
|
||||||
|
db.cleanup(omnigraph::db::CleanupPolicyOptions {
|
||||||
|
keep_versions: Some(1),
|
||||||
|
older_than: None,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
{
|
||||||
|
let ds = lance::Dataset::open(&person_uri).await.unwrap();
|
||||||
|
assert!(
|
||||||
|
!ds.list_branches().await.unwrap().contains_key("ghost"),
|
||||||
|
"the second cleanup should reconcile the orphan"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The cleanup reconciler must reclaim orphaned commit-graph branches, not just
|
||||||
|
// per-table forks. A delete whose best-effort commit-graph reclaim fails leaves
|
||||||
|
// a commit-graph orphan; the next cleanup must drop it.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn cleanup_reclaims_orphaned_commit_graph_branch() {
|
||||||
|
let _scenario = FailScenario::setup();
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let uri = dir.path().to_str().unwrap().to_string();
|
||||||
|
let mut db = helpers::init_and_load(&dir).await;
|
||||||
|
|
||||||
|
db.branch_create("feature").await.unwrap();
|
||||||
|
// Delete, failing the commit-graph reclaim → commit-graph "feature" orphan
|
||||||
|
// (manifest branch gone, commit-graph branch left behind).
|
||||||
|
{
|
||||||
|
let _fp = ScopedFailPoint::new("branch_delete.before_commit_graph_reclaim", "return");
|
||||||
|
db.branch_delete("feature").await.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let commits_uri = format!("{}/_graph_commits.lance", uri.trim_end_matches('/'));
|
||||||
|
{
|
||||||
|
let ds = lance::Dataset::open(&commits_uri).await.unwrap();
|
||||||
|
assert!(
|
||||||
|
ds.list_branches().await.unwrap().contains_key("feature"),
|
||||||
|
"precondition: the commit-graph branch should be orphaned after the failed reclaim"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
db.cleanup(omnigraph::db::CleanupPolicyOptions {
|
||||||
|
keep_versions: Some(1),
|
||||||
|
older_than: None,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
{
|
||||||
|
let ds = lance::Dataset::open(&commits_uri).await.unwrap();
|
||||||
|
assert!(
|
||||||
|
!ds.list_branches().await.unwrap().contains_key("feature"),
|
||||||
|
"cleanup should reclaim the orphaned commit-graph branch"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A branch_delete whose best-effort commit-graph reclaim fails leaves a
|
||||||
|
// commit-graph "zombie" branch. Recreating that name must heal the zombie and
|
||||||
|
// succeed (branch_create force-deletes a stale commit-graph ref since the
|
||||||
|
// manifest branch is created fresh), instead of dying on the leftover ref.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn branch_create_recreates_over_commit_graph_zombie() {
|
||||||
|
let _scenario = FailScenario::setup();
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let db = Omnigraph::init(dir.path().to_str().unwrap(), helpers::TEST_SCHEMA)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
db.branch_create("feature").await.unwrap();
|
||||||
|
{
|
||||||
|
// Fail the best-effort commit-graph reclaim → commit-graph "feature"
|
||||||
|
// zombie survives the delete (manifest authority still flips).
|
||||||
|
let _fp = ScopedFailPoint::new("branch_delete.before_commit_graph_reclaim", "return");
|
||||||
|
db.branch_delete("feature").await.unwrap();
|
||||||
|
}
|
||||||
|
assert_eq!(db.branch_list().await.unwrap(), vec!["main".to_string()]);
|
||||||
|
|
||||||
|
db.branch_create("feature")
|
||||||
|
.await
|
||||||
|
.expect("branch_create should heal the zombie commit-graph branch and succeed");
|
||||||
|
assert!(
|
||||||
|
db.branch_list()
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.contains(&"feature".to_string())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// branch_create is authority-then-derived: if the derived commit-graph branch
|
||||||
|
// cannot be created, the manifest branch (the authority) must be rolled back so
|
||||||
|
// the branch does not half-exist. The existing failpoint fires right after the
|
||||||
|
// manifest create, standing in for any post-authority failure.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn branch_create_rolls_back_manifest_on_commit_graph_failure() {
|
||||||
|
let _scenario = FailScenario::setup();
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let db = Omnigraph::init(dir.path().to_str().unwrap(), helpers::TEST_SCHEMA)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let err = {
|
||||||
|
let _fp = ScopedFailPoint::new("branch_create.after_manifest_branch_create", "return");
|
||||||
|
db.branch_create("feature").await.unwrap_err()
|
||||||
|
};
|
||||||
|
assert!(
|
||||||
|
!db.branch_list()
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.contains(&"feature".to_string()),
|
||||||
|
"branch_create must roll back the manifest branch when the derived \
|
||||||
|
commit-graph branch fails, got error: {err}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// A fork collision must be classified by the manifest authority, not by Lance
|
||||||
|
// branch versions. When a concurrent first-write legitimately wins the fork
|
||||||
|
// race, the loser sees a version mismatch — but that is a stale snapshot, not
|
||||||
|
// an orphan, so it must be a retryable "refresh and retry", never a misleading
|
||||||
|
// "run cleanup".
|
||||||
|
//
|
||||||
|
// Ordering is made deterministic (no sleeps) via a callback at the fork point:
|
||||||
|
// `compare_exchange` lets only the FIRST arrival (writer A) record readiness and
|
||||||
|
// block until released; later arrivals (writer B) fall through. The test waits
|
||||||
|
// on the readiness flag, lets B win and commit the fork, then releases A.
|
||||||
|
static FORK_A_AT_POINT: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false);
|
||||||
|
static FORK_RELEASE_A: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false);
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread")]
|
||||||
|
async fn fork_collision_with_live_concurrent_fork_is_retryable() {
|
||||||
|
use std::sync::atomic::Ordering::SeqCst;
|
||||||
|
|
||||||
|
let _scenario = FailScenario::setup();
|
||||||
|
FORK_A_AT_POINT.store(false, SeqCst);
|
||||||
|
FORK_RELEASE_A.store(false, SeqCst);
|
||||||
|
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let uri = dir.path().to_str().unwrap().to_string();
|
||||||
|
let main = helpers::init_and_load(&dir).await;
|
||||||
|
main.branch_create("feature").await.unwrap();
|
||||||
|
|
||||||
|
// First arrival (A) records readiness and blocks until released; the rest
|
||||||
|
// (B) fall through immediately. Bounded spin so a mistake can't hang forever.
|
||||||
|
fail::cfg_callback("fork.before_classify", || {
|
||||||
|
if FORK_A_AT_POINT
|
||||||
|
.compare_exchange(false, true, SeqCst, SeqCst)
|
||||||
|
.is_ok()
|
||||||
|
{
|
||||||
|
for _ in 0..2000 {
|
||||||
|
if FORK_RELEASE_A.load(SeqCst) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
std::thread::sleep(std::time::Duration::from_millis(5));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let uri_a = uri.clone();
|
||||||
|
let writer_a = tokio::spawn(async move {
|
||||||
|
let mut a = Omnigraph::open(&uri_a).await.unwrap();
|
||||||
|
helpers::mutate_branch(
|
||||||
|
&mut a,
|
||||||
|
"feature",
|
||||||
|
MUTATION_QUERIES,
|
||||||
|
"insert_person",
|
||||||
|
&mixed_params(&[("$name", "Eve")], &[("$age", 22)]),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
});
|
||||||
|
|
||||||
|
// Wait (bounded) until A is parked at the fork point.
|
||||||
|
for _ in 0..600 {
|
||||||
|
if FORK_A_AT_POINT.load(SeqCst) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
tokio::time::sleep(std::time::Duration::from_millis(5)).await;
|
||||||
|
}
|
||||||
|
assert!(
|
||||||
|
FORK_A_AT_POINT.load(SeqCst),
|
||||||
|
"writer A never reached the fork point"
|
||||||
|
);
|
||||||
|
|
||||||
|
// B wins the fork and commits it.
|
||||||
|
let mut b = Omnigraph::open(&uri).await.unwrap();
|
||||||
|
helpers::mutate_branch(
|
||||||
|
&mut b,
|
||||||
|
"feature",
|
||||||
|
MUTATION_QUERIES,
|
||||||
|
"insert_person",
|
||||||
|
&mixed_params(&[("$name", "Frank")], &[("$age", 41)]),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Release A; it resumes, re-reads the manifest, and sees the fork is live.
|
||||||
|
FORK_RELEASE_A.store(true, SeqCst);
|
||||||
|
let err = writer_a
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.expect_err("A's stale-snapshot fork should be a retryable conflict");
|
||||||
|
fail::remove("fork.before_classify");
|
||||||
|
|
||||||
|
let msg = err.to_string();
|
||||||
|
assert!(
|
||||||
|
!msg.contains("cleanup"),
|
||||||
|
"a live concurrent fork must not be misclassified as an orphan, got: {msg}"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
msg.contains("refresh and retry") || msg.contains("expected manifest table version"),
|
||||||
|
"expected a retryable stale-view error, got: {msg}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread")]
|
#[tokio::test(flavor = "multi_thread")]
|
||||||
async fn graph_publish_failpoint_triggers_before_commit_append() {
|
async fn graph_publish_failpoint_triggers_before_commit_append() {
|
||||||
let _scenario = FailScenario::setup();
|
let _scenario = FailScenario::setup();
|
||||||
|
|
@ -799,7 +1245,7 @@ async fn refresh_defers_rollback_eligible_sidecar_to_next_open() {
|
||||||
// the rollback (will use Dataset::restore safely; no concurrent
|
// the rollback (will use Dataset::restore safely; no concurrent
|
||||||
// writers at open time).
|
// writers at open time).
|
||||||
drop(db);
|
drop(db);
|
||||||
let _db = Omnigraph::open(&uri).await.unwrap();
|
let db = Omnigraph::open(&uri).await.unwrap();
|
||||||
// After full-sweep recovery, the sidecar should be processed
|
// After full-sweep recovery, the sidecar should be processed
|
||||||
// (deleted). Sidecar's tables are eligible for rollback (UnexpectedAtP1):
|
// (deleted). Sidecar's tables are eligible for rollback (UnexpectedAtP1):
|
||||||
// restore happens on Person (HEAD advances by 1).
|
// restore happens on Person (HEAD advances by 1).
|
||||||
|
|
@ -822,6 +1268,19 @@ async fn refresh_defers_rollback_eligible_sidecar_to_next_open() {
|
||||||
"full sweep must run Dataset::restore (head advances); \
|
"full sweep must run Dataset::restore (head advances); \
|
||||||
post_head={post_head}, final_head={final_head}",
|
post_head={post_head}, final_head={final_head}",
|
||||||
);
|
);
|
||||||
|
// Convergence: roll-back published the restored HEAD, so the manifest pin
|
||||||
|
// tracks Lance HEAD afterward (no residual drift).
|
||||||
|
let entry_version = db
|
||||||
|
.snapshot_of(omnigraph::db::ReadTarget::branch("main"))
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.entry("node:Person")
|
||||||
|
.unwrap()
|
||||||
|
.table_version;
|
||||||
|
assert_eq!(
|
||||||
|
entry_version, final_head,
|
||||||
|
"full-sweep roll-back must publish so manifest pin ({entry_version}) == Lance HEAD ({final_head})",
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Companion to the above — confirms that a finalize→publisher failure
|
/// Companion to the above — confirms that a finalize→publisher failure
|
||||||
|
|
@ -1015,10 +1474,15 @@ edge WorksAt: Person -> Company
|
||||||
}
|
}
|
||||||
|
|
||||||
let db = Omnigraph::open(&uri).await.unwrap();
|
let db = Omnigraph::open(&uri).await.unwrap();
|
||||||
assert_eq!(
|
// Roll-back now publishes the restored version, so the manifest version
|
||||||
version_main(&db).await.unwrap(),
|
// advances — but to the OLD-schema content: the migration never applied
|
||||||
pre_failure_version,
|
// (asserted by count_rows + the `_schema.pg` checks below), and the sweep
|
||||||
"manifest must remain on the old schema when no schema staging files existed"
|
// converges (`manifest == Lance HEAD`, asserted by
|
||||||
|
// assert_post_recovery_invariants's RolledBack arm).
|
||||||
|
assert!(
|
||||||
|
version_main(&db).await.unwrap() > pre_failure_version,
|
||||||
|
"roll-back publishes the restored (old-schema) version, advancing the manifest; \
|
||||||
|
pre={pre_failure_version}",
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
helpers::count_rows(&db, "node:Person").await,
|
helpers::count_rows(&db, "node:Person").await,
|
||||||
|
|
@ -1191,6 +1655,100 @@ edge WorksAt: Person -> Company
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// `optimize` Phase B → Phase C residual: `compact_files` advanced the Lance
|
||||||
|
/// HEAD but the manifest publish hasn't run. The `Optimize` recovery sidecar
|
||||||
|
/// (loose-match, like SchemaApply/EnsureIndices) must roll the compacted version
|
||||||
|
/// forward on next open so the manifest tracks the Lance HEAD — and the healed
|
||||||
|
/// table must then accept a schema apply (the original bug's victim).
|
||||||
|
#[tokio::test]
|
||||||
|
async fn optimize_phase_b_failure_recovered_on_next_open() {
|
||||||
|
let _scenario = FailScenario::setup();
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let uri = dir.path().to_str().unwrap().to_string();
|
||||||
|
let operation_id;
|
||||||
|
|
||||||
|
// Seed: several separate Person inserts → multiple fragments, so compaction
|
||||||
|
// has real work and advances the Lance HEAD.
|
||||||
|
{
|
||||||
|
let db = Omnigraph::init(&uri, helpers::TEST_SCHEMA).await.unwrap();
|
||||||
|
for (name, age) in [("alice", 30), ("bob", 31), ("carol", 32), ("dave", 33)] {
|
||||||
|
db.mutate(
|
||||||
|
"main",
|
||||||
|
MUTATION_QUERIES,
|
||||||
|
"insert_person",
|
||||||
|
&mixed_params(&[("$name", name)], &[("$age", age)]),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let pre_failure_version = {
|
||||||
|
let db = Omnigraph::open(&uri).await.unwrap();
|
||||||
|
version_main(&db).await.unwrap()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Failpoint fires AFTER compact_files advanced the Lance HEAD but BEFORE the
|
||||||
|
// manifest publish. The Optimize sidecar persists (only node:Person has
|
||||||
|
// compactable fragments, so exactly one sidecar is written).
|
||||||
|
{
|
||||||
|
let db = Omnigraph::open(&uri).await.unwrap();
|
||||||
|
let _failpoint =
|
||||||
|
ScopedFailPoint::new("optimize.post_phase_b_pre_manifest_commit", "return");
|
||||||
|
let err = db.optimize().await.unwrap_err();
|
||||||
|
assert!(
|
||||||
|
err.to_string()
|
||||||
|
.contains("injected failpoint triggered: optimize.post_phase_b_pre_manifest_commit"),
|
||||||
|
"unexpected error: {err}"
|
||||||
|
);
|
||||||
|
|
||||||
|
let recovery_dir = dir.path().join("__recovery");
|
||||||
|
let sidecars: Vec<_> = std::fs::read_dir(&recovery_dir)
|
||||||
|
.unwrap()
|
||||||
|
.filter_map(|e| e.ok())
|
||||||
|
.collect();
|
||||||
|
assert_eq!(
|
||||||
|
sidecars.len(),
|
||||||
|
1,
|
||||||
|
"exactly one Optimize sidecar must persist after optimize failure"
|
||||||
|
);
|
||||||
|
operation_id = single_sidecar_operation_id(dir.path());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recovery: reopen runs the sweep. The Optimize sidecar classifies
|
||||||
|
// RolledPastExpected (loose-match) → RollForward → manifest extends to the
|
||||||
|
// compacted Lance HEAD.
|
||||||
|
let db = Omnigraph::open(&uri).await.unwrap();
|
||||||
|
let post_recovery_version = version_main(&db).await.unwrap();
|
||||||
|
assert!(
|
||||||
|
post_recovery_version > pre_failure_version,
|
||||||
|
"manifest version must advance post-recovery (compaction rolled forward); \
|
||||||
|
pre={pre_failure_version}, post={post_recovery_version}",
|
||||||
|
);
|
||||||
|
drop(db);
|
||||||
|
|
||||||
|
assert_post_recovery_invariants(
|
||||||
|
dir.path(),
|
||||||
|
&operation_id,
|
||||||
|
RecoveryExpectation::RolledForward {
|
||||||
|
tables: vec![TableExpectation::main("node:Person")],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// The healed table accepts an additive schema apply — its HEAD-vs-manifest
|
||||||
|
// precondition is satisfied because recovery published the compacted version.
|
||||||
|
let db = Omnigraph::open(&uri).await.unwrap();
|
||||||
|
let desired = helpers::TEST_SCHEMA.replace(
|
||||||
|
" age: I32?\n}",
|
||||||
|
" age: I32?\n nickname: String?\n}",
|
||||||
|
);
|
||||||
|
db.apply_schema(&desired)
|
||||||
|
.await
|
||||||
|
.expect("schema apply after optimize recovery must succeed");
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn branch_merge_phase_b_failure_recovered_on_next_open() {
|
async fn branch_merge_phase_b_failure_recovered_on_next_open() {
|
||||||
use omnigraph::loader::{LoadMode, load_jsonl};
|
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||||
|
|
|
||||||
|
|
@ -181,6 +181,9 @@ pub async fn assert_post_recovery_invariants(
|
||||||
"audit row for {operation_id} recorded the wrong recovery_kind",
|
"audit row for {operation_id} recorded the wrong recovery_kind",
|
||||||
);
|
);
|
||||||
assert_rollback_outcomes_record_drift(&audit);
|
assert_rollback_outcomes_record_drift(&audit);
|
||||||
|
// Roll-back now publishes the restored HEAD, so manifest == Lance
|
||||||
|
// HEAD afterward (symmetric with roll-forward) — no residual drift.
|
||||||
|
assert_manifest_pins_match_lance_heads(graph_root, &tables).await?;
|
||||||
assert_recovery_commit_shape(graph_root, &audit, &tables).await?;
|
assert_recovery_commit_shape(graph_root, &audit, &tables).await?;
|
||||||
assert_non_main_did_not_move_main(graph_root, &tables).await?;
|
assert_non_main_did_not_move_main(graph_root, &tables).await?;
|
||||||
assert_idempotent_reopen(graph_root, operation_id).await?;
|
assert_idempotent_reopen(graph_root, operation_id).await?;
|
||||||
|
|
|
||||||
|
|
@ -242,3 +242,136 @@ async fn _compile_delete_result_field_shape() -> lance::Result<()> {
|
||||||
let _num_deleted: u64 = result.num_deleted_rows;
|
let _num_deleted: u64 = result.num_deleted_rows;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Guard 9: force_delete_branch semantics --------------------------------
|
||||||
|
//
|
||||||
|
// The branch-delete reconciler (`db/omnigraph/optimize.rs::reconcile_orphaned_branches`)
|
||||||
|
// and the eager best-effort reclaim in `cleanup_deleted_branch_tables` call
|
||||||
|
// `force_delete_branch` to drop orphaned branch refs. The single-authority
|
||||||
|
// design relies on three facts pinned here:
|
||||||
|
// 1. plain `delete_branch` errors on a missing ref (so the design uses the
|
||||||
|
// force variant instead);
|
||||||
|
// 2. `force_delete_branch` removes an existing (forked) branch — the orphan
|
||||||
|
// case, where a `tree/{branch}/` exists;
|
||||||
|
// 3. `force_delete_branch` on a *fully-absent* branch (no tree dir) still
|
||||||
|
// errors on the local store, because `remove_dir_all`'s NotFound is not
|
||||||
|
// caught for Lance's native error variant. `TableStore::force_delete_branch`
|
||||||
|
// wraps this to be fully idempotent. Pin the raw quirk so a future Lance
|
||||||
|
// fix (which would let us simplify the wrapper) is noticed.
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn force_delete_branch_semantics() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let uri = dir.path().join("guard9.lance");
|
||||||
|
let uri = uri.to_str().unwrap();
|
||||||
|
let mut ds = fresh_dataset(uri).await;
|
||||||
|
|
||||||
|
// (1) Plain delete of a never-created branch errors (RefNotFound).
|
||||||
|
assert!(
|
||||||
|
ds.delete_branch("nope").await.is_err(),
|
||||||
|
"Dataset::delete_branch on a missing ref should error; if this is now \
|
||||||
|
Ok, the reconciler could drop the force variant."
|
||||||
|
);
|
||||||
|
|
||||||
|
// (2) force_delete_branch removes an existing (forked) branch.
|
||||||
|
let base = ds.version().version;
|
||||||
|
ds.create_branch("feature", base, None).await.unwrap();
|
||||||
|
ds.force_delete_branch("feature").await.unwrap();
|
||||||
|
assert!(
|
||||||
|
!ds.list_branches().await.unwrap().contains_key("feature"),
|
||||||
|
"force_delete_branch should remove an existing branch ref"
|
||||||
|
);
|
||||||
|
|
||||||
|
// (3) Quirk: force_delete on a fully-absent branch errors on the local
|
||||||
|
// store (worked around by TableStore::force_delete_branch).
|
||||||
|
assert!(
|
||||||
|
ds.force_delete_branch("never").await.is_err(),
|
||||||
|
"force_delete_branch on a fully-absent branch no longer errors — \
|
||||||
|
TableStore::force_delete_branch's NotFound tolerance can be simplified."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Guard 10: blob-column compaction is still broken in this Lance --------
|
||||||
|
//
|
||||||
|
// `db/omnigraph/optimize.rs` skips tables with blob columns while
|
||||||
|
// `LANCE_SUPPORTS_BLOB_COMPACTION = false`: Lance `compact_files` forces
|
||||||
|
// `BlobHandling::AllBinary`, and the blob-v2 struct decoder mis-counts columns
|
||||||
|
// ("more fields in the schema than provided column indices"), failing even a
|
||||||
|
// pristine uniform-V2_2 multi-fragment blob table. Reads are unaffected (they
|
||||||
|
// use descriptor handling).
|
||||||
|
//
|
||||||
|
// WHEN THIS TEST TURNS RED (compact_files no longer errors), the Lance bug is
|
||||||
|
// fixed: flip `LANCE_SUPPORTS_BLOB_COMPACTION` to true in optimize.rs, drop the
|
||||||
|
// blob-skip branch + the `optimize_skips_blob_table_and_reports_skip`
|
||||||
|
// skip assertions in maintenance.rs, and re-pin docs/dev/lance.md.
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn compact_files_still_fails_on_blob_columns() {
|
||||||
|
use arrow_array::{LargeBinaryArray, StructArray};
|
||||||
|
|
||||||
|
fn blob_batch(start: i32, n: i32) -> RecordBatch {
|
||||||
|
let ids: Vec<String> = (start..start + n).map(|i| format!("n{i}")).collect();
|
||||||
|
let data =
|
||||||
|
LargeBinaryArray::from_iter_values((start..start + n).map(|i| format!("blob{i}")));
|
||||||
|
let blob_uri = StringArray::from(vec![None::<&str>; n as usize]);
|
||||||
|
let DataType::Struct(fields) = lance::blob::blob_field("content", true).data_type().clone()
|
||||||
|
else {
|
||||||
|
unreachable!("blob_field is always a Struct");
|
||||||
|
};
|
||||||
|
let content = StructArray::new(
|
||||||
|
fields,
|
||||||
|
vec![Arc::new(data) as _, Arc::new(blob_uri) as _],
|
||||||
|
None,
|
||||||
|
);
|
||||||
|
let schema = Arc::new(Schema::new(vec![
|
||||||
|
Field::new("id", DataType::Utf8, false),
|
||||||
|
lance::blob::blob_field("content", true),
|
||||||
|
]));
|
||||||
|
RecordBatch::try_new(
|
||||||
|
schema,
|
||||||
|
vec![Arc::new(StringArray::from(ids)) as _, Arc::new(content) as _],
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn write(uri: &str, batch: RecordBatch, mode: WriteMode) {
|
||||||
|
let schema = batch.schema();
|
||||||
|
let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
||||||
|
// Blob v2 requires file version >= 2.2; without the pin the *write*
|
||||||
|
// would fail with a different error, masking the guard's intent.
|
||||||
|
let params = WriteParams {
|
||||||
|
mode,
|
||||||
|
enable_stable_row_ids: true,
|
||||||
|
data_storage_version: Some(LanceFileVersion::V2_2),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
Dataset::write(reader, uri, Some(params)).await.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let uri = dir.path().join("guard10-blob.lance");
|
||||||
|
let uri = uri.to_str().unwrap();
|
||||||
|
|
||||||
|
// Uniform V2_2, two fragments → forces compaction to actually rewrite.
|
||||||
|
write(uri, blob_batch(0, 2), WriteMode::Create).await;
|
||||||
|
write(uri, blob_batch(100, 2), WriteMode::Append).await;
|
||||||
|
|
||||||
|
let mut ds = Dataset::open(uri).await.unwrap();
|
||||||
|
assert!(
|
||||||
|
ds.get_fragments().len() >= 2,
|
||||||
|
"guard needs a multi-fragment table to trigger a real compaction rewrite"
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = compact_files(&mut ds, CompactionOptions::default(), None).await;
|
||||||
|
let err = result.expect_err(
|
||||||
|
"compact_files unexpectedly SUCCEEDED on a blob table — the Lance blob-v2 \
|
||||||
|
compaction bug is fixed. Flip LANCE_SUPPORTS_BLOB_COMPACTION to true in \
|
||||||
|
db/omnigraph/optimize.rs, remove the blob-skip branch, and re-pin docs/dev/lance.md.",
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
err.to_string()
|
||||||
|
.contains("more fields in the schema than provided column indices"),
|
||||||
|
"blob compaction failed with an unexpected error (Lance internals may have \
|
||||||
|
shifted): {err}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,10 +7,25 @@ mod helpers;
|
||||||
|
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use omnigraph::db::{CleanupPolicyOptions, Omnigraph};
|
use lance::Dataset;
|
||||||
|
use omnigraph::db::{CleanupPolicyOptions, Omnigraph, ReadTarget, SkipReason};
|
||||||
use omnigraph::loader::{LoadMode, load_jsonl};
|
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||||
|
|
||||||
use helpers::{TEST_DATA, TEST_SCHEMA, count_rows, init_and_load};
|
use helpers::{
|
||||||
|
MUTATION_QUERIES, TEST_DATA, TEST_SCHEMA, count_rows, init_and_load, mixed_params, mutate_main,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Filesystem URI of a node sub-table, mirroring the engine's layout
|
||||||
|
/// (FNV-1a of the type name under `nodes/`). Matches the helper in
|
||||||
|
/// `failpoints.rs`; used to inspect/forge Lance branches directly in tests.
|
||||||
|
fn node_table_uri(root: &str, type_name: &str) -> String {
|
||||||
|
let mut hash: u64 = 0xcbf2_9ce4_8422_2325;
|
||||||
|
for &b in type_name.as_bytes() {
|
||||||
|
hash ^= b as u64;
|
||||||
|
hash = hash.wrapping_mul(0x100_0000_01b3);
|
||||||
|
}
|
||||||
|
format!("{}/nodes/{hash:016x}", root.trim_end_matches('/'))
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn optimize_on_empty_graph_returns_stats_per_table_with_no_changes() {
|
async fn optimize_on_empty_graph_returns_stats_per_table_with_no_changes() {
|
||||||
|
|
@ -59,6 +74,215 @@ async fn optimize_after_load_then_again_is_idempotent() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Regression: `optimize` must not crash on a graph that has a `Blob` table.
|
||||||
|
//
|
||||||
|
// Lance `compact_files` forces `BlobHandling::AllBinary`, which mis-decodes
|
||||||
|
// blob-v2 columns ("more fields in the schema than provided column indices"),
|
||||||
|
// failing even a pristine uniform-V2_2 multi-fragment blob table. `optimize`
|
||||||
|
// must skip blob-bearing tables (and report the skip) rather than aborting the
|
||||||
|
// whole sweep.
|
||||||
|
//
|
||||||
|
// Before the skip fix, `optimize()` returned that Lance error here and aborted
|
||||||
|
// the whole sweep; it now skips the blob table (`doc.skipped == Some(..)`)
|
||||||
|
// while the sibling non-blob `Tag` table still compacts. The skip is gated by
|
||||||
|
// `LANCE_SUPPORTS_BLOB_COMPACTION`; the surface guard
|
||||||
|
// `compact_files_still_fails_on_blob_columns` flags when the upstream Lance fix
|
||||||
|
// makes the skip (and this test's blob arm) removable.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn optimize_skips_blob_table_and_reports_skip() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let uri = dir.path().to_str().unwrap();
|
||||||
|
// One Blob node type (`Doc`) + one plain node type (`Tag`): proves the blob
|
||||||
|
// table is skipped while a non-blob table in the same sweep still compacts.
|
||||||
|
let schema = "\
|
||||||
|
node Doc {\n slug: String @key\n content: Blob\n}\n\
|
||||||
|
node Tag {\n slug: String @key\n}\n";
|
||||||
|
let mut db = Omnigraph::init(uri, schema).await.unwrap();
|
||||||
|
|
||||||
|
// Multi-fragment blob table: Overwrite creates fragment 1; each Merge of
|
||||||
|
// new keys appends another. A >=2-fragment blob table is exactly what
|
||||||
|
// crashes `compact_files` today (single fragment would no-op and not crash).
|
||||||
|
load_jsonl(
|
||||||
|
&mut db,
|
||||||
|
"{\"type\":\"Doc\",\"data\":{\"slug\":\"d1\",\"content\":\"base64:aGVsbG8x\"}}\n{\"type\":\"Doc\",\"data\":{\"slug\":\"d2\",\"content\":\"base64:aGVsbG8y\"}}",
|
||||||
|
LoadMode::Overwrite,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
load_jsonl(
|
||||||
|
&mut db,
|
||||||
|
"{\"type\":\"Doc\",\"data\":{\"slug\":\"d3\",\"content\":\"base64:aGVsbG8z\"}}",
|
||||||
|
LoadMode::Merge,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
load_jsonl(
|
||||||
|
&mut db,
|
||||||
|
"{\"type\":\"Doc\",\"data\":{\"slug\":\"d4\",\"content\":\"base64:aGVsbG80\"}}",
|
||||||
|
LoadMode::Merge,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
// Plain table, also multi-fragment so it has something to compact.
|
||||||
|
load_jsonl(
|
||||||
|
&mut db,
|
||||||
|
"{\"type\":\"Tag\",\"data\":{\"slug\":\"t1\"}}\n{\"type\":\"Tag\",\"data\":{\"slug\":\"t2\"}}",
|
||||||
|
LoadMode::Merge,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
load_jsonl(
|
||||||
|
&mut db,
|
||||||
|
"{\"type\":\"Tag\",\"data\":{\"slug\":\"t3\"}}",
|
||||||
|
LoadMode::Merge,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let stats = db
|
||||||
|
.optimize()
|
||||||
|
.await
|
||||||
|
.expect("optimize must not crash on a graph with a Blob table");
|
||||||
|
|
||||||
|
let doc = stats
|
||||||
|
.iter()
|
||||||
|
.find(|s| s.table_key == "node:Doc")
|
||||||
|
.expect("Doc stat present");
|
||||||
|
let tag = stats
|
||||||
|
.iter()
|
||||||
|
.find(|s| s.table_key == "node:Tag")
|
||||||
|
.expect("Tag stat present");
|
||||||
|
// The blob table is skipped (and reported), not compacted.
|
||||||
|
assert_eq!(
|
||||||
|
doc.skipped,
|
||||||
|
Some(SkipReason::BlobColumnsUnsupportedByLance),
|
||||||
|
"blob table must be reported as skipped",
|
||||||
|
);
|
||||||
|
assert!(!doc.committed, "skipped blob table is not compacted");
|
||||||
|
assert_eq!(doc.fragments_removed, 0);
|
||||||
|
assert_eq!(doc.fragments_added, 0);
|
||||||
|
// The plain (non-blob) table is unaffected by the skip.
|
||||||
|
assert_eq!(tag.skipped, None, "non-blob table must not be skipped");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Regression: `optimize` must publish its compaction to the `__manifest` so the
|
||||||
|
// manifest's recorded `table_version` tracks the compacted Lance HEAD.
|
||||||
|
//
|
||||||
|
// Lance `compact_files` advances the *dataset's* version (reserve-fragments +
|
||||||
|
// rewrite commits) but knows nothing about OmniGraph's `__manifest`. If optimize
|
||||||
|
// does not publish a manifest update, the manifest's `table_version` lags the
|
||||||
|
// Lance HEAD: reads stay pinned to the pre-compaction version (compaction is
|
||||||
|
// invisible to them) and any subsequent schema apply / strict update/delete
|
||||||
|
// fails its HEAD-vs-manifest precondition with
|
||||||
|
// "stale view of '<table>': expected manifest table version X but current is Y".
|
||||||
|
// This pins the fix — optimize publishes the compacted version, so manifest ==
|
||||||
|
// HEAD and migrations after a compaction succeed.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn optimize_publishes_compaction_to_manifest_so_schema_apply_succeeds() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let root = dir.path().to_str().unwrap().trim_end_matches('/').to_string();
|
||||||
|
let mut db = init_and_load(&dir).await;
|
||||||
|
|
||||||
|
// Several separate inserts → multiple Person fragments, so `compact_files`
|
||||||
|
// actually merges and moves the Lance HEAD (a single fragment is a no-op).
|
||||||
|
for (name, age) in [("Eve", 40), ("Frank", 41), ("Grace", 42), ("Heidi", 43)] {
|
||||||
|
mutate_main(
|
||||||
|
&mut db,
|
||||||
|
MUTATION_QUERIES,
|
||||||
|
"insert_person",
|
||||||
|
&mixed_params(&[("$name", name)], &[("$age", age as i64)]),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.expect("insert");
|
||||||
|
}
|
||||||
|
|
||||||
|
let stats = db.optimize().await.unwrap();
|
||||||
|
let person = stats
|
||||||
|
.iter()
|
||||||
|
.find(|s| s.table_key == "node:Person")
|
||||||
|
.expect("Person stat present");
|
||||||
|
assert!(
|
||||||
|
person.committed,
|
||||||
|
"Person is multi-fragment, so optimize must have compacted it"
|
||||||
|
);
|
||||||
|
|
||||||
|
// After optimize, the manifest's recorded table_version must equal the actual
|
||||||
|
// Lance HEAD — optimize published its compaction, so there is no drift.
|
||||||
|
let snap = db.snapshot_of(ReadTarget::branch("main")).await.unwrap();
|
||||||
|
let entry = snap.entry("node:Person").unwrap();
|
||||||
|
let manifest_version = entry.table_version;
|
||||||
|
let full = format!("{}/{}", root, entry.table_path);
|
||||||
|
let lance_head = Dataset::open(&full).await.unwrap().version().version;
|
||||||
|
assert_eq!(
|
||||||
|
manifest_version, lance_head,
|
||||||
|
"after optimize, manifest table_version ({manifest_version}) must equal Lance HEAD ({lance_head})",
|
||||||
|
);
|
||||||
|
|
||||||
|
// Reads observe the compacted version with rows preserved (4 seed + 4 inserts).
|
||||||
|
assert_eq!(count_rows(&db, "node:Person").await, 8);
|
||||||
|
|
||||||
|
// The headline: an additive (nullable property) migration touching the
|
||||||
|
// just-compacted table succeeds, where it previously failed with "stale view".
|
||||||
|
let desired = TEST_SCHEMA.replace(
|
||||||
|
" age: I32?\n}",
|
||||||
|
" age: I32?\n nickname: String?\n}",
|
||||||
|
);
|
||||||
|
let result = db
|
||||||
|
.apply_schema(&desired)
|
||||||
|
.await
|
||||||
|
.expect("additive schema apply after optimize must succeed");
|
||||||
|
assert!(result.applied, "schema apply should report applied=true");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Regression: `optimize` must REFUSE when an unresolved recovery sidecar is
|
||||||
|
// pending. Operating on an unrecovered graph could publish a partial write that
|
||||||
|
// the all-or-nothing recovery sweep would roll back; the operator must reopen
|
||||||
|
// (run the recovery sweep) first.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn optimize_defers_when_recovery_sidecar_is_pending() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let uri = dir.path().to_str().unwrap();
|
||||||
|
let db = init_and_load(&dir).await;
|
||||||
|
|
||||||
|
// Simulate an in-process failed write that left a recovery sidecar on disk.
|
||||||
|
let recovery_dir = dir.path().join("__recovery");
|
||||||
|
std::fs::create_dir_all(&recovery_dir).unwrap();
|
||||||
|
let person_path = node_table_uri(uri, "Person");
|
||||||
|
let sidecar_json = format!(
|
||||||
|
r#"{{
|
||||||
|
"schema_version": 1,
|
||||||
|
"operation_id": "01H000000000000000000DEFR",
|
||||||
|
"started_at": "0",
|
||||||
|
"branch": null,
|
||||||
|
"actor_id": "act-test",
|
||||||
|
"writer_kind": "Mutation",
|
||||||
|
"tables": [
|
||||||
|
{{
|
||||||
|
"table_key": "node:Person",
|
||||||
|
"table_path": "{}",
|
||||||
|
"expected_version": 1,
|
||||||
|
"post_commit_pin": 2
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
}}"#,
|
||||||
|
person_path
|
||||||
|
);
|
||||||
|
std::fs::write(
|
||||||
|
recovery_dir.join("01H000000000000000000DEFR.json"),
|
||||||
|
sidecar_json,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let err = db
|
||||||
|
.optimize()
|
||||||
|
.await
|
||||||
|
.expect_err("optimize must defer (error) while a recovery sidecar is pending");
|
||||||
|
assert!(
|
||||||
|
err.to_string().to_lowercase().contains("recovery"),
|
||||||
|
"optimize defer error should mention recovery; got: {err}",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn cleanup_without_any_policy_option_errors() {
|
async fn cleanup_without_any_policy_option_errors() {
|
||||||
let dir = tempfile::tempdir().unwrap();
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
|
@ -158,3 +382,59 @@ async fn cleanup_then_optimize_preserves_rows_and_table_remains_writable() {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(count_rows(&db, "node:Person").await, people_before);
|
assert_eq!(count_rows(&db, "node:Person").await, people_before);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn cleanup_reconciles_orphaned_branch_forks() {
|
||||||
|
// An incomplete prior `branch_delete` can leave a per-table Lance branch
|
||||||
|
// that the manifest no longer references (a "zombie" fork). It is
|
||||||
|
// unreachable through any snapshot but pins its `tree/{branch}/` storage.
|
||||||
|
// `cleanup` must reconcile it away: drop every Lance branch absent from the
|
||||||
|
// manifest authority, without touching `main`.
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let uri = dir.path().to_str().unwrap().to_string();
|
||||||
|
let mut db = init_and_load(&dir).await;
|
||||||
|
|
||||||
|
let people_before = count_rows(&db, "node:Person").await;
|
||||||
|
assert!(people_before > 0, "fixture should seed Person rows");
|
||||||
|
|
||||||
|
// Forge an orphaned fork the manifest never knew about.
|
||||||
|
let person_uri = node_table_uri(&uri, "Person");
|
||||||
|
{
|
||||||
|
let mut ds = Dataset::open(&person_uri).await.unwrap();
|
||||||
|
let base = ds.version().version;
|
||||||
|
ds.create_branch("ghost", base, None).await.unwrap();
|
||||||
|
assert!(
|
||||||
|
ds.list_branches().await.unwrap().contains_key("ghost"),
|
||||||
|
"precondition: orphaned fork staged"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
db.cleanup(CleanupPolicyOptions {
|
||||||
|
keep_versions: Some(1),
|
||||||
|
older_than: None,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Orphan reclaimed; main untouched.
|
||||||
|
{
|
||||||
|
let ds = Dataset::open(&person_uri).await.unwrap();
|
||||||
|
assert!(
|
||||||
|
!ds.list_branches().await.unwrap().contains_key("ghost"),
|
||||||
|
"cleanup should reconcile the orphaned 'ghost' fork away"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
assert_eq!(
|
||||||
|
count_rows(&db, "node:Person").await,
|
||||||
|
people_before,
|
||||||
|
"cleanup must not disturb main while reconciling orphans"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Idempotent: a second cleanup with the orphan already gone is a no-op.
|
||||||
|
db.cleanup(CleanupPolicyOptions {
|
||||||
|
keep_versions: Some(1),
|
||||||
|
older_than: None,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -278,6 +278,97 @@ async fn recovery_rolls_back_synthetic_drift_on_open() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Regression: recovery roll-back must PUBLISH the restored version so
|
||||||
|
/// `manifest == Lance HEAD` afterward (no residual "orphaned drift"). Before the
|
||||||
|
/// fix, roll-back restored via `Dataset::restore` but left the manifest pin
|
||||||
|
/// behind HEAD, so a subsequent strict write / schema apply failed its
|
||||||
|
/// HEAD-vs-manifest precondition ("stale view … refresh and retry") — and a
|
||||||
|
/// failed schema apply's own roll-back leaked +1 each retry (the original bug's
|
||||||
|
/// loop). With convergence, one roll-back leaves `manifest == HEAD` and the
|
||||||
|
/// follow-up succeeds.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn recovery_rollback_converges_manifest_so_schema_apply_succeeds() {
|
||||||
|
use omnigraph::db::ReadTarget;
|
||||||
|
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||||
|
use omnigraph::table_store::TableStore;
|
||||||
|
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let uri = dir.path().to_str().unwrap();
|
||||||
|
|
||||||
|
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||||
|
load_jsonl(
|
||||||
|
&mut db,
|
||||||
|
r#"{"type":"Person","data":{"name":"alice","age":30}}
|
||||||
|
{"type":"Person","data":{"name":"bob","age":25}}
|
||||||
|
"#,
|
||||||
|
LoadMode::Append,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
drop(db);
|
||||||
|
|
||||||
|
// Forge a Phase-B residual: advance Person's Lance HEAD without publishing to
|
||||||
|
// the manifest (the manifest pin stays at the load's committed version).
|
||||||
|
let person_uri = node_table_uri(uri, "Person");
|
||||||
|
let store = TableStore::new(uri);
|
||||||
|
let mut ds = Dataset::open(&person_uri).await.unwrap();
|
||||||
|
let manifest_pin = ds.version().version;
|
||||||
|
let _ = store
|
||||||
|
.delete_where(&person_uri, &mut ds, "1 = 2")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
drop(ds);
|
||||||
|
|
||||||
|
// Roll-back-classified sidecar (post_commit_pin != observed head ⇒
|
||||||
|
// UnexpectedAtP1 ⇒ RollBack).
|
||||||
|
let sidecar_json = format!(
|
||||||
|
r#"{{
|
||||||
|
"schema_version": 1,
|
||||||
|
"operation_id": "01H0000000000000000000CVG",
|
||||||
|
"started_at": "0",
|
||||||
|
"branch": null,
|
||||||
|
"actor_id": "act-test",
|
||||||
|
"writer_kind": "Mutation",
|
||||||
|
"tables": [
|
||||||
|
{{
|
||||||
|
"table_key": "node:Person",
|
||||||
|
"table_path": "{}",
|
||||||
|
"expected_version": {},
|
||||||
|
"post_commit_pin": {}
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
}}"#,
|
||||||
|
person_uri, manifest_pin, manifest_pin
|
||||||
|
);
|
||||||
|
write_sidecar_file(dir.path(), "01H0000000000000000000CVG", &sidecar_json);
|
||||||
|
|
||||||
|
// Reopen runs the sweep: restore Person to manifest_pin, then PUBLISH so the
|
||||||
|
// manifest tracks the restored Lance HEAD.
|
||||||
|
let db = Omnigraph::open(uri).await.unwrap();
|
||||||
|
|
||||||
|
// Convergence: manifest pin == Lance HEAD. Fails before the fix — the
|
||||||
|
// manifest stays at manifest_pin while HEAD advanced past it.
|
||||||
|
let snap = db.snapshot_of(ReadTarget::branch("main")).await.unwrap();
|
||||||
|
let entry = snap.entry("node:Person").unwrap();
|
||||||
|
let lance_head = Dataset::open(&person_uri).await.unwrap().version().version;
|
||||||
|
assert_eq!(
|
||||||
|
entry.table_version, lance_head,
|
||||||
|
"roll-back must publish so manifest pin ({}) == Lance HEAD ({})",
|
||||||
|
entry.table_version, lance_head,
|
||||||
|
);
|
||||||
|
|
||||||
|
// The +1-loop victim: an additive schema apply must now succeed (its
|
||||||
|
// HEAD-vs-manifest precondition is satisfied). Before the fix this failed
|
||||||
|
// with "stale view … refresh and retry".
|
||||||
|
let desired = TEST_SCHEMA.replace(
|
||||||
|
" age: I32?\n}",
|
||||||
|
" age: I32?\n nickname: String?\n}",
|
||||||
|
);
|
||||||
|
db.apply_schema(&desired)
|
||||||
|
.await
|
||||||
|
.expect("schema apply after a converging roll-back must succeed");
|
||||||
|
}
|
||||||
|
|
||||||
// =====================================================================
|
// =====================================================================
|
||||||
// Phase 4 — roll-forward path + audit row recording
|
// Phase 4 — roll-forward path + audit row recording
|
||||||
// =====================================================================
|
// =====================================================================
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
//! exercise `stage_append`, `stage_merge_insert`, `scan_with_staged`,
|
//! exercise `stage_append`, `stage_merge_insert`, `scan_with_staged`,
|
||||||
//! and `count_rows_with_staged` directly against a Lance dataset — no
|
//! and `count_rows_with_staged` directly against a Lance dataset — no
|
||||||
//! Omnigraph engine involved. The engine-level use of these primitives
|
//! Omnigraph engine involved. The engine-level use of these primitives
|
||||||
//! is exercised by `tests/runs.rs`.
|
//! is exercised by `tests/writes.rs`.
|
||||||
//!
|
//!
|
||||||
//! Test surface here:
|
//! Test surface here:
|
||||||
//! 1. `stage_append` + `scan_with_staged` shows committed + staged data
|
//! 1. `stage_append` + `scan_with_staged` shows committed + staged data
|
||||||
|
|
@ -709,7 +709,7 @@ async fn stage_create_inverted_index_does_not_advance_head_until_commit() {
|
||||||
///
|
///
|
||||||
/// **When Lance #6658 lands**: this test will need to flip — replace
|
/// **When Lance #6658 lands**: this test will need to flip — replace
|
||||||
/// the assertion with a `stage_delete` + `commit_staged` round-trip
|
/// the assertion with a `stage_delete` + `commit_staged` round-trip
|
||||||
/// and remove the residual line in `docs/runs.md`.
|
/// and remove the residual line in `docs/dev/writes.md`.
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn delete_where_advances_head_inline_documents_residual() {
|
async fn delete_where_advances_head_inline_documents_residual() {
|
||||||
let dir = tempfile::tempdir().unwrap();
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
//! Tests for the direct-to-target write path (Run state machine
|
//! Tests for the direct-publish write path: mutations and loads write
|
||||||
//! removed). The Run/`__run__` staging branch / RunRecord state machine no
|
//! directly to target tables and commit once via the publisher's
|
||||||
//! longer exists; mutations and loads write directly to target tables and
|
//! `expected_table_versions` CAS. (History: this replaced the removed Run
|
||||||
//! commit once via the publisher's `expected_table_versions` CAS.
|
//! state machine / `__run__` staging branches / RunRecord — MR-771.)
|
||||||
//!
|
//!
|
||||||
//! What this file covers:
|
//! What this file covers:
|
||||||
//! - No `__run__*` branches are created by load or mutate.
|
//! - No `__run__*` branches are created by load or mutate.
|
||||||
|
|
@ -371,11 +371,10 @@ async fn cancelled_mutation_future_leaves_no_state() {
|
||||||
|
|
||||||
// Cancel-safety property: no graph-level run/staging state remains.
|
// Cancel-safety property: no graph-level run/staging state remains.
|
||||||
//
|
//
|
||||||
// Note: `branch_list()` already filters `__run__*` via
|
// No `__run__` branches can ever be created: the Run state machine
|
||||||
// `is_internal_system_branch`, so a runtime "no `__run__` branches" check
|
// (`begin_run` etc.) was deleted in MR-771 — verified by the build itself,
|
||||||
// would be vacuous. The structural property that no `__run__` branches
|
// those symbols no longer exist. Any legacy `__run__*` branch on an
|
||||||
// can ever be created is enforced by deletion of `begin_run` etc. in
|
// upgraded graph is swept by the v2→v3 manifest migration.
|
||||||
// (verified by the build itself — those symbols no longer exist).
|
|
||||||
//
|
//
|
||||||
// (1) The branch list is unchanged: cancellation/completion cannot
|
// (1) The branch list is unchanged: cancellation/completion cannot
|
||||||
// synthesize new public branches.
|
// synthesize new public branches.
|
||||||
|
|
@ -442,34 +441,40 @@ async fn repeated_loads_do_not_accumulate_branches() {
|
||||||
assert_eq!(db.branch_list().await.unwrap(), vec!["main".to_string()]);
|
assert_eq!(db.branch_list().await.unwrap(), vec!["main".to_string()]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// User code must not be able to write to internal `__run__*` names.
|
/// After MR-770, `__run__*` is an ordinary branch name — the Run state machine
|
||||||
/// The branch-name guard predicate is kept as defense-in-depth; it
|
/// and its `is_internal_run_branch` guard are gone. The surviving internal-ref
|
||||||
/// will be removed once a future production sweep retires the legacy
|
/// guard still rejects the active `__schema_apply_lock__` branch on the public
|
||||||
/// branches.
|
/// create/merge APIs.
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn public_branch_apis_reject_internal_run_refs() {
|
async fn public_branch_apis_reject_internal_system_refs() {
|
||||||
let dir = tempfile::tempdir().unwrap();
|
let dir = tempfile::tempdir().unwrap();
|
||||||
let mut db = init_and_load(&dir).await;
|
let mut db = init_and_load(&dir).await;
|
||||||
|
|
||||||
let create_err = db.branch_create("__run__synthetic").await.unwrap_err();
|
// `__run__*` is no longer reserved — creating it now succeeds.
|
||||||
|
db.branch_create("__run__formerly_reserved")
|
||||||
|
.await
|
||||||
|
.expect("__run__ prefix is a normal branch name post-MR-770");
|
||||||
|
|
||||||
|
// The schema-apply lock branch is still rejected on public branch APIs.
|
||||||
|
let create_err = db.branch_create("__schema_apply_lock__").await.unwrap_err();
|
||||||
let OmniError::Manifest(err) = create_err else {
|
let OmniError::Manifest(err) = create_err else {
|
||||||
panic!("expected Manifest error");
|
panic!("expected Manifest error");
|
||||||
};
|
};
|
||||||
assert!(
|
assert!(
|
||||||
err.message.contains("internal run ref"),
|
err.message.contains("internal system ref"),
|
||||||
"unexpected error: {}",
|
"unexpected error: {}",
|
||||||
err.message
|
err.message
|
||||||
);
|
);
|
||||||
|
|
||||||
let merge_err = db
|
let merge_err = db
|
||||||
.branch_merge("__run__synthetic", "main")
|
.branch_merge("__schema_apply_lock__", "main")
|
||||||
.await
|
.await
|
||||||
.unwrap_err();
|
.unwrap_err();
|
||||||
let OmniError::Manifest(err) = merge_err else {
|
let OmniError::Manifest(err) = merge_err else {
|
||||||
panic!("expected Manifest error");
|
panic!("expected Manifest error");
|
||||||
};
|
};
|
||||||
assert!(
|
assert!(
|
||||||
err.message.contains("internal run refs"),
|
err.message.contains("internal system refs"),
|
||||||
"unexpected error: {}",
|
"unexpected error: {}",
|
||||||
err.message
|
err.message
|
||||||
);
|
);
|
||||||
|
|
@ -9,8 +9,14 @@ fi
|
||||||
|
|
||||||
bind="${OMNIGRAPH_BIND:-0.0.0.0:8080}"
|
bind="${OMNIGRAPH_BIND:-0.0.0.0:8080}"
|
||||||
|
|
||||||
|
# URI comes from the env var (the positional arg wins over any config
|
||||||
|
# `graphs` block in resolve_target_uri). OMNIGRAPH_CONFIG, when also set,
|
||||||
|
# is forwarded as --config purely to supply a policy file — the two
|
||||||
|
# compose. Without OMNIGRAPH_CONFIG the behavior is unchanged.
|
||||||
if [ -n "${OMNIGRAPH_TARGET_URI:-}" ]; then
|
if [ -n "${OMNIGRAPH_TARGET_URI:-}" ]; then
|
||||||
exec "$SERVER_BIN" "${OMNIGRAPH_TARGET_URI}" --bind "${bind}"
|
exec "$SERVER_BIN" "${OMNIGRAPH_TARGET_URI}" \
|
||||||
|
${OMNIGRAPH_CONFIG:+--config "$OMNIGRAPH_CONFIG"} \
|
||||||
|
--bind "${bind}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -n "${OMNIGRAPH_CONFIG:-}" ]; then
|
if [ -n "${OMNIGRAPH_CONFIG:-}" ]; then
|
||||||
|
|
@ -28,5 +34,7 @@ omnigraph-server container startup requires one of:
|
||||||
Optional:
|
Optional:
|
||||||
- OMNIGRAPH_BIND (default: 0.0.0.0:8080)
|
- OMNIGRAPH_BIND (default: 0.0.0.0:8080)
|
||||||
- OMNIGRAPH_TARGET (used with OMNIGRAPH_CONFIG)
|
- OMNIGRAPH_TARGET (used with OMNIGRAPH_CONFIG)
|
||||||
|
- OMNIGRAPH_CONFIG (may also accompany OMNIGRAPH_TARGET_URI to add a
|
||||||
|
policy file; the URI still comes from OMNIGRAPH_TARGET_URI)
|
||||||
EOF
|
EOF
|
||||||
exit 64
|
exit 64
|
||||||
|
|
|
||||||
65
docker/entrypoint_test.sh
Executable file
65
docker/entrypoint_test.sh
Executable file
|
|
@ -0,0 +1,65 @@
|
||||||
|
#!/bin/sh
|
||||||
|
# Self-contained test for docker/entrypoint.sh argument composition.
|
||||||
|
# Runs the entrypoint against a stub server that echoes its args, and
|
||||||
|
# asserts the forwarded argv for each startup mode. No Docker required.
|
||||||
|
#
|
||||||
|
# sh docker/entrypoint_test.sh
|
||||||
|
#
|
||||||
|
# Exits 0 on success, 1 on the first mismatch.
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
here=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
|
||||||
|
entrypoint="$here/entrypoint.sh"
|
||||||
|
|
||||||
|
work=$(mktemp -d)
|
||||||
|
trap 'rm -rf "$work"' EXIT
|
||||||
|
mkdir -p "$work/bin"
|
||||||
|
cat > "$work/bin/omnigraph-server" <<'EOF'
|
||||||
|
#!/bin/sh
|
||||||
|
echo "ARGS: $*"
|
||||||
|
EOF
|
||||||
|
chmod +x "$work/bin/omnigraph-server"
|
||||||
|
|
||||||
|
# Run the real entrypoint with SERVER_BIN pointed at the stub.
|
||||||
|
ep="$work/entrypoint.sh"
|
||||||
|
sed "s#SERVER_BIN=\"/usr/local/bin/omnigraph-server\"#SERVER_BIN=\"$work/bin/omnigraph-server\"#" \
|
||||||
|
"$entrypoint" > "$ep"
|
||||||
|
|
||||||
|
fail=0
|
||||||
|
check() {
|
||||||
|
desc=$1; want=$2; got=$3
|
||||||
|
if [ "$got" != "$want" ]; then
|
||||||
|
echo "FAIL: $desc"
|
||||||
|
echo " want: $want"
|
||||||
|
echo " got: $got"
|
||||||
|
fail=1
|
||||||
|
else
|
||||||
|
echo "ok: $desc"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
got=$(OMNIGRAPH_TARGET_URI="s3://b/g" OMNIGRAPH_BIND="0.0.0.0:8080" sh "$ep")
|
||||||
|
check "TARGET_URI only (legacy)" \
|
||||||
|
"ARGS: s3://b/g --bind 0.0.0.0:8080" "$got"
|
||||||
|
|
||||||
|
got=$(OMNIGRAPH_TARGET_URI="s3://b/g" OMNIGRAPH_CONFIG="/etc/omnigraph/omnigraph.yaml" OMNIGRAPH_BIND="0.0.0.0:8080" sh "$ep")
|
||||||
|
check "TARGET_URI + CONFIG composes (policy)" \
|
||||||
|
"ARGS: s3://b/g --config /etc/omnigraph/omnigraph.yaml --bind 0.0.0.0:8080" "$got"
|
||||||
|
|
||||||
|
got=$(OMNIGRAPH_CONFIG="/etc/omnigraph/omnigraph.yaml" OMNIGRAPH_BIND="0.0.0.0:8080" sh "$ep")
|
||||||
|
check "CONFIG only" \
|
||||||
|
"ARGS: --config /etc/omnigraph/omnigraph.yaml --bind 0.0.0.0:8080" "$got"
|
||||||
|
|
||||||
|
got=$(OMNIGRAPH_CONFIG="/etc/omnigraph/omnigraph.yaml" OMNIGRAPH_TARGET="active" OMNIGRAPH_BIND="0.0.0.0:8080" sh "$ep")
|
||||||
|
check "CONFIG + TARGET" \
|
||||||
|
"ARGS: --config /etc/omnigraph/omnigraph.yaml --target active --bind 0.0.0.0:8080" "$got"
|
||||||
|
|
||||||
|
got=$(sh "$ep" some-uri --bind 1.2.3.4:9 --extra)
|
||||||
|
check "explicit args passthrough" \
|
||||||
|
"ARGS: some-uri --bind 1.2.3.4:9 --extra" "$got"
|
||||||
|
|
||||||
|
if [ "$fail" -ne 0 ]; then
|
||||||
|
echo "entrypoint_test: FAILED"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "entrypoint_test: all cases passed"
|
||||||
|
|
@ -207,7 +207,7 @@ contracts:
|
||||||
This pattern realizes read-your-writes within a multi-statement mutation
|
This pattern realizes read-your-writes within a multi-statement mutation
|
||||||
and keeps failure scope bounded for inserts/updates by construction at
|
and keeps failure scope bounded for inserts/updates by construction at
|
||||||
the writer layer. See [docs/dev/invariants.md](invariants.md) and
|
the writer layer. See [docs/dev/invariants.md](invariants.md) and
|
||||||
[docs/dev/runs.md](runs.md) for the publisher CAS contract this builds on.
|
[docs/dev/writes.md](writes.md) for the publisher CAS contract this builds on.
|
||||||
|
|
||||||
### Storage trait — today vs. roadmap
|
### Storage trait — today vs. roadmap
|
||||||
|
|
||||||
|
|
@ -278,7 +278,7 @@ flowchart LR
|
||||||
eng --> wq
|
eng --> wq
|
||||||
```
|
```
|
||||||
|
|
||||||
The server applies Cedar policy at the HTTP boundary today. The roadmap, called out in [docs/dev/invariants.md](invariants.md) as a known gap, is to push policy into the planner as predicates. After Cedar, mutating handlers go through `WorkloadController` (per-actor admission cap + byte budget; PR 2 / MR-686) before reaching the engine. The engine itself holds an `Arc<WriteQueueManager>` so concurrent mutations on the same `(table, branch)` serialize at the queue, while disjoint keys run in parallel — see [docs/user/server.md](../user/server.md) "Per-actor admission control" and [docs/dev/runs.md](runs.md). The CLI bypasses the HTTP layer (and admission) and calls the engine API directly.
|
The server applies Cedar policy at the HTTP boundary today. The roadmap, called out in [docs/dev/invariants.md](invariants.md) as a known gap, is to push policy into the planner as predicates. After Cedar, mutating handlers go through `WorkloadController` (per-actor admission cap + byte budget; PR 2 / MR-686) before reaching the engine. The engine itself holds an `Arc<WriteQueueManager>` so concurrent mutations on the same `(table, branch)` serialize at the queue, while disjoint keys run in parallel — see [docs/user/server.md](../user/server.md) "Per-actor admission control" and [docs/dev/writes.md](writes.md). The CLI bypasses the HTTP layer (and admission) and calls the engine API directly.
|
||||||
|
|
||||||
Code paths:
|
Code paths:
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ This page explains what the policy says and how to change it.
|
||||||
|
|
||||||
| Setting | Value | Why |
|
| Setting | Value | Why |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| **Required status checks (strict)** | `Classify Changes`, `Check AGENTS.md Links`, `Test Workspace`, `Test omnigraph-server --features aws`, `CODEOWNERS / drift`, `CODEOWNERS / noedit` | Every PR must pass workspace tests, AGENTS.md link integrity, and the CODEOWNERS hygiene checks. `strict: true` requires the branch to be up-to-date with `main` before merge. |
|
| **Required status checks (strict)** | `Classify Changes`, `Check AGENTS.md Links`, `Test Workspace`, `Test omnigraph-server --features aws`, `CODEOWNERS matches source`, `CODEOWNERS not hand-edited` | Every PR must pass workspace tests, AGENTS.md link integrity, and the CODEOWNERS hygiene checks. The two CODEOWNERS contexts must equal the job `name:` values in `.github/workflows/codeowners.yml` **verbatim** — a context naming a job that never reports (the old `CODEOWNERS / drift` used the job *id*, and the job was path-filtered) leaves every PR permanently pending and forces admin overrides. `strict: true` requires the branch to be up-to-date with `main` before merge. |
|
||||||
| **Required approving reviews** | `1` | At least one reviewer. With a 2-person team, going higher would block all merges when one person is unavailable. |
|
| **Required approving reviews** | `1` | At least one reviewer. With a 2-person team, going higher would block all merges when one person is unavailable. |
|
||||||
| **Require code-owner reviews** | `true` | The reviewer must be a code owner per `.github/CODEOWNERS`. This is what makes the codeowners chassis enforced. |
|
| **Require code-owner reviews** | `true` | The reviewer must be a code owner per `.github/CODEOWNERS`. This is what makes the codeowners chassis enforced. |
|
||||||
| **Dismiss stale reviews on new commits** | `true` | A push after approval invalidates the prior review. Prevents the "approve, then sneak in unreviewed changes" pattern. |
|
| **Dismiss stale reviews on new commits** | `true` | A push after approval invalidates the prior review. Prevents the "approve, then sneak in unreviewed changes" pattern. |
|
||||||
|
|
@ -16,7 +16,7 @@ This page explains what the policy says and how to change it.
|
||||||
| **Disallow force pushes** | `true` | No history rewrites on `main`. |
|
| **Disallow force pushes** | `true` | No history rewrites on `main`. |
|
||||||
| **Disallow branch deletions** | `true` | `main` cannot be deleted. |
|
| **Disallow branch deletions** | `true` | `main` cannot be deleted. |
|
||||||
| **Required conversation resolution** | `true` | All review comment threads must be resolved before merge. |
|
| **Required conversation resolution** | `true` | All review comment threads must be resolved before merge. |
|
||||||
| **Enforce on admins** | `true` | Even repository admins go through the gates. The point is no bypasses. |
|
| **Enforce on admins** | `false` | Admins can override the gates (`enforce_admins: false` in the JSON). This is the intended escape hatch for the 2-person team; tightening to `true` is tracked under hardening below. |
|
||||||
| **Required signed commits** | not yet | Not enabled. Would lock out maintainers until everyone enrolls GPG/SSH commit signing. Tracked as a follow-up. |
|
| **Required signed commits** | not yet | Not enabled. Would lock out maintainers until everyone enrolls GPG/SSH commit signing. Tracked as a follow-up. |
|
||||||
|
|
||||||
## How to apply
|
## How to apply
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,8 @@
|
||||||
|
|
||||||
- **ci.yml**: text-only changes skip; otherwise `cargo test --workspace --locked` on ubuntu-latest with protobuf compiler. OpenAPI-drift check that auto-commits the regenerated `openapi.json` for same-repository PRs. Also runs the AGENTS.md cross-link integrity check (`scripts/check-agents-md.sh`).
|
- **ci.yml**: text-only changes skip; otherwise `cargo test --workspace --locked` on ubuntu-latest with protobuf compiler. OpenAPI-drift check that auto-commits the regenerated `openapi.json` for same-repository PRs. Also runs the AGENTS.md cross-link integrity check (`scripts/check-agents-md.sh`).
|
||||||
- **AWS feature build job**: `cargo build/test -p omnigraph-server --features aws` on ubuntu-latest.
|
- **AWS feature build job**: `cargo build/test -p omnigraph-server --features aws` on ubuntu-latest.
|
||||||
|
- **Windows binary build job**: `cargo build --release --locked -p omnigraph-cli -p omnigraph-server` on windows-latest with smoke checks for `omnigraph.exe version`, `omnigraph-server.exe --help`, and PowerShell installer syntax.
|
||||||
- **RustFS S3 integration**: spins up RustFS in Docker, runs `s3_storage`, `server_opens_s3_graph_directly_and_serves_snapshot_and_read`, and `local_cli_s3_end_to_end_init_load_read_flow`.
|
- **RustFS S3 integration**: spins up RustFS in Docker, runs `s3_storage`, `server_opens_s3_graph_directly_and_serves_snapshot_and_read`, and `local_cli_s3_end_to_end_init_load_read_flow`.
|
||||||
- **release-edge.yml**: on every push to main, retags `edge`, builds Linux x86_64 / macOS arm64 archives + sha256, publishes a rolling prerelease.
|
- **release-edge.yml**: on every push to main, retags `edge`, builds Linux x86_64 / macOS arm64 archives and Windows x86_64 zip + sha256, publishes a rolling prerelease, then smoke-tests the Windows PowerShell installer against `edge`.
|
||||||
- **release.yml**: on `v*` tags, builds the Linux x86_64 / macOS arm64 release matrix and updates the Homebrew tap (`scripts/update-homebrew-formula.sh`) by pushing the regenerated formula to `ModernRelay/homebrew-tap`.
|
- **release.yml**: on `v*` tags, builds the Linux x86_64 / macOS arm64 archives and Windows x86_64 zip release matrix, updates the Homebrew tap (`scripts/update-homebrew-formula.sh`) by pushing the regenerated formula to `ModernRelay/homebrew-tap`, and smoke-tests the Windows PowerShell installer against the tag.
|
||||||
- **package.yml**: manual ECR image build; emits two image tags per commit (`<sha>`, `<sha>-aws`) via CodeBuild.
|
- **package.yml**: manual ECR image build; emits two image tags per commit (`<sha>`, `<sha>-aws`) via CodeBuild.
|
||||||
|
|
|
||||||
|
|
@ -4,24 +4,45 @@
|
||||||
|
|
||||||
This setup gives every role change a reviewable PR and a permanent in-repository audit trail (`git log .github/codeowners-roles.yml`).
|
This setup gives every role change a reviewable PR and a permanent in-repository audit trail (`git log .github/codeowners-roles.yml`).
|
||||||
|
|
||||||
## Current roles
|
## Who owns what
|
||||||
|
|
||||||
| Role | Members | Scope |
|
The tables below are **generated** from `.github/codeowners-roles.yml` by `.github/scripts/render-codeowners.py` (the same render that produces `.github/CODEOWNERS`). They are the always-current "who owns what at this commit" view — don't edit them by hand; edit the yml and re-render.
|
||||||
|
|
||||||
|
<!-- BEGIN GENERATED OWNERSHIP — edit codeowners-roles.yml + run render-codeowners.py -->
|
||||||
|
|
||||||
|
**Path → owners** (GitHub applies *last match wins*; the `*` catch-all is listed first and is overridden by the specific patterns below it):
|
||||||
|
|
||||||
|
| Path | Owners | Role(s) |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| `engineering` | `@ragnorc` | All code under `crates/**`, repository infrastructure, default for unmapped paths |
|
| `*` | @ragnorc @aaltshuler | engineering |
|
||||||
| `docs` | `@ragnorc` | `docs/**`, README.md, AGENTS.md, CLAUDE.md, SECURITY.md |
|
| `crates/**` | @ragnorc @aaltshuler | engineering |
|
||||||
|
| `docs/**` | @ragnorc | docs |
|
||||||
|
| `README.md` | @ragnorc | docs |
|
||||||
|
| `AGENTS.md` | @ragnorc | docs |
|
||||||
|
| `CLAUDE.md` | @ragnorc | docs |
|
||||||
|
| `SECURITY.md` | @ragnorc | docs |
|
||||||
|
|
||||||
GitHub treats multiple owners in a CODEOWNERS line as **"any one of them satisfies the review requirement"**. To require N distinct approvers on a specific path, layer a CI check on top (not currently configured).
|
**Roles**:
|
||||||
|
|
||||||
|
| Role | Members | Description |
|
||||||
|
|---|---|---|
|
||||||
|
| `engineering` | @ragnorc @aaltshuler | All production code under crates/**. Engine, CLI, server, compiler. |
|
||||||
|
| `docs` | @ragnorc | Documentation under docs/**, plus repo-level docs (README.md, AGENTS.md, CLAUDE.md symlink, SECURITY.md). |
|
||||||
|
|
||||||
|
<!-- END GENERATED OWNERSHIP -->
|
||||||
|
|
||||||
|
GitHub treats multiple owners on a CODEOWNERS line as **"any one of them satisfies the review requirement"**. To require N distinct approvers on a specific path, layer a CI check on top (not currently configured).
|
||||||
|
|
||||||
## How to change role membership or path mappings
|
## How to change role membership or path mappings
|
||||||
|
|
||||||
1. Edit `.github/codeowners-roles.yml`.
|
1. Edit `.github/codeowners-roles.yml`.
|
||||||
2. Run `python3 .github/scripts/render-codeowners.py` (requires PyYAML; `pip install pyyaml`).
|
2. Open a PR. **CI re-renders for you**: the `CODEOWNERS` workflow regenerates `.github/CODEOWNERS` and the ownership tables above and auto-commits them back to your PR branch on same-repository PRs — you don't have to run the script locally (though you can: `python3 .github/scripts/render-codeowners.py`, requires PyYAML).
|
||||||
3. Commit both files in the same PR.
|
|
||||||
|
On a fork (where CI can't push back), the workflow instead fails with the diff so you can run the script and commit it yourself.
|
||||||
|
|
||||||
CI fails the PR if:
|
CI fails the PR if:
|
||||||
- `CODEOWNERS` was edited without a corresponding yml change, or
|
- a fork PR left a generated artifact out of sync, or
|
||||||
- The yml was changed but the rendered `CODEOWNERS` doesn't match.
|
- `CODEOWNERS` was edited without a corresponding yml change (the `CODEOWNERS not hand-edited` check).
|
||||||
|
|
||||||
## How to add a new role
|
## How to add a new role
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -147,7 +147,7 @@ sequenceDiagram
|
||||||
- End-of-query Lance commit: `TableStore::stage_append`, `stage_merge_insert`, `commit_staged` at `crates/omnigraph/src/table_store.rs`
|
- End-of-query Lance commit: `TableStore::stage_append`, `stage_merge_insert`, `commit_staged` at `crates/omnigraph/src/table_store.rs`
|
||||||
- Manifest commit primitive: `commit_updates_on_branch_with_expected` at `crates/omnigraph/src/db/omnigraph/table_ops.rs`
|
- Manifest commit primitive: `commit_updates_on_branch_with_expected` at `crates/omnigraph/src/db/omnigraph/table_ops.rs`
|
||||||
|
|
||||||
Atomicity guarantee for multi-statement mutations: a mid-query failure leaves Lance HEAD untouched on staged tables (no inline commit happened during op execution), so the next mutation proceeds normally with no `ExpectedVersionMismatch`. The publisher CAS at the very end either succeeds (manifest advances atomically across all touched sub-tables) or fails with a typed `ManifestConflictDetails::ExpectedVersionMismatch` (no partial publish). See [docs/dev/invariants.md](invariants.md) and [docs/dev/runs.md](runs.md).
|
Atomicity guarantee for multi-statement mutations: a mid-query failure leaves Lance HEAD untouched on staged tables (no inline commit happened during op execution), so the next mutation proceeds normally with no `ExpectedVersionMismatch`. The publisher CAS at the very end either succeeds (manifest advances atomically across all touched sub-tables) or fails with a typed `ManifestConflictDetails::ExpectedVersionMismatch` (no partial publish). See [docs/dev/invariants.md](invariants.md) and [docs/dev/writes.md](writes.md).
|
||||||
|
|
||||||
## Bulk loader (`loader/mod.rs`)
|
## Bulk loader (`loader/mod.rs`)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ constraints. User-facing behavior should still be documented through
|
||||||
|---|---|
|
|---|---|
|
||||||
| System structure, L1/L2 framing, component diagrams | [architecture.md](architecture.md) |
|
| System structure, L1/L2 framing, component diagrams | [architecture.md](architecture.md) |
|
||||||
| On-disk layout, manifest schema, URI behavior | [storage.md](../user/storage.md) |
|
| On-disk layout, manifest schema, URI behavior | [storage.md](../user/storage.md) |
|
||||||
| Direct-publish writes, D2, staged writes, recovery sidecars | [runs.md](runs.md) |
|
| Direct-publish writes, D2, staged writes, recovery sidecars | [writes.md](writes.md) |
|
||||||
| Query execution, mutation execution, loader flow | [execution.md](execution.md) |
|
| Query execution, mutation execution, loader flow | [execution.md](execution.md) |
|
||||||
| Index lifecycle and graph topology indexes | [indexes.md](../user/indexes.md) |
|
| Index lifecycle and graph topology indexes | [indexes.md](../user/indexes.md) |
|
||||||
| Branch and commit internals | [branches-commits.md](../user/branches-commits.md) |
|
| Branch and commit internals | [branches-commits.md](../user/branches-commits.md) |
|
||||||
|
|
@ -51,6 +51,18 @@ constraints. User-facing behavior should still be documented through
|
||||||
| Install and deployment packaging | [install.md](../user/install.md), [deployment.md](../user/deployment.md) |
|
| Install and deployment packaging | [install.md](../user/install.md), [deployment.md](../user/deployment.md) |
|
||||||
| Release history | [releases/](../releases/) |
|
| Release history | [releases/](../releases/) |
|
||||||
|
|
||||||
|
## Contribution & Governance
|
||||||
|
|
||||||
|
| Area | Read |
|
||||||
|
|---|---|
|
||||||
|
| How to contribute (external) | [CONTRIBUTING.md](../../CONTRIBUTING.md) |
|
||||||
|
| Governance model, roles, decision authority | [GOVERNANCE.md](../../GOVERNANCE.md) |
|
||||||
|
| Public contribution RFC track | [rfcs/](../rfcs/) |
|
||||||
|
|
||||||
|
The `docs/rfcs/` track is the **public, externally-authorable** RFC process. The
|
||||||
|
maintainer/internal RFCs below (`rfc-00N-*.md`) are a separate, team-owned
|
||||||
|
track; don't conflate the two.
|
||||||
|
|
||||||
## Active Implementation Plans
|
## Active Implementation Plans
|
||||||
|
|
||||||
Working documents for in-flight feature work. Removed when the work lands.
|
Working documents for in-flight feature work. Removed when the work lands.
|
||||||
|
|
@ -59,6 +71,8 @@ Working documents for in-flight feature work. Removed when the work lands.
|
||||||
|---|---|
|
|---|---|
|
||||||
| Schema-lint chassis v1 (MR-694) — `--allow-data-loss`, soft/hard drops | [schema-lint-v1-plan.md](schema-lint-v1-plan.md) |
|
| Schema-lint chassis v1 (MR-694) — `--allow-data-loss`, soft/hard drops | [schema-lint-v1-plan.md](schema-lint-v1-plan.md) |
|
||||||
| Inline + stored queries, request/response envelope, MCP (MR-656 / MR-976 / MR-969) | [rfc-001-queries-envelope-mcp.md](rfc-001-queries-envelope-mcp.md) |
|
| Inline + stored queries, request/response envelope, MCP (MR-656 / MR-976 / MR-969) | [rfc-001-queries-envelope-mcp.md](rfc-001-queries-envelope-mcp.md) |
|
||||||
|
| Config & CLI architecture — layered config, client targeting, file naming (MR-973 / MR-974 / MR-981) | [rfc-002-config-cli-architecture.md](rfc-002-config-cli-architecture.md) |
|
||||||
|
| MCP server surface — full tool parity, stored queries, modular auth (MR-969 / MR-956 / MR-974) | [rfc-003-mcp-server-surface.md](rfc-003-mcp-server-surface.md) |
|
||||||
|
|
||||||
## Boundary
|
## Boundary
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@ Use it this way:
|
||||||
publishes one manifest update. Do not commit per statement. Delete-only
|
publishes one manifest update. Do not commit per statement. Delete-only
|
||||||
queries are the documented inline residual; the parse-time D2 rule prevents
|
queries are the documented inline residual; the parse-time D2 rule prevents
|
||||||
mixing deletes with insert/update until Lance exposes two-phase delete.
|
mixing deletes with insert/update until Lance exposes two-phase delete.
|
||||||
Read [runs.md](runs.md) and [execution.md](execution.md).
|
Read [writes.md](writes.md) and [execution.md](execution.md).
|
||||||
|
|
||||||
5. **Recovery is part of the commit protocol.** Writers that can advance Lance
|
5. **Recovery is part of the commit protocol.** Writers that can advance Lance
|
||||||
HEAD before manifest publish must write `__recovery/{ulid}.json` sidecars.
|
HEAD before manifest publish must write `__recovery/{ulid}.json` sidecars.
|
||||||
|
|
@ -56,7 +56,7 @@ Use it this way:
|
||||||
branch they read even when index coverage is partial. Expensive index work
|
branch they read even when index coverage is partial. Expensive index work
|
||||||
should converge from manifest state instead of extending the critical write
|
should converge from manifest state instead of extending the critical write
|
||||||
path. Scalar staged index builds and vector inline residuals are documented
|
path. Scalar staged index builds and vector inline residuals are documented
|
||||||
in [runs.md](runs.md) and [indexes.md](../user/indexes.md).
|
in [writes.md](writes.md) and [indexes.md](../user/indexes.md).
|
||||||
|
|
||||||
8. **Schema identity survives renames.** Accepted schema identity must remain
|
8. **Schema identity survives renames.** Accepted schema identity must remain
|
||||||
stable across type and property renames. Rename support belongs in migration
|
stable across type and property renames. Rename support belongs in migration
|
||||||
|
|
@ -96,17 +96,25 @@ Use it this way:
|
||||||
|
|
||||||
| Area | Current state | Source |
|
| Area | Current state | Source |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| Multi-table commit | Manifest CAS plus recovery sidecars; not a single Lance primitive | [runs.md](runs.md), [architecture.md](architecture.md) |
|
| Multi-table commit | Manifest CAS plus recovery sidecars; not a single Lance primitive | [writes.md](writes.md), [architecture.md](architecture.md) |
|
||||||
| Constructive mutations | In-memory `MutationStaging`, one end-of-query table commit per touched table, then one manifest publish | [runs.md](runs.md), [execution.md](execution.md) |
|
| Constructive mutations | In-memory `MutationStaging`, one end-of-query table commit per touched table, then one manifest publish | [writes.md](writes.md), [execution.md](execution.md) |
|
||||||
| Deletes | Inline-commit residual; delete-only queries allowed, mixed insert/update/delete rejected by D2 | [query-language.md](../user/query-language.md), [runs.md](runs.md) |
|
| Deletes | Inline-commit residual; delete-only queries allowed, mixed insert/update/delete rejected by D2 | [query-language.md](../user/query-language.md), [writes.md](writes.md) |
|
||||||
|
| Branch delete | Manifest is the single authority, flipped atomically first; per-table forks + commit-graph branch are derived state, reclaimed best-effort (`force_delete_branch`) with the `cleanup` reconciler as the guaranteed backstop. Reusing a name whose reclaim failed before `cleanup` surfaces an actionable error | [branches-commits.md](../user/branches-commits.md), [maintenance.md](../user/maintenance.md) |
|
||||||
| Schema validation | Type checks, required fields, defaults, edge endpoint checks, and edge cardinality are enforced on write paths | [schema-language.md](../user/schema-language.md), [execution.md](execution.md) |
|
| Schema validation | Type checks, required fields, defaults, edge endpoint checks, and edge cardinality are enforced on write paths | [schema-language.md](../user/schema-language.md), [execution.md](execution.md) |
|
||||||
| Unique constraints | Intra-batch and write-path checks exist; full cross-version uniqueness is still a gap | [schema-language.md](../user/schema-language.md) |
|
| Unique constraints | Intra-batch and write-path checks exist; full cross-version uniqueness is still a gap | [schema-language.md](../user/schema-language.md) |
|
||||||
| Storage trait | `TableStorage` exists as the sealed staged-write surface; full call-site migration and capability/stat surfaces are incomplete | [runs.md](runs.md), [architecture.md](architecture.md) |
|
| Storage trait | `TableStorage` exists as the sealed staged-write surface; full call-site migration and capability/stat surfaces are incomplete | [writes.md](writes.md), [architecture.md](architecture.md) |
|
||||||
| Index lifecycle | `ensure_indices` is explicit today; reconciler-based convergence is roadmap | [indexes.md](../user/indexes.md), [maintenance.md](../user/maintenance.md) |
|
| Index lifecycle | `ensure_indices` is explicit today; reconciler-based convergence is roadmap | [indexes.md](../user/indexes.md), [maintenance.md](../user/maintenance.md) |
|
||||||
| Traversal IDs | Runtime still builds `TypeIndex`; Lance stable row-id based graph IDs are roadmap | [architecture.md](architecture.md), [query-language.md](../user/query-language.md) |
|
| Traversal IDs | Runtime still builds `TypeIndex`; Lance stable row-id based graph IDs are roadmap | [architecture.md](architecture.md), [query-language.md](../user/query-language.md) |
|
||||||
| Auth | Bearer token hashing and server-side actor resolution are implemented at the HTTP boundary | [server.md](../user/server.md), [policy.md](../user/policy.md) |
|
| Auth | Bearer token hashing and server-side actor resolution are implemented at the HTTP boundary | [server.md](../user/server.md), [policy.md](../user/policy.md) |
|
||||||
| Tests | Tempdir-backed Lance tests are the current substrate; there is no `MemStorage` test backend | [testing.md](testing.md) |
|
| Tests | Tempdir-backed Lance tests are the current substrate; there is no `MemStorage` test backend | [testing.md](testing.md) |
|
||||||
|
|
||||||
|
The branch-delete reconciler is authority-derived: it reclaims orphaned forks
|
||||||
|
today and degrades to a no-op if Lance ships an atomic multi-dataset branch
|
||||||
|
operation, so the design composes with that future rather than blocking it. This
|
||||||
|
is the same shape as invariant 7 (indexes are derived state); prefer it over a
|
||||||
|
recovery-sidecar-style approach for any new multi-dataset metadata operation,
|
||||||
|
since the sidecar would be scaffolding to remove once the substrate closes the gap.
|
||||||
|
|
||||||
## Known Gaps
|
## Known Gaps
|
||||||
|
|
||||||
Do not hide these behind invariant wording. Either move them forward or keep
|
Do not hide these behind invariant wording. Either move them forward or keep
|
||||||
|
|
@ -122,6 +130,15 @@ them explicit.
|
||||||
- **Deletes and vector indexes:** `delete_where` and vector index creation still
|
- **Deletes and vector indexes:** `delete_where` and vector index creation still
|
||||||
advance Lance HEAD inline because the required public Lance APIs are missing.
|
advance Lance HEAD inline because the required public Lance APIs are missing.
|
||||||
Keep D2 and recovery coverage in place until those residuals are removed.
|
Keep D2 and recovery coverage in place until those residuals are removed.
|
||||||
|
- **Blob-column compaction:** Lance `compact_files` mis-decodes blob-v2 columns
|
||||||
|
under its forced `BlobHandling::AllBinary` read ("more fields in the schema
|
||||||
|
than provided column indices"), so `optimize` skips any table with a `Blob`
|
||||||
|
property — reporting `SkipReason::BlobColumnsUnsupportedByLance` (loud, not a
|
||||||
|
silent drop) behind the `LANCE_SUPPORTS_BLOB_COMPACTION` gate. Reads and writes
|
||||||
|
are unaffected; only space/fragment reclamation on blob tables is deferred.
|
||||||
|
Remove the skip when the upstream Lance fix lands — the
|
||||||
|
`lance_surface_guards.rs::compact_files_still_fails_on_blob_columns` guard
|
||||||
|
turns red on that bump to force it.
|
||||||
- **Planner capability/stat surfaces:** cost-aware planning, complete
|
- **Planner capability/stat surfaces:** cost-aware planning, complete
|
||||||
capability advertisement, and explain-with-cost are roadmap. Do not describe
|
capability advertisement, and explain-with-cost are roadmap. Do not describe
|
||||||
them as implemented.
|
them as implemented.
|
||||||
|
|
|
||||||
|
|
@ -175,7 +175,9 @@ Migration from Lance 4.0.0 → 6.0.1 landed in this cycle (DataFusion 52 → 53,
|
||||||
- **Lance #6658 closed** (2026-05-14) but `DeleteBuilder::execute_uncommitted` did **not** ship in v6.0.1 — binary search across the release stream shows it first appears in `v7.0.0-beta.10` (the closing commits landed on main but didn't backport to the 6.x line). Tracked as MR-A: migrate `delete_where` to staged, retire the parse-time D2 mutation rule, extend recovery sidecar coverage. **Gated on the Lance v7.x bump**, not this PR. v7.0.0-rc.1 dropped 2026-05-21.
|
- **Lance #6658 closed** (2026-05-14) but `DeleteBuilder::execute_uncommitted` did **not** ship in v6.0.1 — binary search across the release stream shows it first appears in `v7.0.0-beta.10` (the closing commits landed on main but didn't backport to the 6.x line). Tracked as MR-A: migrate `delete_where` to staged, retire the parse-time D2 mutation rule, extend recovery sidecar coverage. **Gated on the Lance v7.x bump**, not this PR. v7.0.0-rc.1 dropped 2026-05-21.
|
||||||
- **Lance #6666 still open** (`build_index_metadata_from_segments` public): vector-index two-phase blocked; inline `create_vector_index` residual retained.
|
- **Lance #6666 still open** (`build_index_metadata_from_segments` public): vector-index two-phase blocked; inline `create_vector_index` residual retained.
|
||||||
- **Lance #6877 still open** (`MergeInsertBuilder` dup-rowid): PR #109's `SourceDedupeBehavior::FirstSeen` + `check_batch_unique_by_keys` precondition stay load-bearing.
|
- **Lance #6877 still open** (`MergeInsertBuilder` dup-rowid): PR #109's `SourceDedupeBehavior::FirstSeen` + `check_batch_unique_by_keys` precondition stay load-bearing.
|
||||||
|
- **`Dataset::force_delete_branch`** (`branches().delete(name, force=true)`, dataset.rs:524) tolerates a missing branch-*contents* ref (vs plain `delete_branch`'s `RefNotFound`), but on the local store still errors `NotFound` if the branch `tree/` directory is fully absent (`remove_dir_all`'s NotFound is not caught for Lance's native error variant, refs.rs:526-549). Both variants still refuse a branch with referencing descendants (`RefConflict`). `TableStore::force_delete_branch` wraps this to be fully idempotent (tolerates already-absent). The single-authority branch-delete redesign uses it for orphan reclamation (eager best-effort reclaim + cleanup reconciler). Pinned by `lance_surface_guards.rs::force_delete_branch_semantics`. Branch delete is "flip the ref atomically, then `remove_dir_all(tree/{branch})`"; branch-exclusive data lives under `tree/{branch}/` so a drop reclaims it immediately without touching `main`.
|
||||||
|
- **Lance blob-v2 `compact_files` bug** (no public issue found as of 2026-06): `compact_files` disables binary-copy for blob datasets and forces `BlobHandling::AllBinary` on the read side; the v2.1+ structural decoder then mis-counts column infos for the blob-v2 struct and fails with `Invalid user input: there were more fields in the schema than provided column indices / infos` (`lance-encoding/src/decoder.rs::ColumnInfoIter::expect_next`). This fails even a pristine uniform-V2_2 multi-fragment blob table; vector/list/scalar/ragged columns and mixed file versions all compact fine. Reads/queries use descriptor handling (`BlobHandling::default()`) and are unaffected. `optimize` skips blob-bearing tables behind `LANCE_SUPPORTS_BLOB_COMPACTION = false` (`db/omnigraph/optimize.rs`), reporting `SkipReason::BlobColumnsUnsupportedByLance`. Pinned by `lance_surface_guards.rs::compact_files_still_fails_on_blob_columns`, which turns red when the bug is fixed → flip the gate, remove the skip branch + the `maintenance.rs::optimize_skips_blob_table_and_reports_skip` skip assertions.
|
||||||
|
|
||||||
Surface guards added: `crates/omnigraph/tests/lance_surface_guards.rs` (8 named guards; 3 runtime + 5 compile-only). Future Lance bumps re-run this file first as the smoke check. Two additional guards from the original plan deferred to follow-up (`manifest_cas_returns_row_level_contention_variant` needs full publisher-race harness; `table_version_metadata_byte_compatible_with_v4` needs `pub(crate)` reach extension).
|
Surface guards added: `crates/omnigraph/tests/lance_surface_guards.rs` (10 named guards; 5 runtime + 5 compile-only). Future Lance bumps re-run this file first as the smoke check. Two additional guards from the original plan deferred to follow-up (`manifest_cas_returns_row_level_contention_variant` needs full publisher-race harness; `table_version_metadata_byte_compatible_with_v4` needs `pub(crate)` reach extension).
|
||||||
|
|
||||||
Bump this date stanza on the next alignment pass.
|
Bump this date stanza on the next alignment pass.
|
||||||
|
|
|
||||||
590
docs/dev/rfc-002-config-cli-architecture.md
Normal file
590
docs/dev/rfc-002-config-cli-architecture.md
Normal file
|
|
@ -0,0 +1,590 @@
|
||||||
|
# RFC: Config & CLI Architecture — Layered Config, Client Targeting, File Naming
|
||||||
|
|
||||||
|
**Status:** Proposed
|
||||||
|
**Date:** 2026-05-30
|
||||||
|
**Tickets:** MR-668 (multi-graph server, shipped — the dependency this builds on), MR-969 (stored queries + MCP — supplies the in-repo agent tool surface), MR-973 (quickstart / onboarding), MR-974 (agent setup surface), MR-981 (agent-friendly CLI hardening)
|
||||||
|
**Target release:** v0.8.x (tentative; phased — see Rollout)
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
OmniGraph today has a single config file, `omnigraph.yaml`, read both by the CLI (operating the embedded engine) and by `omnigraph-server` (hosting graphs). There is **no client-side configuration that targets a *running server*** — to talk to a deployed `omnigraph-server` you drop to `curl` or the `omnigraph-ts` client. This is the one real gap in an otherwise coherent design (storage-URI addressing, multi-graph routing, per-graph policy).
|
||||||
|
|
||||||
|
This RFC defines the config and CLI architecture that closes that gap, derived from first principles — *working backwards from what OmniGraph uniquely enables* rather than copying kubeconfig / `helix.toml`. The result:
|
||||||
|
|
||||||
|
1. A **global-first layered config** — user-global (`~/.omnigraph/`) is the **primary, self-sufficient default**; per-project (`./omnigraph.yaml`) is an *optional* override + deployment manifest. One uniform schema, both layers optional; the CLI works from any directory with **no project file** (the `kubectl`/`aws`/`gh` posture), unlike today's project-anchored behavior.
|
||||||
|
2. A single unifying noun — the **target** — that resolves a name to a concrete `(locus, graph, sub-state, credential)` tuple, where the locus is **embedded (storage URI) XOR remote (server endpoint)**.
|
||||||
|
3. A **multi-server × multi-graph** client model (OmniGraph hosts N graphs per server and there are M servers — unlike Helix's one-cluster-one-graph).
|
||||||
|
4. **Credentials by reference, keyed by server name** (the AWS/gh/kube model) — OS keychain `omnigraph:<server>` (preferred) → a `[<server>]` profile in `~/.omnigraph/credentials` → `OMNIGRAPH_TOKEN[_<SERVER>]` env (CI). `servers.<name>` is endpoint-only by default but may carry an explicit, secret-free `auth: { token: { env|file|command|keychain } }` source; no `credentials.yaml`; the shipped `bearer_token_env` + dotenv stay as a legacy compat path. Every committed/GitOps'd surface stays secret-free.
|
||||||
|
5. A **file-naming** decision: project and server config are **the same artifact, same name** (`omnigraph.yaml`); the only differently-named file is the user-global `config.yaml`, justified by **scope, not role**.
|
||||||
|
|
||||||
|
The design optimizes jointly for **DX** (one command surface across embedded and remote; clone-and-go) and **AX** (agent experience: one flat resolved context, secrets structurally unreachable, branch-pinned reproducible reads, and a GitOps'd capability surface).
|
||||||
|
|
||||||
|
## Reconciliation with shipped / planned CLI work
|
||||||
|
|
||||||
|
Verified **against the code**, not ticket statuses (which are unreliable — e.g. MR-581 is marked done but is stale and unbuilt). Findings and the corrections they force:
|
||||||
|
|
||||||
|
- **Noun is `graph`/`graphs`, NOT `target`/`targets`.** The config key is `graphs:` in `config.rs` and the flag is `--graph`. **This RFC uses `graphs:`/`--graph` throughout**; the unifying noun is a **`graphs:` entry** that is *embedded* (`storage:`, formerly `uri:`) XOR *remote* (`server:` + `graph_id:` defaulting to the entry key) — a typed locator (§1.1). Read any lingering `targets:`/`--target` below as `graphs:`/`--graph`.
|
||||||
|
- **`~/.omnigraph/` stands on its own merits** (Helix/aws/kube peer convention), **not** on precedent — there is **no `~/.omnigraph/` usage in the code** today. (MR-581 / MR-531 templates-into-`~/.omnigraph/` are *stale tickets, unbuilt*.)
|
||||||
|
- **Templates do not exist** in the code (no `template` command). The template mechanism is a *design question for this RFC / the init family*, not an existing foothold.
|
||||||
|
- **What actually exists in the CLI** (verified): `init, query(read), mutate(change), load, ingest, branch, schema, lint, snapshot, export, commit, policy, optimize, cleanup, graphs`. **Not built:** `serve, quickstart, template, prune, login`. `omnigraph init` exists (with `scaffold_config_if_missing`, `main.rs:1415`); the rest of the "init family" (`quickstart` MR-973, `serve` MR-970, `prune`/`init --force` MR-972/975, `mcp install`/skills MR-974, agent-mode MR-981) are **unbuilt tickets**, some stale.
|
||||||
|
- **Config still uses `aliases:`** (no `operations:` in code; MR-839 unbuilt). §6's reconciliation talks about `aliases:` as-is, noting `operations:` is a *proposed* rename.
|
||||||
|
- **`bearer_token_env` exists** (per-graph, `config.rs`); MR-971 flags a CLI-parity / server-side gap. The per-`servers.<name>` extension lands on top of that.
|
||||||
|
- **A top-level `omnigraph lint` command exists** (verified). A stored-query *registry* validator must pick a verb that doesn't read as a competing lint/check.
|
||||||
|
|
||||||
|
## Motivation
|
||||||
|
|
||||||
|
Three problems, in priority order:
|
||||||
|
|
||||||
|
- **No client→server targeting config.** The moment an operator stands up `omnigraph-server` — for bearer auth + Cedar at a network boundary + admission control + multi-graph routing — the CLI can't address it. `curl` is the fallback. There is no named, switchable, credential-carrying way to say "run this against `prod` on the team server."
|
||||||
|
- **Multi-server × multi-graph has no first-class expression.** OmniGraph genuinely runs N graphs per server across M servers. The same graph is **multi-homed** — `s3://b/prod` may be `prod` on server A, `production` on server B, and opened directly by the CLI. Today's flat `graphs:` map (name→storage-URI) can't express "graph `production` on server `prod-eu`."
|
||||||
|
- **Solo-first and embedded-first are unserved by the remote story.** A solo developer with no projects should define everything in `~`. A developer iterating locally (embedded, no server) and then pointing at staging (remote) should change *one word*, not learn a second command surface.
|
||||||
|
|
||||||
|
MR-668 shipped the server side (multiple graphs per server). MR-969 ships the in-repo agent tool surface (stored queries / MCP). This RFC supplies the **client and config layer** that lets humans and agents target that surface coherently — the foundation under MR-973 / MR-974 / MR-981.
|
||||||
|
|
||||||
|
## Non-Goals
|
||||||
|
|
||||||
|
- **A control plane / dashboard for config.** Operators edit files and (for servers) restart. No runtime config-mutation API. Matches the MR-668 / MR-969 operational model.
|
||||||
|
- **Hot reload.** Restart-only for server-side config, matching MR-668 and MR-969.
|
||||||
|
- **Embedding secrets in any config file.** Credentials are by-reference; the git-ignored `auth.env_file` dotenv (or, later, the OS keychain) holds tokens. Never a committable `*.yaml`.
|
||||||
|
- **Renaming the project manifest by role.** No `omnigraph.server.yaml` / `omnigraph.client.yaml`. Role lives in sections, not filenames (see Design §3).
|
||||||
|
- **Dropping embedded mode.** Embedded-first is load-bearing for the file-naming decision; this RFC assumes it stays.
|
||||||
|
- **Cross-graph / cross-server tool listing in MCP.** Clients loop over per-graph catalogs (a MR-969 non-goal, restated).
|
||||||
|
|
||||||
|
## Background
|
||||||
|
|
||||||
|
OmniGraph runs on Lance 6.x: typed nodes/edges in per-type Lance datasets, atomic multi-table commits via a `__manifest` table, branchable and time-travelable. The CLI (`omnigraph`) operates the **embedded engine** directly against a storage URI — no HTTP client in its runtime dependencies. `omnigraph-server` (Axum) is a *separate* HTTP front-end over the same engine, with bearer auth + per-graph Cedar (MR-668). The two read the same `omnigraph.yaml` but never connect to each other.
|
||||||
|
|
||||||
|
OmniGraph **already has a credentials-by-reference mechanism**, which this RFC builds on rather than replacing: `TargetConfig.bearer_token_env` names the env var holding a graph's bearer token, and `auth.env_file` points at a git-ignored dotenv (`.env.omni`) that the CLI auto-loads into the process (`load_env_file_into_process`) with real-env-vars-win precedence; `resolve_remote_bearer_token` resolves a token via env var then dotenv named lookup. `.env.omni` is already in `.gitignore`.
|
||||||
|
|
||||||
|
The six **irreducible enablers** that drive the design (referenced as E1–E6 below):
|
||||||
|
|
||||||
|
| # | Enabler | Consequence |
|
||||||
|
|---|---|---|
|
||||||
|
| E1 | A graph is a **self-contained storage URI**; the substrate (object store + manifest CAS) is the source of truth — no server required to read/write. | A graph is addressable **directly (embedded)**, not only via a server. |
|
||||||
|
| E2 | A server hosts **many graphs**; **many servers** exist. | The remote address space is **`{server} × {graph_id}`**. |
|
||||||
|
| E3 | The same graph is **multi-homed** under different per-locus names. | **Name ≠ identity.** Resolution is mandatory. |
|
||||||
|
| E4 | **Branch / commit / snapshot** are first-class addressable sub-state. | An address is *graph @ branch/snapshot*, not just graph. |
|
||||||
|
| E5 | Enforcement is **two-layered**: engine-layer Cedar (`_as` writers, works embedded) + HTTP-boundary bearer+Cedar (server only). | *How* you reach a graph determines *which* enforcement applies. |
|
||||||
|
| E6 | **Stored queries / MCP tools are a per-graph registry defined in the project config** (MR-969). | The **agent tool surface is version-controlled in the repo**. |
|
||||||
|
|
||||||
|
Competitors collapse dimensions OmniGraph keeps live: **Helix** fuses E2+E3 (one cluster = one graph); **namidb** fuses E1+E3 into the URI (`s3://b?ns=prod`) and serves one namespace per process. OmniGraph has all of E1–E6 at once, so its config resolves a richer space — but the richness is *earned* by capability.
|
||||||
|
|
||||||
|
## Design
|
||||||
|
|
||||||
|
### 1. The address space and the `target` abstraction
|
||||||
|
|
||||||
|
Every OmniGraph address is a tuple:
|
||||||
|
|
||||||
|
```
|
||||||
|
(locus, graph, sub-state, credential)
|
||||||
|
locus = embedded(URI) XOR remote(server-endpoint) # E1, E2
|
||||||
|
graph = a URI (embedded) | a graph_id on a server (remote) # E3
|
||||||
|
sub-state = branch | snapshot # E4
|
||||||
|
credential = cloud-storage creds (embedded) | bearer token (remote) # E5
|
||||||
|
```
|
||||||
|
|
||||||
|
The config's only job is **name → this tuple**. Define one noun — a **target** — that resolves to either shape:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
targets:
|
||||||
|
dev: # embedded — substrate-direct (E1)
|
||||||
|
storage: s3://team-bucket/dev.omni
|
||||||
|
branch: main # sub-state (E4)
|
||||||
|
staging: # remote — resolves a server by reference (E2/E3)
|
||||||
|
server: staging # → looked up in `servers`
|
||||||
|
graph_id: prod # the graph's id on that server (defaults to the entry key)
|
||||||
|
branch: review
|
||||||
|
```
|
||||||
|
|
||||||
|
`--target staging` resolves: project `targets.staging` → `{server: staging, graph_id: prod, branch: review}` → `servers.staging` → `{endpoint, token-by-ref}` → final `(remote(https://…), prod, review, $TOKEN)`. Embedded targets skip the server hop and use cloud-storage credentials.
|
||||||
|
|
||||||
|
**Two concepts, not kubeconfig's three.** kube splits cluster / user / context; that 3-way split is its most-cursed UX. A target *bundles* server+graph+branch+defaults under one name; the **only** thing split out is `servers`, because endpoints+credentials are shared across many targets and are secret-bearing (different ownership and rate-of-change; see §2). Result: **2 nouns — `servers` and `targets`.** Embedded `targets` (`storage:`) subsume today's `graphs:` entries.
|
||||||
|
|
||||||
|
### 1.1 The resolved address is a typed *locator*, not a `uri` string
|
||||||
|
|
||||||
|
The shipped config models a graph as a single `uri: String`, and code branches on `is_remote_uri(uri)`. That conflates two structurally different addresses: an **embedded** graph is a *complete, self-contained* address — one storage URI = one graph, opened directly via the embedded engine; a **remote** graph is a *server endpoint + a `graph_id`* — one server hosts N graphs. A bare server URL **is not a graph**; it lacks the `graph_id`. The cost of the string model, in the code today:
|
||||||
|
|
||||||
|
- the CLI re-decides "server or file?" via `is_remote_uri` at ~16 call sites;
|
||||||
|
- `TargetConfig` (one `uri` field) **cannot express** multi-server × multi-graph or a multi-homed graph (E2/E3) — "graph `production` on server `prod-eu`" has no representation;
|
||||||
|
- the CLI **bails on remote URIs** for most operations, precisely because the string can't carry the `graph_id`;
|
||||||
|
- the `omnigraph-ts` SDK had to model `baseUrl` **+** `graphId` *separately* (rewriting `/graphs/{graphId}/…`) — it invented the structure the string lacks.
|
||||||
|
|
||||||
|
So the *resolved* address is a **typed locator**, not a string:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
enum GraphLocator {
|
||||||
|
Embedded { storage: StorageUri }, // file:// , s3:// — a complete graph
|
||||||
|
Remote { server: ServerId, graph_id: GraphId }, // which server + which graph (+ bearer creds)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
A `graphs:` entry resolves into this **once**; downstream code dispatches on the variant (the breadboard's `GraphConn = Embedded(engine) | Remote(http)`) instead of re-sniffing a scheme at each call site. The `uri` string becomes an *input format* for the embedded variant, never the address itself.
|
||||||
|
|
||||||
|
**YAML naming follows the locator — the *key* names the locus**, so neither the value's scheme nor a comment is load-bearing:
|
||||||
|
|
||||||
|
| Locus | Key | Value |
|
||||||
|
|---|---|---|
|
||||||
|
| Embedded | **`storage:`** (shipped `uri:` is a deprecated alias) | a storage URI (`s3://…`, `file://…`) |
|
||||||
|
| Remote | **`server:`** | a name in `servers:` (its `endpoint` + creds resolve by name, §5) |
|
||||||
|
| Remote graph id | **`graph_id:`** | the id on that server — **defaults to the entry key**; set only when the local alias differs |
|
||||||
|
|
||||||
|
An entry has `storage:` **xor** `server:` — the deserializer rejects *both* and *neither* (no silent ambiguity). This removes two prior confusions: `graphs:` (the map) vs `graph:` (the remote id), and `uri:`-might-be-a-server.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
servers:
|
||||||
|
prod-eu: { endpoint: https://og-eu.internal:8080 }
|
||||||
|
graphs:
|
||||||
|
dev: { storage: s3://team-bucket/dev.omni } # embedded
|
||||||
|
production: { server: prod-eu } # remote — graph_id = "production" (the key)
|
||||||
|
staging: { server: prod-eu, graph_id: prod } # remote — alias ≠ server's id
|
||||||
|
```
|
||||||
|
|
||||||
|
### 1.2 Invalid configs are rejected by design
|
||||||
|
|
||||||
|
The DX rule is: **a config field is either honored or rejected, never silently ignored**. The loader therefore has two phases:
|
||||||
|
|
||||||
|
1. Parse YAML into a loose/raw shape that preserves origin (`base_dir`, layer, line/path when available).
|
||||||
|
2. Convert once into a typed, role-aware resolved config. Every command receives the resolved form, not the raw YAML structs.
|
||||||
|
|
||||||
|
The typed graph shape is:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
enum GraphEntry {
|
||||||
|
Embedded(EmbeddedGraphEntry),
|
||||||
|
Remote(RemoteGraphEntry),
|
||||||
|
}
|
||||||
|
|
||||||
|
struct EmbeddedGraphEntry {
|
||||||
|
storage: StorageUri,
|
||||||
|
branch: Option<BranchName>,
|
||||||
|
policy: Option<PolicyFile>,
|
||||||
|
queries: QueryRegistrySpec,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RemoteGraphEntry {
|
||||||
|
server: ServerId,
|
||||||
|
graph_id: GraphId,
|
||||||
|
branch: Option<BranchName>,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
That makes these rules structural rather than advisory:
|
||||||
|
|
||||||
|
- A graph entry must specify **exactly one** locator: `storage:`/legacy `uri:` xor `server:`.
|
||||||
|
- `policy:` and `queries:` are valid only on `Embedded` graph entries, because they define the capability surface of a graph this process opens directly. A `Remote` graph entry points at a server; that server owns policy and stored-query definitions.
|
||||||
|
- `omnigraph-server` may serve only `Embedded` graph entries. A server manifest entry with `server:` is rejected: a server should not "host" a graph by proxying another server.
|
||||||
|
- A named graph uses its own graph entry. Top-level `policy:` / `queries:` are a legacy anonymous-bare-URI compatibility path only; if a named graph is selected while top-level blocks would be ignored, config validation errors with a migration hint.
|
||||||
|
- A client-defined remote graph discovers stored queries from the server (`GET /queries`) and invokes them (`POST /queries/{name}`); it does not define `queries:` locally for that remote graph.
|
||||||
|
|
||||||
|
Examples that must fail fast:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
graphs:
|
||||||
|
prod:
|
||||||
|
storage: s3://team-bucket/prod.omni
|
||||||
|
server: prod-us # invalid: storage xor server
|
||||||
|
```
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
graphs:
|
||||||
|
prod:
|
||||||
|
server: prod-us
|
||||||
|
graph_id: production
|
||||||
|
policy: { file: ./policies/prod.yaml } # invalid: remote graph policy lives on the server
|
||||||
|
queries:
|
||||||
|
find_user: { file: ./queries/find_user.gq } # invalid: remote graph queries are discovered
|
||||||
|
```
|
||||||
|
|
||||||
|
`omnigraph config view --resolved --show-origin` is the user-facing debugger for this boundary: it shows the final `Embedded` or `Remote` graph and where every honored field came from. Fields that cannot be honored never make it into the resolved view; they fail validation first.
|
||||||
|
|
||||||
|
### 2. Layered config — global-first, uniform schema, project-optional
|
||||||
|
|
||||||
|
**Posture: global-first, project-optional.** OmniGraph's CLI is primarily a *client* (it operates against graphs and servers, embedded or remote), so it sits on the **global-first** side of the CLI-config axis — like `kubectl` / `aws` / `gh` / `docker`, and unlike *project-first* tools (`git` / `cargo` / `terraform`) whose primary config is per-repo. The **global user config is the primary, self-sufficient default**; the project file is an *optional* repo-scoped override (and, when present, the deployment manifest). `omnigraph query --target prod` must work from **any directory with no project file**, exactly as `kubectl get pods --context prod` works from anywhere. *(This is a deliberate flip from today, where the CLI reads `./omnigraph.yaml` and does not even walk parent dirs — i.e. today it is project-anchored.)*
|
||||||
|
|
||||||
|
**Rule: the two layers share ONE raw schema, and each is fully self-sufficient** (the git-layering mechanism — same schema at both levels; you never need a repo to have a working config). Do **not** specialize the file format by layer. Instead, run the same role-aware validation everywhere (§1.2): the global and project layers may both define graph locators, defaults, servers, and aliases, but fields that are meaningless for a resolved graph variant are rejected rather than ignored. For example, `queries:` is valid for an embedded graph this config opens directly; it is invalid on a remote graph entry because remote stored queries are server-owned and discovered.
|
||||||
|
|
||||||
|
This makes the **zero-project case the default, not an edge case**: a solo user (or an agent) defines everything needed for client work in `~/.omnigraph/config.yaml` — servers, embedded + remote graph locators, defaults, aliases, and optionally personal embedded-graph query registries — and **never creates a project file**. A team adds `./omnigraph.yaml` only when it wants repo-scoped overrides or a committed, GitOps'd deployment manifest. Global-first does **not** forbid project files; it stops *requiring* them (the kubectl model: `~/.kube/config` is sufficient and default; per-project kubeconfigs are opt-in via `KUBECONFIG`).
|
||||||
|
|
||||||
|
| Layer | Required? | Typical use | Path |
|
||||||
|
|---|---|---|---|
|
||||||
|
| Global | no | **the default** — solo/agent's entire config; shared servers+creds for teams; even a personal server's graphs/queries | `~/.omnigraph/config.yaml` |
|
||||||
|
| Project | no | **opt-in** — repo-scoped overrides + the committed deployment manifest (graphs, queries, policy) | `./omnigraph.yaml` |
|
||||||
|
|
||||||
|
**Precedence (low → high):** built-in defaults < global < project < env vars < CLI flags. With no project file it collapses to **built-in < global < env < flags** — the common global-only path.
|
||||||
|
|
||||||
|
**Merge semantics — "closest layer wins, at the smallest meaningful unit"** (the field consensus: git / kubeconfig / cargo / Helm / VS Code):
|
||||||
|
- **Settings objects** (`defaults`, `auth`, `server`) → **deep-merge per field**: a project sets `defaults.graph` and *inherits* the global `defaults.output_format`. (VS Code / cargo behavior.)
|
||||||
|
- **Named-resource maps** (`servers`, `graphs` / compat `targets`, `queries`, `aliases`) → **union by key; on a collision the higher layer's entry REPLACES the lower wholesale** — *no field-level deep-merge within an entry*. (kubeconfig: union contexts by name.) The footgun this avoids: global `servers.prod = {endpoint, policy}`, project `servers.prod = {endpoint: other}` — deep-merge would silently retain the old fields; replace makes the project's `prod` self-contained and predictable.
|
||||||
|
- **Lists/arrays** → **replace, never append** (Helm convention; appending is order-sensitive and surprising).
|
||||||
|
- **Scalars** → higher layer wins.
|
||||||
|
- **Relative paths carry their origin's base_dir.** A `queries:` entry's `.gq` path, or a `policy.file`, resolves against the directory of the layer it was *defined in* — global entries under `~/.omnigraph/`, project entries under the project dir.
|
||||||
|
- **Inspectable (non-negotiable):** `omnigraph config view --resolved --show-origin` prints each final value *and which layer set it* (the `git config --show-origin` / `kubectl config view` rule). A layered config without origin-tracing is a debugging trap.
|
||||||
|
|
||||||
|
### 3. Roles, and the file-naming decision (same name for project = server)
|
||||||
|
|
||||||
|
`omnigraph.yaml` carries two *roles* that diverge in prod and collapse on a laptop:
|
||||||
|
|
||||||
|
- **Server role** (read by `omnigraph-server`): `graphs:` entries that are **embedded storage locators**, per-graph `policy.file`, **`queries:` — the stored-query/MCP registry lives here**, plus serving knobs. Remote graph locators are rejected in this role.
|
||||||
|
- **Client role** (read by the CLI/agent): `servers:`, embedded or remote `graphs:` locators, `defaults:`, `aliases:`. A remote graph locator points at server-owned capabilities; it cannot define local `policy:` or `queries:`.
|
||||||
|
|
||||||
|
**Project config and server config are the same artifact, hence the same name.** The server *serves the project*: the file that says "these graphs exist, with these stored queries and this policy" is simultaneously the project manifest and the server's deploy config. Role is distinguished by which *sections* are populated, never by filename. Readers ignore sections that are not theirs (today's file already does this with `cli:` vs `server:`).
|
||||||
|
|
||||||
|
**Why not kube's role-split.** Two coherent models exist: (A) one project file with role-sections (Helix `helix.toml` holds both `[local.dev]` and `[enterprise.production]`; compose; Cargo), and (B) deployment-manifest strictly separate from client config (kubectl — you never put a context in `deployment.yaml`). kube is the sharpest topological analog (multi-server × multi-graph, one client targeting many), so B has a real claim. The tiebreaker is **E1: OmniGraph is embedded-first.** In embedded mode the manifest's `graphs:` *is* the local target list — manifest and local-client-view are the same object, so splitting them (B) fights the grain and forces two files for local work. kube splits because it has **no** embedded mode (client always remote+global). So: take the half kube is right about — *remote* client targeting (`servers:`, endpoints, creds) is a separate concern in a separate **user-global** file (`config.yaml`, like `~/.kube/config`); reject the half it is wrong about for us — do **not** split the *project* layer by role. **The second name (`config.yaml`) is justified by scope (user-global), not role.** *(If OmniGraph ever dropped embedded mode and went pure-remote, model B's strict split would become cleanest.)*
|
||||||
|
|
||||||
|
### 4. File naming
|
||||||
|
|
||||||
|
Principles from the field: **one global dir** `~/.omnigraph/` (like `~/.aws`/`~/.kube`/`~/.helix`), with config/cache/state as **subdirectories** (separation without XDG's three-root scatter); **secrets keyed by server name in the OS keychain or a separate git-ignored profile file** (AWS/gh model, not a new `credentials.yaml`); **project-root manifest keeps the app-named file** (`Cargo.toml`, `package.json`); **`.yaml`, not `.yml`**; keep OmniGraph's established names. The genuinely *new* decisions are the **global** dir's existence and keyed-by-name resolution with an explicit `auth.token` override (MR-971); the shipped `bearer_token_env` + `auth.env_file` mechanism remains as legacy compat.
|
||||||
|
|
||||||
|
| Artifact | Path / name | Why |
|
||||||
|
|---|---|---|
|
||||||
|
| Project = server config (one artifact) | `./omnigraph.yaml` | **Keep.** Root manifest like `Cargo.toml` / `compose.yaml` / `helix.toml`. Same name for both roles because it is one file. In prod the server's deploy repo and an app repo each have their own `omnigraph.yaml` — same name, different repos. |
|
||||||
|
| Global user config | `~/.omnigraph/config.yaml` | **One dir** (`~/.omnigraph/`, like `~/.aws`/`~/.kube`/`~/.helix`). Named `config.yaml` *not* `omnigraph.yaml` — the name signals scope (and `~/.aws/config`, `~/.kube/config`, `~/.helix/config` all do this). Holds the full schema so a solo user needs nothing else. |
|
||||||
|
| Credentials | OS keychain (`omnigraph:<server>`, preferred) → `~/.omnigraph/credentials` profile file (`[<server>]`, `0600`, git-ignored). **Keyed by server name**, inside the one dir. | **Key by name, AWS/gh model** — `~/.aws/credentials [profile]`, `~/.kube/config users:`, `~/.helix/credentials`. *Not* a `credentials.yaml`, and *not* a per-server hand-named env var; the secret lives under the server name (no indirection). Legacy `bearer_token_env` + `.env.omni` dotenv remain as a compat path. See §5. |
|
||||||
|
| Cache / state | `~/.omnigraph/cache/`, `~/.omnigraph/state/` | Subdirs of the one dir (like `~/.aws/sso/cache/`, `~/.kube/cache/`) — cache is `rm -rf`-safe and backup-excludable without scattering across XDG roots. |
|
||||||
|
| Cedar policy | `./policies/<env>.yaml` + `<env>.tests.yaml` | **Keep.** Referenced by `policy.file`. |
|
||||||
|
| Schema | `./*.pg` (e.g. `schema.pg`) | **Keep.** |
|
||||||
|
| Stored queries | `./queries/*.gq` | **Keep.** `.gq` sources referenced by the `queries:` registry. |
|
||||||
|
|
||||||
|
**Global dir: `~/.omnigraph/` — one place, with subdirectories.** Everything OmniGraph keeps for a user lives under a single `~/.omnigraph/` directory, matching the peer group (`~/.aws`, `~/.kube`, `~/.docker`) and the direct competitor (`~/.helix`). This is what DB/cloud-CLI users expect and the lowest-cognitive-load shape.
|
||||||
|
|
||||||
|
*Separation and "one place" are not in conflict* — the decisive realization. The peer tools get config/cache/state separation via **subdirectories inside the one dir**, not via XDG's three scattered roots: `~/.aws/sso/cache/`, `~/.kube/cache/`. So OmniGraph keeps `~/.omnigraph/config.yaml`, `~/.omnigraph/credentials`, `~/.omnigraph/cache/` (catalogs — `rm -rf`-safe, backup-excludable), `~/.omnigraph/state/` (session, logs) — getting cache hygiene **and** a single discoverable location, without the XDG scatter. An earlier draft argued XDG on a false dichotomy (it assumed single-dir ⇒ mixed); subdirs dissolve it. `~/.omnigraph/` is canonical and documented; `$XDG_CONFIG_HOME` may optionally be honored if a user has set it, but XDG is not part of the mental model.
|
||||||
|
|
||||||
|
**Env / override precedence (the `KUBECONFIG` analog):**
|
||||||
|
- `OMNIGRAPH_CONFIG=/path` — explicit config file, highest precedence.
|
||||||
|
- `OMNIGRAPH_HOME=/path` → the global dir (default `~/.omnigraph/`); `$XDG_CONFIG_HOME` optionally honored if a user has set it, but `~/.omnigraph/` is canonical.
|
||||||
|
- Cache and state are subdirs of the one dir: `~/.omnigraph/cache/` (cached remote catalogs), `~/.omnigraph/state/` (session, logs).
|
||||||
|
- Per-server token resolution: an explicit `auth: { token: {...} }` source (env/file/command/keychain) wins if set; otherwise **keyed by the server name** — `OMNIGRAPH_TOKEN_<NAME>` (or `OMNIGRAPH_TOKEN` for the active server) → OS keychain `omnigraph:<name>` → the `[<name>]` profile in `~/.omnigraph/credentials`; legacy `bearer_token_env` still honored. See §5.
|
||||||
|
|
||||||
|
### 5. Credentials, connection tiers, and bind portability (12-factor)
|
||||||
|
|
||||||
|
**Credentials are by-reference everywhere, never inlined — and keyed by the *server name*, not by a hand-invented env-var name.** This is the one place the design departs from simply reusing the shipped `bearer_token_env` mechanism, because that mechanism is sub-optimal for a multi-server client: it forces the operator to invent and coordinate an env-var name per server (three steps to add a server: pick a var, name it in config, set it in the store). The peer group (AWS profiles, `gh` hosts, kubeconfig users, docker auths) instead keys the secret **by the server's name** — no indirection. OmniGraph should match that.
|
||||||
|
|
||||||
|
**Resolution for server `<name>` (no config field required):**
|
||||||
|
1. **`OMNIGRAPH_TOKEN_<NAME>`** env var (name-derived, upper-snake), else **`OMNIGRAPH_TOKEN`** for the active server — the CI/headless override (12-factor).
|
||||||
|
2. **OS keychain** entry `omnigraph:<name>` — the preferred interactive store (no plaintext on disk); written by `omnigraph login <name>`.
|
||||||
|
3. **`~/.omnigraph/credentials`** — an AWS-style profile file keyed by server name (mode `0600`, git-ignored), the fallback when no keychain:
|
||||||
|
```ini
|
||||||
|
[prod-us]
|
||||||
|
token = …
|
||||||
|
[prod-eu]
|
||||||
|
token = …
|
||||||
|
```
|
||||||
|
So a `servers.<name>` with no token field resolves by name — adding a server is one step (`omnigraph login <name>`), and "multiple servers, multiple tokens" falls out for free.
|
||||||
|
|
||||||
|
**But implicit must not be the *only* path — explicit sourcing is a first-class option** (the DX/AX lesson). Pure-convention is invisible (you must *know* `OMNIGRAPH_TOKEN_<NAME>`), can't integrate with a secrets-manager's fixed var name, and can't do dynamic/short-lived tokens. So a server may declare an explicit `auth:` block — a **method-agnostic wrapper** (today only `token:` for bearer; `mtls:`/`oidc:` are the future siblings, so the credential model never has to be re-keyed) holding a tagged token *source*. Secrets are *still* never inlined (every source is a reference):
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
servers:
|
||||||
|
prod-us:
|
||||||
|
endpoint: https://og-us…
|
||||||
|
auth: { token: { env: OG_PROD_US_TOKEN } } # explicit env var — self-documenting (= legacy bearer_token_env)
|
||||||
|
prod-eu:
|
||||||
|
endpoint: https://og-eu…
|
||||||
|
auth: { token: { command: [vault, read, -field=token, secret/og] } } # dynamic / short-lived
|
||||||
|
edge:
|
||||||
|
endpoint: https://og-edge…
|
||||||
|
auth: { token: { file: /run/secrets/og-token } } # k8s/docker mounted secret
|
||||||
|
staging:
|
||||||
|
endpoint: https://og-staging… # no auth: → implicit chain (below)
|
||||||
|
```
|
||||||
|
|
||||||
|
| `auth.token:` source | when | DX/AX value |
|
||||||
|
|---|---|---|
|
||||||
|
| *(auth omitted)* | the common case | zero-config; `omnigraph login` populates keychain `omnigraph:<name>` |
|
||||||
|
| `{ env: VAR }` | secrets-manager / CI injects a fixed var | **self-documenting** — config states the source; = the legacy `bearer_token_env` |
|
||||||
|
| `{ file: PATH }` | k8s/docker secret mounted as a file | no env plumbing |
|
||||||
|
| `{ command: [...] }` | Vault, cloud IAM, `gh auth token` | **dynamic tokens** — first-class exec, the capability pure-env/keychain can't give (kube `exec` / AWS `credential_process`) |
|
||||||
|
| `{ keychain: ENTRY }` | pin a non-default keychain entry | explicit override of the name-derived default |
|
||||||
|
|
||||||
|
**Resolution per server:** if `auth.token:` is set, use that source (no fallthrough). Else the **implicit chain**: `OMNIGRAPH_TOKEN_<NAME>` (or `OMNIGRAPH_TOKEN` for the active server) → keychain `omnigraph:<name>` → `[<name>]` in `~/.omnigraph/credentials` (`0600`, git-ignored). `omnigraph login <server>` writes/rotates only that server's secret; per-server precedence is independent; sharing is opt-in (same env var or source). The `command` source runs locally with the operator's own privileges and is defined only in operator-owned config (never server-supplied), so it adds no remote-execution surface. The `auth:` wrapper is method-agnostic so adding mTLS/OIDC later is a new sibling key, not a breaking re-key (Hyrum's Law: the field name is a contract once shipped). There is **no `credentials.yaml`** and **no inlined secret**. *Convention for the floor, explicit for control — and explicit is legible to agents and never inlines a secret.*
|
||||||
|
|
||||||
|
**Back-compat.** The shipped per-graph `bearer_token_env` + `auth.env_file` dotenv (`resolve_remote_bearer_token`, real-env-wins) keeps working unchanged for existing single-server setups; `bearer_token_env` is just the legacy flat alias for `auth: { token: { env } }`. Resolution tries an explicit `auth.token:` (or legacy `bearer_token_env`) first, then the keyed-by-name chain — so nothing breaks, but the zero-config default is the no-boilerplate keyed-by-name path. (MR-971 — the `bearer_token_env` parity gap — is where this resolver work lands.)
|
||||||
|
|
||||||
|
**Three connection tiers** (Supabase/Prisma teach the zero-config floor):
|
||||||
|
1. **Env vars** — `OMNIGRAPH_SERVER=https://…` + `OMNIGRAPH_TOKEN=…`: zero-config remote, no file (the `DATABASE_URL` floor).
|
||||||
|
2. **Global `config.yaml`** — named `servers:` + `graphs:` for multi-server setups (the AWS-profiles convenience).
|
||||||
|
3. **Project `omnigraph.yaml`** — project-pinned targets/graphs, committed.
|
||||||
|
|
||||||
|
**Keep `omnigraph.yaml` a *portable* manifest (12-factor).** Deploy-specific runtime that varies per environment — the **bind host/port**, worker counts — should be supplied by **`--bind` / `OMNIGRAPH_BIND` (flags/env)**, *not* a committed `server.bind:` baked into the manifest. A manifest that hardcodes `0.0.0.0:8080` is not portable across deploys and leaks an environment detail into a version-controlled file. The same-named `omnigraph.yaml` stays portable across deploys precisely because the volatile, per-environment knobs live in env/flags (12-factor config), while the stable, portable definition (graphs, queries, policy) lives in the file. This is the one concrete lesson taken from kube's model-B without adopting its file split: portability via env/flags, not via a second file.
|
||||||
|
|
||||||
|
### 6. Where stored queries live: defined locally, invoked remotely
|
||||||
|
|
||||||
|
A stored query splits across two axes; do not conflate them:
|
||||||
|
- **Definition** (`.gq` source + `queries:` entry) lives next to the **embedded graph entry that owns it**. For a hosted remote graph, that is the **deployment manifest** read by `omnigraph-server`; for a personal embedded graph, it may be the user's own config. It never lives on a client-side `Remote` graph entry.
|
||||||
|
- **Discovery** ("what tools exist for me?") is fetched from the **server** (Cedar-filtered `GET /queries` / MCP catalog) at connect time.
|
||||||
|
- **Invocation** is **remote** (client → server, HTTP/MCP) — or **embedded** (the CLI opens the graph directly and reads the same manifest).
|
||||||
|
|
||||||
|
For remote use, the client carries *pointers to servers*, not query definitions; it **discovers and invokes**, never defines. This is the **capability-as-code guarantee for agents**: an agent can only invoke tools the server's *committed, reviewed* config exposes — it **cannot define a new tool at runtime**. Definition is structurally outside the agent's reach.
|
||||||
|
|
||||||
|
`queries:` (graph-capability registry, Cedar-gated when served remotely, MCP-visible when exposed) and `aliases:` (client CLI shortcut) overlap — both can name `.gq`-backed operations. This RFC keeps them siblings (the MR-969 decision); the clean long-term is **one registry, two invocation surfaces** (embedded + remote), with `aliases:` subsumed. Out of scope here.
|
||||||
|
|
||||||
|
#### Reconciling `aliases:` with the role model
|
||||||
|
|
||||||
|
`aliases:` is the pre-MR-969, **client-role, embedded-only, ungated** ancestor of `queries:`. An alias bundles `command` (read/change), `query` (`.gq` path), `name` (symbol), `args` (positional param names), and `graph`/`branch`/`format` defaults; the CLI runs it embedded. The server never reads it. So:
|
||||||
|
|
||||||
|
- **Role:** `aliases:` is **client-role** (CLI behavior) → it may live in **both** the user-global `config.yaml` and the project manifest, layered. `queries:` is **graph-capability role** → it lives only on an `Embedded` graph entry, and for remote server graphs that means the server deployment manifest. *Who opens the graph determines where query definitions can live.*
|
||||||
|
- **Difference:** `aliases:` = embedded invocation, no gating, explicit `command`, bundles client defaults + positional args. `queries:` = remote (+future embedded), Cedar + `mcp.expose`, **infers** read/mutate, bundles only MCP settings.
|
||||||
|
- **Convergence:** decompose an alias — *definition* (name→.gq+symbol) → `queries:` (the superset: typed, validated, gated, multi-surface, no redundant `command`); *target/branch/format* → client invocation context (`--target`/`--branch`/`--format` or `defaults:`), not baked per-query; *positional `args`* → thin CLI sugar or dropped (agents/services use named JSON params). End-state: one `queries:` registry + the client config model subsumes `aliases:`.
|
||||||
|
- **Validation:** a file-backed alias (`query: ./foo.gq`) may target only an embedded graph. A remote graph shortcut must be explicit that it invokes a server-owned stored query, e.g. `invoke: find_user`, so the client cannot smuggle a new `.gq` definition into a remote capability surface.
|
||||||
|
- **v1:** keep `aliases:` unchanged. Footgun worth a load-time warn: an alias and a query with the same name in one manifest are different namespaces invoked differently (`--alias X` vs `POST /queries/X`).
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
aliases:
|
||||||
|
local_owner:
|
||||||
|
command: query
|
||||||
|
query: ./queries/owner.gq
|
||||||
|
name: owner
|
||||||
|
graph: dev # valid only if `dev` resolves Embedded
|
||||||
|
|
||||||
|
remote_owner:
|
||||||
|
invoke: find_user
|
||||||
|
graph: prod # valid only if `prod` resolves Remote; source lives on the server
|
||||||
|
args: [name]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7. CLI surface
|
||||||
|
|
||||||
|
- `omnigraph login <server>` — interactive auth; stores the token keyed by server name in the OS keychain (`omnigraph:<server>`) or the `[<server>]` profile of `~/.omnigraph/credentials` (0600). The `gh auth login` analog.
|
||||||
|
- `omnigraph use <graph>` — set the active graph (writes the appropriate layer). The `kubectl config use-context` analog.
|
||||||
|
- `omnigraph config view [--resolved] [--show-origin] [<graph>]` — print the merged config and, with `--resolved`, the final tuple **plus the origin layer of every field** (the `git config --show-origin` / `kubectl config view` analog). Resolution is never a mystery.
|
||||||
|
- All existing verbs (`query`, `mutate`, `load`, `schema`, `branch`, …) gain `--graph <name>`; resolution decides embedded vs remote transparently.
|
||||||
|
|
||||||
|
### 7.5 Init, login, and bootstrap — three tiers (folds in the Q2 design)
|
||||||
|
|
||||||
|
Scaffolding splits into three tiers by *scope* and *fatness*, mirroring the field (supabase `init` vs `login`; HelixDB thin `init` vs fat `chef`). Most of this lives in sibling tickets; this RFC owns only the **user route**.
|
||||||
|
|
||||||
|
| Tier | Command | Scope | What it does | Model | Status |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| **User route** | `omnigraph login [<server>]` | user (`~/.omnigraph/`) | auth + write `~/.omnigraph/config.yaml` / `credentials`; first-run global setup | gh / supabase `login` | **this RFC** (unbuilt) |
|
||||||
|
| **Thin project init** | `omnigraph init` | project, in-place | create graph + `scaffold_config_if_missing` (`omnigraph.yaml` + minimal `.pg`/`.gq`); refuse-if-exists or `--force` | `cargo init`, `prisma init` | exists; `--force` purge = MR-975 |
|
||||||
|
| **Fat bootstrap** | `omnigraph quickstart [--template <t>] [--auto]` | project, possibly new-dir | scaffold + seed data + `serve start` + agent prompt file | HelixDB `chef`, `create-next-app` | MR-973 (unbuilt) |
|
||||||
|
|
||||||
|
**Design positions** (first-principles, since none of the fat tier is built):
|
||||||
|
- **Split `init` (project) from `login` (user)** — never one command writing to both `$HOME` and the project (the supabase line, not the dbt line). `init`=project scaffold; `login`=user credential + global config.
|
||||||
|
- **`init` is in-place + refuse-if-exists** (cargo/prisma/terraform default): don't clobber; adopt existing files; require `--force` to overwrite (and `--force` purges Lance state per MR-975).
|
||||||
|
- **Interactive for humans, `--auto`/agent-mode for automation** (npm `-y`, create-* `--CI`, MR-981 `--machine`). In `OMNIGRAPH_AGENT_MODE` any prompt → fail with a repair hint.
|
||||||
|
- **Templates are a `--template <name>` flag on the fat tier** (create-vite model), with the *content* (schema + queries + seed) coming from a template source. Mechanism is a design question (bundled-in vs `og template pull` from a repo vs `npm create-*`-style delegation) — **not** an existing foothold (MR-581 stale). Lean: a small set of bundled templates first (generic `Person→Knows`, plus promote `omnigraph-intel-bootstrap`), `--template <github>` later.
|
||||||
|
- **`init`/`quickstart` can scaffold the `graphs:` map with one or more entries**; "init with specific graphs" = the scaffolded `graphs:` block (embedded `storage:` locally; the agent/operator adds remote `server:` entries via `login` + editing).
|
||||||
|
- **Secrets-on-scaffold rule** (prisma/dbt/supabase all do this): anything that writes a token also keeps it out of VCS. `login` prefers the OS keychain (no file); the `~/.omnigraph/credentials` profile fallback is `0600` and git-ignored, and any project-local `.env`-shaped file gets a `.gitignore` entry.
|
||||||
|
|
||||||
|
### 8. Concrete shape
|
||||||
|
|
||||||
|
**Global** `~/.omnigraph/config.yaml` (per-user, secret-free):
|
||||||
|
```yaml
|
||||||
|
servers: # endpoint only — token is keyed by the server name
|
||||||
|
prod-us: { endpoint: https://og-us.internal:8080 }
|
||||||
|
prod-eu: { endpoint: https://og-eu.internal:8080 }
|
||||||
|
staging: { endpoint: https://og-staging.internal:8080 }
|
||||||
|
graphs:
|
||||||
|
personal: { storage: ~/graphs/personal.omni }
|
||||||
|
defaults:
|
||||||
|
graph: personal
|
||||||
|
aliases:
|
||||||
|
my_people:
|
||||||
|
command: query
|
||||||
|
query: ~/queries/people.gq
|
||||||
|
name: list_people
|
||||||
|
graph: personal
|
||||||
|
```
|
||||||
|
|
||||||
|
**Project client** `./omnigraph.yaml` (committed, secret-free, portable — no `server.bind`). Note the shipped noun is `graphs:` (MR-603); an entry is embedded (`storage:`) XOR remote (`server:` + `graph_id:`, §1.1):
|
||||||
|
```yaml
|
||||||
|
graphs:
|
||||||
|
dev: { storage: s3://team-bucket/dev.omni, branch: main } # embedded
|
||||||
|
staging: { server: staging, graph_id: prod, branch: review } # remote → graph `prod` on server `staging`
|
||||||
|
prod-us: { server: prod-us, graph_id: production }
|
||||||
|
prod-eu: { server: prod-eu, graph_id: production } # multi-homed: same graph, another server
|
||||||
|
defaults: { graph: dev, output_format: table }
|
||||||
|
aliases:
|
||||||
|
owner:
|
||||||
|
command: query
|
||||||
|
query: ./queries/owner.gq
|
||||||
|
name: owner
|
||||||
|
args: [name]
|
||||||
|
graph: dev
|
||||||
|
```
|
||||||
|
Select with `--graph <name>` (shipped flag, MR-603).
|
||||||
|
|
||||||
|
**Server deployment** `./omnigraph.yaml` (committed in the deploy repo, read by `omnigraph-server`). Every served graph is an embedded storage locator; server-owned policy and stored-query definitions live here:
|
||||||
|
```yaml
|
||||||
|
graphs:
|
||||||
|
production:
|
||||||
|
storage: s3://team-bucket/prod.omni
|
||||||
|
policy:
|
||||||
|
file: ./policies/prod.yaml
|
||||||
|
queries:
|
||||||
|
find_user:
|
||||||
|
file: ./queries/find_user.gq
|
||||||
|
mcp: { expose: true, tool_name: lookup_user }
|
||||||
|
|
||||||
|
server:
|
||||||
|
policy:
|
||||||
|
file: ./policies/server.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
**Credentials** are keyed by server name — `omnigraph login prod-us` writes the OS keychain entry `omnigraph:prod-us` (or a `[prod-us]` profile in `~/.omnigraph/credentials`, 0600, git-ignored); `OMNIGRAPH_TOKEN_PROD_US` overrides for CI. No token fields in any config file; no committable secrets.
|
||||||
|
|
||||||
|
## DX
|
||||||
|
|
||||||
|
1. **One command surface, two loci.** `query --graph dev` (embedded) and `--graph staging` (remote) are the same command; only resolution differs. Change one word, not a mental model.
|
||||||
|
2. **Clone-and-go.** Project config names servers+graphs; teammate runs `omnigraph login staging` once and every target resolves. The git + `gh auth login` model.
|
||||||
|
3. **Multi-server × multi-graph is the default.** Remote graph entries reference `server` by name; `servers` is a global named map; graphs are per-server. `prod-us` and `prod-eu` both serving `production` is two graph entries — Helix cannot express this.
|
||||||
|
4. **Solo-first.** Everything in `~`, no project required.
|
||||||
|
5. **Laptop-to-fleet on one schema.** Local = one `omnigraph.yaml` (both roles); prod = role-split across repos. No second format to learn.
|
||||||
|
|
||||||
|
## AX (agent experience)
|
||||||
|
|
||||||
|
1. **One flat resolved context, never a config to navigate.** target→server→endpoint→token resolves *before* the agent sees anything. The agent reasons about tools, not topology (the LLM-safe-surface principle extended to config).
|
||||||
|
2. **Secrets are structurally outside the agent's reach.** The repo it operates in has no tokens; they are in the global layer / keychain, outside its view. An agent *cannot* exfiltrate a prod token from project config because it is not there.
|
||||||
|
3. **Branch/snapshot-pinned contexts** (E4) — hand an agent a `branch: review` / `--snapshot v42` target and its reads are reproducible and cannot see uncommitted main-line state. No kubeconfig analog.
|
||||||
|
4. **The agent's capabilities are a GitOps'd artifact** (E6) — which graphs exist, which stored-query tools it may call, and which Cedar rules gate them are all in the version-controlled server config. Powers change only via a reviewed PR, deployed by restart. Infrastructure-as-code for what the AI can do.
|
||||||
|
5. **Config + policy compose.** Config = "where am I pointed + which token"; Cedar = "what may I do there." Orthogonal; no enforcement logic leaks into config.
|
||||||
|
|
||||||
|
## GitOps — three surfaces, secrets in none
|
||||||
|
|
||||||
|
| Surface | Repo | Contents | Deploy | Secrets |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| Server deployment config | infra/deploy repo | `graphs:`, policy, **`queries:` + `.gq` files** | commit → CI → **server restart** (no hot reload) | none — by-reference |
|
||||||
|
| Project client config | app repo | `graphs:` → embedded storage or remote server+graph | committed, read by CLI/agent | none |
|
||||||
|
| Global user config | **not GitOps'd** — machine-local `~` | `servers:` + creds-by-ref | `omnigraph login` writes it | refs only (like `~/.kube/config`) |
|
||||||
|
|
||||||
|
## Comparison
|
||||||
|
|
||||||
|
| Property | kubeconfig | Helix | git | compose | **OmniGraph (this RFC)** |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| Named remote endpoints + creds-by-ref | ✅ | ✅ | partial | partial | ✅ (global `servers`) |
|
||||||
|
| Global + project layering, uniform schema | ✗ | ✗ | ✅ | ✗ | ✅ |
|
||||||
|
| Embedded OR remote under one name | ✗ | ✗ | n/a | ✗ | ✅ (E1) |
|
||||||
|
| Multi-server × multi-graph | ✅ | ✗ | n/a | n/a | ✅ (E2) |
|
||||||
|
| Branch/snapshot in the address | ✗ | ✗ | partial | ✗ | ✅ (E4) |
|
||||||
|
| Agent tool surface in the repo | ✗ | ✗ (separate bundle) | n/a | n/a | ✅ (E6) |
|
||||||
|
| Project manifest renamed by role | — | no | — | no | **no** |
|
||||||
|
| Concept count | 3 | 1 | 2 | 1 | **2 (servers/targets)** |
|
||||||
|
|
||||||
|
## Migration / backwards compatibility
|
||||||
|
|
||||||
|
- **Additive.** Today's `omnigraph.yaml` (`graphs:`, `cli:`, `server:`, `aliases:`, `policy:`) keeps working unchanged. `graphs:` entries are equivalent to embedded `targets:` with a `storage:` (shipped `uri:` is a deprecated alias); both resolve.
|
||||||
|
- **`targets:` is new** and optional. `servers:` is new and optional. Absent → today's behavior.
|
||||||
|
- **Global `~/.omnigraph/config.yaml` is new.** Absent → only project + env + flags, exactly as now. Its addition is the **global-first posture flip**: today the CLI is project-anchored (reads `./omnigraph.yaml`, no parent walk); the global config becomes the new primary discovery path so the CLI works with no project file. Existing project-only workflows are unchanged (project still overrides global); the flip is additive — it adds a fallback layer below the project file, it does not remove the project file.
|
||||||
|
- **`graphs:` → `targets:` is an evolution, not a break.** Both can coexist; `targets:` is the superset (adds remote + branch pinning). A future cleanup may alias `graphs:` to embedded `targets:`.
|
||||||
|
- **`server.bind` stays supported** but documentation steers operators to `--bind` / `OMNIGRAPH_BIND` for portability; no removal.
|
||||||
|
- **Credentials: keyed-by-name is new; `bearer_token_env` is the compat path.** The primary design (keychain / `[<server>]` profile / `OMNIGRAPH_TOKEN_<SERVER>`) is new resolver work (lands on MR-971). The shipped `bearer_token_env` + `auth.env_file` dotenv (`resolve_remote_bearer_token`) is **unchanged and still honored** — existing single-server dotenv setups keep working, and the resolver honors an explicit `auth: { token: {...} }` source (env/file/command/keychain) with `bearer_token_env` as its flat legacy alias. No `credentials.yaml`.
|
||||||
|
- **Validation tightens invalid mixes, not valid legacy use.** Top-level `policy:` / `queries:` remain only for anonymous bare-URI compatibility. Named graphs use per-entry fields. Remote graph entries with local `policy:` / `queries:` and server manifests with `server:` graph locators are rejected because there is no correct way to honor those fields.
|
||||||
|
|
||||||
|
## Open questions
|
||||||
|
|
||||||
|
- **`graphs:` vs `targets:` naming churn.** Do we rename `graphs:` → `targets:` (with a deprecation alias) or keep `graphs:` for embedded and add `targets:` for remote? Leaning: keep both, document `targets:` as the superset.
|
||||||
|
- **Keychain integration scope.** Keychain is now the *primary* credential store (§5), so this is on the critical path, not optional: macOS Keychain first (matches operator practice) with the `0600` `[<server>]` profile file as fallback; Linux Secret Service / `pass` later. Open: which keyring crate, and the exact `OMNIGRAPH_TOKEN_<SERVER>` name-derivation (upper-snake, non-alnum → `_`).
|
||||||
|
- **Project-local `servers:`.** Allowed (e.g. a localhost dev server), merged with global. Confirm creds stay by-reference even for project-local servers (yes).
|
||||||
|
- **`aliases:` ⇄ `queries:` convergence.** Out of scope here; tracked separately. One registry with embedded + remote invocation surfaces is the target end state.
|
||||||
|
- **Single-file `KUBECONFIG`-style list.** Do we support `OMNIGRAPH_CONFIG` pointing at multiple files (colon-joined), or a single file only? Start single; revisit if demand appears.
|
||||||
|
|
||||||
|
## Implementation — breadboard + slices (Shape A)
|
||||||
|
|
||||||
|
Shaped via requirements + a fit check (Shape A — global-first layered config + unified `graphs:` entry + three-tier init — selected over a project-first minimal option and a Helix-clone). This section breadboards A and slices it. **Bold** = NEW.
|
||||||
|
|
||||||
|
### Places
|
||||||
|
|
||||||
|
| # | Place | What |
|
||||||
|
|---|---|---|
|
||||||
|
| P1 | Disk | `~/.omnigraph/{config.yaml, credentials, cache/, state/}` + project `omnigraph.yaml` + `.env.omni` |
|
||||||
|
| P2 | Config resolution | runs on every command: load layers → merge → resolve `--graph` |
|
||||||
|
| P3 | Command execution | embedded engine OR remote HTTP client |
|
||||||
|
| P4 | Remote `omnigraph-server` | existing HTTP surface (`/query`, `/mutate`, `/queries/{name}`) |
|
||||||
|
| P5 | Scaffold | `login` / `init` / `quickstart` |
|
||||||
|
|
||||||
|
### Affordances
|
||||||
|
|
||||||
|
| # | Place | Affordance | NEW? | Wires |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| U1 | P1 | `~/.omnigraph/config.yaml` (operator edits) | **N** | → N1 |
|
||||||
|
| U2 | P1 | project `./omnigraph.yaml` | — | → N1 |
|
||||||
|
| U3 | P1 | `~/.omnigraph/credentials` / `.env.omni` dotenv (secrets, git-ignored) | — | → N4 |
|
||||||
|
| U4 | P3 | `omnigraph <verb> --graph <name>` (any command) | — | → N14 |
|
||||||
|
| U5 | P5 | `omnigraph login [<server>]` | **N** | → N11 |
|
||||||
|
| U6 | P5 | `omnigraph init` / `quickstart [--template]` | partly | → N12 / N13 |
|
||||||
|
| U7 | P2 | `omnigraph config view --resolved --show-origin` | **N** | → N10 |
|
||||||
|
| N1 | P2 | `load_layered_config()` — global (N3) + project (cwd), serde each | **N** | → N2 |
|
||||||
|
| N2 | P2 | **merge engine** — deep-merge settings; replace named-resource entries; replace lists; **retain provenance** and raw field origins | **N⚠️** | → N5, → S_merged |
|
||||||
|
| N3 | P2 | global-dir resolver — `OMNIGRAPH_HOME` else `~/.omnigraph/` | **N** | → N1 |
|
||||||
|
| N4 | P2 | `load_env_file_into_process` — dotenv, real-env-wins (existing) | — | → N9 |
|
||||||
|
| N5 | P2 | `resolve_graph(name, merged)` → typed `Embedded`/`Remote` locator; rejects invalid role/field combinations before execution | **N⚠️** | → N6 |
|
||||||
|
| N6 | P3 | `GraphConn` — `Embedded(engine)` \| `Remote(http)` dispatch | **N⚠️** | → N7, → N8 |
|
||||||
|
| N7 | P3 | embedded path — `Omnigraph::open(uri)` (existing) | — | → engine |
|
||||||
|
| N8 | P3 | **HTTP-client path** — POST `/query`/`/mutate`/`/queries/{name}` | **N⚠️** | → P4, → N9 |
|
||||||
|
| N9 | P2 | `resolve_bearer_token(server)` — explicit `auth.token` source if set, else **keyed by name**: `OMNIGRAPH_TOKEN_<NAME>`/`OMNIGRAPH_TOKEN` → keychain `omnigraph:<name>` → `[<name>]` profile; legacy `bearer_token_env`/dotenv (MR-971) | **N⚠️** | → N8 |
|
||||||
|
| N10 | P2 | `config view` handler — merged + per-field origin (needs N2 provenance) | **N** | → U7 |
|
||||||
|
| N11 | P5 | `login` handler — interactive auth → write `config.yaml` + `credentials` (0600) + `.gitignore` | **N⚠️** | → S_global |
|
||||||
|
| N12 | P5 | `init` handler — `scaffold_config_if_missing` + create graph; refuse-if-exists/`--force` purge (MR-975) | partly | → S_project |
|
||||||
|
| N13 | P5 | `quickstart` handler — scaffold + `--template` + seed + `serve start` + agent prompt (MR-973; needs serve MR-970) | **N⚠️** | → S_project |
|
||||||
|
| N14 | P3 | agent-mode wrapper — `--machine`/`OMNIGRAPH_AGENT_MODE`: JSON, structured errors, never-prompt, typed exit codes (MR-981) | **N⚠️** | → N1 |
|
||||||
|
| S_global | P1 | `~/.omnigraph/config.yaml` + `credentials` | **N** | read by N1/N9 |
|
||||||
|
| S_project | P1 | `./omnigraph.yaml` + `.env.omni` | — | read by N1/N4 |
|
||||||
|
| S_merged | P2 | in-memory resolved config (per command, with provenance) | **N** | read by N5/N10 |
|
||||||
|
| S_cache | P1 | `~/.omnigraph/cache/` (remote catalogs) | **N** | read by N8 |
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart TB
|
||||||
|
subgraph P1["P1: Disk"]
|
||||||
|
U1["U1: ~/.omnigraph/config.yaml"]
|
||||||
|
U2["U2: ./omnigraph.yaml"]
|
||||||
|
U3["U3: credentials dotenv"]
|
||||||
|
end
|
||||||
|
subgraph P2["P2: Config resolution"]
|
||||||
|
N3["N3: global-dir (OMNIGRAPH_HOME)"]
|
||||||
|
N1["N1: load_layered_config"]
|
||||||
|
N2["N2: merge engine (+provenance)"]
|
||||||
|
N4["N4: dotenv loader"]
|
||||||
|
N5["N5: resolve_graph(--graph)"]
|
||||||
|
N9["N9: resolve_bearer_token"]
|
||||||
|
N10["N10: config view"]
|
||||||
|
end
|
||||||
|
subgraph P3["P3: Command execution"]
|
||||||
|
U4["U4: omnigraph <verb> --graph"]
|
||||||
|
N14["N14: agent-mode wrapper"]
|
||||||
|
N6["N6: GraphConn embedded|remote"]
|
||||||
|
N7["N7: embedded Omnigraph::open"]
|
||||||
|
N8["N8: HTTP-client POST"]
|
||||||
|
end
|
||||||
|
subgraph P5["P5: Scaffold"]
|
||||||
|
U5["U5: login"]; U6["U6: init/quickstart"]
|
||||||
|
N11["N11: login handler"]; N12["N12: init"]; N13["N13: quickstart"]
|
||||||
|
end
|
||||||
|
P4["P4: remote omnigraph-server"]
|
||||||
|
U1-->N1; U2-->N1; N3-->N1; N1-->N2-->N5-->N6
|
||||||
|
U3-->N4-->N9-->N8
|
||||||
|
U4-->N14-->N1
|
||||||
|
N6-->N7; N6-->N8-->P4
|
||||||
|
N2-->N10-->U7["U7: config view --resolved"]
|
||||||
|
U5-->N11; U6-->N12; U6-->N13
|
||||||
|
classDef ui fill:#ffb6c1,stroke:#d87093,color:#000
|
||||||
|
classDef n fill:#d3d3d3,stroke:#808080,color:#000
|
||||||
|
class U1,U2,U3,U4,U5,U6,U7 ui
|
||||||
|
class N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14 n
|
||||||
|
```
|
||||||
|
|
||||||
|
### Slices (vertical, each demo-able)
|
||||||
|
|
||||||
|
| # | Slice | Parts/affordances | Demo |
|
||||||
|
|---|---|---|---|
|
||||||
|
| **V1** | **Global layer + merge + `config view`** | A1–A4 · N1,N2,N3,N10 · U1,U7,S_global,S_merged | Put config in `~/.omnigraph/`, run `omnigraph config view --resolved --show-origin` from any dir → merged result with per-field origin; existing embedded commands work global-first with no project file |
|
||||||
|
| **V2** | **Remote graphs + HTTP client + creds** | A5–A7 · N5,N6,N8,N9 · S_cache | Define a `server:` graph entry; `omnigraph query --graph prod` hits the remote server (`curl`-free); embedded `--graph dev` still local |
|
||||||
|
| **V3** | **`omnigraph login`** | A8 · N11,U5 | `omnigraph login prod` writes `~/.omnigraph/credentials` (0600) + `.gitignore`; V2 remote query now works with no manual env |
|
||||||
|
| **V4** | **Thin-init hardening + quickstart + templates** | A9 · N12,N13,U6 (needs serve MR-970) | `omnigraph quickstart --template person-knows` scaffolds + seeds + serves; `init --force` purges (MR-975) |
|
||||||
|
| **V5** | **Agent-mode** | A10 · N14,U4 (MR-981) | `OMNIGRAPH_AGENT_MODE=1 omnigraph query …` → JSON + structured errors + typed exit codes; never-prompt |
|
||||||
|
|
||||||
|
V1 is the foundation (global-first + merge + view). V2 closes the substantive client→server gap. V3 is credential ergonomics. V4/V5 ride sibling tickets (MR-970/973/981). MR-969 (stored queries) ships independently and is reached by N8's `/queries/{name}` once V2 lands.
|
||||||
|
|
||||||
|
## Rollout
|
||||||
|
|
||||||
|
The slices above are the rollout order: **V1 (global layer + merge) → V2 (remote graphs + HTTP client) → V3 (login) → V4 (quickstart/templates, on MR-970) → V5 (agent-mode, MR-981).** V1–V2 close the substantive gap (global-first config + `curl`-free server access); V3–V5 are ergonomics that ride sibling tickets. Evaluate after V2 against early-adopter and agent-onboarding (MR-973 / MR-974) signal. The spikes (X1 HTTP-client, X2 merge engine, X3 resolver+provenance, X4 login) resolve before their owning slice.
|
||||||
|
|
||||||
|
## Prior art
|
||||||
|
|
||||||
|
- kubeconfig (clusters / users / contexts; `KUBECONFIG`; `kubectl config view`)
|
||||||
|
- Helix CLI v2 (`helix.toml` local+enterprise instance blocks; `~/.helix/config`; `~/.helix/credentials`)
|
||||||
|
- AWS CLI (`~/.aws/config` + `~/.aws/credentials` split; named profiles; `credential_process`)
|
||||||
|
- git (`~/.gitconfig` + `.git/config`; `--show-origin`)
|
||||||
|
- Cargo (`Cargo.toml` manifest + `~/.cargo/config.toml`)
|
||||||
|
- Supabase / Prisma (one project manifest; connection via `DATABASE_URL` env)
|
||||||
|
- 12-factor app (config that varies by deploy lives in the environment)
|
||||||
270
docs/dev/rfc-003-mcp-server-surface.md
Normal file
270
docs/dev/rfc-003-mcp-server-surface.md
Normal file
|
|
@ -0,0 +1,270 @@
|
||||||
|
# RFC: MCP Server Surface for `omnigraph-server` — Full Tool Parity, Stored Queries, Modular Auth
|
||||||
|
|
||||||
|
**Status:** Proposed
|
||||||
|
**Date:** 2026-06-01
|
||||||
|
**Tickets:** MR-969 (stored queries + MCP exposure — the surface this completes), MR-956 (federated auth / WorkOS OAuth — the auth substrate this consumes), MR-971 (per-server credential resolver), MR-974 (agent setup surface — the installer that wires this), MR-668 (multi-graph server — shipped, the routing this builds on)
|
||||||
|
**Builds on:** [omnigraph#128](https://github.com/ModernRelay/omnigraph/pull/128) (`ragnorc/stored-queries-mcp`) — the shipped stored-query registry, `GET /queries`, `POST /queries/{name}`, and the coarse `invoke_query` gate.
|
||||||
|
**Supersedes:** the MCP-transport portion of [rfc-001-queries-envelope-mcp.md](rfc-001-queries-envelope-mcp.md) (`/mcp/tools` + `/mcp/invoke`). See [Relationship to RFC-001](#relationship-to-rfc-001).
|
||||||
|
**Target release:** v0.8.x (phased — see Rollout)
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Add a first-class **MCP (Model Context Protocol) server surface to `omnigraph-server`**, exposed over **Streamable HTTP**, that projects the server's operations as MCP tools and resources for LLM clients (Claude Code/Desktop/web, Cursor, etc.). Two populations of tools share one projection path:
|
||||||
|
|
||||||
|
1. **Built-in operational tools** — parity with the existing `@modernrelay/omnigraph-mcp` stdio package's **13 tools** (`health`, `snapshot`, `read`, `schema_get`, `branches_list`, `commits_list`, `commits_get`, `change`, `ingest`, `branches_create`, `branches_delete`, `branches_merge`, `schema_apply`) and its **2 resources** (`omnigraph://schema`, `omnigraph://branches`), plus a new server-scoped `graphs_list` tool and an `omnigraph://graphs` resource (multi-graph mode).
|
||||||
|
2. **Dynamic stored-query tools** — one MCP tool per `mcp.expose: true` entry in the `queries:` registry (MR-969 / #128), with parameters typed from the `.gq` declaration via the shipped `query_catalog_entry` / `param_descriptor` projection.
|
||||||
|
|
||||||
|
Every tool is **authorized by the server's existing Cedar policy engine**. The MCP layer never implements its own authentication: it consumes an **already-resolved `ResolvedActor`** from the server's bearer middleware (`require_bearer_auth` today; the `TokenVerifier` seam when MR-956 lands), so the **same MCP endpoint serves on-prem (static or customer-OIDC tokens) and our cloud (WorkOS OAuth) by configuration only**. Cloud OAuth is an additive layer (RFC 9728 protected-resource metadata) that slots in with zero MCP changes.
|
||||||
|
|
||||||
|
The end-state collapses two diverging tool implementations into one: the in-server MCP is the canonical, Cedar-gated, remotely-reachable surface; the stdio package becomes a thin stdio↔HTTP proxy (local on-ramp) over it.
|
||||||
|
|
||||||
|
> **Key caveat, stated up front (see §5.9 below):** the headline "a token scoped via Cedar to a *specific set* of stored queries" requires **per-query `invoke_query` scope**, which is *designed* (rfc-001) but **not yet implemented** — the shipped action is coarse (any stored query on the graph, or none). Per-actor Cedar curation works today for *built-in vs ad-hoc vs admin* tools and for *stored-vs-ad-hoc*; sub-selecting individual stored queries per actor is gated on a prerequisite (PR 0b). Until then, stored-query curation is graph-level (registry membership + `mcp.expose`).
|
||||||
|
|
||||||
|
## Relationship to RFC-001
|
||||||
|
|
||||||
|
[rfc-001-queries-envelope-mcp.md](rfc-001-queries-envelope-mcp.md) (MR-656 / MR-976 / MR-969) is the parent design for stored queries + the response envelope + MCP. This RFC is the **detailed MCP-transport design** that #128 left for a follow-up, and it **revises rfc-001 in three places where the shipped code or the MCP wire protocol diverged from rfc-001's sketch**:
|
||||||
|
|
||||||
|
1. **Transport shape.** rfc-001 sketched `GET /mcp/tools` + `POST /mcp/invoke` (a bespoke REST pair). **That is not the MCP wire protocol — real MCP clients cannot connect to it.** This RFC implements actual MCP JSON-RPC over Streamable HTTP and reuses `query_catalog_entry` as a *projection source*, not a parallel surface. (rfc-001's own Open Question already leaned toward Streamable HTTP.)
|
||||||
|
2. **Exposure config.** rfc-001 specified inline `.gq` pragmas (`@mcp(expose=…)`, default `expose=false`). **#128 shipped a different mechanism:** YAML `queries.<name>.mcp.expose` in `omnigraph.yaml`, **default `true`** (declaring a query in the manifest *is* the opt-in). This RFC builds on the shipped YAML form; the `.gq`-pragma design in rfc-001 is superseded for exposure.
|
||||||
|
3. **Schema introspection.** rfc-001 lists "Schema introspection through MCP" as a **non-goal** ("agents see types through declared return shapes"). This RFC **revises that**: the operational-parity tools include `schema_get` and `omnigraph://schema` — *because the shipped stdio package already exposes both*. The non-goal is achieved by *policy*, not omission: `schema_get`/`omnigraph://schema` are Cedar-gated by `Read`, and the recommended locked-down agent policy denies `Read`, so a curated agent still never sees the schema. (rfc-001's intent is preserved; the mechanism moves from "don't build it" to "build it, gate it.")
|
||||||
|
|
||||||
|
Everything else in rfc-001 (two-paths-one-engine, per-query `invoke_query` *as the intended scope*, the response envelope, multi-graph per-graph endpoints) this RFC consumes unchanged.
|
||||||
|
|
||||||
|
> **Numbering note:** the `TokenVerifier`/WorkOS auth design is referred to in code (`crates/omnigraph-server/src/identity.rs`) as "RFC 0001," which is a *different* document from this repo's `docs/dev/rfc-001-queries-envelope-mcp.md`. To avoid the collision this RFC cites the auth substrate as **MR-956** throughout, never "RFC 0001."
|
||||||
|
|
||||||
|
## Reconciliation with shipped code (verified against `ragnorc/stored-queries-mcp` HEAD)
|
||||||
|
|
||||||
|
Verified against `crates/omnigraph-server/src/{lib.rs,api.rs}` and `crates/omnigraph-policy/src/lib.rs` at the current branch head (not the #128 PR body, and not `api.rs` alone):
|
||||||
|
|
||||||
|
- ✅ `GET /queries` returns the `mcp.expose == true` subset as `QueriesCatalogOutput { queries: [QueryCatalogEntry] }`, each with typed `ParamDescriptor`s, `tool_name`, `description`, `instruction`, and a `mutation` flag. **MCP-ready projection, but exposed as bespoke REST/JSON — not the MCP wire protocol.**
|
||||||
|
- ✅ `POST /queries/{name}` route exists (`server_invoke_query`, `lib.rs`).
|
||||||
|
- ✅ `query_catalog_entry()` / `param_descriptor()` with an exhaustive `ScalarType → ParamKind` map (a new scalar is a compile error).
|
||||||
|
- ✅ `InvokeQuery` Cedar action defined in `omnigraph-policy`.
|
||||||
|
- ✅ **`InvokeQuery` IS enforced** at `POST /queries/{name}`: `server_invoke_query` calls `authorize(PolicyAction::InvokeQuery)` and **masks a denial to a 404 identical to "unknown query"** so the catalog isn't probeable (the denial-masking the previous draft of this RFC reported as missing is shipped — it lives in `lib.rs`, not `api.rs`). The stored-mutation path is already double-gated: `InvokeQuery` outer, then `Change` inside `run_mutate`.
|
||||||
|
- ✅ **Reuse path exists:** `run_query` / `run_mutate` are already decoupled from their HTTP request bodies and take registry-supplied `(source, name, params, branch/snapshot)`. MCP `tools/call` for both stored and ad-hoc tools delegates to these — no new business logic.
|
||||||
|
- ❌ **Per-query (`invoke_query[name]`) scope is NOT implemented.** `PolicyRequest` carries only `{action, branch, target_branch}` — **no query-name dimension** — and the action is documented coarse ("permits *any* stored query on the graph"). rfc-001 *designed* per-name scope; it is unbuilt. This RFC's per-query Cedar filtering (§5.4) and recommended agent policy (§5.9) depend on it → tracked as **PR 0b**.
|
||||||
|
- ❌ No MCP protocol surface (`initialize`/`tools/list`/`tools/call`, JSON-RPC, transport).
|
||||||
|
- ❌ No `TokenVerifier` trait yet — `require_bearer_auth` resolves a `ResolvedActor` inline (static-hash). The trait/`OidcJwtVerifier` are MR-956 (draft). The MCP layer's only requirement — *consume `ResolvedActor`* — is satisfiable today.
|
||||||
|
|
||||||
|
Stack (verified `Cargo.toml`): Axum + utoipa (OpenAPI) + `omnigraph-policy` (Cedar) + `futures` + `tokio`. **No MCP crate present.** `edition = "2024"`.
|
||||||
|
|
||||||
|
## Motivation
|
||||||
|
|
||||||
|
- **One curated, safe, remotely-reachable tool surface.** MR-969's thesis: hand an LLM a token Cedar-scoped to a set of tools and it sees exactly those typed tools — cannot construct ad-hoc queries it isn't permitted, cannot read the schema it isn't permitted, cannot reach other graphs. Today the only MCP is the stdio package: local-only, full surface, ungated.
|
||||||
|
- **Parity, so the in-server MCP can be the single implementation.** Operators/agents already depend on the operational tools. Supporting them server-side behind one Cedar gate lets the stdio package degrade to a proxy and removes two diverging tool sets.
|
||||||
|
- **On-prem and cloud from one endpoint.** A managed cloud (WorkOS OAuth) and an on-prem/air-gapped deploy (static or customer-OIDC tokens) must serve the same MCP without forks or MCP-specific auth.
|
||||||
|
- **Foundation for the agent on-ramp (MR-974).** `omnigraph mcp install --agent <tool>` needs a decided transport + a stable endpoint.
|
||||||
|
|
||||||
|
## Goals
|
||||||
|
|
||||||
|
- Project built-in tools + stored queries as MCP tools through **one** registry abstraction.
|
||||||
|
- `tools/list` and the callable set are **identical for argument-independent authorization**, both driven by Cedar (see §5.4 for the branch-scoped caveat).
|
||||||
|
- The MCP layer is **auth-method-agnostic**: it consumes `ResolvedActor`, never a raw token, never branches on how auth happened.
|
||||||
|
- The same endpoint works on-prem (static/OIDC) and cloud (WorkOS OAuth), switched by config; cloud OAuth is additive (RFC 9728).
|
||||||
|
- No new business logic: MCP tools delegate to the same `run_query`/`run_mutate`/branch/schema functions the HTTP routes call.
|
||||||
|
- Behaviour-neutral when unused: no MCP traffic = no change.
|
||||||
|
|
||||||
|
## Non-Goals
|
||||||
|
|
||||||
|
- **Building/hosting an OAuth authorization server.** The server is a Resource Server; WorkOS AuthKit+Connect is the AS (MR-956). The MCP endpoint validates tokens, never issues them, never holds client secrets.
|
||||||
|
- **OAuth/WorkOS implementation itself** — MR-956's work. This RFC leaves a clean RFC-9728 hook and consumes `ResolvedActor`.
|
||||||
|
- **MCP prompts, elicitation, `tools/list_changed`, resource subscriptions, server-initiated messages.** None needed → enables a stateless POST-only transport (§5.6).
|
||||||
|
- **stdio transport inside the server.** stdio stays in the TS package (now a proxy).
|
||||||
|
- **Cross-graph tool listing.** Per-graph catalogs only (MR-969 + RFC-002 non-goal).
|
||||||
|
- **Hot reload of the query registry.** Restart-only (MR-969).
|
||||||
|
|
||||||
|
## Background
|
||||||
|
|
||||||
|
`omnigraph-server` (Axum) already implements every operation this RFC exposes as an authenticated HTTP route; each authorizes via a `PolicyAction` against the Cedar policy for a server-resolved actor and calls into the engine. The existing stdio MCP package is a *client* of these routes (it owns no business logic). MR-956 will introduce a `TokenVerifier` trait (`StaticHashTokenVerifier` today inline, `OidcJwtVerifier` for OIDC/WorkOS) producing the `ResolvedActor { actor_id, tenant_id: Option, scopes: Vec<Scope>, source }` that already exists in `identity.rs` and is consumed by Cedar — token *validation* is offline (cached JWKS), so on-prem/air-gapped has no request-path dependency on the cloud.
|
||||||
|
|
||||||
|
## Design
|
||||||
|
|
||||||
|
### 5.1 One tool model: a `McpTool` trait, two populators
|
||||||
|
|
||||||
|
Both built-in and stored-query tools implement one trait so `tools/list` / `tools/call` never special-case:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
trait McpTool: Send + Sync {
|
||||||
|
fn name(&self) -> &str; // MCP tool id (stable)
|
||||||
|
fn title(&self) -> Option<&str>;
|
||||||
|
fn description(&self) -> &str;
|
||||||
|
fn input_schema(&self) -> serde_json::Value; // JSON Schema (draft 2020-12)
|
||||||
|
fn annotations(&self) -> ToolAnnotations; // readOnlyHint / destructiveHint / idempotentHint
|
||||||
|
/// The Cedar request(s) this call requires, given parsed args. Used BOTH at
|
||||||
|
/// list-time (dry-run filter, default args) and call-time (enforce, real args).
|
||||||
|
fn authorization(&self, args: &ToolArgs) -> Vec<PolicyRequest>;
|
||||||
|
async fn call(&self, ctx: &GraphCtx, args: ToolArgs) -> Result<ToolOutput, ToolError>;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- **Built-ins**: ~14 static impls, each delegating to the *same* function its HTTP route calls (`run_query`, `run_mutate`, branch ops, `apply_schema_as`, …). `input_schema` authored once (or derived from each route's existing `utoipa`/`ToSchema` DTO).
|
||||||
|
- **Stored queries**: generated `McpTool` instances, one per `mcp.expose` entry; `input_schema` from `param_descriptor` (§5.3); `authorization` → `InvokeQuery` (coarse today; `InvokeQuery{name}` after PR 0b) then the inner `Read`/`Change`.
|
||||||
|
|
||||||
|
`ToolRegistry` for a graph = the static built-ins + the dynamic stored-query tools resolved from that graph's `GraphHandle` registry.
|
||||||
|
|
||||||
|
### 5.2 Tool catalog (parity) and Cedar mapping
|
||||||
|
|
||||||
|
Each built-in **reuses the exact `PolicyAction` its HTTP route already enforces** — verified against the handlers in `lib.rs`, not invented:
|
||||||
|
|
||||||
|
| MCP tool | Scope | Read/Mutate | Cedar action (verified from route) |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `health` | server | read | none (liveness/version) |
|
||||||
|
| `graphs_list` *(new)* | server | read | `GraphList` |
|
||||||
|
| `snapshot` | graph | read | `Read` |
|
||||||
|
| `schema_get` | graph | read | `Read` |
|
||||||
|
| `branches_list` | graph | read | `Read` |
|
||||||
|
| `commits_list`, `commits_get` | graph | read | `Read` |
|
||||||
|
| `read` (ad-hoc `.gq`) / `query` *(alias)* | graph | read | `Read` |
|
||||||
|
| `change` (ad-hoc `.gq`) / `mutate` *(alias)* | graph | mutate | `Change` |
|
||||||
|
| `ingest` (NDJSON) | graph | mutate | `Change` (+ `BranchCreate` when forking a new branch) |
|
||||||
|
| `branches_create` | graph | mutate | `BranchCreate` |
|
||||||
|
| `branches_delete` | graph | mutate | `BranchDelete` |
|
||||||
|
| `branches_merge` | graph | mutate | `BranchMerge` |
|
||||||
|
| `schema_apply` (`allow_data_loss`) | graph | mutate | `SchemaApply` |
|
||||||
|
| **stored query** (`find_user`, …) | graph | inferred | `InvokeQuery` (coarse; `InvokeQuery{name}` after PR 0b) + inner `Read`/`Change` |
|
||||||
|
|
||||||
|
There is **no `Ingest` and no separate `snapshot`/`Export` action** — `ingest` enforces `Change`, `snapshot` enforces `Read`. (`Export` exists but maps to the `/export` route, which this RFC does not expose as a tool.)
|
||||||
|
|
||||||
|
**Tool id parity vs. canonicalization.** The shipped stdio package uses tool ids **`read`/`change`** (and calls the deprecated `/read`,`/change` routes). The server HTTP surface canonicalized to `/query`,`/mutate` with `/read`,`/change` deprecated (MR-656). To keep existing package clients working *and* align with the server, the MCP exposes **`query`/`mutate` as canonical with `read`/`change` retained as deprecated-but-live aliases** (both dispatch to the same handler). Open Q7 asks whether to drop the aliases later.
|
||||||
|
|
||||||
|
Resources (§5.5): `omnigraph://schema`, `omnigraph://branches` (parity), plus `omnigraph://graphs` *(new)* — each gated by the same action as its list/get route (`Read`, `Read`, `GraphList`).
|
||||||
|
|
||||||
|
### 5.3 `ParamDescriptor → JSON Schema` (stored-query tools)
|
||||||
|
|
||||||
|
| `ParamKind` | JSON Schema | Notes |
|
||||||
|
|---|---|---|
|
||||||
|
| String | `{"type":"string"}` | |
|
||||||
|
| Bool | `{"type":"boolean"}` | |
|
||||||
|
| Int (i32/u32) | `{"type":"integer"}` | |
|
||||||
|
| BigInt (i64/u64) | `{"type":"string","pattern":"^-?\\d+$"}` | JSON numbers lose precision >2⁵³ → string (matches the shipped `api.rs` rationale). (Open Q1) |
|
||||||
|
| Float (f32/f64) | `{"type":"number"}` | |
|
||||||
|
| Date | `{"type":"string","format":"date"}` | |
|
||||||
|
| DateTime | `{"type":"string","format":"date-time"}` | |
|
||||||
|
| Blob | `{"type":"string","contentEncoding":"base64"}` | |
|
||||||
|
| Vector | `{"type":"array","items":{"type":"number"},"minItems":dim,"maxItems":dim}` | uses `vector_dim` |
|
||||||
|
| List | `{"type":"array","items":<item_kind schema>}` | scalar items only (grammar guarantees) |
|
||||||
|
|
||||||
|
`nullable == false` → param is in `required`. Annotations: `mutation` → `{readOnlyHint:false, destructiveHint:true}`; else `{readOnlyHint:true}`. `description` → tool description; `instruction` → appended to description (or `_meta`). (The shipped `check()` already warns when an `mcp.expose` query declares a `Vector` param an LLM can't supply.)
|
||||||
|
|
||||||
|
For built-in tools the schema is hand-authored from the route DTO; e.g. `query` → `{source: string, branch?: string, params?: object}`; `schema_apply` → `{schema: string, allow_data_loss?: boolean}`; `ingest` → `{ndjson: string, mode?: "merge"|"append"|"overwrite", branch?: string}`.
|
||||||
|
|
||||||
|
### 5.4 `tools/list` (Cedar-filtered) and `tools/call` (dispatch + masking)
|
||||||
|
|
||||||
|
- **`tools/list`**: build the `ToolRegistry`; for each tool evaluate `authorization(default_args)` against the actor's Cedar policy; **emit only tools that authorize**. Authz decisions memoized per request. Stored-query tools additionally require `mcp.expose: true`.
|
||||||
|
- **Exactness caveat (R7 is conditional):** the listed set equals the callable set **only for tools whose authorization is argument-independent** (`health`, `graphs_list`, `snapshot`, `schema_get`, `branches_list`, `commits_*`, ad-hoc `query`/`mutate`, and stored queries under the *coarse* action). For **branch-scoped tools** (`branches_create`/`merge` with `target_branch_scope`, and any branch-scoped `Read`/`Change` rule), list-time uses `default_args` (e.g. branch `main`) and cannot know the real target, so the listed set is a *best-effort approximation* of callability — a call may still be denied (or, rarely, a hidden tool would have been allowed). `tools/call` is always the authoritative gate. The contract is: **list never shows a tool the actor can't ever call; for branch-scoped tools it may show one the actor can call only on some branches.**
|
||||||
|
- **`tools/call`**: resolve `name` → `McpTool` (masked-404 if unknown *or* `mcp.expose:false`); parse+validate args against `input_schema`; enforce `authorization(args)` (mutations stay double-gated: `InvokeQuery` then `Change`); on success `call`. **Denial masking** lives in one place (the dispatcher): an authz denial is returned identically to "unknown tool" (§5.10), reusing the same deny≡missing principle already shipped at `POST /queries/{name}`.
|
||||||
|
|
||||||
|
### 5.5 Resources
|
||||||
|
|
||||||
|
Advertise `resources` capability (`subscribe:false, listChanged:false`). `resources/list` → the URIs the actor may read; `resources/read` → schema `.pg` text / branches JSON / (multi-graph) graphs JSON, each gated by the corresponding action (`Read`, `Read`, `GraphList`). A locked-down agent denied `Read` simply never sees `omnigraph://schema` or `omnigraph://branches` — this is how rfc-001's "agents don't introspect schema" intent is met *by policy* (§Relationship-to-RFC-001).
|
||||||
|
|
||||||
|
### 5.6 Transport: Streamable HTTP, stateless, POST-only
|
||||||
|
|
||||||
|
- **Streamable HTTP** (MCP's current standard; we're already an HTTP server). One endpoint per scope (§5.7).
|
||||||
|
- Because the server emits **no** server-initiated messages, implement the **minimal conformant** shape: client `POST`s JSON-RPC, server replies `application/json`. **No SSE channel, no `Mcp-Session-Id`, stateless** — each request authenticated independently via the bearer middleware. Honour the `MCP-Protocol-Version` header. SSE/sessions can be added later if subscriptions land.
|
||||||
|
- **JSON-RPC methods:** `initialize` (advertise `{tools:{listChanged:false}, resources:{listChanged:false, subscribe:false}}` + serverInfo/version), `notifications/initialized` (no-op ack), `ping`, `tools/list`, `tools/call`, `resources/list`, `resources/read`. `prompts/list` returns empty if probed.
|
||||||
|
- **Library decision (Open Q2):** spike `rmcp` (official Rust MCP SDK) for conformance + Streamable-HTTP/Axum on edition 2024; **fall back to a hand-rolled ~150 LOC JSON-RPC-over-POST** (only the methods above) on friction. Given the tiny surface, hand-roll is an acceptable default.
|
||||||
|
|
||||||
|
### 5.7 Endpoint routing (server- vs graph-scoped)
|
||||||
|
|
||||||
|
- **Single-graph mode:** `POST /mcp` — graph tools + server tools (`health`, `graphs_list`).
|
||||||
|
- **Multi-graph mode (MR-668):** `POST /graphs/{graph_id}/mcp` — graph-scoped tools for that graph; plus a server-level `POST /mcp` exposing only server-scoped tools (`health`, `graphs_list`). A per-graph endpoint never lists another graph's tools (isolation, tested). Mirrors the shipped `/graphs/{graph_id}/…` cluster routing. (Open Q5: confirm naming + whether server tools also appear on the per-graph endpoint.)
|
||||||
|
|
||||||
|
### 5.8 Modular / decoupled auth (the cross-cutting requirement)
|
||||||
|
|
||||||
|
**Invariant (load-bearing, satisfiable today):** the MCP handler receives an **already-resolved `ResolvedActor`** and **branches on nothing** about how the token was verified. No token parsing, no method check, no OAuth inside the MCP module. Today that actor comes from `require_bearer_auth`; when MR-956 lands it comes from a `TokenVerifier` — the MCP code is identical either way.
|
||||||
|
|
||||||
|
```
|
||||||
|
request → [auth middleware: ResolvedActor] → [MCP route] → Cedar → McpTool
|
||||||
|
```
|
||||||
|
|
||||||
|
**Server side — auth is config, not code:**
|
||||||
|
|
||||||
|
| Deployment | Verifier | MCP change |
|
||||||
|
|---|---|---|
|
||||||
|
| On-prem, static bearer | `require_bearer_auth` / `StaticHashTokenVerifier` | none |
|
||||||
|
| On-prem, customer IdP | `OidcJwtVerifier` → customer issuer (MR-956) | none |
|
||||||
|
| Our cloud | `OidcJwtVerifier` → WorkOS, `tenant_id = Some(org_id)` (MR-956) | none |
|
||||||
|
|
||||||
|
Token validation is offline (cached JWKS) — on-prem/air-gapped keeps working with no request-path cloud dependency. The MCP endpoint never terminates OAuth and never holds a client secret (Resource Server only).
|
||||||
|
|
||||||
|
**Cloud client negotiation — additive, no MCP changes:** when MR-956 lands, the server publishes RFC 9728 `/.well-known/oauth-protected-resource` and returns `WWW-Authenticate: Bearer ..., resource_metadata="..."` on 401. A compliant MCP client (Claude) then auto-negotiates: static bearer to an on-prem endpoint; on a cloud 401 it discovers the WorkOS AS and runs OAuth/PKCE itself — **same endpoint URL, zero client-side branching.** This RFC only requires that MCP routes flow through the standard 401 path so that hook can be added later without touching MCP.
|
||||||
|
|
||||||
|
**Multi-user identity pass-through (cloud):** the *caller's* token (a WorkOS JWT, audience-bound per-tenant) must reach the server so Cedar enforces per-user/per-tenant policy — never a shared service token. The MCP endpoint validates it offline and maps `org_id → tenant_id`. This is why the **remote path is the in-server HTTP MCP that Claude connects to directly** (its token flows through), not a stdio bridge impersonating a user.
|
||||||
|
|
||||||
|
**Client-side credential acquisition (CLI/SDK/proxy) — pluggable `CredentialSource`** (RFC-002 §5, MR-971), keyed by server name, so OAuth is a future *sibling key*, not a re-key:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
servers:
|
||||||
|
onprem: { endpoint: https://og.internal:8080, auth: { token: { env: OG_TOKEN } } }
|
||||||
|
edge: { endpoint: https://og-edge, auth: { token: { command: [vault, read, -field=token, secret/og] } } }
|
||||||
|
cloud: { endpoint: https://api.omnigraph.cloud, auth: { oauth: { issuer: workos } } } # future sibling
|
||||||
|
```
|
||||||
|
|
||||||
|
Implicit chain when `auth:` omitted: `OMNIGRAPH_TOKEN_<NAME>` → keychain `omnigraph:<name>` → `[<name>]` in `~/.omnigraph/credentials`; legacy `bearer_token_env` honoured. Secrets never inlined.
|
||||||
|
|
||||||
|
### 5.9 Safety model — Cedar is the gate, default-deny is the floor
|
||||||
|
|
||||||
|
With ad-hoc `query`/`mutate`/`schema_apply` present as tools, the **only** thing protecting an untrusted agent is the Cedar policy. Therefore:
|
||||||
|
|
||||||
|
- **Default-deny when tokens are configured** (MR-723, shipped) is the floor — an actor with no grants sees an empty tool list.
|
||||||
|
- **What works today (coarse action):** a policy can hide all ad-hoc tools and admin tools per-actor (`deny Read, Change, SchemaApply, Branch*`) while allowing stored queries (`allow InvokeQuery`). That already reproduces "can't run ad-hoc, can't read schema, can only call stored queries" — the agent sees *every* exposed stored query plus nothing else.
|
||||||
|
- **What needs PR 0b (per-query scope):** selecting *which* stored queries an actor may call (`allow InvokeQuery [find_user, list_orders]`, deny the rest). The shipped `invoke_query` is coarse (all stored queries or none). Until PR 0b adds a query-name dimension to `PolicyRequest` + the Cedar schema (rfc-001's intended design), per-actor sub-selection of stored queries is **not expressible**; curation is graph-level (which `.gq` files are registered + `mcp.expose`).
|
||||||
|
- `schema_apply`, `branches_delete`, ad-hoc `mutate` require an explicit admin-tier grant; never in a default agent policy.
|
||||||
|
- (Open Q3) Optional `mcp.allow_adhoc` server switch defaulting **off** for the ad-hoc `query`/`mutate` tools — defence-in-depth independent of Cedar, and independent of PR 0b.
|
||||||
|
|
||||||
|
### 5.10 Result shaping and error mapping
|
||||||
|
|
||||||
|
- **Success:** `tools/call` returns `content: [{type:"text", text:<json>}]` where `<json>` is the route's existing output envelope (read rows / mutation summary, i.e. `ReadOutput` / `ChangeOutput`). (Open Q4: also emit `structuredContent` + `outputSchema` — defer; text-JSON for v1.)
|
||||||
|
- **Tool execution error** (bad params after schema validation, engine error): result with `isError:true` + a text content block.
|
||||||
|
- **Authorization denial / unknown tool / `mcp.expose:false`:** a single JSON-RPC error (`-32602`, message `"unknown tool"`) — identical for all three so policy isn't probeable (same principle as the shipped `POST /queries/{name}` 404 masking).
|
||||||
|
- **Auth failure** (bad/absent bearer): HTTP 401 from the middleware *before* MCP — carries `WWW-Authenticate` (the RFC 9728 hook), never masked as a tool error. (This is exactly the path the shipped `authorize`/`authorize_request` split preserves: operational failures keep their status; only *denials* are masked.)
|
||||||
|
|
||||||
|
## Relationship to the `@modernrelay/omnigraph-mcp` stdio package
|
||||||
|
|
||||||
|
Verified surface of the package (`omnigraph-ts`, pkg version `0.3.0`, `@modelcontextprotocol/sdk@^1.29.0`, **stdio only**): **13 tools** (`health`, `snapshot`, `read`, `schema_get`, `branches_list`, `commits_list`, `commits_get`, `change`, `ingest`, `branches_create`, `branches_delete`, `branches_merge`, `schema_apply`) and **2 resources** (`omnigraph://schema`, `omnigraph://branches`). It is a thin client over the SDK → HTTP routes and **forwards the caller's bearer verbatim** (no inspection).
|
||||||
|
|
||||||
|
Once parity lands, **collapse to one implementation**: the in-server MCP is canonical (Cedar-gated, remote-capable, the path that becomes a Claude-web connector via MR-956). The stdio package degrades to a **thin stdio↔HTTP proxy** forwarding JSON-RPC (and the incoming `Authorization`) to `/mcp` — staying the local on-ramp for Claude Code/Desktop while sharing one tool set, one Cedar gate. Transition: keep the current independent stdio package on its `0.3.x`/`0.6.x` line; ship proxy mode in a later TS minor once the server endpoint is GA. (Note: the package is currently several minors behind the server — its vendored `spec/openapi.json` predates the stored-query routes — so it needs the standard re-sync regardless of MCP work.)
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
- **Protocol conformance:** `initialize` handshake + advertised capabilities; `tools/list` shape; `tools/call` happy path; JSON-RPC error envelopes (`-32601` unknown method, `-32602` invalid params / unknown tool); `resources/list` + `resources/read`.
|
||||||
|
- **Cedar filtering (coarse, today):** an actor with `allow InvokeQuery` + `deny Read/Change` sees *all* exposed stored queries but **not** `query`/`mutate`/`schema_get`; `tools/call query` returns masked "unknown tool"; an admin sees the full catalog.
|
||||||
|
- **Cedar filtering (per-query, gated on PR 0b):** actor scoped to `InvokeQuery [find_user]` sees *only* `find_user`; `tools/call list_orders` masks. **This test ships with PR 0b**, not PR 1 — it cannot pass against the coarse action.
|
||||||
|
- **Parity per built-in:** each tool round-trips against the same expectations as its HTTP route (reuse route tests); `read`/`change` aliases dispatch identically to `query`/`mutate`.
|
||||||
|
- **Double-gating:** a stored mutation requires both `InvokeQuery` and `Change`; `schema_apply` requires `SchemaApply`.
|
||||||
|
- **`mcp.expose:false`:** absent from `GET /queries` and MCP `tools/list`; still service-callable by name through `POST /queries/{name}` when the actor has `invoke_query`, but not MCP-callable.
|
||||||
|
- **Schema generation:** table-driven over every `ParamKind` incl. nullable / list / vector(dim).
|
||||||
|
- **Branch-scoped list approximation:** assert the documented R7 caveat — a branch-scoped policy lists `branches_create`, and `tools/call` is the authoritative gate (a denied target still 403s/masks).
|
||||||
|
- **Multi-graph isolation:** `/graphs/a/mcp` never lists graph `b`'s tools; server `/mcp` exposes only server tools.
|
||||||
|
- **Auth decoupling:** the MCP suite is green under the current `require_bearer_auth` and under a mock OIDC `ResolvedActor` source — proving verifier-agnosticism. A 401 carries `WWW-Authenticate`.
|
||||||
|
- **OpenAPI:** the JSON-RPC endpoint is not REST — document only the envelope in utoipa (or exclude); keep `openapi.json` drift test green (`OMNIGRAPH_UPDATE_OPENAPI=1` to regenerate on intentional change).
|
||||||
|
- **Cross-repo smoke (optional):** point `@modelcontextprotocol/sdk` (TS) at the HTTP endpoint in an `omnigraph-ts` integration test.
|
||||||
|
|
||||||
|
## Rollout — phased by risk
|
||||||
|
|
||||||
|
- **PR 0a — extract the reusable invoke path (small).** The coarse `invoke_query` gate + 404 denial-masking are **already shipped** in `server_invoke_query`. Extract the read/mutate dispatch into `invoke_stored_query(handle, name, params, branch/snapshot, actor)` so MCP `tools/call` and the HTTP route share one path. No behaviour change. *(Replaces the previous draft's "PR 0 — wire the gate", which was already done.)*
|
||||||
|
- **PR 0b — per-query `invoke_query` scope (the safety prerequisite).** Add a query-name dimension to `PolicyRequest` + the Cedar schema (rfc-001's intended design), wire it at `POST /queries/{name}` and in the stored-query `McpTool::authorization`. Independently useful (the `allow InvokeQuery [find_user]` policy). **Gates the per-query Cedar-filtering test and §5.9's recommended agent policy.**
|
||||||
|
- **PR 1 — MCP transport + read-only parity + stored-query reads.** Endpoint(s), `initialize`/`tools/list`/`tools/call`/`resources/*`, the `McpTool` registry, Cedar-filtered listing, the read-only built-ins (`health`, `graphs_list`, `snapshot`, `read`/`query`, `schema_get`, `branches_list`, `commits_*`) + resources + stored-query *reads*. All auth-agnostic.
|
||||||
|
- **PR 2 — mutating parity + stored-query mutations.** `change`/`mutate`, `ingest`, `branches_create/delete/merge`, `schema_apply`, stored-query mutations + the `mcp.allow_adhoc` switch.
|
||||||
|
- **PR 3 — docs + agent on-ramp hook.** `docs/user/server.md` MCP section (incl. the recommended agent policy + the coarse-vs-per-query caveat), `openapi.json` sync, the `omnigraph mcp install` config target (MR-974), and the downstream `omnigraph-ts` re-sync/proxy follow-up.
|
||||||
|
- **Later (separate, MR-956):** RFC 9728 protected-resource metadata + WorkOS — slots in with zero MCP changes.
|
||||||
|
- **Later (TS minor):** stdio package → proxy mode.
|
||||||
|
|
||||||
|
## Migration / backwards compatibility
|
||||||
|
|
||||||
|
- **Additive.** No `queries:` and no MCP traffic → today's behaviour unchanged. New endpoints are new routes.
|
||||||
|
- **Cedar default-deny** (when tokens configured) means MCP exposes nothing until an actor is granted — safe by default.
|
||||||
|
- The stdio package keeps working unchanged; proxy mode is opt-in later.
|
||||||
|
- `openapi.json` only gains the documented MCP envelope; existing REST routes untouched.
|
||||||
|
|
||||||
|
## Open Questions
|
||||||
|
|
||||||
|
1. **BigInt/u64 as JSON string** (recommended, precision-safe) vs number.
|
||||||
|
2. **`rmcp` vs hand-rolled** JSON-RPC (spike `rmcp` on edition 2024; default to hand-roll on friction).
|
||||||
|
3. **Default-off `mcp.allow_adhoc`** for ad-hoc `query`/`mutate` (recommended) vs always-on + Cedar-only.
|
||||||
|
4. **`structuredContent` + `outputSchema`** now vs text-JSON v1 (recommend v1 text-JSON).
|
||||||
|
5. **Endpoint paths:** `/mcp` + `/graphs/{id}/mcp` — confirm naming and whether server-scoped tools also appear on the per-graph endpoint.
|
||||||
|
6. **Stateless POST-only** confirmed (no near-term server-initiated messages) — revisit only if subscriptions land.
|
||||||
|
7. **Legacy alias tools** (`read`/`change`): keep for client compat (the shipped package uses them), or drop and rely on `query`/`mutate`?
|
||||||
|
8. **PR 0b shape:** per-query scope as a Cedar *resource* (`StoredQuery::"find_user"`) vs a `query_name` *context attribute* + policy condition — affects how `allow InvokeQuery [list]` is authored.
|
||||||
|
|
@ -20,7 +20,7 @@ The engine's `tests/` is the principal coverage surface; most graph-shaped behav
|
||||||
| `end_to_end.rs` | Full init → load → query/mutate flow |
|
| `end_to_end.rs` | Full init → load → query/mutate flow |
|
||||||
| `branching.rs` | Branch create / list / delete, lazy fork |
|
| `branching.rs` | Branch create / list / delete, lazy fork |
|
||||||
| `merge_truth_table.rs` | Merge-pair truth table (MR-786): all 9×9 `(left_op, right_op)` cells from `{noop, addNode, removeNode, addEdge, removeEdge, setProperty, dropProperty, addLabel, removeLabel}`. Adding a new op to `OpVariant` forces a compile error in `build_case` until the new row + column are dispositioned. 36 executable cells run through real `branch_merge` with a structured oracle (`MergeOutcome` / `MergeConflictKind` + graph-state assert); 45 cells involving `dropProperty`/`addLabel`/`removeLabel` are recorded as `Unsupported` until the mutation grammar grows. |
|
| `merge_truth_table.rs` | Merge-pair truth table (MR-786): all 9×9 `(left_op, right_op)` cells from `{noop, addNode, removeNode, addEdge, removeEdge, setProperty, dropProperty, addLabel, removeLabel}`. Adding a new op to `OpVariant` forces a compile error in `build_case` until the new row + column are dispositioned. 36 executable cells run through real `branch_merge` with a structured oracle (`MergeOutcome` / `MergeConflictKind` + graph-state assert); 45 cells involving `dropProperty`/`addLabel`/`removeLabel` are recorded as `Unsupported` until the mutation grammar grows. |
|
||||||
| `runs.rs` | Direct-publish writes: cancellation, concurrent-writer CAS, multi-statement atomicity, MR-794 staged-write rewire (D₂ rejection, insert+update coalesce, multi-append coalesce, partial-failure recovery, load RI/cardinality recovery) |
|
| `writes.rs` | Direct-publish writes: cancellation, concurrent-writer CAS, multi-statement atomicity, MR-794 staged-write rewire (D₂ rejection, insert+update coalesce, multi-append coalesce, partial-failure recovery, load RI/cardinality recovery) |
|
||||||
| `staged_writes.rs` | TableStore staged-write primitives (`stage_append`, `stage_merge_insert`, `commit_staged`, `scan_with_staged`, `count_rows_with_staged`) — primitive-level only; engine code uses the in-memory `MutationStaging` accumulator instead |
|
| `staged_writes.rs` | TableStore staged-write primitives (`stage_append`, `stage_merge_insert`, `commit_staged`, `scan_with_staged`, `count_rows_with_staged`) — primitive-level only; engine code uses the in-memory `MutationStaging` accumulator instead |
|
||||||
| `lifecycle.rs` | Graph lifecycle, schema state |
|
| `lifecycle.rs` | Graph lifecycle, schema state |
|
||||||
| `point_in_time.rs` | Snapshots, time travel (`snapshot_at_version`, `entity_at`) |
|
| `point_in_time.rs` | Snapshots, time travel (`snapshot_at_version`, `entity_at`) |
|
||||||
|
|
@ -34,10 +34,10 @@ The engine's `tests/` is the principal coverage surface; most graph-shaped behav
|
||||||
| `s3_storage.rs` | S3-backed graph (skipped unless `OMNIGRAPH_S3_TEST_BUCKET` is set) |
|
| `s3_storage.rs` | S3-backed graph (skipped unless `OMNIGRAPH_S3_TEST_BUCKET` is set) |
|
||||||
| `lance_version_columns.rs` | Per-row `_row_last_updated_at_version` behavior |
|
| `lance_version_columns.rs` | Per-row `_row_last_updated_at_version` behavior |
|
||||||
| `validators.rs` | Schema constraint enforcement (enum, range, unique, cardinality) across JSONL, insert, update paths |
|
| `validators.rs` | Schema constraint enforcement (enum, range, unique, cardinality) across JSONL, insert, update paths |
|
||||||
| `maintenance.rs` | `optimize` (compaction) + `cleanup` (version GC): empty/idempotent/no-op edges, policy validation, head preservation |
|
| `maintenance.rs` | `optimize` (compaction) + `cleanup` (version GC): empty/idempotent/no-op edges, policy validation, head preservation; `optimize` publishes the compacted version so the manifest tracks the Lance HEAD and a subsequent schema apply succeeds (`optimize_publishes_compaction_to_manifest_so_schema_apply_succeeds`), and refuses to run while a `__recovery` sidecar is pending so optimize only ever operates on a recovered graph (`optimize_defers_when_recovery_sidecar_is_pending`) |
|
||||||
| `failpoints.rs` | Failure-injection coverage (gated on `failpoints` feature). Includes the four per-writer Phase B → recovery integration tests (`recovery_rolls_forward_after_finalize_publisher_failure`, `schema_apply_phase_b_failure_recovered_on_next_open`, `branch_merge_phase_b_failure_recovered_on_next_open`, `ensure_indices_phase_b_failure_recovered_on_next_open`). |
|
| `failpoints.rs` | Failure-injection coverage (gated on `failpoints` feature). Includes the five per-writer Phase B → recovery integration tests (`recovery_rolls_forward_after_finalize_publisher_failure`, `schema_apply_phase_b_failure_recovered_on_next_open`, `branch_merge_phase_b_failure_recovered_on_next_open`, `ensure_indices_phase_b_failure_recovered_on_next_open`, `optimize_phase_b_failure_recovered_on_next_open`). |
|
||||||
| `recovery.rs` | Open-time recovery sweep — sidecar I/O, classifier dispatch (NoMovement / RolledPastExpected / UnexpectedAtP1 / UnexpectedMultistep / InvariantViolation), all-or-nothing decision, roll-forward via `ManifestBatchPublisher::publish`, roll-back via `Dataset::restore`, audit row in `_graph_commit_recoveries.lance`, `OpenMode::ReadOnly` skip path |
|
| `recovery.rs` | Open-time recovery sweep — sidecar I/O, classifier dispatch (NoMovement / RolledPastExpected / UnexpectedAtP1 / UnexpectedMultistep / InvariantViolation), all-or-nothing decision, roll-forward via `ManifestBatchPublisher::publish`, roll-back via `Dataset::restore`, audit row in `_graph_commit_recoveries.lance`, `OpenMode::ReadOnly` skip path |
|
||||||
| `composite_flow.rs` | Compositional/narrative end-to-end stories — multi-step flows that compose mechanics covered by other test files. Catches integration regressions where individual operations all pass their unit tests but their composition breaks (sequential merges, post-merge main writes, time-travel through merge DAG, reopen consistency over multi-merge histories). |
|
| `composite_flow.rs` | Compositional/narrative end-to-end stories — multi-step flows that compose mechanics covered by other test files. Catches integration regressions where individual operations all pass their unit tests but their composition breaks (sequential merges, post-merge main writes, time-travel through merge DAG, reopen consistency over multi-merge histories, post-optimize and post-cleanup strict writes). |
|
||||||
|
|
||||||
## Fixtures
|
## Fixtures
|
||||||
|
|
||||||
|
|
@ -89,7 +89,7 @@ If introducing coverage tooling is in scope for your task, the natural first ste
|
||||||
|
|
||||||
How to check:
|
How to check:
|
||||||
|
|
||||||
1. **Map the change to an area** — use the engine integration-test table above (`branching.rs`, `runs.rs`, `search.rs`, etc.). The filename usually names the area.
|
1. **Map the change to an area** — use the engine integration-test table above (`branching.rs`, `writes.rs`, `search.rs`, etc.). The filename usually names the area.
|
||||||
2. **Open the file and skim every test fn name.** Test fn names are the index — read them all, not just the first few.
|
2. **Open the file and skim every test fn name.** Test fn names are the index — read them all, not just the first few.
|
||||||
3. **Grep for the symbol or path you're changing.** `rg <FunctionName>` or `rg <enum_variant>` across all `tests/` directories surfaces existing coverage you might miss.
|
3. **Grep for the symbol or path you're changing.** `rg <FunctionName>` or `rg <enum_variant>` across all `tests/` directories surfaces existing coverage you might miss.
|
||||||
4. **Decide one of three outcomes**, in this order of preference:
|
4. **Decide one of three outcomes**, in this order of preference:
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,10 @@
|
||||||
# Runs — REMOVED (MR-771)
|
# Direct-Publish Write Path
|
||||||
|
|
||||||
The Run state machine and `__run__<id>` staging branches were removed in
|
> History: the Run state machine and `__run__<id>` staging branches were
|
||||||
MR-771. `mutate_as` and `load` now write **directly to the target table**
|
> removed in MR-771 (shipped v0.4.0). Writes now go directly to the target
|
||||||
|
> table; this document specifies that direct-publish path.
|
||||||
|
|
||||||
|
`mutate_as` and `load` write **directly to the target table**
|
||||||
and call `ManifestBatchPublisher::publish` once at the end with
|
and call `ManifestBatchPublisher::publish` once at the end with
|
||||||
`expected_table_versions` (the per-table manifest versions captured before
|
`expected_table_versions` (the per-table manifest versions captured before
|
||||||
the first write). Cross-table OCC is enforced inside the publisher; the
|
the first write). Cross-table OCC is enforced inside the publisher; the
|
||||||
|
|
@ -11,8 +14,11 @@ publisher's row-level CAS on `__manifest` is the single fence.
|
||||||
|
|
||||||
- No `RunRecord`, no `_graph_runs.lance`, no `_graph_run_actors.lance`.
|
- No `RunRecord`, no `_graph_runs.lance`, no `_graph_run_actors.lance`.
|
||||||
- No `omnigraph run *` CLI subcommands and no `/runs/*` HTTP endpoints.
|
- No `omnigraph run *` CLI subcommands and no `/runs/*` HTTP endpoints.
|
||||||
- No `__run__<id>` staging branches. (Legacy on-disk artifacts from
|
- No `__run__<id>` staging branches; `__run__*` is no longer a reserved
|
||||||
pre-MR-771 repos are inert; MR-770 sweeps them in production.)
|
name. The branch-name guard was removed in MR-770, and any stale
|
||||||
|
`__run__*` branch on an upgraded graph is swept off `__manifest` by the
|
||||||
|
v2→v3 internal-schema migration on first read-write open. (The inert
|
||||||
|
`_graph_runs.lance` bytes remain until a `delete_prefix` primitive lands.)
|
||||||
- Cancelled mutation futures leave **no graph-level state** — only orphaned
|
- Cancelled mutation futures leave **no graph-level state** — only orphaned
|
||||||
Lance fragments, which the existing `omnigraph cleanup` pipe reclaims.
|
Lance fragments, which the existing `omnigraph cleanup` pipe reclaims.
|
||||||
|
|
||||||
|
|
@ -151,10 +157,14 @@ are left at `Lance HEAD = manifest_pinned + 1`.
|
||||||
|
|
||||||
**Recovery protocol** (lifecycle of every staged-write writer —
|
**Recovery protocol** (lifecycle of every staged-write writer —
|
||||||
`MutationStaging::finalize`, `schema_apply::apply_schema_with_lock`,
|
`MutationStaging::finalize`, `schema_apply::apply_schema_with_lock`,
|
||||||
`branch_merge_on_current_target`, `ensure_indices_for_branch`):
|
`branch_merge_on_current_target`, `ensure_indices_for_branch`,
|
||||||
|
`optimize_all_tables`):
|
||||||
|
|
||||||
1. **Phase A**: writer writes a sidecar JSON to
|
1. **Phase A**: writer writes a sidecar JSON to
|
||||||
`__recovery/{ulid}.json` BEFORE its first `commit_staged`. The
|
`__recovery/{ulid}.json` BEFORE its first HEAD-advancing commit
|
||||||
|
(`commit_staged`, or `compact_files` for `optimize_all_tables`,
|
||||||
|
which advances the Lance HEAD via a reserve-fragments + rewrite
|
||||||
|
commit rather than a staged write). The
|
||||||
sidecar names every `(table_key, table_path, expected_version,
|
sidecar names every `(table_key, table_path, expected_version,
|
||||||
post_commit_pin)` it intends to commit + the writer kind +
|
post_commit_pin)` it intends to commit + the writer kind +
|
||||||
actor_id.
|
actor_id.
|
||||||
|
|
@ -189,8 +199,13 @@ recovery sweep in `crates/omnigraph/src/db/manifest/recovery.rs`:
|
||||||
otherwise full open-time recovery rolls them back and refresh-time
|
otherwise full open-time recovery rolls them back and refresh-time
|
||||||
recovery leaves them for the next read-write open.
|
recovery leaves them for the next read-write open.
|
||||||
- Otherwise **roll back**: per-table `Dataset::restore` to the
|
- Otherwise **roll back**: per-table `Dataset::restore` to the
|
||||||
manifest-pinned table version for that branch. Rollback records the
|
manifest-pinned table version, then a single `ManifestBatchPublisher::publish`
|
||||||
actual restore target in the audit row's `to_version`.
|
of the restored HEAD — symmetric with roll-forward, so `manifest == HEAD`
|
||||||
|
after recovery (no residual drift). This convergence is what lets a
|
||||||
|
failed-then-retried schema apply succeed instead of failing one version higher
|
||||||
|
each iteration. The audit row's `to_version` records the logical
|
||||||
|
rolled-back-to version (`manifest_pinned`); the manifest is published at the
|
||||||
|
restore commit (`manifest_pinned + 1`, same content).
|
||||||
- After a successful roll-forward or roll-back, an audit row is
|
- After a successful roll-forward or roll-back, an audit row is
|
||||||
recorded — `_graph_commits.lance` carries
|
recorded — `_graph_commits.lance` carries
|
||||||
a commit tagged `actor_id = "omnigraph:recovery"`, and a sibling
|
a commit tagged `actor_id = "omnigraph:recovery"`, and a sibling
|
||||||
|
|
@ -242,9 +257,14 @@ list`.
|
||||||
|
|
||||||
## Migration code
|
## Migration code
|
||||||
|
|
||||||
`db/manifest/migrations.rs` does not change. Active deletion of
|
`db/manifest/migrations.rs` carries the v2→v3 internal-schema step (MR-770):
|
||||||
`_graph_runs.lance` belongs in MR-770 (the production sweep) — this PR
|
a one-time sweep that deletes legacy `__run__*` staging branches off
|
||||||
stops *creating* run state but does not destroy legacy bytes on disk.
|
`__manifest`. It runs in `Omnigraph::open(ReadWrite)` (via
|
||||||
|
`manifest::migrate_on_open`, before the coordinator reads branch state) and
|
||||||
|
again on the publisher's write path; both are idempotent once the stamp is at
|
||||||
|
v3. Deleting the inert `_graph_runs.lance` / `_graph_run_actors.lance` dataset
|
||||||
|
*bytes* is still deferred — it needs a `StorageAdapter::delete_prefix`
|
||||||
|
primitive — but those bytes are invisible to graph-level state.
|
||||||
|
|
||||||
## Mid-query partial failure: closed by MR-794
|
## Mid-query partial failure: closed by MR-794
|
||||||
|
|
||||||
|
|
@ -65,7 +65,7 @@ manifest. The next mutation against that table fails with
|
||||||
`ExpectedVersionMismatch`. Most validation runs before any Lance write,
|
`ExpectedVersionMismatch`. Most validation runs before any Lance write,
|
||||||
so single-statement mutations are unaffected; the narrow path is
|
so single-statement mutations are unaffected; the narrow path is
|
||||||
multi-statement queries with late-op failures. Tracked as a follow-up;
|
multi-statement queries with late-op failures. Tracked as a follow-up;
|
||||||
see [docs/dev/runs.md](../dev/runs.md#known-limitation-mid-query-partial-failure-on-the-same-table)
|
see [docs/dev/writes.md](../dev/writes.md#mid-query-partial-failure-closed-by-mr-794)
|
||||||
for the workaround.
|
for the workaround.
|
||||||
|
|
||||||
## Upgrade notes
|
## Upgrade notes
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ mutation proceeds normally.
|
||||||
HEAD on every staged table is untouched and the next mutation
|
HEAD on every staged table is untouched and the next mutation
|
||||||
proceeds normally. A narrowed residual remains at the
|
proceeds normally. A narrowed residual remains at the
|
||||||
finalize→publisher boundary (multi-table `commit_staged` is not
|
finalize→publisher boundary (multi-table `commit_staged` is not
|
||||||
atomic with the manifest commit) — see [docs/dev/runs.md](../dev/runs.md)
|
atomic with the manifest commit) — see [docs/dev/writes.md](../dev/writes.md)
|
||||||
"Finalize → publisher residual" for details.
|
"Finalize → publisher residual" for details.
|
||||||
- **D₂ parse-time rule**: a single mutation query is either
|
- **D₂ parse-time rule**: a single mutation query is either
|
||||||
insert/update-only or delete-only. Mixed → rejected with a clear
|
insert/update-only or delete-only. Mixed → rejected with a clear
|
||||||
|
|
@ -75,14 +75,14 @@ mutation proceeds normally.
|
||||||
|
|
||||||
## Tests added
|
## Tests added
|
||||||
|
|
||||||
- `tests/runs.rs::partial_failure_leaves_target_queryable_and_unblocks_next_mutation`
|
- `tests/writes.rs::partial_failure_leaves_target_queryable_and_unblocks_next_mutation`
|
||||||
(replaces the old `partial_failure_observably_rolls_back_but_blocks_next_mutation_on_same_table`)
|
(replaces the old `partial_failure_observably_rolls_back_but_blocks_next_mutation_on_same_table`)
|
||||||
- `tests/runs.rs::mutation_rejects_mixed_insert_and_delete_at_parse_time`
|
- `tests/writes.rs::mutation_rejects_mixed_insert_and_delete_at_parse_time`
|
||||||
- `tests/runs.rs::mixed_insert_and_update_on_same_person_coalesces_to_one_merge`
|
- `tests/writes.rs::mixed_insert_and_update_on_same_person_coalesces_to_one_merge`
|
||||||
- `tests/runs.rs::multiple_appends_to_same_edge_coalesce_to_one_append`
|
- `tests/writes.rs::multiple_appends_to_same_edge_coalesce_to_one_append`
|
||||||
- `tests/runs.rs::multi_statement_inserts_publish_exactly_once`
|
- `tests/writes.rs::multi_statement_inserts_publish_exactly_once`
|
||||||
- `tests/runs.rs::load_with_bad_edge_reference_unblocks_next_load`
|
- `tests/writes.rs::load_with_bad_edge_reference_unblocks_next_load`
|
||||||
- `tests/runs.rs::load_with_cardinality_violation_unblocks_next_load`
|
- `tests/writes.rs::load_with_cardinality_violation_unblocks_next_load`
|
||||||
|
|
||||||
## Files changed
|
## Files changed
|
||||||
|
|
||||||
|
|
@ -105,7 +105,7 @@ mutation proceeds normally.
|
||||||
- `Cargo.toml` (workspace) + `crates/omnigraph/Cargo.toml` — added
|
- `Cargo.toml` (workspace) + `crates/omnigraph/Cargo.toml` — added
|
||||||
`datafusion = "52"` direct dep (transitively pulled by Lance
|
`datafusion = "52"` direct dep (transitively pulled by Lance
|
||||||
already; required for `MemTable`).
|
already; required for `MemTable`).
|
||||||
- `docs/dev/runs.md` — removed "Known limitation" section; documented
|
- `docs/dev/writes.md` — removed "Known limitation" section; documented
|
||||||
the new accumulator + D₂ + LoadMode::Overwrite residual.
|
the new accumulator + D₂ + LoadMode::Overwrite residual.
|
||||||
- `docs/dev/invariants.md` — mutation atomicity / read-your-writes status
|
- `docs/dev/invariants.md` — mutation atomicity / read-your-writes status
|
||||||
flipped to `upheld for inserts/updates`.
|
flipped to `upheld for inserts/updates`.
|
||||||
|
|
@ -127,7 +127,7 @@ mutation proceeds normally.
|
||||||
as legacy.
|
as legacy.
|
||||||
- `docs/user/cli.md` — replaced the legacy `omnigraph run *` quickstart
|
- `docs/user/cli.md` — replaced the legacy `omnigraph run *` quickstart
|
||||||
block with `omnigraph commit list/show`.
|
block with `omnigraph commit list/show`.
|
||||||
- `docs/dev/testing.md` — extended the `runs.rs` row to cover the new
|
- `docs/dev/testing.md` — extended the `writes.rs` row to cover the new
|
||||||
staged-write contract tests; added the `staged_writes.rs` row.
|
staged-write contract tests; added the `staged_writes.rs` row.
|
||||||
- `AGENTS.md` (CLAUDE.md symlink) — updated the atomic-per-query
|
- `AGENTS.md` (CLAUDE.md symlink) — updated the atomic-per-query
|
||||||
description and the L2 capability matrix row.
|
description and the L2 capability matrix row.
|
||||||
|
|
|
||||||
28
docs/releases/v0.6.1.md
Normal file
28
docs/releases/v0.6.1.md
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
# Omnigraph v0.6.1
|
||||||
|
|
||||||
|
v0.6.1 focuses on operational polish after v0.6.0: stored-query registries, safer branch cleanup, more complete release artifacts, and a Lance blob-compaction workaround.
|
||||||
|
|
||||||
|
## Highlights
|
||||||
|
|
||||||
|
- **Stored-query registries.** `omnigraph.yaml` can declare curated `queries:` blocks per graph. Servers load and type-check them at startup, `omnigraph queries validate` checks them offline, `omnigraph queries list` shows exposed queries and typed params, `GET /queries` exposes a typed catalog, and `POST /queries/{name}` invokes a stored query without accepting ad hoc `.gq` source from the client.
|
||||||
|
- **Stored-query policy gate.** New Cedar action `invoke_query` gates the stored-query invocation surface. Stored mutations are double-gated: `invoke_query` to reach the stored query and `change` for the actual write.
|
||||||
|
- **Safer branch deletion.** `branch_delete` now treats the manifest as the authority, flips branch visibility atomically, and reclaims per-table/commit-graph forks as derived state. If best-effort reclaim is interrupted, `cleanup` reconciles orphaned forks; reusing a branch name before cleanup reports an actionable error.
|
||||||
|
- **Legacy `__run__` cleanup (MR-770).** Removed the last functional remnant of the Run state machine (retired in v0.4.0): the `__run__` branch-name guard. A new v2→v3 `__manifest` internal-schema migration sweeps any stale `__run__*` staging branches on the first read-write open, so `__run__*` is no longer a reserved branch name. This closes the "unpromoted `__run__` branches block reads" condition behind the zombie-run cascade incident; the inert `_graph_runs.lance` row cleanup is tracked separately (it needs a `delete_prefix` primitive).
|
||||||
|
- **Blob-safe optimize.** `omnigraph optimize` skips tables with `Blob` properties instead of failing the whole sweep on Lance's blob-v2 compaction decode bug. Skips are visible in human output, `--json` as `skipped`, `TableOptimizeStats.skipped`, and logs; non-blob tables still compact normally.
|
||||||
|
- **Deployment improvements.** The container entrypoint now composes `OMNIGRAPH_TARGET_URI` with `OMNIGRAPH_CONFIG`, so operators can keep the graph URI in env while loading policy/query config from a mounted file. The local RustFS bootstrap pins RustFS beta.3 and allows the current insecure local-dev default credentials.
|
||||||
|
- **Windows release support.** Tagged and edge releases now publish Windows x86_64 archives containing `omnigraph.exe` and `omnigraph-server.exe`, with a PowerShell installer and Windows install docs.
|
||||||
|
- **Release tooling.** Homebrew formula generation was tightened to produce audit-clean formulas.
|
||||||
|
|
||||||
|
## Compatibility Notes
|
||||||
|
|
||||||
|
- A graph selected by name (`--target` or `server.graph`) now uses `graphs.<name>.policy` and `graphs.<name>.queries`. Top-level `policy` / `queries` blocks are only for anonymous bare-URI single-graph mode; using them with a named graph now fails loudly with migration guidance.
|
||||||
|
- `mcp.expose` defaults to `true` for stored-query registry entries. Set `mcp: { expose: false }` for service-only queries that should not appear in the catalog.
|
||||||
|
- `invoke_query` is graph-scoped, not branch-scoped. Branch/snapshot access remains enforced by the inner `read` / `change` gate.
|
||||||
|
- **Legacy `__run__` migration.** Graphs created before v0.4.0 are migrated automatically on the first **read-write** open by a v0.6.1 binary (one-time `__manifest` stamp v2→v3 sweep of stale `__run__*` branches). No action required. Two caveats: (1) a graph opened **read-only** still lists any stale `__run__*` branch until its first read-write open, since the migration is write-path-only like all manifest migrations — long-lived read-only deployments should be opened read-write once after upgrading; (2) the inert `_graph_runs.lance` / `_graph_run_actors.lance` dataset bytes are left in place until a future `delete_prefix` primitive (they are invisible to graph-level state).
|
||||||
|
- Blob tables are not compacted until the upstream Lance fix lands, so fragment count and deleted-row space on blob tables are not reclaimed by `optimize`. Reads, writes, and query results are unaffected; no on-disk migration is required.
|
||||||
|
- `TableOptimizeStats` is now `#[non_exhaustive]` and gains a `skipped: Option<SkipReason>` field (so does the new `SkipReason` enum). This is a source-level change only for downstream code that built this returned result struct by literal — rare, since it is produced by `optimize` and consumed by reading its fields; field access is unaffected, and `#[non_exhaustive]` keeps future additions non-breaking.
|
||||||
|
|
||||||
|
## Docs And Cleanup
|
||||||
|
|
||||||
|
- Public docs were updated for stored queries, policy, server routes, deployment, Windows installation, branch deletion, maintenance, and the `runs` docs rename to `writes`.
|
||||||
|
- README copy and release documentation were refreshed; older release notes had small typo/wording fixes.
|
||||||
54
docs/rfcs/0000-template.md
Normal file
54
docs/rfcs/0000-template.md
Normal file
|
|
@ -0,0 +1,54 @@
|
||||||
|
# RFC NNNN: <title>
|
||||||
|
|
||||||
|
| | |
|
||||||
|
|---|---|
|
||||||
|
| **Status** | Proposed |
|
||||||
|
| **Author(s)** | <your name / handle> |
|
||||||
|
| **Discussion** | <link to the originating Discussion, if any> |
|
||||||
|
| **Implementation** | <issue/PR links, filled in as work lands> |
|
||||||
|
|
||||||
|
> Status is maintained by maintainers: `Proposed` while the PR is open,
|
||||||
|
> `Accepted` on merge, `Declined` on close, `Superseded by NNNN` later.
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
One paragraph: what this changes, in plain terms.
|
||||||
|
|
||||||
|
## Motivation
|
||||||
|
|
||||||
|
What problem does this solve, and why is it worth the ongoing cost? Tie it to a
|
||||||
|
concrete need (a Discussion, a recurring issue, a user request). Per the
|
||||||
|
project's first principle, argue the *long-run liability*, not just the
|
||||||
|
short-term convenience.
|
||||||
|
|
||||||
|
## Guide-level explanation
|
||||||
|
|
||||||
|
Explain the change as you'd teach it to a user or contributor: new commands,
|
||||||
|
syntax, API shapes, behavior. Examples first.
|
||||||
|
|
||||||
|
## Reference-level design
|
||||||
|
|
||||||
|
The precise design: data structures, IR/AST/planner changes, storage/format
|
||||||
|
impact, migration path, error behavior. Enough that a reviewer can find the
|
||||||
|
holes.
|
||||||
|
|
||||||
|
## Invariants & deny-list check
|
||||||
|
|
||||||
|
Which Hard Invariants in [../dev/invariants.md](../dev/invariants.md) does this
|
||||||
|
touch? Does it brush against any deny-list item — and if so, why is this the
|
||||||
|
justified exception? State explicitly that no invariant is weakened, or which
|
||||||
|
Known Gap moves.
|
||||||
|
|
||||||
|
## Drawbacks & alternatives
|
||||||
|
|
||||||
|
What does this cost, what did you reject, and why. "Do nothing" is a valid
|
||||||
|
alternative to weigh.
|
||||||
|
|
||||||
|
## Reversibility
|
||||||
|
|
||||||
|
Is this reversible? On-disk/wire/format and substrate choices are near-permanent
|
||||||
|
and demand more evidence; a CLI flag or doc is cheap to undo. Say which this is.
|
||||||
|
|
||||||
|
## Unresolved questions
|
||||||
|
|
||||||
|
What's deliberately left open for review to settle.
|
||||||
66
docs/rfcs/README.md
Normal file
66
docs/rfcs/README.md
Normal file
|
|
@ -0,0 +1,66 @@
|
||||||
|
# RFCs
|
||||||
|
|
||||||
|
Substantial changes to OmniGraph — new user-facing surface, format or protocol
|
||||||
|
changes, anything irreversible or cross-cutting — go through a lightweight RFC
|
||||||
|
so the design is agreed *as reviewable code* before implementation starts. This
|
||||||
|
is the public RFC track, open to **anyone, including external contributors**.
|
||||||
|
|
||||||
|
This complements the always-on review bar in
|
||||||
|
[../dev/invariants.md](../dev/invariants.md): the invariants say *what every
|
||||||
|
change must respect*; an RFC says *why this particular change is worth making and
|
||||||
|
how*.
|
||||||
|
|
||||||
|
> **Two tracks, don't conflate them.** This `docs/rfcs/` directory is the
|
||||||
|
> **public contribution** track (anyone authors; maintainers accept). The
|
||||||
|
> maintainer-internal RFCs under `docs/dev/rfc-00N-*.md` are a separate,
|
||||||
|
> team-owned track for in-flight internal work. If you're an outside
|
||||||
|
> contributor, you're in the right place here.
|
||||||
|
|
||||||
|
## When you need one
|
||||||
|
|
||||||
|
- **RFC required:** new query/schema/CLI/HTTP surface; on-disk or wire-format
|
||||||
|
changes; a new substrate dependency; anything the deny-list in
|
||||||
|
[../dev/invariants.md](../dev/invariants.md) flags; anything irreversible
|
||||||
|
("reversibility shapes evidence demand").
|
||||||
|
- **RFC not required:** bug fixes for an `accepted` issue, and the trivial
|
||||||
|
fast-lane (typos, docs, deps) — see [../../CONTRIBUTING.md](../../CONTRIBUTING.md).
|
||||||
|
|
||||||
|
If you're unsure, start a [Discussion](../../../discussions); a maintainer will
|
||||||
|
tell you whether it needs an RFC.
|
||||||
|
|
||||||
|
## Lifecycle
|
||||||
|
|
||||||
|
```
|
||||||
|
Discussion (incubate, get rough consensus)
|
||||||
|
│ graduate
|
||||||
|
▼
|
||||||
|
RFC pull request → adds docs/rfcs/NNNN-title.md (Status: Proposed)
|
||||||
|
│
|
||||||
|
maintainer review ──▶ changes requested / declined (PR closed, with rationale)
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
merged == Accepted (the merged file is the durable decision record)
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
Implementation PR(s) reference the accepted RFC
|
||||||
|
```
|
||||||
|
|
||||||
|
- **Author:** anyone. **Acceptance:** a maintainer decision, performed by
|
||||||
|
merging the RFC PR. Declining is closing it with rationale.
|
||||||
|
- The merged RFC *is* the accepted record — there is no separate sign-off step.
|
||||||
|
- Later reversals don't edit history: supersede with a new RFC that links back
|
||||||
|
and flip the old one's `Status` to `Superseded`.
|
||||||
|
|
||||||
|
## Numbering & naming
|
||||||
|
|
||||||
|
- File: `docs/rfcs/NNNN-kebab-title.md`, where `NNNN` is the next free
|
||||||
|
zero-padded integer (`0001`, `0002`, …). `0000-template.md` is reserved.
|
||||||
|
- Pick the number when you open the PR; if it collides with another in-flight
|
||||||
|
RFC, the second to merge bumps theirs.
|
||||||
|
|
||||||
|
## Status values
|
||||||
|
|
||||||
|
`Proposed` (open PR) · `Accepted` (merged) · `Declined` (closed) ·
|
||||||
|
`Superseded by NNNN` · `Implemented` (set once the work lands, optional).
|
||||||
|
|
||||||
|
Copy [0000-template.md](0000-template.md) to start.
|
||||||
|
|
@ -4,4 +4,4 @@
|
||||||
- `_as` variants of every write API let callers override the actor: `mutate_as`, `ingest_as`, `branch_merge_as`, `apply_schema_as`, etc.
|
- `_as` variants of every write API let callers override the actor: `mutate_as`, `ingest_as`, `branch_merge_as`, `apply_schema_as`, etc.
|
||||||
- Actor IDs are persisted on `GraphCommit.actor_id` with split storage in `_graph_commit_actors.lance` (the commit graph is split into `_graph_commits.lance` for the linkage and `_graph_commit_actors.lance` for the actor map).
|
- Actor IDs are persisted on `GraphCommit.actor_id` with split storage in `_graph_commit_actors.lance` (the commit graph is split into `_graph_commits.lance` for the linkage and `_graph_commit_actors.lance` for the actor map).
|
||||||
- HTTP server uses the bearer-token actor automatically; CLI uses the local user / explicit env (no implicit actor).
|
- HTTP server uses the bearer-token actor automatically; CLI uses the local user / explicit env (no implicit actor).
|
||||||
- Pre-v0.4.0 graphs also stored actor IDs on `RunRecord.actor_id` in `_graph_runs.lance` / `_graph_run_actors.lance`. The Run state machine was removed in MR-771; those files are inert post-v0.4.0 and reclaimed by MR-770's production sweep.
|
- Pre-v0.4.0 graphs also stored actor IDs on `RunRecord.actor_id` in `_graph_runs.lance` / `_graph_run_actors.lance`. The Run state machine was removed in MR-771; those files are inert post-v0.4.0. The v2→v3 manifest migration sweeps any stale `__run__*` branches on first write-open (MR-770); the inert dataset bytes remain until a `delete_prefix` primitive lands.
|
||||||
|
|
|
||||||
|
|
@ -8,10 +8,10 @@ Lance supports branching at the dataset level: a branch is a named lineage of ve
|
||||||
|
|
||||||
OmniGraph builds *graph branches* on top by branching every sub-table coherently:
|
OmniGraph builds *graph branches* on top by branching every sub-table coherently:
|
||||||
|
|
||||||
- `branch_create(name)` / `branch_create_from(target, name)` — disallowed name `main`; fails if branch exists; ensures the schema-apply lock is idle.
|
- `branch_create(name)` / `branch_create_from(target, name)` — disallowed name `main`; fails if branch exists; ensures the schema-apply lock is idle. Atomic and authority-first like `branch_delete`: it flips the `__manifest` branch (authority), then creates the derived commit-graph branch, force-dropping any orphaned commit-graph ref left by an incomplete prior delete (the manifest branch is fresh, so a same-named commit-graph branch is provably a zombie). If commit-graph creation fails, the manifest branch is rolled back so the name never half-exists.
|
||||||
- `branch_list()` — returns public branches, **filters internal** `__run__…` and `__schema_apply_lock__` prefixes.
|
- `branch_list()` — returns public branches, **filters the internal** `__schema_apply_lock__` branch.
|
||||||
- `branch_delete(name)` — refuses if there are descendants or active runs on the branch; cleans up owned per-branch fragments.
|
- `branch_delete(name)` — refuses if there are descendants on the branch, or if it is the current branch. The manifest is the single authority for branch existence: deletion flips the `__manifest` branch ref first (one atomic op), after which the branch is gone from every snapshot. The owned per-table forks and the commit-graph branch are derived state, reclaimed best-effort with `force_delete_branch` after the flip. A failure during that reclaim (transient object-store error) does not fail the call or block the authority flip; the leftover forks are unreachable orphans that the [`cleanup`](maintenance.md) reconciler converges. One consequence: if a delete's best-effort reclaim fails, reusing that branch name before the next `cleanup` surfaces a clear error pointing at `cleanup` (the stale fork would otherwise collide on first write).
|
||||||
- **Lazy forking**: a branch only forks a sub-table when that sub-table is first mutated on it. Pure-read branches share fragments with their source.
|
- **Lazy forking**: a branch only forks a sub-table when that sub-table is first mutated on it. Pure-read branches share fragments with their source. A fork collision is classified by the manifest authority, not by Lance branch versions: if the live manifest already records the fork on the active branch, a concurrent first-write won and the caller gets a retryable "refresh and retry"; if the manifest does not, a physical branch there is an orphan and the caller is pointed at `cleanup`.
|
||||||
- `sync_branch(branch)` — re-binds the in-memory handle to the latest head of the branch.
|
- `sync_branch(branch)` — re-binds the in-memory handle to the latest head of the branch.
|
||||||
|
|
||||||
## L2 — Commit graph (`db/commit_graph.rs`)
|
## L2 — Commit graph (`db/commit_graph.rs`)
|
||||||
|
|
@ -51,13 +51,13 @@ Notes:
|
||||||
|
|
||||||
## L2 — Internal system branches
|
## L2 — Internal system branches
|
||||||
|
|
||||||
Filtered from `branch_list()` but visible to internals:
|
Internal or legacy branch refs:
|
||||||
|
|
||||||
- `__schema_apply_lock__` — serializes schema migrations.
|
- `__schema_apply_lock__` — serializes schema migrations; filtered from `branch_list()` but visible to internals.
|
||||||
- `__run__<run-id>` — legacy from the pre-v0.4.0 Run state machine (removed in MR-771). The branch-name guard predicate `is_internal_run_branch` is kept as defense-in-depth so users cannot create a branch matching the legacy prefix; the filter will be removed once production legacy branches are swept (MR-770).
|
- `__run__<run-id>` — legacy from the pre-v0.4.0 Run state machine (removed in MR-771). These are swept off `__manifest` on the first read-write open by the v2→v3 internal-schema migration (MR-770), and `__run__*` is no longer a reserved name. Known limitation: a pre-v0.4.0 graph opened **read-only** still surfaces any stale `__run__*` branch in `branch_list()` until its first read-write open (the migration is write-path-only, like all manifest migrations).
|
||||||
|
|
||||||
## L2 — Recovery audit trail
|
## L2 — Recovery audit trail
|
||||||
|
|
||||||
The four migrated writers (`MutationStaging::finalize`, `schema_apply`, `branch_merge`, `ensure_indices`) protect their multi-table commits with a sidecar at `__recovery/{ulid}.json` written before Phase B and deleted after Phase C. The next `Omnigraph::open` (gated on `OpenMode::ReadWrite`) runs the recovery sweep in `crates/omnigraph/src/db/manifest/recovery.rs`: classify per-table state, decide all-or-nothing per sidecar, roll forward / back, record an audit row.
|
The five migrated writers (`MutationStaging::finalize`, `schema_apply`, `branch_merge`, `ensure_indices`, `optimize_all_tables`) protect their multi-table commits with a sidecar at `__recovery/{ulid}.json` written before Phase B and deleted after Phase C. The next `Omnigraph::open` (gated on `OpenMode::ReadWrite`) runs the recovery sweep in `crates/omnigraph/src/db/manifest/recovery.rs`: classify per-table state, decide all-or-nothing per sidecar, roll forward / back, record an audit row.
|
||||||
|
|
||||||
Audit rows live in `_graph_commit_recoveries.lance` (sibling to `_graph_commits.lance`) and reference the commit graph by `graph_commit_id`. The linked recovery commit is identified by that same `graph_commit_id`, and `actor_id="omnigraph:recovery"` is stored in `_graph_commit_actors.lance` (joined by `graph_commit_id`) — `_graph_commits.lance` itself does not carry the `actor_id` column. To find recoveries for a specific original actor: `omnigraph commit list --filter actor=omnigraph:recovery`, then join to `_graph_commit_recoveries.lance` by `graph_commit_id` to read `recovery_for_actor`. Schema: see `crates/omnigraph/src/db/recovery_audit.rs`.
|
Audit rows live in `_graph_commit_recoveries.lance` (sibling to `_graph_commits.lance`) and reference the commit graph by `graph_commit_id`. The linked recovery commit is identified by that same `graph_commit_id`, and `actor_id="omnigraph:recovery"` is stored in `_graph_commit_actors.lance` (joined by `graph_commit_id`) — `_graph_commits.lance` itself does not carry the `actor_id` column. To find recoveries for a specific original actor: `omnigraph commit list --filter actor=omnigraph:recovery`, then join to `_graph_commit_recoveries.lance` by `graph_commit_id` to read `recovery_for_actor`. Schema: see `crates/omnigraph/src/db/recovery_audit.rs`.
|
||||||
|
|
|
||||||
|
|
@ -20,10 +20,11 @@ A reference for the `omnigraph` binary's command surface and `omnigraph.yaml` sc
|
||||||
| `run list \| show \| publish \| abort` | transactional run ops |
|
| `run list \| show \| publish \| abort` | transactional run ops |
|
||||||
| `schema plan \| apply \| show (alias: get)` | migrations |
|
| `schema plan \| apply \| show (alias: get)` | migrations |
|
||||||
| `lint` (alias: `check`) | offline / graph-backed query validation. Replaces `query lint` / `query check`, which are kept as deprecated argv-level shims that print a one-line warning and rewrite to `omnigraph lint` |
|
| `lint` (alias: `check`) | offline / graph-backed query validation. Replaces `query lint` / `query check`, which are kept as deprecated argv-level shims that print a one-line warning and rewrite to `omnigraph lint` |
|
||||||
| `optimize` | non-destructive Lance compaction |
|
| `queries validate \| list` | operate on the server-side stored-query registry (the `queries:` block). `validate` type-checks every stored query against the live schema offline (opens the selected graph; exits non-zero on any breakage), catching schema drift without restarting the server; `list` prints the selected registry's query names, MCP exposure, and typed params. For per-graph registries, pass `--target <graph>` or set `cli.graph`; with no graph selection, `list` shows only top-level `queries:`. Distinct from `lint`, which validates a single `.gq` file |
|
||||||
|
| `optimize` | non-destructive Lance compaction (skips tables with `Blob` columns; `--json` reports a `skipped` field) |
|
||||||
| `cleanup --keep N --older-than 7d --confirm` | destructive version GC |
|
| `cleanup --keep N --older-than 7d --confirm` | destructive version GC |
|
||||||
| `embed` | offline JSONL embedding pipeline |
|
| `embed` | offline JSONL embedding pipeline |
|
||||||
| `policy validate \| test \| explain` | Cedar tooling |
|
| `policy validate \| test \| explain` | Cedar tooling. Selects `cli.graph`, else `server.graph`, else top-level `policy.file` |
|
||||||
| `version` / `-v` | print `omnigraph 0.3.x` |
|
| `version` / `-v` | print `omnigraph 0.3.x` |
|
||||||
|
|
||||||
## `omnigraph.yaml` schema
|
## `omnigraph.yaml` schema
|
||||||
|
|
@ -34,6 +35,13 @@ graphs:
|
||||||
<name>:
|
<name>:
|
||||||
uri: <local|s3://|http(s)://>
|
uri: <local|s3://|http(s)://>
|
||||||
bearer_token_env: <ENV_NAME>
|
bearer_token_env: <ENV_NAME>
|
||||||
|
queries: # per-graph stored-query registry (server-role; multi-graph mode)
|
||||||
|
<query-name>: # key MUST equal the `query <name>` symbol inside the .gq
|
||||||
|
file: <path-to-.gq> # relative to this config's directory
|
||||||
|
mcp:
|
||||||
|
expose: true # default true: listed in the MCP catalog (GET /queries); set false to hide (still HTTP-callable)
|
||||||
|
tool_name: <name> # optional MCP tool-name override (defaults to <query-name>;
|
||||||
|
# must be unique across exposed queries)
|
||||||
server:
|
server:
|
||||||
graph: <name>
|
graph: <name>
|
||||||
bind: <ip:port>
|
bind: <ip:port>
|
||||||
|
|
@ -59,6 +67,8 @@ aliases:
|
||||||
graph: <name>
|
graph: <name>
|
||||||
branch: <name>
|
branch: <name>
|
||||||
format: <output-format>
|
format: <output-format>
|
||||||
|
queries: # top-level registry — applies only to a bare-URI (anonymous) graph; a graph served by name uses its `graphs.<id>.queries`. Mirrors top-level `policy`.
|
||||||
|
<query-name>: { file: <path-to-.gq> } # mcp.expose defaults to true
|
||||||
policy:
|
policy:
|
||||||
file: ./policy.yaml
|
file: ./policy.yaml
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -4,13 +4,14 @@
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| `MANIFEST_DIR` | `__manifest` | `db/manifest/layout.rs` |
|
| `MANIFEST_DIR` | `__manifest` | `db/manifest/layout.rs` |
|
||||||
| Commit graph dir | `_graph_commits.lance` | `db/commit_graph.rs` |
|
| Commit graph dir | `_graph_commits.lance` | `db/commit_graph.rs` |
|
||||||
| Run registry dir (legacy, removed MR-771) | `_graph_runs.lance` | inert post-v0.4.0; reclaimed by MR-770 |
|
| Run registry dir (legacy, removed MR-771) | `_graph_runs.lance` | inert post-v0.4.0; bytes remain until a `delete_prefix` primitive lands |
|
||||||
| Run branch prefix (legacy, removed MR-771) | `__run__` | filtered by `is_internal_run_branch` defense-in-depth |
|
| Run branch prefix (legacy, removed MR-771/MR-770) | `__run__` | swept off `__manifest` by the v2→v3 migration; no longer a reserved name |
|
||||||
| Schema apply lock | `__schema_apply_lock__` | `db/mod.rs` |
|
| Schema apply lock | `__schema_apply_lock__` | `db/mod.rs` |
|
||||||
| Manifest publisher retry budget | `PUBLISHER_RETRY_BUDGET = 5` | `db/manifest/publisher.rs` |
|
| Manifest publisher retry budget | `PUBLISHER_RETRY_BUDGET = 5` | `db/manifest/publisher.rs` |
|
||||||
| Internal manifest schema version | `INTERNAL_MANIFEST_SCHEMA_VERSION = 2` | `db/manifest/migrations.rs` |
|
| Internal manifest schema version | `INTERNAL_MANIFEST_SCHEMA_VERSION = 3` | `db/manifest/migrations.rs` |
|
||||||
| Merge stage batch | `MERGE_STAGE_BATCH_ROWS = 8192` | `exec/merge.rs` |
|
| Merge stage batch | `MERGE_STAGE_BATCH_ROWS = 8192` | `exec/merge.rs` |
|
||||||
| Maintenance concurrency | `OMNIGRAPH_MAINTENANCE_CONCURRENCY=8` | `db/omnigraph/optimize.rs` |
|
| Maintenance concurrency | `OMNIGRAPH_MAINTENANCE_CONCURRENCY=8` | `db/omnigraph/optimize.rs` |
|
||||||
|
| Lance blob compaction support | `LANCE_SUPPORTS_BLOB_COMPACTION = false` | `db/omnigraph/optimize.rs` |
|
||||||
| Graph index cache size | `8` (LRU) | `runtime_cache.rs` |
|
| Graph index cache size | `8` (LRU) | `runtime_cache.rs` |
|
||||||
| Default body limit | `1 MB` | `omnigraph-server/lib.rs` |
|
| Default body limit | `1 MB` | `omnigraph-server/lib.rs` |
|
||||||
| Ingest body limit | `32 MB` | `omnigraph-server/lib.rs` |
|
| Ingest body limit | `32 MB` | `omnigraph-server/lib.rs` |
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,8 @@ Build or install:
|
||||||
- `omnigraph`
|
- `omnigraph`
|
||||||
- `omnigraph-server`
|
- `omnigraph-server`
|
||||||
|
|
||||||
|
On Windows, the binaries are `omnigraph.exe` and `omnigraph-server.exe`.
|
||||||
|
|
||||||
Run against a local graph:
|
Run against a local graph:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
@ -107,6 +109,35 @@ docker run --rm -p 8080:8080 \
|
||||||
--bind 0.0.0.0:8080
|
--bind 0.0.0.0:8080
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Container entrypoint env vars
|
||||||
|
|
||||||
|
When no positional args are given, the image entrypoint
|
||||||
|
(`docker/entrypoint.sh`) builds the server command from env vars:
|
||||||
|
|
||||||
|
| Var | Effect |
|
||||||
|
|---|---|
|
||||||
|
| `OMNIGRAPH_TARGET_URI` | Graph URI, passed as the positional argument. |
|
||||||
|
| `OMNIGRAPH_CONFIG` | Path to an `omnigraph.yaml`, passed as `--config`. Used to supply a `policy.file` (Cedar authorization). The config file and any relative `policy.file` must be mounted into the container. |
|
||||||
|
| `OMNIGRAPH_TARGET` | Graph name to select from the config's `graphs:` block (with `OMNIGRAPH_CONFIG`, when no `OMNIGRAPH_TARGET_URI`). |
|
||||||
|
| `OMNIGRAPH_BIND` | Listen address (default `0.0.0.0:8080`). |
|
||||||
|
|
||||||
|
`OMNIGRAPH_TARGET_URI` and `OMNIGRAPH_CONFIG` **compose**: set both to keep the
|
||||||
|
graph URI in the env var while loading policy from the config file (the
|
||||||
|
positional URI wins over any `graphs:` entry). To enable Cedar policy on a
|
||||||
|
container otherwise driven by `OMNIGRAPH_TARGET_URI`, mount the config dir and
|
||||||
|
add `OMNIGRAPH_CONFIG`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run --rm -p 8080:8080 \
|
||||||
|
-e OMNIGRAPH_SERVER_BEARER_TOKEN="change-me" \
|
||||||
|
-e OMNIGRAPH_TARGET_URI="s3://my-bucket/graphs/example/releases/2026-04-10-v0.1.0" \
|
||||||
|
-e OMNIGRAPH_CONFIG="/etc/omnigraph/omnigraph.yaml" \
|
||||||
|
-v "$PWD/config:/etc/omnigraph:ro" \
|
||||||
|
omnigraph-server:local
|
||||||
|
# /etc/omnigraph/omnigraph.yaml contains `policy: { file: ./policy.yaml }`;
|
||||||
|
# policy.yaml (+ optional policy.tests.yaml) sit beside it in the mount.
|
||||||
|
```
|
||||||
|
|
||||||
## Auth
|
## Auth
|
||||||
|
|
||||||
The server can run unauthenticated for local development only when explicitly
|
The server can run unauthenticated for local development only when explicitly
|
||||||
|
|
@ -141,8 +172,10 @@ The server binary ships in two flavors:
|
||||||
| **AWS** | `cargo build --release --features aws` | Adds AWS Secrets Manager backend for bearer tokens |
|
| **AWS** | `cargo build --release --features aws` | Adds AWS Secrets Manager backend for bearer tokens |
|
||||||
|
|
||||||
Tagged release archives contain the default `omnigraph` and
|
Tagged release archives contain the default `omnigraph` and
|
||||||
`omnigraph-server` binaries. AWS-enabled server binaries are built from source
|
`omnigraph-server` binaries on macOS / Linux, and `omnigraph.exe` plus
|
||||||
with `cargo build --release --features aws -p omnigraph-server` when needed.
|
`omnigraph-server.exe` on Windows. AWS-enabled server binaries are built from
|
||||||
|
source with `cargo build --release --features aws -p omnigraph-server` when
|
||||||
|
needed.
|
||||||
|
|
||||||
The AWS build adds ~150 transitive deps and ~30-60s of first-build compile
|
The AWS build adds ~150 transitive deps and ~30-60s of first-build compile
|
||||||
time. Default builds don't pay that cost.
|
time. Default builds don't pay that cost.
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@
|
||||||
- `Manifest(ManifestError { kind: BadRequest|NotFound|Conflict|Internal, details: Option<ManifestConflictDetails>, … })`
|
- `Manifest(ManifestError { kind: BadRequest|NotFound|Conflict|Internal, details: Option<ManifestConflictDetails>, … })`
|
||||||
- `ManifestConflictDetails::ExpectedVersionMismatch { table_key, expected, actual }` — caller's `expected_table_versions` did not match the manifest's current latest non-tombstoned version (set by `OmniError::manifest_expected_version_mismatch`).
|
- `ManifestConflictDetails::ExpectedVersionMismatch { table_key, expected, actual }` — caller's `expected_table_versions` did not match the manifest's current latest non-tombstoned version (set by `OmniError::manifest_expected_version_mismatch`).
|
||||||
- `ManifestConflictDetails::RowLevelCasContention` — Lance row-level CAS rejected the publish because a concurrent writer landed the same `object_id`. Retried internally by the publisher; only surfaces if the retry budget exhausts.
|
- `ManifestConflictDetails::RowLevelCasContention` — Lance row-level CAS rejected the publish because a concurrent writer landed the same `object_id`. Retried internally by the publisher; only surfaces if the retry budget exhausts.
|
||||||
- **D₂ parse-time rejection** (MR-794): a single mutation query that mixes inserts/updates with deletes errors out *before any I/O* with kind `BadRequest`. Message: `mutation '<name>' on the same query mixes inserts/updates and deletes; split into separate mutations: (1) inserts and updates, then (2) deletes`. See [docs/user/query-language.md](query-language.md) for the rule and [docs/dev/runs.md](../dev/runs.md) for the underlying staged-write rationale.
|
- **D₂ parse-time rejection** (MR-794): a single mutation query that mixes inserts/updates with deletes errors out *before any I/O* with kind `BadRequest`. Message: `mutation '<name>' on the same query mixes inserts/updates and deletes; split into separate mutations: (1) inserts and updates, then (2) deletes`. See [docs/user/query-language.md](query-language.md) for the rule and [docs/dev/writes.md](../dev/writes.md) for the underlying staged-write rationale.
|
||||||
- `MergeConflicts(Vec<MergeConflict>)`
|
- `MergeConflicts(Vec<MergeConflict>)`
|
||||||
|
|
||||||
Compiler-side `NanoError` covers parse / catalog / type / storage / plan / execution / arrow / lance / IO / manifest / unique-constraint, each with structured spans (`SourceSpan { start, end }`) for ariadne-style diagnostics.
|
Compiler-side `NanoError` covers parse / catalog / type / storage / plan / execution / arrow / lance / IO / manifest / unique-constraint, each with structured spans (`SourceSpan { start, end }`) for ariadne-style diagnostics.
|
||||||
|
|
|
||||||
|
|
@ -2,16 +2,29 @@
|
||||||
|
|
||||||
## Quick Install
|
## Quick Install
|
||||||
|
|
||||||
|
macOS / Linux:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/install.sh | bash
|
curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/install.sh | bash
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Windows PowerShell:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
powershell -NoProfile -ExecutionPolicy Bypass -Command "iwr -UseBasicParsing https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/install.ps1 | iex"
|
||||||
|
```
|
||||||
|
|
||||||
By default the installer places:
|
By default the installer places:
|
||||||
|
|
||||||
- `omnigraph`
|
- `omnigraph`
|
||||||
- `omnigraph-server`
|
- `omnigraph-server`
|
||||||
|
|
||||||
in `~/.local/bin`.
|
in `~/.local/bin` on macOS / Linux, or:
|
||||||
|
|
||||||
|
- `omnigraph.exe`
|
||||||
|
- `omnigraph-server.exe`
|
||||||
|
|
||||||
|
in `%USERPROFILE%\.local\bin` on Windows.
|
||||||
|
|
||||||
The default installer is binary-only. It downloads a published release asset,
|
The default installer is binary-only. It downloads a published release asset,
|
||||||
verifies the SHA256 checksum, and unpacks it. It does not build from source.
|
verifies the SHA256 checksum, and unpacks it. It does not build from source.
|
||||||
|
|
@ -39,6 +52,13 @@ Rolling edge binaries from `main`:
|
||||||
curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/install.sh | RELEASE_CHANNEL=edge bash
|
curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/install.sh | RELEASE_CHANNEL=edge bash
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Windows rolling edge binaries:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
iwr -UseBasicParsing https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/install.ps1 -OutFile install.ps1
|
||||||
|
powershell -NoProfile -ExecutionPolicy Bypass -File .\install.ps1 -ReleaseChannel edge
|
||||||
|
```
|
||||||
|
|
||||||
Install from source:
|
Install from source:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
@ -53,12 +73,24 @@ Install to a different directory:
|
||||||
curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/install.sh | INSTALL_DIR="$HOME/bin" bash
|
curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/install.sh | INSTALL_DIR="$HOME/bin" bash
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Windows:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
powershell -NoProfile -ExecutionPolicy Bypass -File .\install.ps1 -InstallDir "$env:USERPROFILE\bin"
|
||||||
|
```
|
||||||
|
|
||||||
Install a specific tag:
|
Install a specific tag:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/install.sh | VERSION=v0.1.0 bash
|
curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/install.sh | VERSION=v0.1.0 bash
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Windows:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
powershell -NoProfile -ExecutionPolicy Bypass -File .\install.ps1 -Version v0.1.0
|
||||||
|
```
|
||||||
|
|
||||||
Build from a specific git ref:
|
Build from a specific git ref:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
@ -67,27 +99,53 @@ curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/
|
||||||
|
|
||||||
## Manual Source Build
|
## Manual Source Build
|
||||||
|
|
||||||
|
macOS / Linux:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cargo build --release --locked -p omnigraph-cli -p omnigraph-server
|
cargo build --release --locked -p omnigraph-cli -p omnigraph-server
|
||||||
install -m 0755 target/release/omnigraph ~/.local/bin/omnigraph
|
install -m 0755 target/release/omnigraph ~/.local/bin/omnigraph
|
||||||
install -m 0755 target/release/omnigraph-server ~/.local/bin/omnigraph-server
|
install -m 0755 target/release/omnigraph-server ~/.local/bin/omnigraph-server
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Windows:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
cargo build --release --locked -p omnigraph-cli -p omnigraph-server
|
||||||
|
New-Item -ItemType Directory -Force "$env:USERPROFILE\.local\bin" | Out-Null
|
||||||
|
Copy-Item target\release\omnigraph.exe "$env:USERPROFILE\.local\bin\omnigraph.exe"
|
||||||
|
Copy-Item target\release\omnigraph-server.exe "$env:USERPROFILE\.local\bin\omnigraph-server.exe"
|
||||||
|
```
|
||||||
|
|
||||||
## Release Assets
|
## Release Assets
|
||||||
|
|
||||||
Tagged releases are expected to publish:
|
Tagged releases are expected to publish:
|
||||||
|
|
||||||
- `omnigraph-linux-x86_64.tar.gz`
|
- `omnigraph-linux-x86_64.tar.gz`
|
||||||
- `omnigraph-macos-arm64.tar.gz`
|
- `omnigraph-macos-arm64.tar.gz`
|
||||||
|
- `omnigraph-windows-x86_64.zip`
|
||||||
|
|
||||||
Each archive contains both binaries:
|
The macOS / Linux archives contain both binaries:
|
||||||
|
|
||||||
- `omnigraph`
|
- `omnigraph`
|
||||||
- `omnigraph-server`
|
- `omnigraph-server`
|
||||||
|
|
||||||
|
The Windows archive contains:
|
||||||
|
|
||||||
|
- `omnigraph.exe`
|
||||||
|
- `omnigraph-server.exe`
|
||||||
|
|
||||||
## Verify The Install
|
## Verify The Install
|
||||||
|
|
||||||
|
macOS / Linux:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
omnigraph version
|
omnigraph version
|
||||||
omnigraph-server --help
|
omnigraph-server --help
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Windows:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
omnigraph.exe version
|
||||||
|
omnigraph-server.exe --help
|
||||||
|
```
|
||||||
|
|
|
||||||
|
|
@ -4,19 +4,28 @@
|
||||||
|
|
||||||
## `optimize_all_tables(db)` — non-destructive
|
## `optimize_all_tables(db)` — non-destructive
|
||||||
|
|
||||||
- Lance `compact_files()` on every node + edge table on `main`.
|
- Lance `compact_files()` on every node + edge table on `main`, then **publishes the compacted version to the `__manifest`** so the manifest's `table_version` tracks the compacted Lance HEAD. Reads pin the manifest version, so without this publish compaction would be invisible to readers *and* would break the HEAD-vs-manifest precondition of the next schema apply / strict update/delete ("stale view … refresh and retry"). The publish advances the graph version (a system-attributed commit) only for tables that actually compacted.
|
||||||
- Rewrites small fragments into fewer large ones; old fragments remain reachable via older manifests.
|
- Rewrites small fragments into fewer large ones; old fragments remain reachable via older manifests until `cleanup` runs.
|
||||||
|
- Each table's compact→publish runs under its per-`(table, main)` write queue (serializing with concurrent mutations — compaction is a Lance `Rewrite` op that retryable-conflicts with a concurrent merge/update/delete on overlapping fragments). The Lance-HEAD-before-manifest-publish gap is covered by a `SidecarKind::Optimize` recovery sidecar (loose-match): a crash in that window rolls the compacted version forward on the next `Omnigraph::open` (compaction is content-preserving, so roll-forward is always safe).
|
||||||
|
- **Requires a recovered graph.** `optimize` refuses (errors) when an unresolved recovery sidecar is present under `__recovery` — operating on an unrecovered graph could publish a partial write the open-time recovery sweep would roll back. Reopen the graph to run the recovery sweep, then re-run `optimize`. (Recovery roll-back now publishes its restored version, so a recovered graph always satisfies `manifest == Lance HEAD` going in; there is no leftover drift for `optimize` to interpret.)
|
||||||
- Bounded by `OMNIGRAPH_MAINTENANCE_CONCURRENCY` (default 8).
|
- Bounded by `OMNIGRAPH_MAINTENANCE_CONCURRENCY` (default 8).
|
||||||
- Returns `[TableOptimizeStats { table_key, fragments_removed, fragments_added, committed }]`.
|
- Returns `[TableOptimizeStats { table_key, fragments_removed, fragments_added, committed, skipped }]`.
|
||||||
|
- **Blob tables are skipped.** A table that declares any `Blob` property is not compacted: it is reported with `skipped: Some(BlobColumnsUnsupportedByLance)` (and logged via `tracing::warn`) instead of compacted, and the rest of the sweep proceeds normally. The current Lance `compact_files` mis-decodes blob-v2 columns under its forced `BlobHandling::AllBinary` read; **reads and writes are unaffected** — only compaction is. This is gated by `LANCE_SUPPORTS_BLOB_COMPACTION` (`db/omnigraph/optimize.rs`) and removed when the upstream Lance fix lands (see [docs/dev/lance.md](../dev/lance.md)). Consequence: fragment count and deleted-row space on blob tables are not reclaimed until then; query results are never affected.
|
||||||
|
|
||||||
## `cleanup_all_tables(db, options)` — destructive
|
## `cleanup_all_tables(db, options)` — destructive
|
||||||
|
|
||||||
- Lance `cleanup_old_versions()` per table.
|
- Lance `cleanup_old_versions()` per table.
|
||||||
- Removes manifests (and their unique fragments) older than the retention policy.
|
- Removes manifests (and their unique fragments) older than the retention policy.
|
||||||
- `CleanupPolicyOptions { keep_versions: Option<u32>, older_than: Option<Duration> }` — at least one is required.
|
- `CleanupPolicyOptions { keep_versions: Option<u32>, older_than: Option<Duration> }` — at least one is required.
|
||||||
- Returns `[TableCleanupStats { table_key, bytes_removed, old_versions_removed }]`.
|
- Returns `[TableCleanupStats { table_key, bytes_removed, old_versions_removed, error }]`.
|
||||||
|
- **Fault-isolated per table.** A single table's transient failure (version GC or
|
||||||
|
orphan reclaim) is recorded on that table's stats row (`error: Some(..)`, logged
|
||||||
|
via `tracing`) and never aborts the healthy tables — cleanup is the convergence
|
||||||
|
backstop, so it does as much as it can and converges on re-run. The CLI reports
|
||||||
|
any failed tables; rerun `cleanup` to retry them.
|
||||||
- CLI guards with `--confirm`; without it, prints a preview line.
|
- CLI guards with `--confirm`; without it, prints a preview line.
|
||||||
- **Recovery floor:** `--keep < 3` may garbage-collect Lance versions that the open-time recovery sweep needs as a rollback target (the sweep restores to the branch's manifest-pinned table version, which is HEAD-1 in the typical Phase B → Phase C drift case). Default `--keep 10` is safe.
|
- **Recovery floor:** `--keep < 3` may garbage-collect Lance versions that the open-time recovery sweep needs as a rollback target (the sweep restores to the branch's manifest-pinned table version, which is HEAD-1 in the typical Phase B → Phase C drift case). Default `--keep 10` is safe.
|
||||||
|
- **Orphaned-branch reconciliation:** before the version GC, cleanup runs `reconcile_orphaned_branches`, which `force_delete_branch`es any per-table or commit-graph Lance branch absent from the manifest branch list. These orphans arise when a `branch_delete` flips the manifest authority but a downstream best-effort reclaim does not complete (see [branches-commits.md](branches-commits.md)). The reconciler is authority-derived and idempotent (it no-ops once nothing is orphaned), runs regardless of the `keep_versions` / `older_than` values (those gate version GC only), and never reclaims `main` or system-branch forks. Reclaimed forks are logged via `tracing::info`.
|
||||||
|
|
||||||
## Tombstones
|
## Tombstones
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,10 +14,11 @@ Per-graph actions (bind to `Omnigraph::Graph::"<graph_id>"`):
|
||||||
6. `branch_delete`
|
6. `branch_delete`
|
||||||
7. `branch_merge`
|
7. `branch_merge`
|
||||||
8. `admin` — reserved for policy-management surfaces (hot reload, audit log, approvals). No call site today; see MR-724 for the reservation rationale.
|
8. `admin` — reserved for policy-management surfaces (hot reload, audit log, approvals). No call site today; see MR-724 for the reservation rationale.
|
||||||
|
9. `invoke_query` — gates invoking a server-side stored query (the `queries:` registry). Graph-scoped (like `admin`) — per-branch access is enforced by the inner `read` / `change` gate, so a rule that sets `branch_scope` on `invoke_query` is rejected. Coarse in this release: an `invoke_query` allow rule permits any stored query on the graph; a future, additive refinement adds an optional per-query-name scope without changing rules written against the coarse action. Enforced at `POST /queries/{name}` (see [server](server.md)). A stored *mutation* is double-gated: `invoke_query` to reach the tool, plus `change` for the write itself (the engine `_as` writers still enforce per the query body).
|
||||||
|
|
||||||
Server-scoped action (v0.6.0+; binds to `Omnigraph::Server::"root"`):
|
Server-scoped action (v0.6.0+; binds to `Omnigraph::Server::"root"`):
|
||||||
|
|
||||||
9. `graph_list` — `GET /graphs` registry enumeration (multi-graph mode)
|
10. `graph_list` — `GET /graphs` registry enumeration (multi-graph mode)
|
||||||
|
|
||||||
Server-scoped actions cannot use `branch_scope` or `target_branch_scope` — they operate on the registry, not on a graph's branches. A rule cannot mix server-scoped and per-graph actions; split into separate rules. (Runtime `graph_create` / `graph_delete` are reserved but not shipped in v0.6.0; operators add/remove graphs by editing `omnigraph.yaml` and restarting.)
|
Server-scoped actions cannot use `branch_scope` or `target_branch_scope` — they operate on the registry, not on a graph's branches. A rule cannot mix server-scoped and per-graph actions; split into separate rules. (Runtime `graph_create` / `graph_delete` are reserved but not shipped in v0.6.0; operators add/remove graphs by editing `omnigraph.yaml` and restarting.)
|
||||||
|
|
||||||
|
|
@ -46,10 +47,15 @@ graphs:
|
||||||
# no per-graph policy → no engine-layer Cedar enforcement on beta
|
# no per-graph policy → no engine-layer Cedar enforcement on beta
|
||||||
```
|
```
|
||||||
|
|
||||||
Top-level `policy.file` is single-graph / CLI-local policy only. Multi-graph
|
**Config follows graph identity, not server mode.** A graph served by **name**
|
||||||
server startup rejects it because applying one graph policy to every configured
|
(`--target <name>` or `server.graph`) uses its own `graphs.<name>.policy.file`,
|
||||||
graph is ambiguous. Move per-graph rules to `graphs.<graph_id>.policy.file` and
|
exactly as in multi-graph mode. Top-level `policy.file` applies only to an
|
||||||
move `graph_list` rules to `server.policy.file`.
|
**anonymous** graph — one served by a bare `<URI>` with no `graphs:` entry.
|
||||||
|
Serving a **named** graph (single- or multi-graph mode) while top-level
|
||||||
|
`policy.file` (or `queries:`) is populated **refuses boot**, naming the block,
|
||||||
|
since the top-level value would otherwise be silently shadowed by the per-graph
|
||||||
|
block. Move per-graph rules to `graphs.<graph_id>.policy.file` and `graph_list`
|
||||||
|
rules to `server.policy.file`.
|
||||||
|
|
||||||
Each graph's HTTP request flows through its own per-graph policy. The management endpoint (`GET /graphs`) flows through the server-level policy. When `server.policy.file` is unset, `GET /graphs` is denied in every runtime state, including `--unauthenticated`; with bearer tokens configured, it returns 403 after admission control because `graph_list` is not a `read`-equivalent action. The operator must explicitly authorize via `server-policy.yaml` to expose `/graphs`.
|
Each graph's HTTP request flows through its own per-graph policy. The management endpoint (`GET /graphs`) flows through the server-level policy. When `server.policy.file` is unset, `GET /graphs` is denied in every runtime state, including `--unauthenticated`; with bearer tokens configured, it returns 403 after admission control because `graph_list` is not a `read`-equivalent action. The operator must explicitly authorize via `server-policy.yaml` to expose `/graphs`.
|
||||||
|
|
||||||
|
|
@ -92,6 +98,10 @@ bearer token.
|
||||||
|
|
||||||
## CLI
|
## CLI
|
||||||
|
|
||||||
|
Policy tooling resolves its graph like server single-mode policy: `cli.graph`
|
||||||
|
wins, otherwise `server.graph` is used, otherwise the top-level `policy.file`
|
||||||
|
is validated/tested/explained as the anonymous policy.
|
||||||
|
|
||||||
- `omnigraph policy validate` — parse + count actors, exit 1 on parse error.
|
- `omnigraph policy validate` — parse + count actors, exit 1 on parse error.
|
||||||
- `omnigraph policy test` — run cases in `policy.tests.yaml`, exit 1 on any expectation mismatch.
|
- `omnigraph policy test` — run cases in `policy.tests.yaml`, exit 1 on any expectation mismatch.
|
||||||
- `omnigraph policy explain --actor … --action … [--branch …] [--target-branch …]` — show decision and matched rule.
|
- `omnigraph policy explain --actor … --action … [--branch …] [--target-branch …]` — show decision and matched rule.
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,7 @@ A single mutation query must be **either insert/update-only or delete-only**. Mi
|
||||||
|
|
||||||
> `mutation '<name>' on the same query mixes inserts/updates and deletes; split into separate mutations: (1) inserts and updates, then (2) deletes. This restriction lifts when Lance exposes a two-phase delete API (tracked: MR-793 / Lance-upstream).`
|
> `mutation '<name>' on the same query mixes inserts/updates and deletes; split into separate mutations: (1) inserts and updates, then (2) deletes. This restriction lifts when Lance exposes a two-phase delete API (tracked: MR-793 / Lance-upstream).`
|
||||||
|
|
||||||
Reason: under the staged-write rewire (MR-794), inserts and updates accumulate in memory and commit at end-of-query, while deletes still inline-commit (Lance 4.0.0 has no public two-phase delete). Mixing creates ordering hazards (same-row insert→delete becomes a no-op because the staged insert isn't visible to delete; cascading deletes of just-inserted edges break referential integrity by silent design). Until Lance exposes `DeleteJob::execute_uncommitted`, the parse-time rejection keeps both paths atomic and correct. See [docs/dev/runs.md](../dev/runs.md) and [docs/dev/invariants.md](../dev/invariants.md).
|
Reason: under the staged-write rewire (MR-794), inserts and updates accumulate in memory and commit at end-of-query, while deletes still inline-commit (Lance 4.0.0 has no public two-phase delete). Mixing creates ordering hazards (same-row insert→delete becomes a no-op because the staged insert isn't visible to delete; cascading deletes of just-inserted edges break referential integrity by silent design). Until Lance exposes `DeleteJob::execute_uncommitted`, the parse-time rejection keeps both paths atomic and correct. See [docs/dev/writes.md](../dev/writes.md) and [docs/dev/invariants.md](../dev/invariants.md).
|
||||||
|
|
||||||
## IR (Intermediate Representation)
|
## IR (Intermediate Representation)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,9 @@ Axum 0.8 + tokio + utoipa-generated OpenAPI. **Two modes** (v0.6.0+): single-gra
|
||||||
|
|
||||||
### Single-graph mode (legacy)
|
### Single-graph mode (legacy)
|
||||||
|
|
||||||
`omnigraph-server <URI>` or `omnigraph-server --target <name> --config omnigraph.yaml`. Routes are flat — `/snapshot`, `/read`, `/branches`, etc. Behavior unchanged from v0.6.0.
|
`omnigraph-server <URI>` or `omnigraph-server --target <name> --config omnigraph.yaml`. Routes are flat — `/snapshot`, `/read`, `/branches`, etc.
|
||||||
|
|
||||||
|
**Config follows graph identity.** A bare `<URI>` is an *anonymous* graph and uses the **top-level** `policy.file` / `queries:`. A graph chosen by **name** (`--target` / `server.graph`) uses its own `graphs.<name>.{policy.file, queries}` — the same block multi-graph mode uses. ⚠️ *Changed from v0.6.0, which always used top-level config in single mode: a named-graph config that puts `policy`/`queries` at top-level now **refuses boot** and points you at `graphs.<name>.…` (move the block there). Bare-`<URI>` single mode is unchanged.*
|
||||||
|
|
||||||
### Multi-graph mode (v0.6.0+)
|
### Multi-graph mode (v0.6.0+)
|
||||||
|
|
||||||
|
|
@ -20,6 +22,10 @@ Mode inference (four-rule matrix):
|
||||||
4. `--config` + non-empty `graphs:` + no single-mode selector → **multi**
|
4. `--config` + non-empty `graphs:` + no single-mode selector → **multi**
|
||||||
5. otherwise → error with migration hint
|
5. otherwise → error with migration hint
|
||||||
|
|
||||||
|
### Stored-query validation at startup
|
||||||
|
|
||||||
|
If a graph declares a `queries:` registry (see [cli-reference](cli-reference.md)), the server **loads and type-checks every stored query against that graph's live schema at startup** and **refuses to boot** if any query references a type or property the schema lacks — the same fail-loud posture as a malformed policy file, so schema drift surfaces at the deploy boundary rather than at invocation. Two MCP-exposed queries claiming the same tool name is likewise a boot error. Non-blocking advisories (e.g. an MCP-exposed query with a vector parameter an agent cannot supply) are logged. Validate offline before deploying with `omnigraph queries validate`. Discover the exposed queries as a typed tool catalog with `GET /queries`, and invoke one over HTTP with `POST /queries/{name}` (both below).
|
||||||
|
|
||||||
## Endpoint inventory
|
## Endpoint inventory
|
||||||
|
|
||||||
Per-graph endpoints — same body shape across modes; URLs differ:
|
Per-graph endpoints — same body shape across modes; URLs differ:
|
||||||
|
|
@ -34,6 +40,8 @@ Per-graph endpoints — same body shape across modes; URLs differ:
|
||||||
| POST | `/export` | `/graphs/{id}/export` | bearer + `export` | NDJSON stream | `server_export` |
|
| POST | `/export` | `/graphs/{id}/export` | bearer + `export` | NDJSON stream | `server_export` |
|
||||||
| POST | `/mutate` | `/graphs/{id}/mutate` | bearer + `change` | mutation (canonical; `query`/`name`; accepts legacy `query_source`/`query_name` as serde aliases) | `server_mutate` |
|
| POST | `/mutate` | `/graphs/{id}/mutate` | bearer + `change` | mutation (canonical; `query`/`name`; accepts legacy `query_source`/`query_name` as serde aliases) | `server_mutate` |
|
||||||
| POST | `/change` | `/graphs/{id}/change` | bearer + `change` | **deprecated** alias of `/mutate` (carries `Deprecation: true` + `Link: </mutate>; rel="successor-version"`) | `server_change` |
|
| POST | `/change` | `/graphs/{id}/change` | bearer + `change` | **deprecated** alias of `/mutate` (carries `Deprecation: true` + `Link: </mutate>; rel="successor-version"`) | `server_change` |
|
||||||
|
| GET | `/queries` | `/graphs/{id}/queries` | bearer + `read` | list the `mcp.expose` stored queries as a typed tool catalog | `server_list_queries` |
|
||||||
|
| POST | `/queries/{name}` | `/graphs/{id}/queries/{name}` | bearer + `invoke_query` (+ `change` for a stored mutation) | invoke a named query from the `queries:` registry; deny == 404 | `server_invoke_query` |
|
||||||
| GET | `/schema` | `/graphs/{id}/schema` | bearer + `read` | get current `.pg` source | `server_schema_get` |
|
| GET | `/schema` | `/graphs/{id}/schema` | bearer + `read` | get current `.pg` source | `server_schema_get` |
|
||||||
| POST | `/schema/apply` | `/graphs/{id}/schema/apply` | bearer + `schema_apply` (target=`main`) | migrate | `server_schema_apply` |
|
| POST | `/schema/apply` | `/graphs/{id}/schema/apply` | bearer + `schema_apply` (target=`main`) | migrate | `server_schema_apply` |
|
||||||
| POST | `/ingest` | `/graphs/{id}/ingest` | bearer + `branch_create` (if new) + `change` | bulk load | `server_ingest` (32 MB body limit) |
|
| POST | `/ingest` | `/graphs/{id}/ingest` | bearer + `branch_create` (if new) + `change` | bulk load | `server_ingest` (32 MB body limit) |
|
||||||
|
|
@ -50,6 +58,23 @@ Server-level management endpoints (v0.6.0+):
|
||||||
|---|---|---|---|---|
|
|---|---|---|---|---|
|
||||||
| GET | `/graphs` | bearer + `graph_list` on `Server::"root"` | list registered graphs | `server_graphs_list` (405 in single mode) |
|
| GET | `/graphs` | bearer + `graph_list` on `Server::"root"` | list registered graphs | `server_graphs_list` (405 in single mode) |
|
||||||
|
|
||||||
|
### Stored-query catalog (`GET /queries`)
|
||||||
|
|
||||||
|
List the graph's **`mcp.expose`** stored queries as a typed tool catalog — enough for a client (e.g. an MCP server) to register each as a tool without fetching `.gq` source. Each entry: `{ name, tool_name, description, instruction, mutation, params }`, where each param is `{ name, kind, item_kind?, vector_dim?, nullable }`. `kind` is one of `string | bool | int | bigint | float | date | datetime | blob | vector | list` (decomposed so a consumer maps it with a closed `switch`, never re-parsing GQ type spelling). `bigint` (I64/U64), `date`, `datetime`, and `blob` are carried as JSON **strings** — a 64-bit integer loses precision as a JSON number, dates are ISO strings, and a blob is a URI string.
|
||||||
|
|
||||||
|
- **Read-gated** (works in default-deny mode). The catalog is **graph-wide** (branch-independent; `read` is authorized against `main`).
|
||||||
|
- **`mcp.expose` defaults to `true`** — declaring a query in `queries:` lists it; set `mcp: { expose: false }` to keep it HTTP/service-callable but hidden from the catalog.
|
||||||
|
- **Not Cedar-filtered per query (yet).** A caller with `read` but not `invoke_query` can *list* a query they can't *invoke* (which would 404). Closing that gap is future per-query authorization; for now the catalog is a discovery surface and `invoke_query` remains the invocation gate.
|
||||||
|
|
||||||
|
### Stored-query invocation (`POST /queries/{name}`)
|
||||||
|
|
||||||
|
Invoke a curated, server-side stored query by **name** — the source comes from the graph's `queries:` registry, so the client never sends `.gq`. The request body itself is optional; omit it for no-param queries, or send `{ "params": { … }, "branch": "main", "snapshot": null }`, where every field is optional and `params` keys match the query's declared parameters. The response is the **read envelope** (`ReadOutput`) for a stored read or the **mutation envelope** (`ChangeOutput`) for a stored mutation — serialized untagged, so the wire shape is identical to `/query` / `/mutate`.
|
||||||
|
|
||||||
|
- **Gate:** `invoke_query` (per-graph, graph-scoped) at the boundary. A stored *mutation* is **double-gated** — it also passes the engine's `change` gate, so an actor with `invoke_query` but not `change` gets `403`.
|
||||||
|
- **Deny == unknown, for callers without `invoke_query`:** for a caller lacking the grant, an `invoke_query` denial and an unknown query name return the **same `404`** (identical body), so the catalog can't be probed. A caller that *holds* `invoke_query` may still get the inner gate's `403` for an existing query it can't `read`/`change` (the double-gate, above) — so existence is visible to grant-holders by design.
|
||||||
|
- **Requires an explicit policy grant when auth is on.** In default-deny mode (bearer tokens but no `policy.file`), only `read` is permitted, so *every* `/queries/{name}` call returns `404` until an `invoke_query` rule is configured.
|
||||||
|
- A stored mutation cannot target a `snapshot` (`400`); a parameter type error is a structured `400` naming the parameter.
|
||||||
|
|
||||||
## Adding and removing graphs (multi mode)
|
## Adding and removing graphs (multi mode)
|
||||||
|
|
||||||
Runtime add/remove via API is **not** exposed in v0.6.0 — neither
|
Runtime add/remove via API is **not** exposed in v0.6.0 — neither
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ OmniGraph is **not** a single Lance dataset; it is a *graph* of datasets coordin
|
||||||
- `edges/{fnv1a64-hex(edge_type_name)}` — one Lance dataset per edge type
|
- `edges/{fnv1a64-hex(edge_type_name)}` — one Lance dataset per edge type
|
||||||
- `__manifest/` — the catalog of all sub-tables and their published versions
|
- `__manifest/` — the catalog of all sub-tables and their published versions
|
||||||
- `_graph_commits.lance` / `_graph_commit_actors.lance` — the commit graph and its actor map
|
- `_graph_commits.lance` / `_graph_commit_actors.lance` — the commit graph and its actor map
|
||||||
- (legacy `_graph_runs.lance` / `_graph_run_actors.lance` from pre-v0.4.0 graphs are inert; the run state machine was removed in MR-771 and these files are cleaned up via MR-770's production sweep)
|
- (legacy `_graph_runs.lance` / `_graph_run_actors.lance` from pre-v0.4.0 graphs are inert; the run state machine was removed in MR-771. The v2→v3 manifest migration sweeps stale `__run__*` branches on first write-open; the inert dataset bytes themselves remain until a `delete_prefix` storage primitive lands)
|
||||||
- **Manifest row schema** (`object_id, object_type, location, metadata, base_objects, table_key, table_version, table_branch, row_count`):
|
- **Manifest row schema** (`object_id, object_type, location, metadata, base_objects, table_key, table_version, table_branch, row_count`):
|
||||||
- `object_type` ∈ `table | table_version | table_tombstone`
|
- `object_type` ∈ `table | table_version | table_tombstone`
|
||||||
- `table_key` ∈ `node:<TypeName> | edge:<EdgeName>`
|
- `table_key` ∈ `node:<TypeName> | edge:<EdgeName>`
|
||||||
|
|
@ -47,6 +47,7 @@ Adding a new on-disk shape change is one constant bump (`INTERNAL_MANIFEST_SCHEM
|
||||||
|---|---|
|
|---|---|
|
||||||
| v1 (implicit, pre-stamp) | `__manifest.object_id` had no PK annotation; publisher had no row-level CAS protection. |
|
| v1 (implicit, pre-stamp) | `__manifest.object_id` had no PK annotation; publisher had no row-level CAS protection. |
|
||||||
| v2 | `__manifest.object_id` carries `lance-schema:unenforced-primary-key=true`; row-level CAS engaged. Stamped as `omnigraph:internal_schema_version=2`. |
|
| v2 | `__manifest.object_id` carries `lance-schema:unenforced-primary-key=true`; row-level CAS engaged. Stamped as `omnigraph:internal_schema_version=2`. |
|
||||||
|
| v3 | One-time sweep of legacy `__run__*` staging branches (pre-v0.4.0 Run state machine, removed MR-771) off `__manifest`. Runs at `Omnigraph::open(ReadWrite)` and on publish. Stamped as `omnigraph:internal_schema_version=3`. |
|
||||||
|
|
||||||
## On-disk layout
|
## On-disk layout
|
||||||
|
|
||||||
|
|
@ -91,9 +92,9 @@ flowchart TB
|
||||||
- **Graph root** is one directory (or S3 prefix). Everything below is part of one OmniGraph graph.
|
- **Graph root** is one directory (or S3 prefix). Everything below is part of one OmniGraph graph.
|
||||||
- **`__manifest/`** is a Lance dataset whose rows describe which sub-table version is published at which graph-branch. Reading a snapshot starts here.
|
- **`__manifest/`** is a Lance dataset whose rows describe which sub-table version is published at which graph-branch. Reading a snapshot starts here.
|
||||||
- **`nodes/`** and **`edges/`** are sibling directories holding one Lance dataset per declared type. Names are `fnv1a64-hex` of the type name to keep paths fixed-length and case-safe.
|
- **`nodes/`** and **`edges/`** are sibling directories holding one Lance dataset per declared type. Names are `fnv1a64-hex` of the type name to keep paths fixed-length and case-safe.
|
||||||
- **`_graph_commits.lance`** is an L2 dataset that records the graph-level commit DAG, with a paired `_graph_commit_actors.lance` for the actor map. (Pre-v0.4.0 graphs also have inert `_graph_runs.lance` / `_graph_run_actors.lance` from the removed Run state machine; MR-770 sweeps these in production.)
|
- **`_graph_commits.lance`** is an L2 dataset that records the graph-level commit DAG, with a paired `_graph_commit_actors.lance` for the actor map. (Pre-v0.4.0 graphs also have inert `_graph_runs.lance` / `_graph_run_actors.lance` from the removed Run state machine; the v2→v3 migration sweeps their stale `__run__*` branches, and the dataset bytes are reclaimed once `delete_prefix` lands.)
|
||||||
- **`_graph_commit_recoveries.lance`** — one row per recovery sweep action. Joined to `_graph_commits.lance` by `graph_commit_id`; the linked commit row carries `actor_id=omnigraph:recovery`. Operators correlate recoveries with the original mutations they rolled forward / back via this join. See `crates/omnigraph/src/db/recovery_audit.rs`.
|
- **`_graph_commit_recoveries.lance`** — one row per recovery sweep action. Joined to `_graph_commits.lance` by `graph_commit_id`; the linked commit row carries `actor_id=omnigraph:recovery`. Operators correlate recoveries with the original mutations they rolled forward / back via this join. See `crates/omnigraph/src/db/recovery_audit.rs`.
|
||||||
- **`__recovery/{ulid}.json`** — transient sidecar files written by the four migrated writers (`MutationStaging::finalize`, `schema_apply`, `branch_merge`, `ensure_indices`) before Phase B begins, deleted after Phase C succeeds. A sidecar persisting after process exit means the writer crashed in the Phase B → Phase C window; the next `Omnigraph::open` recovery sweep processes it. Steady-state directory is empty. See `crates/omnigraph/src/db/manifest/recovery.rs`.
|
- **`__recovery/{ulid}.json`** — transient sidecar files written by the five migrated writers (`MutationStaging::finalize`, `schema_apply`, `branch_merge`, `ensure_indices`, `optimize_all_tables`) before Phase B begins, deleted after Phase C succeeds. A sidecar persisting after process exit means the writer crashed in the Phase B → Phase C window; the next `Omnigraph::open` recovery sweep processes it. Steady-state directory is empty. See `crates/omnigraph/src/db/manifest/recovery.rs`.
|
||||||
- **`_refs/branches/{name}.json`** is graph-level branch metadata — pointers from a branch name to the manifest version it heads.
|
- **`_refs/branches/{name}.json`** is graph-level branch metadata — pointers from a branch name to the manifest version it heads.
|
||||||
- **Inside each Lance dataset** (orange): the standard Lance directory layout. `_versions/{n}.manifest` records every commit; `data/` holds the actual Arrow fragments; `_indices/{uuid}/` holds index segments with their own `fragment_bitmap` for partial coverage; `_refs/` holds Lance-native per-dataset branches and tags.
|
- **Inside each Lance dataset** (orange): the standard Lance directory layout. `_versions/{n}.manifest` records every commit; `data/` holds the actual Arrow fragments; `_indices/{uuid}/` holds index segments with their own `fragment_bitmap` for partial coverage; `_refs/` holds Lance-native per-dataset branches and tags.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -164,5 +164,5 @@ This is the workflow MR-797 / agentic loops are designed around: **branches are
|
||||||
- [`docs/user/branches-commits.md`](branches-commits.md) — branch and commit-graph mechanics.
|
- [`docs/user/branches-commits.md`](branches-commits.md) — branch and commit-graph mechanics.
|
||||||
- [`docs/dev/merge.md`](../dev/merge.md) — three-way merge details and conflict kinds.
|
- [`docs/dev/merge.md`](../dev/merge.md) — three-way merge details and conflict kinds.
|
||||||
- [`docs/user/query-language.md`](query-language.md) — `.gq` syntax for the multi-statement queries used above.
|
- [`docs/user/query-language.md`](query-language.md) — `.gq` syntax for the multi-statement queries used above.
|
||||||
- [`docs/dev/runs.md`](../dev/runs.md) — the per-query commit pipeline that gives single-query atomicity.
|
- [`docs/dev/writes.md`](../dev/writes.md) — the per-query commit pipeline that gives single-query atomicity.
|
||||||
- [`docs/dev/invariants.md`](../dev/invariants.md) — the architectural rule.
|
- [`docs/dev/invariants.md`](../dev/invariants.md) — the architectural rule.
|
||||||
|
|
|
||||||
321
openapi.json
321
openapi.json
|
|
@ -7,7 +7,7 @@
|
||||||
"name": "MIT",
|
"name": "MIT",
|
||||||
"identifier": "MIT"
|
"identifier": "MIT"
|
||||||
},
|
},
|
||||||
"version": "0.6.0"
|
"version": "0.6.1"
|
||||||
},
|
},
|
||||||
"paths": {
|
"paths": {
|
||||||
"/branches": {
|
"/branches": {
|
||||||
|
|
@ -829,6 +829,177 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/queries": {
|
||||||
|
"get": {
|
||||||
|
"tags": [
|
||||||
|
"queries"
|
||||||
|
],
|
||||||
|
"summary": "List the graph's exposed stored queries as a typed tool catalog.",
|
||||||
|
"description": "Returns the `mcp.expose == true` subset of the `queries:` registry, each\nwith its MCP tool name, read/mutate flag, description/instruction, and\ntyped parameters — enough for a client to register them as tools without\nfetching `.gq` source. Read-gated; the catalog is graph-wide (branch\nindependent — `read` is authorized against `main`). **Not** Cedar-filtered\nper query yet, so it can list a query whose `invoke_query` the caller\nlacks (a known gap until per-query authorization lands).",
|
||||||
|
"operationId": "list_queries",
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Stored-query catalog (the mcp.expose subset, with typed params)",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/QueriesCatalogOutput"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"401": {
|
||||||
|
"description": "Unauthorized",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ErrorOutput"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"403": {
|
||||||
|
"description": "Forbidden",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ErrorOutput"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"bearer_token": []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/queries/{name}": {
|
||||||
|
"post": {
|
||||||
|
"tags": [
|
||||||
|
"queries"
|
||||||
|
],
|
||||||
|
"summary": "Invoke a curated, server-side stored query by name.",
|
||||||
|
"description": "The query source comes from the graph's `queries:` registry, not the\nrequest body — callers send only runtime inputs (`params`, `branch`,\n`snapshot`). Gated by the `invoke_query` Cedar action at the boundary;\na stored *mutation* additionally passes the engine's `change` gate\n(double-gated). An actor **without** `invoke_query` cannot tell a denied\nquery from a missing one — both return the same 404, so the catalog\ncan't be probed without the grant. Once `invoke_query` is held, the\ninner `read`/`change` gate may surface a 403 for an existing query the\nactor can't run (the intended double-gate signal).",
|
||||||
|
"operationId": "invoke_query",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "name",
|
||||||
|
"in": "path",
|
||||||
|
"description": "Stored query name (the registry key)",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/InvokeStoredQueryRequest"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Read envelope (ReadOutput) or mutation envelope (ChangeOutput), serialized untagged",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/InvokeStoredQueryResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Bad request (param type error; snapshot on a stored mutation)",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ErrorOutput"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"401": {
|
||||||
|
"description": "Unauthorized",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ErrorOutput"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"403": {
|
||||||
|
"description": "Forbidden (the inner `change` gate for a stored mutation)",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ErrorOutput"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"404": {
|
||||||
|
"description": "Unknown stored query, or `invoke_query` denied — indistinguishable to a caller without the grant",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ErrorOutput"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"409": {
|
||||||
|
"description": "Merge conflict",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ErrorOutput"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"description": "Per-actor admission cap exceeded; honor `Retry-After` header",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ErrorOutput"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"description": "Policy evaluation error (a denial is reported as 404, not 500)",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ErrorOutput"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"bearer_token": []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
"/query": {
|
"/query": {
|
||||||
"post": {
|
"post": {
|
||||||
"tags": [
|
"tags": [
|
||||||
|
|
@ -1628,6 +1799,40 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"InvokeStoredQueryRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "Body for `POST /queries/{name}` — invokes the server-side stored query\nnamed in the path. The query source and name come from the registry,\nnever the body; only the runtime inputs are supplied here.",
|
||||||
|
"properties": {
|
||||||
|
"branch": {
|
||||||
|
"type": [
|
||||||
|
"string",
|
||||||
|
"null"
|
||||||
|
],
|
||||||
|
"description": "Branch to run against. Defaults to `main`; for a stored mutation the\nwrite targets this branch."
|
||||||
|
},
|
||||||
|
"params": {
|
||||||
|
"description": "JSON object whose keys match the stored query's declared parameters."
|
||||||
|
},
|
||||||
|
"snapshot": {
|
||||||
|
"type": [
|
||||||
|
"string",
|
||||||
|
"null"
|
||||||
|
],
|
||||||
|
"description": "Snapshot id to read from (read queries only — rejected for a stored\nmutation). Mutually exclusive with `branch`."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"InvokeStoredQueryResponse": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/ReadOutput"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/ChangeOutput"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Response for `POST /queries/{name}`: the read envelope for a stored\nread, or the mutation envelope for a stored mutation. Serialized\n**untagged**, so the wire shape is exactly [`ReadOutput`] or\n[`ChangeOutput`] — classification follows the stored query, not a\nwrapper field."
|
||||||
|
},
|
||||||
"LoadMode": {
|
"LoadMode": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Shadow enum for documenting [`LoadMode`] in the OpenAPI schema.",
|
"description": "Shadow enum for documenting [`LoadMode`] in the OpenAPI schema.",
|
||||||
|
|
@ -1698,6 +1903,120 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"ParamDescriptor": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "One declared parameter of a stored query, projected for the catalog.",
|
||||||
|
"required": [
|
||||||
|
"name",
|
||||||
|
"kind",
|
||||||
|
"nullable"
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"item_kind": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/ParamKind",
|
||||||
|
"description": "Element kind when `kind == list` (always a scalar — the grammar\nforbids lists of vectors or nested lists)."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"kind": {
|
||||||
|
"$ref": "#/components/schemas/ParamKind"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"nullable": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "`false` → the caller must supply it; `true` → optional."
|
||||||
|
},
|
||||||
|
"vector_dim": {
|
||||||
|
"type": [
|
||||||
|
"integer",
|
||||||
|
"null"
|
||||||
|
],
|
||||||
|
"format": "int32",
|
||||||
|
"description": "Dimension when `kind == vector`.",
|
||||||
|
"minimum": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ParamKind": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The kind of a stored-query parameter, decomposed so a client (e.g. an\nMCP server) can build a typed input schema with a closed `match` and\nnever re-parse omnigraph's type spelling. `bigint`/`date`/`datetime`/\n`blob` are carried as JSON strings on the wire: a 64-bit integer past\n2^53 loses precision as a JSON number, and Date/DateTime are ISO\nstrings, Blob a blob-URI string.",
|
||||||
|
"enum": [
|
||||||
|
"string",
|
||||||
|
"bool",
|
||||||
|
"int",
|
||||||
|
"bigint",
|
||||||
|
"float",
|
||||||
|
"date",
|
||||||
|
"datetime",
|
||||||
|
"blob",
|
||||||
|
"vector",
|
||||||
|
"list"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"QueriesCatalogOutput": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "Response for `GET /queries`: the `mcp.expose` subset of a graph's\nstored-query registry, each with typed parameters.",
|
||||||
|
"required": [
|
||||||
|
"queries"
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"queries": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/QueryCatalogEntry"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"QueryCatalogEntry": {
|
||||||
|
"type": "object",
|
||||||
|
"description": "One entry in the stored-query catalog (`GET /queries`).",
|
||||||
|
"required": [
|
||||||
|
"name",
|
||||||
|
"tool_name",
|
||||||
|
"mutation",
|
||||||
|
"params"
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"description": {
|
||||||
|
"type": [
|
||||||
|
"string",
|
||||||
|
"null"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"instruction": {
|
||||||
|
"type": [
|
||||||
|
"string",
|
||||||
|
"null"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"mutation": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "`true` for a stored mutation → an MCP read-only hint of `false`."
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Registry key / invoke path segment (`POST /queries/{name}`)."
|
||||||
|
},
|
||||||
|
"params": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/ParamDescriptor"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tool_name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "MCP tool id (the `tool_name` override, else `name`)."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"QueryRequest": {
|
"QueryRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"description": "Inline read-query request for `POST /query`.\n\nFriendlier-named alternative to [`ReadRequest`] for ad-hoc reads and\nAI-agent integration. Mutations are rejected with 400 — use `POST\n/mutate` (or its deprecated alias `POST /change`) for write queries.\nField names are deliberately short (`query`, `name`) to match the GQ\nkeyword and the CLI `-e` flag.",
|
"description": "Inline read-query request for `POST /query`.\n\nFriendlier-named alternative to [`ReadRequest`] for ad-hoc reads and\nAI-agent integration. Mutations are rejected with 400 — use `POST\n/mutate` (or its deprecated alias `POST /change`) for write queries.\nField names are deliberately short (`query`, `name`) to match the GQ\nkeyword and the CLI `-e` flag.",
|
||||||
|
|
|
||||||
|
|
@ -34,10 +34,15 @@ PY
|
||||||
canonical=()
|
canonical=()
|
||||||
while IFS= read -r line; do
|
while IFS= read -r line; do
|
||||||
canonical+=("$line")
|
canonical+=("$line")
|
||||||
done < <(find docs -type f -name '*.md' ! -path 'docs/releases/*' ! -path 'docs/internal/*' | sort)
|
done < <(find docs -type f -name '*.md' ! -path 'docs/releases/*' ! -path 'docs/internal/*' ! -path 'docs/rfcs/*' | sort)
|
||||||
if [[ -d docs/releases ]]; then
|
if [[ -d docs/releases ]]; then
|
||||||
canonical+=("docs/releases/")
|
canonical+=("docs/releases/")
|
||||||
fi
|
fi
|
||||||
|
# RFCs are a growing collection (like releases): represent the directory, not
|
||||||
|
# every per-RFC file. The dir must be linked from an audience index.
|
||||||
|
if [[ -d docs/rfcs ]]; then
|
||||||
|
canonical+=("docs/rfcs/")
|
||||||
|
fi
|
||||||
|
|
||||||
linked=()
|
linked=()
|
||||||
for index_file in "${index_files[@]}"; do
|
for index_file in "${index_files[@]}"; do
|
||||||
|
|
|
||||||
151
scripts/install.ps1
Normal file
151
scripts/install.ps1
Normal file
|
|
@ -0,0 +1,151 @@
|
||||||
|
param(
|
||||||
|
[string]$RepoSlug = "ModernRelay/omnigraph",
|
||||||
|
[string]$InstallDir = "$env:USERPROFILE\.local\bin",
|
||||||
|
[ValidateSet("stable", "edge")]
|
||||||
|
[string]$ReleaseChannel = "stable",
|
||||||
|
[string]$Version = ""
|
||||||
|
)
|
||||||
|
|
||||||
|
$ErrorActionPreference = "Stop"
|
||||||
|
|
||||||
|
$assetName = "omnigraph-windows-x86_64.zip"
|
||||||
|
$assetStem = "omnigraph-windows-x86_64"
|
||||||
|
$workDir = Join-Path ([System.IO.Path]::GetTempPath()) ("omnigraph-install-" + [System.Guid]::NewGuid().ToString("N"))
|
||||||
|
$selectedChannel = ""
|
||||||
|
|
||||||
|
function Write-Log {
|
||||||
|
param([string]$Message)
|
||||||
|
Write-Host "==> $Message"
|
||||||
|
}
|
||||||
|
|
||||||
|
function Get-ReleaseBaseUrl {
|
||||||
|
param([string]$Channel)
|
||||||
|
|
||||||
|
if ($Version -ne "") {
|
||||||
|
return "https://github.com/$RepoSlug/releases/download/$Version"
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($Channel -eq "stable") {
|
||||||
|
return "https://github.com/$RepoSlug/releases/latest/download"
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($Channel -eq "edge") {
|
||||||
|
return "https://github.com/$RepoSlug/releases/download/edge"
|
||||||
|
}
|
||||||
|
|
||||||
|
throw "unsupported ReleaseChannel '$Channel' (expected stable or edge)"
|
||||||
|
}
|
||||||
|
|
||||||
|
function Download-ReleaseFiles {
|
||||||
|
param(
|
||||||
|
[string]$BaseUrl,
|
||||||
|
[string]$ArchivePath,
|
||||||
|
[string]$ChecksumPath
|
||||||
|
)
|
||||||
|
|
||||||
|
try {
|
||||||
|
Invoke-WebRequest -UseBasicParsing -Uri "$BaseUrl/$assetName" -OutFile $ArchivePath
|
||||||
|
Invoke-WebRequest -UseBasicParsing -Uri "$BaseUrl/$assetStem.sha256" -OutFile $ChecksumPath
|
||||||
|
return $true
|
||||||
|
} catch {
|
||||||
|
return $false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function Verify-Checksum {
|
||||||
|
param(
|
||||||
|
[string]$ArchivePath,
|
||||||
|
[string]$ChecksumPath
|
||||||
|
)
|
||||||
|
|
||||||
|
$checksumText = (Get-Content -Path $ChecksumPath -Raw).Trim()
|
||||||
|
$expected = ($checksumText -split "\s+")[0].ToLowerInvariant()
|
||||||
|
if ($expected -eq "") {
|
||||||
|
throw "checksum file did not contain a SHA256 digest"
|
||||||
|
}
|
||||||
|
|
||||||
|
$actual = (Get-FileHash -Path $ArchivePath -Algorithm SHA256).Hash.ToLowerInvariant()
|
||||||
|
if ($actual -ne $expected) {
|
||||||
|
throw "checksum verification failed for $assetName"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function Install-FromDirectory {
|
||||||
|
param([string]$SourceDir)
|
||||||
|
|
||||||
|
New-Item -ItemType Directory -Force -Path $InstallDir | Out-Null
|
||||||
|
Copy-Item -Path (Join-Path $SourceDir "omnigraph.exe") -Destination (Join-Path $InstallDir "omnigraph.exe") -Force
|
||||||
|
Copy-Item -Path (Join-Path $SourceDir "omnigraph-server.exe") -Destination (Join-Path $InstallDir "omnigraph-server.exe") -Force
|
||||||
|
}
|
||||||
|
|
||||||
|
function Install-FromRelease {
|
||||||
|
New-Item -ItemType Directory -Force -Path $workDir | Out-Null
|
||||||
|
|
||||||
|
$archivePath = Join-Path $workDir $assetName
|
||||||
|
$checksumPath = Join-Path $workDir "$assetStem.sha256"
|
||||||
|
|
||||||
|
if ($Version -ne "") {
|
||||||
|
$script:selectedChannel = $Version
|
||||||
|
$baseUrl = Get-ReleaseBaseUrl -Channel $ReleaseChannel
|
||||||
|
Write-Log "Downloading $assetName from $Version"
|
||||||
|
if (!(Download-ReleaseFiles -BaseUrl $baseUrl -ArchivePath $archivePath -ChecksumPath $checksumPath)) {
|
||||||
|
throw "no published binary found for $assetName at release $Version"
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$script:selectedChannel = $ReleaseChannel
|
||||||
|
$baseUrl = Get-ReleaseBaseUrl -Channel $selectedChannel
|
||||||
|
Write-Log "Downloading $assetName from $selectedChannel"
|
||||||
|
if (!(Download-ReleaseFiles -BaseUrl $baseUrl -ArchivePath $archivePath -ChecksumPath $checksumPath)) {
|
||||||
|
if ($ReleaseChannel -ne "stable") {
|
||||||
|
throw "no published binary found for $assetName on channel $ReleaseChannel"
|
||||||
|
}
|
||||||
|
|
||||||
|
Write-Log "Stable release binaries are not published yet; falling back to edge"
|
||||||
|
$script:selectedChannel = "edge"
|
||||||
|
$baseUrl = Get-ReleaseBaseUrl -Channel $selectedChannel
|
||||||
|
if (!(Download-ReleaseFiles -BaseUrl $baseUrl -ArchivePath $archivePath -ChecksumPath $checksumPath)) {
|
||||||
|
throw "no published binary found for $assetName on stable or edge; build from source"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Verify-Checksum -ArchivePath $archivePath -ChecksumPath $checksumPath
|
||||||
|
|
||||||
|
$extractDir = Join-Path $workDir "extract"
|
||||||
|
New-Item -ItemType Directory -Force -Path $extractDir | Out-Null
|
||||||
|
Expand-Archive -Path $archivePath -DestinationPath $extractDir -Force
|
||||||
|
Install-FromDirectory -SourceDir $extractDir
|
||||||
|
}
|
||||||
|
|
||||||
|
function Print-Summary {
|
||||||
|
$omnigraphPath = Join-Path $InstallDir "omnigraph.exe"
|
||||||
|
$serverPath = Join-Path $InstallDir "omnigraph-server.exe"
|
||||||
|
|
||||||
|
Write-Host ""
|
||||||
|
Write-Host "Installed:"
|
||||||
|
Write-Host " $omnigraphPath"
|
||||||
|
Write-Host " $serverPath"
|
||||||
|
Write-Host ""
|
||||||
|
Write-Host "Verify:"
|
||||||
|
Write-Host " $omnigraphPath version"
|
||||||
|
Write-Host " $serverPath --help"
|
||||||
|
Write-Host ""
|
||||||
|
|
||||||
|
if ($selectedChannel -ne "") {
|
||||||
|
Write-Host "Installed from release channel: $selectedChannel"
|
||||||
|
}
|
||||||
|
|
||||||
|
$pathParts = $env:Path -split [System.IO.Path]::PathSeparator
|
||||||
|
if ($pathParts -notcontains $InstallDir) {
|
||||||
|
Write-Host "Add $InstallDir to PATH if needed."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
Install-FromRelease
|
||||||
|
Print-Summary
|
||||||
|
} finally {
|
||||||
|
if (Test-Path $workDir) {
|
||||||
|
Remove-Item -Path $workDir -Recurse -Force
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -6,7 +6,14 @@ SOURCE_REF="${SOURCE_REF:-main}"
|
||||||
RELEASE_CHANNEL="${RELEASE_CHANNEL:-edge}"
|
RELEASE_CHANNEL="${RELEASE_CHANNEL:-edge}"
|
||||||
WORKDIR="${WORKDIR:-$PWD/.omnigraph-rustfs-demo}"
|
WORKDIR="${WORKDIR:-$PWD/.omnigraph-rustfs-demo}"
|
||||||
RUSTFS_CONTAINER_NAME="${RUSTFS_CONTAINER_NAME:-omnigraph-rustfs-demo}"
|
RUSTFS_CONTAINER_NAME="${RUSTFS_CONTAINER_NAME:-omnigraph-rustfs-demo}"
|
||||||
RUSTFS_IMAGE="${RUSTFS_IMAGE:-rustfs/rustfs:latest}"
|
# Pinned to 1.0.0-beta.3 (2026-05-14) — the last known-good tag, matching CI
|
||||||
|
# (.github/workflows/ci.yml). `rustfs/rustfs:latest` (1.0.0-beta.4, 2026-05-21)
|
||||||
|
# added a credentials-policy check that refuses to start when the access/secret
|
||||||
|
# keys are values it considers "default" (rustfsadmin/rustfsadmin here). This
|
||||||
|
# script still works on beta.4+ because it passes
|
||||||
|
# RUSTFS_ALLOW_INSECURE_DEFAULT_CREDENTIALS=true below — so overriding
|
||||||
|
# RUSTFS_IMAGE to a newer tag is safe.
|
||||||
|
RUSTFS_IMAGE="${RUSTFS_IMAGE:-rustfs/rustfs:1.0.0-beta.3}"
|
||||||
RUSTFS_DATA_DIR="${RUSTFS_DATA_DIR:-$WORKDIR/rustfs-data}"
|
RUSTFS_DATA_DIR="${RUSTFS_DATA_DIR:-$WORKDIR/rustfs-data}"
|
||||||
BUCKET="${BUCKET:-omnigraph-local}"
|
BUCKET="${BUCKET:-omnigraph-local}"
|
||||||
PREFIX="${PREFIX:-repos/context}"
|
PREFIX="${PREFIX:-repos/context}"
|
||||||
|
|
@ -74,9 +81,6 @@ platform_asset_name() {
|
||||||
Linux/x86_64)
|
Linux/x86_64)
|
||||||
printf 'omnigraph-linux-x86_64.tar.gz\n'
|
printf 'omnigraph-linux-x86_64.tar.gz\n'
|
||||||
;;
|
;;
|
||||||
Darwin/x86_64)
|
|
||||||
printf 'omnigraph-macos-x86_64.tar.gz\n'
|
|
||||||
;;
|
|
||||||
Darwin/arm64)
|
Darwin/arm64)
|
||||||
printf 'omnigraph-macos-arm64.tar.gz\n'
|
printf 'omnigraph-macos-arm64.tar.gz\n'
|
||||||
;;
|
;;
|
||||||
|
|
@ -268,6 +272,7 @@ start_rustfs() {
|
||||||
-v "$RUSTFS_DATA_DIR:/data" \
|
-v "$RUSTFS_DATA_DIR:/data" \
|
||||||
-e RUSTFS_ACCESS_KEY="$AWS_ACCESS_KEY_ID" \
|
-e RUSTFS_ACCESS_KEY="$AWS_ACCESS_KEY_ID" \
|
||||||
-e RUSTFS_SECRET_KEY="$AWS_SECRET_ACCESS_KEY" \
|
-e RUSTFS_SECRET_KEY="$AWS_SECRET_ACCESS_KEY" \
|
||||||
|
-e RUSTFS_ALLOW_INSECURE_DEFAULT_CREDENTIALS=true \
|
||||||
"$RUSTFS_IMAGE" \
|
"$RUSTFS_IMAGE" \
|
||||||
/data >/dev/null
|
/data >/dev/null
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -64,20 +64,8 @@ cat >"$FORMULA_PATH" <<EOF
|
||||||
class Omnigraph < Formula
|
class Omnigraph < Formula
|
||||||
desc "Typed property graph database with Git-style workflows"
|
desc "Typed property graph database with Git-style workflows"
|
||||||
homepage "https://github.com/${REPO_SLUG}"
|
homepage "https://github.com/${REPO_SLUG}"
|
||||||
license "MIT"
|
|
||||||
version "${VERSION}"
|
version "${VERSION}"
|
||||||
|
license "MIT"
|
||||||
on_macos do
|
|
||||||
depends_on arch: :arm64
|
|
||||||
url "${MACOS_ARM_URL}"
|
|
||||||
sha256 "${MACOS_ARM_SHA}"
|
|
||||||
end
|
|
||||||
|
|
||||||
on_linux do
|
|
||||||
url "${LINUX_X86_URL}"
|
|
||||||
sha256 "${LINUX_X86_SHA}"
|
|
||||||
end
|
|
||||||
|
|
||||||
head "https://github.com/${REPO_SLUG}.git", branch: "main"
|
head "https://github.com/${REPO_SLUG}.git", branch: "main"
|
||||||
|
|
||||||
livecheck do
|
livecheck do
|
||||||
|
|
@ -85,6 +73,21 @@ class Omnigraph < Formula
|
||||||
regex(/^v?(\\d+(?:\\.\\d+)+)$/i)
|
regex(/^v?(\\d+(?:\\.\\d+)+)$/i)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
on_macos do
|
||||||
|
depends_on arch: :arm64
|
||||||
|
on_arm do
|
||||||
|
url "${MACOS_ARM_URL}"
|
||||||
|
sha256 "${MACOS_ARM_SHA}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
on_linux do
|
||||||
|
on_intel do
|
||||||
|
url "${LINUX_X86_URL}"
|
||||||
|
sha256 "${LINUX_X86_SHA}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def install
|
def install
|
||||||
bin.install "omnigraph", "omnigraph-server"
|
bin.install "omnigraph", "omnigraph-server"
|
||||||
end
|
end
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue