mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-12 01:45:14 +02:00
Merge branch 'main' into devin/1779464281-mr-656-inline-query-strings
Resolve conflicts: keep query/mutate canonical CLI subcommands and top-level lint command (this branch) alongside the repo→graph terminology rename from main. Update test helpers (repo_path → graph_path, init_repo → init_graph, app_for_loaded_repo → app_for_loaded_graph) and align tempdir variable names so the merged tests compile. Drop the now- unused QueryCommand enum (Lint was promoted to a top-level Command). Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>
This commit is contained in:
commit
9ff4af47fb
79 changed files with 2780 additions and 1894 deletions
10
.github/workflows/ci.yml
vendored
10
.github/workflows/ci.yml
vendored
|
|
@ -291,6 +291,14 @@ jobs:
|
|||
. -> target
|
||||
|
||||
- name: Start RustFS
|
||||
# Pinned to 1.0.0-beta.3 (2026-05-14) — the last known-good tag.
|
||||
# `rustfs/rustfs:latest` (1.0.0-beta.4, 2026-05-21) added a
|
||||
# credentials-policy check that refuses to start when
|
||||
# AWS_ACCESS_KEY_ID/SECRET_ACCESS_KEY are values it considers
|
||||
# "default" (rustfsadmin/rustfsadmin in our case). Bumping to
|
||||
# beta.4+ requires either rotating those creds to less-default
|
||||
# values or setting RUSTFS_ALLOW_INSECURE_DEFAULT_CREDENTIALS=true
|
||||
# — deliberate work, not an emergency. Pin first; upgrade later.
|
||||
run: |
|
||||
docker rm -f rustfs >/dev/null 2>&1 || true
|
||||
docker run -d \
|
||||
|
|
@ -299,7 +307,7 @@ jobs:
|
|||
-p 9001:9001 \
|
||||
-e RUSTFS_ACCESS_KEY="${AWS_ACCESS_KEY_ID}" \
|
||||
-e RUSTFS_SECRET_KEY="${AWS_SECRET_ACCESS_KEY}" \
|
||||
rustfs/rustfs:latest \
|
||||
rustfs/rustfs:1.0.0-beta.3 \
|
||||
/data
|
||||
|
||||
- name: Install AWS CLI
|
||||
|
|
|
|||
29
.github/workflows/publish-crates.yml
vendored
29
.github/workflows/publish-crates.yml
vendored
|
|
@ -80,8 +80,15 @@ jobs:
|
|||
version=$(cargo metadata --format-version=1 --no-deps \
|
||||
| jq -r --arg c "$crate" '.packages[] | select(.name==$c) | .version')
|
||||
|
||||
# crates.io API requires a User-Agent header — without it the
|
||||
# API responds 403 and the skip check below would silently
|
||||
# fall through to a real publish attempt that errors with
|
||||
# "already exists on crates.io index" when re-running after a
|
||||
# partial publish. Send a UA naming the workflow.
|
||||
local current
|
||||
current=$(curl -fsSL "https://crates.io/api/v1/crates/${crate}" \
|
||||
current=$(curl -fsSL \
|
||||
-A 'ModernRelay-omnigraph-ci (https://github.com/ModernRelay/omnigraph)' \
|
||||
"https://crates.io/api/v1/crates/${crate}" \
|
||||
| jq -r '.crate.max_version' || echo "")
|
||||
|
||||
if [[ "$current" == "$version" ]]; then
|
||||
|
|
@ -90,10 +97,28 @@ jobs:
|
|||
fi
|
||||
|
||||
echo "==> publishing ${crate} ${version} (current crates.io: ${current:-none})"
|
||||
cargo publish -p "$crate" --locked
|
||||
# Defense in depth: if the skip check missed an existing
|
||||
# version (e.g. crates.io API hiccup), cargo publish errors
|
||||
# with "already exists on crates.io index". Treat that as
|
||||
# success so the workflow can be re-run idempotently.
|
||||
local output
|
||||
if ! output=$(cargo publish -p "$crate" --locked 2>&1); then
|
||||
echo "$output"
|
||||
if echo "$output" | grep -q "already exists on crates.io"; then
|
||||
echo "==> ${crate} ${version} was already published; treating as success"
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
echo "$output"
|
||||
}
|
||||
|
||||
# Order matters: each crate must precede anything that depends on it.
|
||||
# omnigraph-compiler and omnigraph-policy have no internal deps;
|
||||
# omnigraph-engine depends on both; server depends on engine + the
|
||||
# two leaf crates; cli depends on everything.
|
||||
publish_if_new omnigraph-compiler
|
||||
publish_if_new omnigraph-policy
|
||||
publish_if_new omnigraph-engine
|
||||
publish_if_new omnigraph-server
|
||||
publish_if_new omnigraph-cli
|
||||
|
|
|
|||
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -16,6 +16,7 @@ __pycache__/
|
|||
*.pyc
|
||||
demo/*.omni/
|
||||
.omnigraph-rustfs-demo/
|
||||
/docs/internal
|
||||
|
||||
# Local-only working files (not for the public repo)
|
||||
.claude/
|
||||
|
|
|
|||
38
AGENTS.md
38
AGENTS.md
|
|
@ -1,6 +1,6 @@
|
|||
# OmniGraph — Agent Guide
|
||||
|
||||
This file is the always-on map for AI coding agents (Claude Code, Codex, Cursor, Cline) working in this repo. It is loaded into context on every turn, so it stays as a **map plus the rules and invariants that need to be in scope at all times** — the encyclopedia content lives under [`docs/`](docs/). When you need depth, follow a pointer.
|
||||
This file is the always-on map for AI coding agents (Claude Code, Codex, Cursor, Cline) working in this codebase. It is loaded into context on every turn, so it stays as a **map plus the rules and invariants that need to be in scope at all times** — the encyclopedia content lives under [`docs/`](docs/). When you need depth, follow a pointer.
|
||||
|
||||
**Required reading every session, every change:**
|
||||
|
||||
|
|
@ -16,9 +16,9 @@ Tools that support `@`-imports (Claude Code) auto-include all three files via th
|
|||
|
||||
`CLAUDE.md` is a symlink to this file — there is exactly one source of truth. Edit `AGENTS.md`.
|
||||
|
||||
**Version surveyed:** 0.4.2
|
||||
**Version surveyed:** 0.6.0
|
||||
**Workspace crates:** `omnigraph-compiler`, `omnigraph` (engine), `omnigraph-cli`, `omnigraph-server`
|
||||
**Storage substrate:** Lance 4.x (columnar, versioned, branchable)
|
||||
**Storage substrate:** Lance 6.x (columnar, versioned, branchable)
|
||||
**License:** MIT
|
||||
**Toolchain:** Rust stable, edition 2024
|
||||
|
||||
|
|
@ -50,10 +50,10 @@ CLI (omnigraph) HTTP Server (omnigraph-server, Axum)
|
|||
omnigraph-compiler ── Pest grammars, catalog, IR, lowering, lint, migration plan
|
||||
│
|
||||
▼
|
||||
omnigraph (engine) ── ManifestRepo, CommitGraph, RunRegistry, GraphIndex (CSR/CSC), exec
|
||||
omnigraph (engine) ── ManifestCoordinator, CommitGraph, RunRegistry, GraphIndex (CSR/CSC), exec
|
||||
│
|
||||
▼
|
||||
Lance 4.x ── columnar Arrow, fragments, per-dataset versions/branches, indexes
|
||||
Lance 6.x ── columnar Arrow, fragments, per-dataset versions/branches, indexes
|
||||
│
|
||||
▼
|
||||
Object store (file / s3 / RustFS / MinIO / S3-compat)
|
||||
|
|
@ -167,35 +167,35 @@ If a proposal fits one of these, the burden is on the proposer to justify why th
|
|||
## Quick-reference flows
|
||||
|
||||
```bash
|
||||
# Initialize an S3-backed repo
|
||||
omnigraph init --schema ./schema.pg s3://my-bucket/repo.omni
|
||||
# Initialize an S3-backed graph
|
||||
omnigraph init --schema ./schema.pg s3://my-bucket/graph.omni
|
||||
|
||||
# Bulk load
|
||||
omnigraph load --data ./seed.jsonl --mode overwrite s3://my-bucket/repo.omni
|
||||
omnigraph load --data ./seed.jsonl --mode overwrite s3://my-bucket/graph.omni
|
||||
|
||||
# Branch + ingest a review batch
|
||||
omnigraph branch create --from main review/2026-04-25 s3://my-bucket/repo.omni
|
||||
omnigraph ingest --branch review/2026-04-25 --data ./batch.jsonl s3://my-bucket/repo.omni
|
||||
omnigraph branch create --from main review/2026-04-25 s3://my-bucket/graph.omni
|
||||
omnigraph ingest --branch review/2026-04-25 --data ./batch.jsonl s3://my-bucket/graph.omni
|
||||
|
||||
# Run a hybrid (vector + BM25) query
|
||||
omnigraph read --query ./queries.gq --name find_similar \
|
||||
--params '{"q":"trends in AI safety"}' --format table s3://my-bucket/repo.omni
|
||||
--params '{"q":"trends in AI safety"}' --format table s3://my-bucket/graph.omni
|
||||
|
||||
# Plan + apply schema migration
|
||||
omnigraph schema plan --schema ./next.pg s3://my-bucket/repo.omni
|
||||
omnigraph schema apply --schema ./next.pg s3://my-bucket/repo.omni --json
|
||||
omnigraph schema plan --schema ./next.pg s3://my-bucket/graph.omni
|
||||
omnigraph schema apply --schema ./next.pg s3://my-bucket/graph.omni --json
|
||||
|
||||
# Merge review branch back
|
||||
omnigraph branch merge review/2026-04-25 --into main s3://my-bucket/repo.omni
|
||||
omnigraph branch merge review/2026-04-25 --into main s3://my-bucket/graph.omni
|
||||
|
||||
# Compact + GC (preview, then confirm)
|
||||
omnigraph optimize s3://my-bucket/repo.omni
|
||||
omnigraph cleanup --keep 10 --older-than 7d s3://my-bucket/repo.omni
|
||||
omnigraph cleanup --keep 10 --older-than 7d --confirm s3://my-bucket/repo.omni
|
||||
omnigraph optimize s3://my-bucket/graph.omni
|
||||
omnigraph cleanup --keep 10 --older-than 7d s3://my-bucket/graph.omni
|
||||
omnigraph cleanup --keep 10 --older-than 7d --confirm s3://my-bucket/graph.omni
|
||||
|
||||
# Stand up the HTTP server (token from env)
|
||||
OMNIGRAPH_SERVER_BEARER_TOKEN=xxxx \
|
||||
omnigraph-server s3://my-bucket/repo.omni --bind 0.0.0.0:8080
|
||||
omnigraph-server s3://my-bucket/graph.omni --bind 0.0.0.0:8080
|
||||
|
||||
# Cedar policy explain
|
||||
omnigraph policy explain --actor act-alice --action change --branch main
|
||||
|
|
@ -222,7 +222,7 @@ omnigraph policy explain --actor act-alice --action change --branch main
|
|||
| Schema language | — | `.pg` + Pest grammar + catalog + interfaces + constraints + annotations |
|
||||
| Query language | — | `.gq` + Pest grammar + IR + lowering + linter |
|
||||
| Schema migration planning | — | `plan_schema_migration` + `apply_schema` step types + `__schema_apply_lock__` |
|
||||
| Commit graph (DAG) across whole repo | — | `_graph_commits.lance` with linear + merge parents, ULID ids, actor map |
|
||||
| Commit graph (DAG) across whole graph | — | `_graph_commits.lance` with linear + merge parents, ULID ids, actor map |
|
||||
| Per-query atomic writes | — | In-memory `MutationStaging.pending` accumulator + `stage_*` / `commit_staged` per touched table at end-of-query + publisher CAS via `commit_with_expected` (single manifest commit per `mutate_as` / `load`); D₂ parse-time rule keeps inserts/updates and deletes from mixing |
|
||||
| Three-way row-level merge | — | `OrderedTableCursor` + `StagedTableWriter`, structured `MergeConflictKind` |
|
||||
| Change feeds | — | `diff_between` / `diff_commits` with manifest fast path + ID streaming |
|
||||
|
|
|
|||
829
Cargo.lock
generated
829
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
42
Cargo.toml
42
Cargo.toml
|
|
@ -14,29 +14,29 @@ default-members = [
|
|||
]
|
||||
|
||||
[workspace.dependencies]
|
||||
arrow-array = "57"
|
||||
arrow-ipc = "57"
|
||||
arrow-schema = "57"
|
||||
arrow-select = "57"
|
||||
arrow-cast = { version = "57", features = ["prettyprint"] }
|
||||
arrow-ord = "57"
|
||||
arrow-array = "58"
|
||||
arrow-ipc = "58"
|
||||
arrow-schema = "58"
|
||||
arrow-select = "58"
|
||||
arrow-cast = { version = "58", features = ["prettyprint"] }
|
||||
arrow-ord = "58"
|
||||
|
||||
datafusion = { version = "52", default-features = false }
|
||||
datafusion-physical-plan = "52"
|
||||
datafusion-physical-expr = "52"
|
||||
datafusion-execution = "52"
|
||||
datafusion-common = "52"
|
||||
datafusion-expr = "52"
|
||||
datafusion-functions-aggregate = "52"
|
||||
datafusion = { version = "53", default-features = false, features = ["nested_expressions"] }
|
||||
datafusion-physical-plan = "53"
|
||||
datafusion-physical-expr = "53"
|
||||
datafusion-execution = "53"
|
||||
datafusion-common = "53"
|
||||
datafusion-expr = "53"
|
||||
datafusion-functions-aggregate = "53"
|
||||
|
||||
lance = { version = "4.0.0", default-features = false, features = ["aws"] }
|
||||
lance-datafusion = "4.0.0"
|
||||
lance-file = "4.0.0"
|
||||
lance-index = "4.0.0"
|
||||
lance-linalg = "4.0.0"
|
||||
lance-namespace = "4.0.0"
|
||||
lance-namespace-impls = "4.0.0"
|
||||
lance-table = "4.0.0"
|
||||
lance = { version = "6.0.1", default-features = false, features = ["aws"] }
|
||||
lance-datafusion = "6.0.1"
|
||||
lance-file = "6.0.1"
|
||||
lance-index = "6.0.1"
|
||||
lance-linalg = "6.0.1"
|
||||
lance-namespace = "6.0.1"
|
||||
lance-namespace-impls = "6.0.1"
|
||||
lance-table = "6.0.1"
|
||||
|
||||
ulid = "1"
|
||||
futures = "0.3"
|
||||
|
|
|
|||
18
README.md
18
README.md
|
|
@ -60,7 +60,7 @@ curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/
|
|||
That bootstrap:
|
||||
|
||||
- starts RustFS on `127.0.0.1:9000`
|
||||
- creates a bucket and S3-backed repo
|
||||
- creates a bucket and S3-backed graph
|
||||
- loads the checked-in context fixture
|
||||
- launches `omnigraph-server` on `127.0.0.1:8080`
|
||||
|
||||
|
|
@ -69,8 +69,8 @@ Docker must be installed and running first.
|
|||
The RustFS bootstrap prefers the rolling `edge` binaries and only falls back to
|
||||
source builds when release assets are unavailable.
|
||||
|
||||
If a previous run left objects under the same repo prefix but did not finish
|
||||
initializing the repo, rerun with `RESET_REPO=1` or set `PREFIX` to a new
|
||||
If a previous run left objects under the same graph prefix but did not finish
|
||||
initializing the graph, rerun with `RESET_REPO=1` or set `PREFIX` to a new
|
||||
value.
|
||||
|
||||
## Common Commands
|
||||
|
|
@ -78,12 +78,12 @@ value.
|
|||
The same URI works for local paths, `s3://…`, or `http://host:port`.
|
||||
|
||||
```bash
|
||||
omnigraph init --schema ./schema.pg ./repo.omni
|
||||
omnigraph load --data ./data.jsonl ./repo.omni
|
||||
omnigraph read --query ./queries.gq --name get_person --params '{"name":"Alice"}' ./repo.omni
|
||||
omnigraph change --query ./queries.gq --name insert_person --params '{"name":"Mina"}' ./repo.omni
|
||||
omnigraph branch create --from main feature-x ./repo.omni
|
||||
omnigraph branch merge feature-x --into main ./repo.omni
|
||||
omnigraph init --schema ./schema.pg ./graph.omni
|
||||
omnigraph load --data ./data.jsonl ./graph.omni
|
||||
omnigraph read --query ./queries.gq --name get_person --params '{"name":"Alice"}' ./graph.omni
|
||||
omnigraph change --query ./queries.gq --name insert_person --params '{"name":"Mina"}' ./graph.omni
|
||||
omnigraph branch create --from main feature-x ./graph.omni
|
||||
omnigraph branch merge feature-x --into main ./graph.omni
|
||||
```
|
||||
|
||||
See [docs/user/cli.md](docs/user/cli.md) for schema apply, snapshots, ingest, runs, and policy commands.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "omnigraph-cli"
|
||||
version = "0.4.2"
|
||||
version = "0.6.0"
|
||||
edition = "2024"
|
||||
description = "CLI for the Omnigraph graph database."
|
||||
license = "MIT"
|
||||
|
|
@ -13,10 +13,10 @@ name = "omnigraph"
|
|||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.4.2" }
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" }
|
||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" }
|
||||
omnigraph-server = { path = "../omnigraph-server", version = "0.4.2" }
|
||||
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.0" }
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" }
|
||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" }
|
||||
omnigraph-server = { path = "../omnigraph-server", version = "0.6.0" }
|
||||
clap = { workspace = true }
|
||||
color-eyre = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
|
|
@ -30,4 +30,5 @@ assert_cmd = "2"
|
|||
predicates = "3"
|
||||
serde_json = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
lance = { workspace = true }
|
||||
lance-index = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -67,16 +67,16 @@ enum Command {
|
|||
Version,
|
||||
/// Generate, clean, or refresh explicit seed embeddings
|
||||
Embed(EmbedArgs),
|
||||
/// Initialize a new repo from a schema
|
||||
/// Initialize a new graph from a schema
|
||||
Init {
|
||||
#[arg(long)]
|
||||
schema: PathBuf,
|
||||
/// Repo URI (local path or s3://)
|
||||
/// Graph URI (local path or s3://)
|
||||
uri: String,
|
||||
},
|
||||
/// Load data into a repo
|
||||
/// Load data into a graph
|
||||
Load {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -93,7 +93,7 @@ enum Command {
|
|||
},
|
||||
/// Ingest data into a reviewable named branch
|
||||
Ingest {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -127,7 +127,7 @@ enum Command {
|
|||
/// printed and the invocation is rewritten to `omnigraph lint`).
|
||||
#[command(visible_alias = "check")]
|
||||
Lint {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -140,9 +140,9 @@ enum Command {
|
|||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Show repo snapshot
|
||||
/// Show graph snapshot
|
||||
Snapshot {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -155,7 +155,7 @@ enum Command {
|
|||
},
|
||||
/// Export a full graph snapshot as JSONL
|
||||
Export {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -182,7 +182,7 @@ enum Command {
|
|||
/// when used. Pairs with `omnigraph mutate` on the write side.
|
||||
#[command(visible_alias = "read")]
|
||||
Query {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(hide = true)]
|
||||
|
|
@ -220,7 +220,7 @@ enum Command {
|
|||
/// warning when used. Pairs with `omnigraph query` on the read side.
|
||||
#[command(visible_alias = "change")]
|
||||
Mutate {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(hide = true)]
|
||||
|
|
@ -252,9 +252,9 @@ enum Command {
|
|||
#[command(subcommand)]
|
||||
command: PolicyCommand,
|
||||
},
|
||||
/// Compact small Lance fragments in every table of the repo
|
||||
/// Compact small Lance fragments in every table of the graph
|
||||
Optimize {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -263,9 +263,9 @@ enum Command {
|
|||
#[arg(long)]
|
||||
json: bool,
|
||||
},
|
||||
/// Remove old Lance versions from every table of the repo (destructive)
|
||||
/// Remove old Lance versions from every table of the graph (destructive)
|
||||
Cleanup {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -291,7 +291,7 @@ enum Command {
|
|||
enum BranchCommand {
|
||||
/// Create a new branch
|
||||
Create {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
|
|
@ -306,7 +306,7 @@ enum BranchCommand {
|
|||
},
|
||||
/// List branches
|
||||
List {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
|
|
@ -318,7 +318,7 @@ enum BranchCommand {
|
|||
},
|
||||
/// Delete a branch
|
||||
Delete {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
|
|
@ -331,7 +331,7 @@ enum BranchCommand {
|
|||
},
|
||||
/// Merge a source branch into a target branch
|
||||
Merge {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
|
|
@ -350,7 +350,7 @@ enum BranchCommand {
|
|||
enum SchemaCommand {
|
||||
/// Plan a schema migration against the accepted persisted schema
|
||||
Plan {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -368,7 +368,7 @@ enum SchemaCommand {
|
|||
},
|
||||
/// Apply a supported schema migration
|
||||
Apply {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -393,7 +393,7 @@ enum SchemaCommand {
|
|||
/// Show the current accepted schema source
|
||||
#[command(alias = "get")]
|
||||
Show {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -405,10 +405,11 @@ enum SchemaCommand {
|
|||
}
|
||||
|
||||
#[derive(Debug, Subcommand)]
|
||||
|
||||
enum CommitCommand {
|
||||
/// List graph commits
|
||||
List {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
@ -421,7 +422,7 @@ enum CommitCommand {
|
|||
},
|
||||
/// Show a graph commit
|
||||
Show {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
#[arg(long)]
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
|
|
@ -594,7 +595,7 @@ fn finish_query_lint(output: &QueryLintOutput, json: bool) -> Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn ensure_local_repo_parent(uri: &str) -> Result<()> {
|
||||
fn ensure_local_graph_parent(uri: &str) -> Result<()> {
|
||||
if !uri.contains("://") {
|
||||
fs::create_dir_all(uri)?;
|
||||
}
|
||||
|
|
@ -706,10 +707,10 @@ fn resolve_policy_engine(config: &OmnigraphConfig) -> Result<PolicyEngine> {
|
|||
let policy_file = config
|
||||
.resolve_policy_file()
|
||||
.ok_or_else(|| color_eyre::eyre::eyre!("policy.file must be set in omnigraph.yaml"))?;
|
||||
PolicyEngine::load(&policy_file, &policy_repo_id(config))
|
||||
PolicyEngine::load(&policy_file, &policy_graph_id(config))
|
||||
}
|
||||
|
||||
/// Open a local-URI repo and, when `policy.file` is configured in
|
||||
/// Open a local-URI graph and, when `policy.file` is configured in
|
||||
/// `omnigraph.yaml`, install the resolved `PolicyEngine` on the engine
|
||||
/// handle so every direct-engine write goes through
|
||||
/// `Omnigraph::enforce(...)` (MR-722). Without a configured policy this
|
||||
|
|
@ -733,10 +734,7 @@ async fn open_local_db_with_policy(uri: &str, config: &OmnigraphConfig) -> Resul
|
|||
/// policy is configured and this returns `None`, the engine-layer
|
||||
/// footgun guard intentionally denies — silent bypass via "I forgot the
|
||||
/// actor" is what the guard prevents.
|
||||
fn resolve_cli_actor<'a>(
|
||||
cli_as: Option<&'a str>,
|
||||
config: &'a OmnigraphConfig,
|
||||
) -> Option<&'a str> {
|
||||
fn resolve_cli_actor<'a>(cli_as: Option<&'a str>, config: &'a OmnigraphConfig) -> Option<&'a str> {
|
||||
cli_as.or(config.cli.actor.as_deref())
|
||||
}
|
||||
|
||||
|
|
@ -748,7 +746,7 @@ fn resolve_policy_tests_path(config: &OmnigraphConfig) -> Result<PathBuf> {
|
|||
})
|
||||
}
|
||||
|
||||
fn policy_repo_id(config: &OmnigraphConfig) -> String {
|
||||
fn policy_graph_id(config: &OmnigraphConfig) -> String {
|
||||
if let Some(name) = &config.project.name {
|
||||
return name.clone();
|
||||
}
|
||||
|
|
@ -846,8 +844,15 @@ fn parse_duration_arg(s: &str) -> Result<std::time::Duration> {
|
|||
if s.is_empty() {
|
||||
bail!("duration is empty");
|
||||
}
|
||||
let (num_part, unit) = match s.char_indices().rev().find(|(_, c)| c.is_ascii_alphabetic()) {
|
||||
Some((i, _)) => (&s[..i + 1 - s[i..].chars().next().unwrap().len_utf8()], &s[i..]),
|
||||
let (num_part, unit) = match s
|
||||
.char_indices()
|
||||
.rev()
|
||||
.find(|(_, c)| c.is_ascii_alphabetic())
|
||||
{
|
||||
Some((i, _)) => (
|
||||
&s[..i + 1 - s[i..].chars().next().unwrap().len_utf8()],
|
||||
&s[i..],
|
||||
),
|
||||
None => (s, ""),
|
||||
};
|
||||
let n: u64 = num_part
|
||||
|
|
@ -873,7 +878,7 @@ fn resolve_local_uri(
|
|||
let uri = resolve_uri(config, cli_uri, cli_target)?;
|
||||
if is_remote_uri(&uri) {
|
||||
bail!(
|
||||
"{} is only supported against local repo URIs in this milestone",
|
||||
"{} is only supported against local graph URIs in this milestone",
|
||||
operation
|
||||
);
|
||||
}
|
||||
|
|
@ -1138,9 +1143,7 @@ fn render_schema_plan_step(step: &SchemaMigrationStep) -> String {
|
|||
type_name,
|
||||
drop_mode_label(*mode),
|
||||
),
|
||||
SchemaMigrationStep::UnsupportedChange {
|
||||
entity, reason, ..
|
||||
} => {
|
||||
SchemaMigrationStep::UnsupportedChange { entity, reason, .. } => {
|
||||
// When a schema-lint code is attached, render code + tier
|
||||
// so operators see at-a-glance the kind of risk (destructive
|
||||
// / validated / safe) — not just the rule identifier.
|
||||
|
|
@ -1550,10 +1553,10 @@ async fn execute_query_lint(
|
|||
));
|
||||
}
|
||||
|
||||
let has_repo_target =
|
||||
let has_graph_target =
|
||||
cli_uri.is_some() || cli_target.is_some() || config.cli_graph_name().is_some();
|
||||
if !has_repo_target {
|
||||
bail!("query lint requires --schema <schema.pg> or a resolvable repo target");
|
||||
if !has_graph_target {
|
||||
bail!("query lint requires --schema <schema.pg> or a resolvable graph target");
|
||||
}
|
||||
|
||||
let uri = resolve_local_uri(config, cli_uri, cli_target, "query lint")?;
|
||||
|
|
@ -1562,7 +1565,7 @@ async fn execute_query_lint(
|
|||
&db.catalog(),
|
||||
&query_source,
|
||||
query_path,
|
||||
QueryLintSchemaSource::repo(uri),
|
||||
QueryLintSchemaSource::graph(uri),
|
||||
))
|
||||
}
|
||||
|
||||
|
|
@ -1806,7 +1809,7 @@ async fn main() -> Result<()> {
|
|||
}
|
||||
Command::Init { schema, uri } => {
|
||||
let schema_source = fs::read_to_string(&schema)?;
|
||||
ensure_local_repo_parent(&uri)?;
|
||||
ensure_local_graph_parent(&uri)?;
|
||||
Omnigraph::init(&uri, &schema_source).await?;
|
||||
scaffold_config_if_missing(&uri)?;
|
||||
println!("initialized {}", uri);
|
||||
|
|
@ -2589,17 +2592,16 @@ async fn main() -> Result<()> {
|
|||
let config = load_cli_config(config.as_ref())?;
|
||||
let uri = resolve_uri(&config, uri, target.as_deref())?;
|
||||
|
||||
let older_than_dur = older_than
|
||||
.as_deref()
|
||||
.map(parse_duration_arg)
|
||||
.transpose()?;
|
||||
let older_than_dur = older_than.as_deref().map(parse_duration_arg).transpose()?;
|
||||
|
||||
if keep.is_none() && older_than_dur.is_none() {
|
||||
bail!("cleanup requires at least one of --keep or --older-than");
|
||||
}
|
||||
|
||||
let policy_desc = match (keep, older_than_dur) {
|
||||
(Some(k), Some(d)) => format!("keep {} versions, remove anything older than {:?}", k, d),
|
||||
(Some(k), Some(d)) => {
|
||||
format!("keep {} versions, remove anything older than {:?}", k, d)
|
||||
}
|
||||
(Some(k), None) => format!("keep {} versions", k),
|
||||
(None, Some(d)) => format!("remove anything older than {:?}", d),
|
||||
_ => unreachable!(),
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -52,7 +52,7 @@ pub fn fixture(name: &str) -> PathBuf {
|
|||
.join(name)
|
||||
}
|
||||
|
||||
pub fn repo_path(root: &Path) -> PathBuf {
|
||||
pub fn graph_path(root: &Path) -> PathBuf {
|
||||
root.join("demo.omni")
|
||||
}
|
||||
|
||||
|
|
@ -86,14 +86,14 @@ pub fn parse_stdout_json(output: &Output) -> Value {
|
|||
serde_json::from_slice(&output.stdout).unwrap()
|
||||
}
|
||||
|
||||
pub fn init_repo(repo: &Path) {
|
||||
pub fn init_graph(graph: &Path) {
|
||||
let schema = fixture("test.pg");
|
||||
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(repo));
|
||||
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(graph));
|
||||
}
|
||||
|
||||
pub fn load_fixture(repo: &Path) {
|
||||
pub fn load_fixture(graph: &Path) {
|
||||
let data = fixture("test.jsonl");
|
||||
output_success(cli().arg("load").arg("--data").arg(&data).arg(repo));
|
||||
output_success(cli().arg("load").arg("--data").arg(&data).arg(graph));
|
||||
}
|
||||
|
||||
pub fn write_jsonl(path: &Path, rows: &str) {
|
||||
|
|
@ -116,7 +116,7 @@ fn yaml_string(value: &str) -> String {
|
|||
format!("'{}'", value.replace('\'', "''"))
|
||||
}
|
||||
|
||||
pub fn local_yaml_config(repo: &Path) -> String {
|
||||
pub fn local_yaml_config(graph: &Path) -> String {
|
||||
format!(
|
||||
"\
|
||||
graphs:
|
||||
|
|
@ -130,7 +130,7 @@ query:
|
|||
- .
|
||||
policy: {{}}
|
||||
",
|
||||
yaml_string(&repo.to_string_lossy())
|
||||
yaml_string(&graph.to_string_lossy())
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -200,9 +200,9 @@ fn spawn_server_process(mut command: StdCommand) -> TestServer {
|
|||
panic!("server did not become healthy");
|
||||
}
|
||||
|
||||
pub fn spawn_server(repo: &Path) -> TestServer {
|
||||
pub fn spawn_server(graph: &Path) -> TestServer {
|
||||
let mut command = server_process();
|
||||
command.arg(repo);
|
||||
command.arg(graph);
|
||||
spawn_server_process(command)
|
||||
}
|
||||
|
||||
|
|
@ -221,58 +221,57 @@ pub fn spawn_server_with_config_env(config: &Path, envs: &[(&str, &str)]) -> Tes
|
|||
spawn_server_process(command)
|
||||
}
|
||||
|
||||
|
||||
pub struct SystemRepo {
|
||||
pub struct SystemGraph {
|
||||
_temp: TempDir,
|
||||
repo: PathBuf,
|
||||
graph: PathBuf,
|
||||
}
|
||||
|
||||
impl SystemRepo {
|
||||
impl SystemGraph {
|
||||
pub fn initialized() -> Self {
|
||||
let temp = tempdir().unwrap();
|
||||
let repo = repo_path(temp.path());
|
||||
init_repo(&repo);
|
||||
Self { _temp: temp, repo }
|
||||
let graph = graph_path(temp.path());
|
||||
init_graph(&graph);
|
||||
Self { _temp: temp, graph }
|
||||
}
|
||||
|
||||
pub fn loaded() -> Self {
|
||||
let temp = tempdir().unwrap();
|
||||
let repo = repo_path(temp.path());
|
||||
init_repo(&repo);
|
||||
load_fixture(&repo);
|
||||
Self { _temp: temp, repo }
|
||||
let graph = graph_path(temp.path());
|
||||
init_graph(&graph);
|
||||
load_fixture(&graph);
|
||||
Self { _temp: temp, graph }
|
||||
}
|
||||
|
||||
pub fn path(&self) -> &Path {
|
||||
&self.repo
|
||||
&self.graph
|
||||
}
|
||||
|
||||
pub fn write_query(&self, name: &str, source: &str) -> PathBuf {
|
||||
let path = self.repo.parent().unwrap().join(name);
|
||||
let path = self.graph.parent().unwrap().join(name);
|
||||
write_query_file(&path, source);
|
||||
path
|
||||
}
|
||||
|
||||
pub fn write_jsonl(&self, name: &str, rows: &str) -> PathBuf {
|
||||
let path = self.repo.parent().unwrap().join(name);
|
||||
let path = self.graph.parent().unwrap().join(name);
|
||||
write_jsonl(&path, rows);
|
||||
path
|
||||
}
|
||||
|
||||
pub fn write_config(&self, name: &str, source: &str) -> PathBuf {
|
||||
let path = self.repo.parent().unwrap().join(name);
|
||||
let path = self.graph.parent().unwrap().join(name);
|
||||
write_config(&path, source);
|
||||
path
|
||||
}
|
||||
|
||||
pub fn write_file(&self, name: &str, source: &str) -> PathBuf {
|
||||
let path = self.repo.parent().unwrap().join(name);
|
||||
let path = self.graph.parent().unwrap().join(name);
|
||||
write_file(&path, source);
|
||||
path
|
||||
}
|
||||
|
||||
pub fn spawn_server(&self) -> TestServer {
|
||||
spawn_server(&self.repo)
|
||||
spawn_server(&self.graph)
|
||||
}
|
||||
|
||||
pub fn spawn_server_with_config(&self, config: &Path) -> TestServer {
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ fn yaml_string(value: &str) -> String {
|
|||
format!("'{}'", value.replace('\'', "''"))
|
||||
}
|
||||
|
||||
fn local_policy_config(repo: &SystemRepo) -> String {
|
||||
fn local_policy_config(graph: &SystemGraph) -> String {
|
||||
format!(
|
||||
"\
|
||||
project:
|
||||
|
|
@ -83,12 +83,12 @@ query:
|
|||
policy:
|
||||
file: ./policy.yaml
|
||||
",
|
||||
yaml_string(&repo.path().to_string_lossy())
|
||||
yaml_string(&graph.path().to_string_lossy())
|
||||
)
|
||||
}
|
||||
|
||||
fn insert_person_query(repo: &SystemRepo, name: &str) -> std::path::PathBuf {
|
||||
repo.write_query(
|
||||
fn insert_person_query(graph: &SystemGraph, name: &str) -> std::path::PathBuf {
|
||||
graph.write_query(
|
||||
name,
|
||||
r#"
|
||||
query insert_person($name: String, $age: I32) {
|
||||
|
|
@ -98,8 +98,8 @@ query insert_person($name: String, $age: I32) {
|
|||
)
|
||||
}
|
||||
|
||||
fn add_friend_query(repo: &SystemRepo, name: &str) -> std::path::PathBuf {
|
||||
repo.write_query(
|
||||
fn add_friend_query(graph: &SystemGraph, name: &str) -> std::path::PathBuf {
|
||||
graph.write_query(
|
||||
name,
|
||||
r#"
|
||||
query add_friend($from: String, $to: String) {
|
||||
|
|
@ -109,13 +109,13 @@ query add_friend($from: String, $to: String) {
|
|||
)
|
||||
}
|
||||
|
||||
fn snapshot_table_row_count(repo: &SystemRepo, table_key: &str) -> u64 {
|
||||
snapshot_table_row_count_at(repo.path(), table_key)
|
||||
fn snapshot_table_row_count(graph: &SystemGraph, table_key: &str) -> u64 {
|
||||
snapshot_table_row_count_at(graph.path(), table_key)
|
||||
}
|
||||
|
||||
fn snapshot_table_row_count_at(repo: &std::path::Path, table_key: &str) -> u64 {
|
||||
fn snapshot_table_row_count_at(graph: &std::path::Path, table_key: &str) -> u64 {
|
||||
let payload = parse_stdout_json(&output_success(
|
||||
cli().arg("snapshot").arg(repo).arg("--json"),
|
||||
cli().arg("snapshot").arg(graph).arg("--json"),
|
||||
));
|
||||
payload["tables"]
|
||||
.as_array()
|
||||
|
|
@ -178,7 +178,7 @@ fn format_vector(values: &[f32]) -> String {
|
|||
.join(", ")
|
||||
}
|
||||
|
||||
fn s3_test_repo_uri(suite: &str) -> Option<String> {
|
||||
fn s3_test_graph_uri(suite: &str) -> Option<String> {
|
||||
let bucket = env::var("OMNIGRAPH_S3_TEST_BUCKET").ok()?;
|
||||
let prefix = env::var("OMNIGRAPH_S3_TEST_PREFIX")
|
||||
.ok()
|
||||
|
|
@ -193,21 +193,21 @@ fn s3_test_repo_uri(suite: &str) -> Option<String> {
|
|||
|
||||
#[test]
|
||||
fn local_cli_end_to_end_init_load_read_change_read_flow() {
|
||||
let repo = SystemRepo::initialized();
|
||||
let mutation_file = insert_person_query(&repo, "system-local-init-change.gq");
|
||||
let graph = SystemGraph::initialized();
|
||||
let mutation_file = insert_person_query(&graph, "system-local-init-change.gq");
|
||||
|
||||
output_success(
|
||||
cli()
|
||||
.arg("load")
|
||||
.arg("--data")
|
||||
.arg(fixture("test.jsonl"))
|
||||
.arg(repo.path()),
|
||||
.arg(graph.path()),
|
||||
);
|
||||
|
||||
let read_before = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -222,7 +222,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
|
|||
let change_payload = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("change")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(&mutation_file)
|
||||
.arg("--params")
|
||||
|
|
@ -235,7 +235,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
|
|||
let read_after = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -253,7 +253,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
|
|||
let inline_change = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("change")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("-e")
|
||||
.arg("query add($name: String, $age: I32) { insert Person { name: $name, age: $age } }")
|
||||
.arg("--params")
|
||||
|
|
@ -267,7 +267,7 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
|
|||
let inline_read = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query-string")
|
||||
.arg("query find($name: String) { match { $p: Person { name: $name } } return { $p.name, $p.age } }")
|
||||
.arg("--params")
|
||||
|
|
@ -281,15 +281,15 @@ fn local_cli_end_to_end_init_load_read_change_read_flow() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_end_to_end_branch_change_merge_flow() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let mutation_file = insert_person_query(&repo, "system-local-change.gq");
|
||||
let graph = SystemGraph::loaded();
|
||||
let mutation_file = insert_person_query(&graph, "system-local-change.gq");
|
||||
|
||||
output_success(
|
||||
cli()
|
||||
.arg("branch")
|
||||
.arg("create")
|
||||
.arg("--uri")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--from")
|
||||
.arg("main")
|
||||
.arg("feature"),
|
||||
|
|
@ -298,7 +298,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
|
|||
let change_payload = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("change")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(&mutation_file)
|
||||
.arg("--branch")
|
||||
|
|
@ -313,7 +313,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
|
|||
let feature_read = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -332,7 +332,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
|
|||
.arg("branch")
|
||||
.arg("merge")
|
||||
.arg("--uri")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("feature")
|
||||
.arg("--json"),
|
||||
));
|
||||
|
|
@ -341,7 +341,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
|
|||
let main_read = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -358,7 +358,7 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
|
|||
cli()
|
||||
.arg("commit")
|
||||
.arg("list")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--branch")
|
||||
.arg("main")
|
||||
.arg("--json"),
|
||||
|
|
@ -368,8 +368,8 @@ fn local_cli_end_to_end_branch_change_merge_flow() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let ingest_data = repo.write_jsonl(
|
||||
let graph = SystemGraph::loaded();
|
||||
let ingest_data = graph.write_jsonl(
|
||||
"system-local-ingest.jsonl",
|
||||
r#"{"type":"Person","data":{"name":"Zoe","age":33}}
|
||||
{"type":"Person","data":{"name":"Bob","age":26}}"#,
|
||||
|
|
@ -382,7 +382,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
|
|||
.arg(&ingest_data)
|
||||
.arg("--branch")
|
||||
.arg("feature-ingest")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--json"),
|
||||
));
|
||||
assert_eq!(ingest_payload["branch"], "feature-ingest");
|
||||
|
|
@ -395,7 +395,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
|
|||
let feature_snapshot = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("snapshot")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--branch")
|
||||
.arg("feature-ingest")
|
||||
.arg("--json"),
|
||||
|
|
@ -405,7 +405,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
|
|||
let zoe = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -422,7 +422,7 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
|
|||
let bob = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -439,20 +439,20 @@ fn local_cli_ingest_creates_review_branch_and_keeps_it_readable() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_export_round_trips_full_branch_graph() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let graph = SystemGraph::loaded();
|
||||
|
||||
output_success(
|
||||
cli()
|
||||
.arg("branch")
|
||||
.arg("create")
|
||||
.arg("--uri")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--from")
|
||||
.arg("main")
|
||||
.arg("feature"),
|
||||
);
|
||||
|
||||
let feature_data = repo.write_jsonl(
|
||||
let feature_data = graph.write_jsonl(
|
||||
"system-local-export-feature.jsonl",
|
||||
r#"{"type":"Person","data":{"name":"Eve","age":29}}
|
||||
{"edge":"Knows","from":"Alice","to":"Eve"}"#,
|
||||
|
|
@ -466,53 +466,56 @@ fn local_cli_export_round_trips_full_branch_graph() {
|
|||
.arg("feature")
|
||||
.arg("--mode")
|
||||
.arg("append")
|
||||
.arg(repo.path()),
|
||||
.arg(graph.path()),
|
||||
);
|
||||
|
||||
let exported = stdout_string(&output_success(
|
||||
cli()
|
||||
.arg("export")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--branch")
|
||||
.arg("feature")
|
||||
.arg("--jsonl"),
|
||||
));
|
||||
let export_path = repo.write_jsonl("system-local-exported.jsonl", &exported);
|
||||
let imported_repo = repo.path().parent().unwrap().join("imported-export.omni");
|
||||
let export_path = graph.write_jsonl("system-local-exported.jsonl", &exported);
|
||||
let imported_graph = graph.path().parent().unwrap().join("imported-export.omni");
|
||||
|
||||
output_success(
|
||||
cli()
|
||||
.arg("init")
|
||||
.arg("--schema")
|
||||
.arg(fixture("test.pg"))
|
||||
.arg(&imported_repo),
|
||||
.arg(&imported_graph),
|
||||
);
|
||||
output_success(
|
||||
cli()
|
||||
.arg("load")
|
||||
.arg("--data")
|
||||
.arg(&export_path)
|
||||
.arg(&imported_repo),
|
||||
.arg(&imported_graph),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
snapshot_table_row_count_at(&imported_repo, "node:Person"),
|
||||
snapshot_table_row_count_at(&imported_graph, "node:Person"),
|
||||
5
|
||||
);
|
||||
assert_eq!(
|
||||
snapshot_table_row_count_at(&imported_repo, "node:Company"),
|
||||
snapshot_table_row_count_at(&imported_graph, "node:Company"),
|
||||
2
|
||||
);
|
||||
assert_eq!(snapshot_table_row_count_at(&imported_repo, "edge:Knows"), 4);
|
||||
assert_eq!(
|
||||
snapshot_table_row_count_at(&imported_repo, "edge:WorksAt"),
|
||||
snapshot_table_row_count_at(&imported_graph, "edge:Knows"),
|
||||
4
|
||||
);
|
||||
assert_eq!(
|
||||
snapshot_table_row_count_at(&imported_graph, "edge:WorksAt"),
|
||||
2
|
||||
);
|
||||
|
||||
let eve = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(&imported_repo)
|
||||
.arg(&imported_graph)
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -527,7 +530,7 @@ fn local_cli_export_round_trips_full_branch_graph() {
|
|||
let friends = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(&imported_repo)
|
||||
.arg(&imported_graph)
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -541,7 +544,7 @@ fn local_cli_export_round_trips_full_branch_graph() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_s3_end_to_end_init_load_read_flow() {
|
||||
let Some(repo_uri) = s3_test_repo_uri("cli-local") else {
|
||||
let Some(graph_uri) = s3_test_graph_uri("cli-local") else {
|
||||
eprintln!("skipping s3 cli test: OMNIGRAPH_S3_TEST_BUCKET is not set");
|
||||
return;
|
||||
};
|
||||
|
|
@ -566,7 +569,7 @@ query:
|
|||
- .
|
||||
policy: {{}}
|
||||
",
|
||||
repo_uri
|
||||
graph_uri
|
||||
),
|
||||
);
|
||||
|
||||
|
|
@ -575,14 +578,14 @@ policy: {{}}
|
|||
.arg("init")
|
||||
.arg("--schema")
|
||||
.arg(fixture("test.pg"))
|
||||
.arg(&repo_uri),
|
||||
.arg(&graph_uri),
|
||||
);
|
||||
output_success(
|
||||
cli()
|
||||
.arg("load")
|
||||
.arg("--data")
|
||||
.arg(fixture("test.jsonl"))
|
||||
.arg(&repo_uri),
|
||||
.arg(&graph_uri),
|
||||
);
|
||||
|
||||
let read = parse_stdout_json(&output_success(
|
||||
|
|
@ -615,13 +618,13 @@ policy: {{}}
|
|||
|
||||
#[test]
|
||||
fn local_cli_failed_load_keeps_target_state_unchanged() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let bad_data = repo.write_jsonl(
|
||||
let graph = SystemGraph::loaded();
|
||||
let bad_data = graph.write_jsonl(
|
||||
"system-bad-load.jsonl",
|
||||
r#"{"edge":"Knows","from":"Alice","to":"Missing"}"#,
|
||||
);
|
||||
let person_rows_before = snapshot_table_row_count(&repo, "node:Person");
|
||||
let knows_rows_before = snapshot_table_row_count(&repo, "edge:Knows");
|
||||
let person_rows_before = snapshot_table_row_count(&graph, "node:Person");
|
||||
let knows_rows_before = snapshot_table_row_count(&graph, "edge:Knows");
|
||||
|
||||
let output = output_failure(
|
||||
cli()
|
||||
|
|
@ -630,17 +633,17 @@ fn local_cli_failed_load_keeps_target_state_unchanged() {
|
|||
.arg(&bad_data)
|
||||
.arg("--mode")
|
||||
.arg("append")
|
||||
.arg(repo.path()),
|
||||
.arg(graph.path()),
|
||||
);
|
||||
let stderr = String::from_utf8(output.stderr).unwrap();
|
||||
assert!(stderr.contains("not found") || stderr.contains("Missing"));
|
||||
|
||||
assert_eq!(
|
||||
snapshot_table_row_count(&repo, "node:Person"),
|
||||
snapshot_table_row_count(&graph, "node:Person"),
|
||||
person_rows_before
|
||||
);
|
||||
assert_eq!(
|
||||
snapshot_table_row_count(&repo, "edge:Knows"),
|
||||
snapshot_table_row_count(&graph, "edge:Knows"),
|
||||
knows_rows_before
|
||||
);
|
||||
// Failed loads leave no run record (the run lifecycle has been
|
||||
|
|
@ -649,13 +652,13 @@ fn local_cli_failed_load_keeps_target_state_unchanged() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_failed_change_keeps_target_state_unchanged() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let mutation_file = add_friend_query(&repo, "system-invalid-change.gq");
|
||||
let graph = SystemGraph::loaded();
|
||||
let mutation_file = add_friend_query(&graph, "system-invalid-change.gq");
|
||||
|
||||
let output = output_failure(
|
||||
cli()
|
||||
.arg("change")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(&mutation_file)
|
||||
.arg("--params")
|
||||
|
|
@ -667,7 +670,7 @@ fn local_cli_failed_change_keeps_target_state_unchanged() {
|
|||
let friends_payload = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -683,8 +686,8 @@ fn local_cli_failed_change_keeps_target_state_unchanged() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_resolves_relative_query_against_config_base_dir() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let root = repo.path().parent().unwrap();
|
||||
let graph = SystemGraph::loaded();
|
||||
let root = graph.path().parent().unwrap();
|
||||
let config_dir = root.join("config");
|
||||
let query_dir = config_dir.join("queries");
|
||||
let ambient_dir = root.join("ambient");
|
||||
|
|
@ -707,7 +710,7 @@ query:
|
|||
- queries
|
||||
policy: {{}}
|
||||
",
|
||||
repo.path().display()
|
||||
graph.path().display()
|
||||
),
|
||||
);
|
||||
write_query_file(
|
||||
|
|
@ -761,7 +764,7 @@ query get_person($name: String) {
|
|||
#[test]
|
||||
fn local_cli_datetime_and_list_types_round_trip_through_load_read_and_change() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let repo = repo_path(temp.path());
|
||||
let graph = graph_path(temp.path());
|
||||
let schema = temp.path().join("datatypes.pg");
|
||||
let data = temp.path().join("datatypes.jsonl");
|
||||
let queries = temp.path().join("datatypes.gq");
|
||||
|
|
@ -836,13 +839,13 @@ query get_task($slug: String) {
|
|||
"#,
|
||||
);
|
||||
|
||||
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&repo));
|
||||
output_success(cli().arg("load").arg("--data").arg(&data).arg(&repo));
|
||||
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&graph));
|
||||
output_success(cli().arg("load").arg("--data").arg(&data).arg(&graph));
|
||||
|
||||
let filtered = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(&repo)
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(&queries)
|
||||
.arg("--name")
|
||||
|
|
@ -867,7 +870,7 @@ query get_task($slug: String) {
|
|||
let insert_payload = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("change")
|
||||
.arg(&repo)
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(&queries)
|
||||
.arg("--name")
|
||||
|
|
@ -883,7 +886,7 @@ query get_task($slug: String) {
|
|||
let update_payload = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("change")
|
||||
.arg(&repo)
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(&queries)
|
||||
.arg("--name")
|
||||
|
|
@ -897,7 +900,7 @@ query get_task($slug: String) {
|
|||
let gamma = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(&repo)
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(&queries)
|
||||
.arg("--name")
|
||||
|
|
@ -924,7 +927,7 @@ query get_task($slug: String) {
|
|||
#[ignore = "requires GEMINI_API_KEY and network access"]
|
||||
fn local_cli_real_gemini_string_nearest_query_returns_expected_match() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let repo = repo_path(temp.path());
|
||||
let graph = graph_path(temp.path());
|
||||
let schema = temp.path().join("gemini.pg");
|
||||
let data = temp.path().join("gemini.jsonl");
|
||||
let queries = temp.path().join("gemini.gq");
|
||||
|
|
@ -966,13 +969,13 @@ query vector_search($q: String) {
|
|||
"#,
|
||||
);
|
||||
|
||||
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&repo));
|
||||
output_success(cli().arg("load").arg("--data").arg(&data).arg(&repo));
|
||||
output_success(cli().arg("init").arg("--schema").arg(&schema).arg(&graph));
|
||||
output_success(cli().arg("load").arg("--data").arg(&data).arg(&graph));
|
||||
|
||||
let result = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(&repo)
|
||||
.arg(&graph)
|
||||
.arg("--query")
|
||||
.arg(&queries)
|
||||
.arg("--name")
|
||||
|
|
@ -999,10 +1002,10 @@ fn local_cli_policy_tooling_is_end_to_end() {
|
|||
// Sanity check for the read-only policy CLI surfaces. These don't
|
||||
// mutate the graph — they just parse and evaluate the policy file —
|
||||
// so they don't depend on PR #4's engine-side enforcement.
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
repo.write_config("policy.tests.yaml", POLICY_E2E_TESTS_YAML);
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
graph.write_config("policy.tests.yaml", POLICY_E2E_TESTS_YAML);
|
||||
|
||||
let validate = output_success(
|
||||
cli()
|
||||
|
|
@ -1053,10 +1056,10 @@ fn local_cli_change_enforces_engine_layer_policy() {
|
|||
// 3. Policy installed, `--as act-ragnor`, change on main →
|
||||
// Cedar permits (admins-write rule). Write succeeds and the
|
||||
// inserted row is readable.
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let mutation_file = insert_person_query(&repo, "system-local-policy-change.gq");
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let mutation_file = insert_person_query(&graph, "system-local-policy-change.gq");
|
||||
|
||||
// Case 1: policy configured, no actor threaded → footgun guard.
|
||||
let no_actor = output_failure(
|
||||
|
|
@ -1119,7 +1122,7 @@ fn local_cli_change_enforces_engine_layer_policy() {
|
|||
let verify = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -1145,10 +1148,10 @@ fn local_cli_change_enforces_engine_layer_policy() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_load_enforces_engine_layer_policy() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let data = repo.write_jsonl(
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let data = graph.write_jsonl(
|
||||
"system-local-policy-load.jsonl",
|
||||
r#"{"type":"Person","data":{"name":"LoadPolicy","age":11}}"#,
|
||||
);
|
||||
|
|
@ -1189,10 +1192,10 @@ fn local_cli_load_enforces_engine_layer_policy() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_ingest_enforces_engine_layer_policy() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let data = repo.write_jsonl(
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let data = graph.write_jsonl(
|
||||
"system-local-policy-ingest.jsonl",
|
||||
r#"{"type":"Person","data":{"name":"IngestPolicy","age":12}}"#,
|
||||
);
|
||||
|
|
@ -1242,16 +1245,19 @@ fn local_cli_ingest_enforces_engine_layer_policy() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_schema_apply_enforces_engine_layer_policy() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
|
||||
// Additive: add a nullable property; SDK-compatible with the fixture
|
||||
// schema. Uses the schema-apply scope (TargetBranch("main")).
|
||||
let new_schema = std::fs::read_to_string(fixture("test.pg"))
|
||||
.unwrap()
|
||||
.replace(" age: I32?\n}", " age: I32?\n nickname: String?\n}");
|
||||
let schema_path = repo.path().join("policy-additive.pg");
|
||||
.replace(
|
||||
" age: I32?\n}",
|
||||
" age: I32?\n nickname: String?\n}",
|
||||
);
|
||||
let schema_path = graph.path().join("policy-additive.pg");
|
||||
std::fs::write(&schema_path, &new_schema).unwrap();
|
||||
|
||||
let denied = output_failure(
|
||||
|
|
@ -1289,9 +1295,9 @@ fn local_cli_schema_apply_enforces_engine_layer_policy() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_branch_create_enforces_engine_layer_policy() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
|
||||
let denied = output_failure(
|
||||
cli()
|
||||
|
|
@ -1327,9 +1333,9 @@ fn local_cli_branch_create_enforces_engine_layer_policy() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_branch_delete_enforces_engine_layer_policy() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
|
||||
// Pre-create the branch as ragnor so there's something to delete.
|
||||
output_success(
|
||||
|
|
@ -1375,9 +1381,9 @@ fn local_cli_branch_delete_enforces_engine_layer_policy() {
|
|||
|
||||
#[test]
|
||||
fn local_cli_branch_merge_enforces_engine_layer_policy() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config("omnigraph-policy.yaml", &local_policy_config(&repo));
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config("omnigraph-policy.yaml", &local_policy_config(&graph));
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
|
||||
// Pre-create a feature branch as ragnor (admins-branch-ops covers it).
|
||||
output_success(
|
||||
|
|
@ -1431,7 +1437,7 @@ fn local_cli_branch_merge_enforces_engine_layer_policy() {
|
|||
// pin the precedence rule that `main.rs::resolve_cli_actor` implements:
|
||||
// `--as` flag > `cli.actor` from `omnigraph.yaml` > None.
|
||||
|
||||
fn local_policy_config_with_actor(repo: &SystemRepo, actor: &str) -> String {
|
||||
fn local_policy_config_with_actor(graph: &SystemGraph, actor: &str) -> String {
|
||||
// Mirrors `local_policy_config` but adds `cli.actor` so the
|
||||
// config-only precedence path is exercised. The `cli:` block
|
||||
// already has `graph` and `branch`; appending `actor` here.
|
||||
|
|
@ -1452,7 +1458,7 @@ query:
|
|||
policy:
|
||||
file: ./policy.yaml
|
||||
",
|
||||
yaml_string(&repo.path().to_string_lossy()),
|
||||
yaml_string(&graph.path().to_string_lossy()),
|
||||
actor,
|
||||
)
|
||||
}
|
||||
|
|
@ -1462,13 +1468,13 @@ fn local_cli_actor_from_config_used_when_no_flag() {
|
|||
// cli.actor: act-ragnor in omnigraph.yaml, no --as flag → change
|
||||
// permitted via admins-write rule. Proves the config-only path
|
||||
// works; previously the only proof was structural.
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config(
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config(
|
||||
"omnigraph-policy.yaml",
|
||||
&local_policy_config_with_actor(&repo, "act-ragnor"),
|
||||
&local_policy_config_with_actor(&graph, "act-ragnor"),
|
||||
);
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let mutation_file = insert_person_query(&repo, "system-local-cli-actor.gq");
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let mutation_file = insert_person_query(&graph, "system-local-cli-actor.gq");
|
||||
|
||||
let allowed = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
|
|
@ -1490,13 +1496,13 @@ fn local_cli_actor_flag_overrides_config_actor() {
|
|||
// cli.actor: act-ragnor in config + --as act-bruno on CLI → change
|
||||
// denied. Flag wins per the precedence rule. Without this test, a
|
||||
// future change that reverses precedence would ride through silently.
|
||||
let repo = SystemRepo::loaded();
|
||||
let config = repo.write_config(
|
||||
let graph = SystemGraph::loaded();
|
||||
let config = graph.write_config(
|
||||
"omnigraph-policy.yaml",
|
||||
&local_policy_config_with_actor(&repo, "act-ragnor"),
|
||||
&local_policy_config_with_actor(&graph, "act-ragnor"),
|
||||
);
|
||||
repo.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let mutation_file = insert_person_query(&repo, "system-local-cli-actor-override.gq");
|
||||
graph.write_config("policy.yaml", POLICY_E2E_YAML);
|
||||
let mutation_file = insert_person_query(&graph, "system-local-cli-actor-override.gq");
|
||||
|
||||
let denied = output_failure(
|
||||
cli()
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ fn yaml_string(value: &str) -> String {
|
|||
format!("'{}'", value.replace('\'', "''"))
|
||||
}
|
||||
|
||||
fn remote_policy_server_config(repo: &SystemRepo) -> String {
|
||||
fn remote_policy_server_config(graph: &SystemGraph) -> String {
|
||||
format!(
|
||||
"\
|
||||
project:
|
||||
|
|
@ -54,7 +54,7 @@ server:
|
|||
policy:
|
||||
file: ./policy.yaml
|
||||
",
|
||||
yaml_string(&repo.path().to_string_lossy())
|
||||
yaml_string(&graph.path().to_string_lossy())
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -81,10 +81,10 @@ auth:
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_server_and_cli_end_to_end_flow() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let mutation_file = repo.write_query(
|
||||
let graph = SystemGraph::loaded();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let mutation_file = graph.write_query(
|
||||
"system-remote-change.gq",
|
||||
r#"
|
||||
query insert_person($name: String, $age: I32) {
|
||||
|
|
@ -105,7 +105,7 @@ query insert_person($name: String, $age: I32) {
|
|||
assert_eq!(health["status"], "ok");
|
||||
|
||||
let local_snapshot = parse_stdout_json(&output_success(
|
||||
cli().arg("snapshot").arg(repo.path()).arg("--json"),
|
||||
cli().arg("snapshot").arg(graph.path()).arg("--json"),
|
||||
));
|
||||
let snapshot = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
|
|
@ -120,7 +120,7 @@ query insert_person($name: String, $age: I32) {
|
|||
let local_read = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -180,7 +180,7 @@ query insert_person($name: String, $age: I32) {
|
|||
let local_verify = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -260,11 +260,11 @@ query insert_person($name: String, $age: I32) {
|
|||
|
||||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_schema_apply_via_cli_updates_repo() {
|
||||
let repo = SystemRepo::initialized();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let next_schema = repo.write_file(
|
||||
fn remote_schema_apply_via_cli_updates_graph() {
|
||||
let graph = SystemGraph::initialized();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let next_schema = graph.write_file(
|
||||
"next.pg",
|
||||
&fs::read_to_string(fixture("test.pg")).unwrap().replace(
|
||||
" age: I32?\n}",
|
||||
|
|
@ -286,7 +286,7 @@ fn remote_schema_apply_via_cli_updates_repo() {
|
|||
|
||||
let db = tokio::runtime::Runtime::new()
|
||||
.unwrap()
|
||||
.block_on(Omnigraph::open(repo.path().to_string_lossy().as_ref()))
|
||||
.block_on(Omnigraph::open(graph.path().to_string_lossy().as_ref()))
|
||||
.unwrap();
|
||||
assert!(
|
||||
db.catalog().node_types["Person"]
|
||||
|
|
@ -298,10 +298,10 @@ fn remote_schema_apply_via_cli_updates_repo() {
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_schema_apply_rejects_unsupported_plan() {
|
||||
let repo = SystemRepo::initialized();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let breaking_schema = repo.write_file(
|
||||
let graph = SystemGraph::initialized();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let breaking_schema = graph.write_file(
|
||||
"breaking.pg",
|
||||
&fs::read_to_string(fixture("test.pg"))
|
||||
.unwrap()
|
||||
|
|
@ -324,7 +324,7 @@ fn remote_schema_apply_rejects_unsupported_plan() {
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_schema_apply_rejects_when_non_main_branch_exists() {
|
||||
let repo = SystemRepo::initialized();
|
||||
let graph = SystemGraph::initialized();
|
||||
output_success(
|
||||
cli()
|
||||
.arg("branch")
|
||||
|
|
@ -332,12 +332,12 @@ fn remote_schema_apply_rejects_when_non_main_branch_exists() {
|
|||
.arg("--from")
|
||||
.arg("main")
|
||||
.arg("--uri")
|
||||
.arg(repo.path())
|
||||
.arg(graph.path())
|
||||
.arg("feature"),
|
||||
);
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let next_schema = repo.write_file(
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let next_schema = graph.write_file(
|
||||
"next.pg",
|
||||
&fs::read_to_string(fixture("test.pg")).unwrap().replace(
|
||||
" age: I32?\n}",
|
||||
|
|
@ -355,16 +355,16 @@ fn remote_schema_apply_rejects_when_non_main_branch_exists() {
|
|||
.arg(&next_schema),
|
||||
);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
assert!(stderr.contains("schema apply requires a repo with only main"));
|
||||
assert!(stderr.contains("schema apply requires a graph with only main"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_read_preserves_projection_order_in_json_and_csv() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let ordered_query = repo.write_query(
|
||||
let graph = SystemGraph::loaded();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let ordered_query = graph.write_query(
|
||||
"ordered-remote.gq",
|
||||
r#"
|
||||
query ordered_person($name: String) {
|
||||
|
|
@ -419,10 +419,10 @@ query ordered_person($name: String) {
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_branch_create_list_merge_flow() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let mutation_file = repo.write_query(
|
||||
let graph = SystemGraph::loaded();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let mutation_file = graph.write_query(
|
||||
"system-remote-branch-change.gq",
|
||||
r#"
|
||||
query insert_person($name: String, $age: I32) {
|
||||
|
|
@ -516,9 +516,9 @@ query insert_person($name: String, $age: I32) {
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_branch_delete_removes_branch() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let graph = SystemGraph::loaded();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
|
||||
parse_stdout_json(&output_success(
|
||||
cli()
|
||||
|
|
@ -557,10 +557,10 @@ fn remote_branch_delete_removes_branch() {
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_export_round_trips_full_branch_graph() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let mutation_file = repo.write_query(
|
||||
let graph = SystemGraph::loaded();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let mutation_file = graph.write_query(
|
||||
"system-remote-export-change.gq",
|
||||
r#"
|
||||
query insert_person($name: String, $age: I32) {
|
||||
|
|
@ -624,8 +624,8 @@ query add_friend($from: String, $to: String) {
|
|||
.arg("feature")
|
||||
.arg("--jsonl"),
|
||||
));
|
||||
let export_path = repo.write_jsonl("system-remote-exported.jsonl", &exported);
|
||||
let imported_repo = repo
|
||||
let export_path = graph.write_jsonl("system-remote-exported.jsonl", &exported);
|
||||
let imported_graph = graph
|
||||
.path()
|
||||
.parent()
|
||||
.unwrap()
|
||||
|
|
@ -636,18 +636,18 @@ query add_friend($from: String, $to: String) {
|
|||
.arg("init")
|
||||
.arg("--schema")
|
||||
.arg(fixture("test.pg"))
|
||||
.arg(&imported_repo),
|
||||
.arg(&imported_graph),
|
||||
);
|
||||
output_success(
|
||||
cli()
|
||||
.arg("load")
|
||||
.arg("--data")
|
||||
.arg(&export_path)
|
||||
.arg(&imported_repo),
|
||||
.arg(&imported_graph),
|
||||
);
|
||||
|
||||
let snapshot = parse_stdout_json(&output_success(
|
||||
cli().arg("snapshot").arg(&imported_repo).arg("--json"),
|
||||
cli().arg("snapshot").arg(&imported_graph).arg("--json"),
|
||||
));
|
||||
assert_eq!(
|
||||
snapshot["tables"]
|
||||
|
|
@ -671,7 +671,7 @@ query add_friend($from: String, $to: String) {
|
|||
let eve = parse_stdout_json(&output_success(
|
||||
cli()
|
||||
.arg("read")
|
||||
.arg(&imported_repo)
|
||||
.arg(&imported_graph)
|
||||
.arg("--query")
|
||||
.arg(fixture("test.gq"))
|
||||
.arg("--name")
|
||||
|
|
@ -687,10 +687,10 @@ query add_friend($from: String, $to: String) {
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_ingest_creates_review_branch_and_keeps_it_readable() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let ingest_data = repo.write_jsonl(
|
||||
let graph = SystemGraph::loaded();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let ingest_data = graph.write_jsonl(
|
||||
"system-remote-ingest.jsonl",
|
||||
r#"{"type":"Person","data":{"name":"Zoe","age":33}}
|
||||
{"type":"Person","data":{"name":"Bob","age":26}}"#,
|
||||
|
|
@ -747,9 +747,9 @@ fn remote_ingest_creates_review_branch_and_keeps_it_readable() {
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_ingest_reuses_existing_branch_and_merges_updates() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let server = repo.spawn_server();
|
||||
let config = repo.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
let graph = SystemGraph::loaded();
|
||||
let server = graph.spawn_server();
|
||||
let config = graph.write_config("omnigraph.yaml", &remote_yaml_config(&server.base_url));
|
||||
|
||||
output_success(
|
||||
cli()
|
||||
|
|
@ -762,7 +762,7 @@ fn remote_ingest_reuses_existing_branch_and_merges_updates() {
|
|||
.arg("feature-ingest"),
|
||||
);
|
||||
|
||||
let ingest_data = repo.write_jsonl(
|
||||
let ingest_data = graph.write_jsonl(
|
||||
"system-remote-ingest-merge.jsonl",
|
||||
r#"{"type":"Person","data":{"name":"Bob","age":26}}
|
||||
{"type":"Person","data":{"name":"Zoe","age":33}}"#,
|
||||
|
|
@ -828,23 +828,23 @@ fn remote_ingest_reuses_existing_branch_and_merges_updates() {
|
|||
#[test]
|
||||
#[ignore = "requires loopback socket permissions in sandboxed runners"]
|
||||
fn remote_policy_enforces_branch_first_cli_workflow() {
|
||||
let repo = SystemRepo::loaded();
|
||||
let graph = SystemGraph::loaded();
|
||||
let server_config =
|
||||
repo.write_config("server-policy.yaml", &remote_policy_server_config(&repo));
|
||||
repo.write_config("policy.yaml", REMOTE_POLICY_E2E_YAML);
|
||||
let server = repo.spawn_server_with_config_env(
|
||||
graph.write_config("server-policy.yaml", &remote_policy_server_config(&graph));
|
||||
graph.write_config("policy.yaml", REMOTE_POLICY_E2E_YAML);
|
||||
let server = graph.spawn_server_with_config_env(
|
||||
&server_config,
|
||||
&[(
|
||||
"OMNIGRAPH_SERVER_BEARER_TOKENS_JSON",
|
||||
r#"{"act-bruno":"team-token","act-ragnor":"admin-token"}"#,
|
||||
)],
|
||||
);
|
||||
let client_config = repo.write_config(
|
||||
let client_config = graph.write_config(
|
||||
"omnigraph-policy.yaml",
|
||||
&remote_policy_client_config(&server.base_url),
|
||||
);
|
||||
repo.write_config(".env.omni", "POLICY_TEST_TOKEN=team-token\n");
|
||||
let mutation_file = repo.write_query(
|
||||
graph.write_config(".env.omni", "POLICY_TEST_TOKEN=team-token\n");
|
||||
let mutation_file = graph.write_query(
|
||||
"system-remote-policy-change.gq",
|
||||
r#"
|
||||
query insert_person($name: String, $age: I32) {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "omnigraph-compiler"
|
||||
version = "0.4.2"
|
||||
version = "0.6.0"
|
||||
edition = "2024"
|
||||
description = "Schema/query compiler for Omnigraph. Zero Lance dependency."
|
||||
license = "MIT"
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ pub enum QueryLintQueryKind {
|
|||
#[serde(rename_all = "lowercase")]
|
||||
pub enum QueryLintSchemaSourceKind {
|
||||
File,
|
||||
Repo,
|
||||
Graph,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||
|
|
@ -59,9 +59,9 @@ impl QueryLintSchemaSource {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn repo(uri: impl Into<String>) -> Self {
|
||||
pub fn graph(uri: impl Into<String>) -> Self {
|
||||
Self {
|
||||
kind: QueryLintSchemaSourceKind::Repo,
|
||||
kind: QueryLintSchemaSourceKind::Graph,
|
||||
path: None,
|
||||
uri: Some(uri.into()),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "omnigraph-policy"
|
||||
version = "0.4.2"
|
||||
version = "0.6.0"
|
||||
edition = "2024"
|
||||
description = "Policy / authorization layer for Omnigraph — Cedar-backed PolicyEngine, PolicyChecker trait, ResourceScope enum."
|
||||
license = "MIT"
|
||||
|
|
|
|||
|
|
@ -172,7 +172,7 @@ pub struct PolicyCompiler;
|
|||
|
||||
#[derive(Clone)]
|
||||
pub struct PolicyEngine {
|
||||
repo_id: String,
|
||||
graph_id: String,
|
||||
protected_branches: BTreeSet<String>,
|
||||
known_actors: BTreeSet<String>,
|
||||
schema: Schema,
|
||||
|
|
@ -291,7 +291,7 @@ impl PolicyTestConfig {
|
|||
}
|
||||
|
||||
impl PolicyCompiler {
|
||||
pub fn compile(config: &PolicyConfig, repo_id: &str) -> Result<PolicyEngine> {
|
||||
pub fn compile(config: &PolicyConfig, graph_id: &str) -> Result<PolicyEngine> {
|
||||
config.validate()?;
|
||||
let (schema, schema_warnings) = Schema::from_cedarschema_str(policy_schema_source())?;
|
||||
let schema_warnings = schema_warnings
|
||||
|
|
@ -300,8 +300,8 @@ impl PolicyCompiler {
|
|||
if !schema_warnings.is_empty() {
|
||||
bail!("policy schema warnings:\n{}", schema_warnings.join("\n"));
|
||||
}
|
||||
let entities = compile_entities(config, repo_id, &schema)?;
|
||||
let (policies, policy_to_rule) = compile_policies(config, repo_id)?;
|
||||
let entities = compile_entities(config, graph_id, &schema)?;
|
||||
let (policies, policy_to_rule) = compile_policies(config, graph_id)?;
|
||||
let validator = Validator::new(schema.clone());
|
||||
let validation = validator.validate(&policies, ValidationMode::Strict);
|
||||
let errors = validation
|
||||
|
|
@ -318,7 +318,7 @@ impl PolicyCompiler {
|
|||
.flat_map(|members| members.iter().cloned())
|
||||
.collect();
|
||||
Ok(PolicyEngine {
|
||||
repo_id: repo_id.to_string(),
|
||||
graph_id: graph_id.to_string(),
|
||||
protected_branches: config.protected_branches.iter().cloned().collect(),
|
||||
known_actors,
|
||||
schema,
|
||||
|
|
@ -330,9 +330,9 @@ impl PolicyCompiler {
|
|||
}
|
||||
|
||||
impl PolicyEngine {
|
||||
pub fn load(path: &Path, repo_id: &str) -> Result<Self> {
|
||||
pub fn load(path: &Path, graph_id: &str) -> Result<Self> {
|
||||
let config = PolicyConfig::load(path)?;
|
||||
PolicyCompiler::compile(&config, repo_id)
|
||||
PolicyCompiler::compile(&config, graph_id)
|
||||
}
|
||||
|
||||
pub fn authorize(&self, request: &PolicyRequest) -> Result<PolicyDecision> {
|
||||
|
|
@ -349,7 +349,7 @@ impl PolicyEngine {
|
|||
|
||||
let principal = entity_uid("Actor", &request.actor_id)?;
|
||||
let action = entity_uid("Action", request.action.as_str())?;
|
||||
let resource = entity_uid("Repo", &self.repo_id)?;
|
||||
let resource = entity_uid("Graph", &self.graph_id)?;
|
||||
let context_value = json!({
|
||||
"has_branch": request.branch.is_some(),
|
||||
"branch": request.branch.clone().unwrap_or_default(),
|
||||
|
|
@ -462,7 +462,7 @@ impl PolicyEngine {
|
|||
}
|
||||
}
|
||||
|
||||
fn compile_entities(config: &PolicyConfig, repo_id: &str, schema: &Schema) -> Result<Entities> {
|
||||
fn compile_entities(config: &PolicyConfig, graph_id: &str, schema: &Schema) -> Result<Entities> {
|
||||
let mut group_entities = Vec::new();
|
||||
for group in config.groups.keys() {
|
||||
group_entities.push(Entity::new(
|
||||
|
|
@ -495,8 +495,8 @@ fn compile_entities(config: &PolicyConfig, repo_id: &str, schema: &Schema) -> Re
|
|||
)?);
|
||||
}
|
||||
|
||||
let repo_entity = Entity::new(
|
||||
entity_uid("Repo", repo_id)?,
|
||||
let graph_entity = Entity::new(
|
||||
entity_uid("Graph", graph_id)?,
|
||||
HashMap::new(),
|
||||
HashSet::<EntityUid>::new(),
|
||||
)?;
|
||||
|
|
@ -504,13 +504,13 @@ fn compile_entities(config: &PolicyConfig, repo_id: &str, schema: &Schema) -> Re
|
|||
let mut entities = Vec::new();
|
||||
entities.extend(group_entities);
|
||||
entities.extend(actor_entities);
|
||||
entities.push(repo_entity);
|
||||
entities.push(graph_entity);
|
||||
Ok(Entities::from_entities(entities, Some(schema))?)
|
||||
}
|
||||
|
||||
fn compile_policies(
|
||||
config: &PolicyConfig,
|
||||
repo_id: &str,
|
||||
graph_id: &str,
|
||||
) -> Result<(PolicySet, HashMap<String, String>)> {
|
||||
let mut policies = Vec::new();
|
||||
let mut policy_to_rule = HashMap::new();
|
||||
|
|
@ -518,7 +518,7 @@ fn compile_policies(
|
|||
for rule in &config.rules {
|
||||
for action in &rule.allow.actions {
|
||||
let policy_id = PolicyId::new(format!("{}:{}", rule.id, action.as_str()));
|
||||
let source = compile_policy_source(rule, action, repo_id);
|
||||
let source = compile_policy_source(rule, action, graph_id);
|
||||
let policy = Policy::parse(Some(policy_id.clone()), source.as_str())?;
|
||||
policy_to_rule.insert(policy_id.to_string(), rule.id.clone());
|
||||
policies.push(policy);
|
||||
|
|
@ -528,7 +528,7 @@ fn compile_policies(
|
|||
Ok((PolicySet::from_policies(policies)?, policy_to_rule))
|
||||
}
|
||||
|
||||
fn compile_policy_source(rule: &PolicyRule, action: &PolicyAction, repo_id: &str) -> String {
|
||||
fn compile_policy_source(rule: &PolicyRule, action: &PolicyAction, graph_id: &str) -> String {
|
||||
let mut conditions = Vec::new();
|
||||
if let Some(scope) = rule.allow.branch_scope {
|
||||
conditions.push(branch_scope_condition(scope));
|
||||
|
|
@ -547,11 +547,11 @@ fn compile_policy_source(rule: &PolicyRule, action: &PolicyAction, repo_id: &str
|
|||
r#"permit (
|
||||
principal in Omnigraph::Group::{group},
|
||||
action == Omnigraph::Action::{action},
|
||||
resource == Omnigraph::Repo::{repo}
|
||||
resource == Omnigraph::Graph::{graph}
|
||||
){when};"#,
|
||||
group = cedar_literal(&rule.allow.actors.group),
|
||||
action = cedar_literal(action.as_str()),
|
||||
repo = cedar_literal(repo_id),
|
||||
graph = cedar_literal(graph_id),
|
||||
when = when,
|
||||
)
|
||||
}
|
||||
|
|
@ -594,16 +594,16 @@ namespace Omnigraph {
|
|||
|
||||
entity Actor in [Group];
|
||||
entity Group;
|
||||
entity Repo;
|
||||
entity Graph;
|
||||
|
||||
action "read" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
|
||||
action "export" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
|
||||
action "change" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
|
||||
action "schema_apply" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
|
||||
action "branch_create" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
|
||||
action "branch_delete" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
|
||||
action "branch_merge" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
|
||||
action "admin" appliesTo { principal: Actor, resource: Repo, context: RequestContext };
|
||||
action "read" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||
action "export" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||
action "change" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||
action "schema_apply" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||
action "branch_create" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||
action "branch_delete" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||
action "branch_merge" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||
action "admin" appliesTo { principal: Actor, resource: Graph, context: RequestContext };
|
||||
}
|
||||
"#
|
||||
}
|
||||
|
|
@ -881,7 +881,7 @@ rules:
|
|||
)
|
||||
.unwrap();
|
||||
|
||||
let engine = PolicyCompiler::compile(&policy, "repo").unwrap();
|
||||
let engine = PolicyCompiler::compile(&policy, "graph").unwrap();
|
||||
let allow = engine
|
||||
.authorize(&PolicyRequest {
|
||||
actor_id: "act-bruno".to_string(),
|
||||
|
|
@ -932,7 +932,7 @@ rules:
|
|||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let engine = PolicyCompiler::compile(&policy, "repo").unwrap();
|
||||
let engine = PolicyCompiler::compile(&policy, "graph").unwrap();
|
||||
let tests = PolicyTestConfig {
|
||||
version: 1,
|
||||
cases: vec![
|
||||
|
|
@ -976,7 +976,7 @@ rules:
|
|||
)
|
||||
.unwrap();
|
||||
|
||||
let engine = PolicyCompiler::compile(&policy, "repo").unwrap();
|
||||
let engine = PolicyCompiler::compile(&policy, "graph").unwrap();
|
||||
let allow = engine
|
||||
.authorize(&PolicyRequest {
|
||||
actor_id: "act-ragnor".to_string(),
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "omnigraph-server"
|
||||
version = "0.4.2"
|
||||
version = "0.6.0"
|
||||
edition = "2024"
|
||||
description = "HTTP server for the Omnigraph graph database."
|
||||
license = "MIT"
|
||||
|
|
@ -19,9 +19,9 @@ default = []
|
|||
aws = ["dep:aws-config", "dep:aws-sdk-secretsmanager"]
|
||||
|
||||
[dependencies]
|
||||
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.4.2" }
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" }
|
||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" }
|
||||
omnigraph = { package = "omnigraph-engine", path = "../omnigraph", version = "0.6.0" }
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" }
|
||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" }
|
||||
axum = { workspace = true }
|
||||
clap = { workspace = true }
|
||||
color-eyre = { workspace = true }
|
||||
|
|
@ -45,4 +45,5 @@ aws-sdk-secretsmanager = { version = "1", optional = true, default-features = fa
|
|||
tempfile = { workspace = true }
|
||||
tower = { workspace = true }
|
||||
serial_test = "3"
|
||||
lance = { workspace = true }
|
||||
lance-index = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -259,10 +259,10 @@ async fn main() {
|
|||
}
|
||||
|
||||
let temp = tempfile::tempdir().expect("tempdir");
|
||||
let repo = temp.path().join("bench.omni");
|
||||
Omnigraph::init(repo.to_str().unwrap(), SCHEMA)
|
||||
let graph = temp.path().join("bench.omni");
|
||||
Omnigraph::init(graph.to_str().unwrap(), SCHEMA)
|
||||
.await
|
||||
.expect("init repo");
|
||||
.expect("init graph");
|
||||
|
||||
// Build bearer tokens: one for the heavy actor + one per light actor.
|
||||
let mut tokens: Vec<(String, String)> =
|
||||
|
|
@ -270,21 +270,17 @@ async fn main() {
|
|||
for i in 0..args.light_actors {
|
||||
tokens.push((format!("act-light-{i}"), format!("light-token-{i}")));
|
||||
}
|
||||
let db = Omnigraph::open(repo.to_str().unwrap())
|
||||
let db = Omnigraph::open(graph.to_str().unwrap())
|
||||
.await
|
||||
.expect("open repo");
|
||||
.expect("open graph");
|
||||
// Construct a custom WorkloadController with the requested caps and
|
||||
// pass it through `AppState::new_with_workload`. Avoids the
|
||||
// `unsafe { std::env::set_var(...) }` antipattern that violates
|
||||
// `setenv`'s thread-safety precondition once the multi-thread tokio
|
||||
// runtime is up.
|
||||
let workload = WorkloadController::new(args.inflight_cap, args.byte_cap);
|
||||
let state = AppState::new_with_workload(
|
||||
repo.to_string_lossy().to_string(),
|
||||
db,
|
||||
tokens,
|
||||
workload,
|
||||
);
|
||||
let state =
|
||||
AppState::new_with_workload(graph.to_string_lossy().to_string(), db, tokens, workload);
|
||||
let app = build_app(state);
|
||||
|
||||
eprintln!(
|
||||
|
|
|
|||
|
|
@ -152,7 +152,9 @@ async fn drive_actor(
|
|||
errors += 1;
|
||||
// Drain body for logging on the first few failures.
|
||||
if errors <= 3 {
|
||||
let body = to_bytes(response.into_body(), 64 * 1024).await.unwrap_or_default();
|
||||
let body = to_bytes(response.into_body(), 64 * 1024)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
eprintln!(
|
||||
"actor {actor_idx} op {op_idx} status {status} body {}",
|
||||
String::from_utf8_lossy(&body)
|
||||
|
|
@ -173,13 +175,13 @@ async fn main() {
|
|||
}
|
||||
|
||||
let temp = tempfile::tempdir().expect("tempdir");
|
||||
let repo = temp.path().join("bench.omni");
|
||||
let graph = temp.path().join("bench.omni");
|
||||
let schema = build_schema(args.tables);
|
||||
Omnigraph::init(repo.to_str().unwrap(), &schema)
|
||||
Omnigraph::init(graph.to_str().unwrap(), &schema)
|
||||
.await
|
||||
.expect("init repo");
|
||||
.expect("init graph");
|
||||
|
||||
let state = AppState::open(repo.to_string_lossy().to_string())
|
||||
let state = AppState::open(graph.to_string_lossy().to_string())
|
||||
.await
|
||||
.expect("open AppState");
|
||||
let app = build_app(state);
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ use api::{
|
|||
IngestRequest, QueryRequest, ReadOutput, ReadRequest, SchemaApplyOutput, SchemaApplyRequest,
|
||||
SchemaOutput, SnapshotQuery, ingest_output, schema_apply_output, snapshot_payload,
|
||||
};
|
||||
pub use auth::{AWS_SECRET_ENV, EnvOrFileTokenSource, TokenSource, resolve_token_source};
|
||||
use axum::body::{Body, Bytes};
|
||||
use axum::extract::DefaultBodyLimit;
|
||||
use axum::extract::{Extension, Path, Query, Request, State};
|
||||
|
|
@ -39,7 +40,6 @@ use omnigraph::error::{ManifestConflictDetails, ManifestErrorKind, OmniError};
|
|||
use omnigraph_compiler::json_params_to_param_map;
|
||||
use omnigraph_compiler::query::parser::parse_query;
|
||||
use omnigraph_compiler::{JsonParamMode, ParamMap};
|
||||
pub use auth::{AWS_SECRET_ENV, EnvOrFileTokenSource, TokenSource, resolve_token_source};
|
||||
pub use policy::{
|
||||
PolicyAction, PolicyCompiler, PolicyConfig, PolicyDecision, PolicyEngine, PolicyExpectation,
|
||||
PolicyRequest, PolicyTestConfig,
|
||||
|
|
@ -439,10 +439,7 @@ impl ApiError {
|
|||
}
|
||||
}
|
||||
|
||||
fn manifest_version_conflict(
|
||||
message: String,
|
||||
details: api::ManifestConflictOutput,
|
||||
) -> Self {
|
||||
fn manifest_version_conflict(message: String, details: api::ManifestConflictOutput) -> Self {
|
||||
Self {
|
||||
status: StatusCode::CONFLICT,
|
||||
code: ErrorCode::Conflict,
|
||||
|
|
@ -2112,12 +2109,12 @@ server:
|
|||
("OMNIGRAPH_UNAUTHENTICATED", None),
|
||||
]);
|
||||
let temp = tempdir().unwrap();
|
||||
// Repo path doesn't need to exist — classifier fires before
|
||||
// Graph path doesn't need to exist — classifier fires before
|
||||
// `AppState::open_with_bearer_tokens_and_policy`.
|
||||
let config = ServerConfig {
|
||||
uri: temp
|
||||
.path()
|
||||
.join("repo.omni")
|
||||
.join("graph.omni")
|
||||
.to_string_lossy()
|
||||
.into_owned(),
|
||||
bind: "127.0.0.1:0".to_string(),
|
||||
|
|
@ -2125,7 +2122,8 @@ server:
|
|||
allow_unauthenticated: false,
|
||||
};
|
||||
let result = serve(config).await;
|
||||
let err = result.expect_err("serve should refuse to start in State 1 without --unauthenticated");
|
||||
let err =
|
||||
result.expect_err("serve should refuse to start in State 1 without --unauthenticated");
|
||||
let msg = format!("{:?}", err);
|
||||
assert!(
|
||||
msg.contains("no bearer tokens") || msg.contains("policy file"),
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ use omnigraph_server::{ServerConfig, init_tracing, load_server_settings, serve};
|
|||
#[command(name = "omnigraph-server")]
|
||||
#[command(about = "HTTP server for the Omnigraph graph database")]
|
||||
struct Cli {
|
||||
/// Repo URI
|
||||
/// Graph URI
|
||||
uri: Option<String>,
|
||||
#[arg(long)]
|
||||
target: Option<String>,
|
||||
|
|
|
|||
|
|
@ -19,42 +19,42 @@ fn fixture(name: &str) -> PathBuf {
|
|||
.join(name)
|
||||
}
|
||||
|
||||
fn repo_path(root: &Path) -> PathBuf {
|
||||
fn graph_path(root: &Path) -> PathBuf {
|
||||
root.join("openapi_test.omni")
|
||||
}
|
||||
|
||||
async fn init_loaded_repo() -> tempfile::TempDir {
|
||||
async fn init_loaded_graph() -> tempfile::TempDir {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let repo = repo_path(temp.path());
|
||||
fs::create_dir_all(&repo).unwrap();
|
||||
let graph = graph_path(temp.path());
|
||||
fs::create_dir_all(&graph).unwrap();
|
||||
let schema = fs::read_to_string(fixture("test.pg")).unwrap();
|
||||
let data = fs::read_to_string(fixture("test.jsonl")).unwrap();
|
||||
Omnigraph::init(repo.to_str().unwrap(), &schema)
|
||||
Omnigraph::init(graph.to_str().unwrap(), &schema)
|
||||
.await
|
||||
.unwrap();
|
||||
let mut db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap();
|
||||
let mut db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
|
||||
load_jsonl(&mut db, &data, LoadMode::Overwrite)
|
||||
.await
|
||||
.unwrap();
|
||||
temp
|
||||
}
|
||||
|
||||
async fn app_for_loaded_repo() -> (tempfile::TempDir, Router) {
|
||||
let temp = init_loaded_repo().await;
|
||||
let repo = repo_path(temp.path());
|
||||
let state = AppState::open(repo.to_string_lossy().to_string())
|
||||
async fn app_for_loaded_graph() -> (tempfile::TempDir, Router) {
|
||||
let temp = init_loaded_graph().await;
|
||||
let graph = graph_path(temp.path());
|
||||
let state = AppState::open(graph.to_string_lossy().to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
let app = build_app(state);
|
||||
(temp, app)
|
||||
}
|
||||
|
||||
async fn app_for_loaded_repo_with_auth(token: &str) -> (tempfile::TempDir, Router) {
|
||||
let temp = init_loaded_repo().await;
|
||||
let repo = repo_path(temp.path());
|
||||
let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap();
|
||||
async fn app_for_loaded_graph_with_auth(token: &str) -> (tempfile::TempDir, Router) {
|
||||
let temp = init_loaded_graph().await;
|
||||
let graph = graph_path(temp.path());
|
||||
let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
|
||||
let state = AppState::new_with_bearer_token(
|
||||
repo.to_string_lossy().to_string(),
|
||||
graph.to_string_lossy().to_string(),
|
||||
db,
|
||||
Some(token.to_string()),
|
||||
);
|
||||
|
|
@ -84,7 +84,7 @@ fn openapi_json() -> Value {
|
|||
|
||||
#[tokio::test]
|
||||
async fn openapi_endpoint_returns_200_with_valid_json() {
|
||||
let (_temp, app) = app_for_loaded_repo().await;
|
||||
let (_temp, app) = app_for_loaded_graph().await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
|
|
@ -97,7 +97,7 @@ async fn openapi_endpoint_returns_200_with_valid_json() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn openapi_endpoint_returns_openapi_31_version() {
|
||||
let (_temp, app) = app_for_loaded_repo().await;
|
||||
let (_temp, app) = app_for_loaded_graph().await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
|
|
@ -113,11 +113,11 @@ async fn openapi_endpoint_returns_openapi_31_version() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn openapi_endpoint_does_not_require_auth() {
|
||||
let temp = init_loaded_repo().await;
|
||||
let repo = repo_path(temp.path());
|
||||
let db = Omnigraph::open(repo.to_str().unwrap()).await.unwrap();
|
||||
let temp = init_loaded_graph().await;
|
||||
let graph = graph_path(temp.path());
|
||||
let db = Omnigraph::open(graph.to_str().unwrap()).await.unwrap();
|
||||
let state = AppState::new_with_bearer_token(
|
||||
repo.to_string_lossy().to_string(),
|
||||
graph.to_string_lossy().to_string(),
|
||||
db,
|
||||
Some("secret-token".to_string()),
|
||||
);
|
||||
|
|
@ -129,7 +129,11 @@ async fn openapi_endpoint_does_not_require_auth() {
|
|||
.body(Body::empty())
|
||||
.unwrap();
|
||||
let (status, _) = json_response(&app, request).await;
|
||||
assert_eq!(status, StatusCode::OK, "/openapi.json should not require auth");
|
||||
assert_eq!(
|
||||
status,
|
||||
StatusCode::OK,
|
||||
"/openapi.json should not require auth"
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -739,10 +743,13 @@ fn branch_delete_has_branch_path_parameter() {
|
|||
let params = doc["paths"]["/branches/{branch}"]["delete"]["parameters"]
|
||||
.as_array()
|
||||
.unwrap();
|
||||
let has_branch = params.iter().any(|p| {
|
||||
p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("path")
|
||||
});
|
||||
assert!(has_branch, "DELETE /branches/{{branch}} must have 'branch' path parameter");
|
||||
let has_branch = params
|
||||
.iter()
|
||||
.any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("path"));
|
||||
assert!(
|
||||
has_branch,
|
||||
"DELETE /branches/{{branch}} must have 'branch' path parameter"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -751,10 +758,13 @@ fn commit_show_has_commit_id_path_parameter() {
|
|||
let params = doc["paths"]["/commits/{commit_id}"]["get"]["parameters"]
|
||||
.as_array()
|
||||
.unwrap();
|
||||
let has_commit_id = params.iter().any(|p| {
|
||||
p["name"].as_str() == Some("commit_id") && p["in"].as_str() == Some("path")
|
||||
});
|
||||
assert!(has_commit_id, "GET /commits/{{commit_id}} must have 'commit_id' path parameter");
|
||||
let has_commit_id = params
|
||||
.iter()
|
||||
.any(|p| p["name"].as_str() == Some("commit_id") && p["in"].as_str() == Some("path"));
|
||||
assert!(
|
||||
has_commit_id,
|
||||
"GET /commits/{{commit_id}} must have 'commit_id' path parameter"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -763,10 +773,13 @@ fn snapshot_has_branch_query_parameter() {
|
|||
let params = doc["paths"]["/snapshot"]["get"]["parameters"]
|
||||
.as_array()
|
||||
.unwrap();
|
||||
let has_branch = params.iter().any(|p| {
|
||||
p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query")
|
||||
});
|
||||
assert!(has_branch, "GET /snapshot must have 'branch' query parameter");
|
||||
let has_branch = params
|
||||
.iter()
|
||||
.any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query"));
|
||||
assert!(
|
||||
has_branch,
|
||||
"GET /snapshot must have 'branch' query parameter"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -775,10 +788,13 @@ fn commits_has_branch_query_parameter() {
|
|||
let params = doc["paths"]["/commits"]["get"]["parameters"]
|
||||
.as_array()
|
||||
.unwrap();
|
||||
let has_branch = params.iter().any(|p| {
|
||||
p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query")
|
||||
});
|
||||
assert!(has_branch, "GET /commits must have 'branch' query parameter");
|
||||
let has_branch = params
|
||||
.iter()
|
||||
.any(|p| p["name"].as_str() == Some("branch") && p["in"].as_str() == Some("query"));
|
||||
assert!(
|
||||
has_branch,
|
||||
"GET /commits must have 'branch' query parameter"
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -854,8 +870,7 @@ fn error_responses_reference_error_output_schema() {
|
|||
];
|
||||
|
||||
for (path, method, status) in paths_with_errors {
|
||||
let content =
|
||||
&doc["paths"][path][method]["responses"][status]["content"];
|
||||
let content = &doc["paths"][path][method]["responses"][status]["content"];
|
||||
let schema = &content["application/json"]["schema"];
|
||||
let ref_path = schema["$ref"].as_str().unwrap();
|
||||
assert!(
|
||||
|
|
@ -917,7 +932,7 @@ fn openapi_spec_round_trips_through_json() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn open_mode_spec_has_no_security_schemes() {
|
||||
let (_temp, app) = app_for_loaded_repo().await;
|
||||
let (_temp, app) = app_for_loaded_graph().await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
|
|
@ -933,7 +948,7 @@ async fn open_mode_spec_has_no_security_schemes() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn open_mode_spec_has_no_operation_security() {
|
||||
let (_temp, app) = app_for_loaded_repo().await;
|
||||
let (_temp, app) = app_for_loaded_graph().await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
|
|
@ -954,7 +969,7 @@ async fn open_mode_spec_has_no_operation_security() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn auth_mode_spec_includes_bearer_token_security_scheme() {
|
||||
let (_temp, app) = app_for_loaded_repo_with_auth("secret").await;
|
||||
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
|
|
@ -968,7 +983,7 @@ async fn auth_mode_spec_includes_bearer_token_security_scheme() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn auth_mode_spec_has_security_on_protected_operations() {
|
||||
let (_temp, app) = app_for_loaded_repo_with_auth("secret").await;
|
||||
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
|
|
@ -999,7 +1014,7 @@ async fn auth_mode_spec_has_security_on_protected_operations() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn auth_mode_spec_matches_static_generation() {
|
||||
let (_temp, app) = app_for_loaded_repo_with_auth("secret").await;
|
||||
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
|
|
@ -1015,7 +1030,7 @@ async fn auth_mode_spec_matches_static_generation() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn auth_mode_healthz_still_has_no_security() {
|
||||
let (_temp, app) = app_for_loaded_repo_with_auth("secret").await;
|
||||
let (_temp, app) = app_for_loaded_graph_with_auth("secret").await;
|
||||
let request = Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri("/openapi.json")
|
||||
|
|
@ -1031,8 +1046,7 @@ async fn auth_mode_healthz_still_has_no_security() {
|
|||
|
||||
#[test]
|
||||
fn openapi_spec_is_up_to_date() {
|
||||
let spec_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("../../openapi.json");
|
||||
let spec_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../openapi.json");
|
||||
|
||||
let generated = serde_json::to_string_pretty(&openapi_doc()).unwrap() + "\n";
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "omnigraph-engine"
|
||||
version = "0.4.2"
|
||||
version = "0.6.0"
|
||||
edition = "2024"
|
||||
description = "Runtime engine for the Omnigraph graph database."
|
||||
license = "MIT"
|
||||
|
|
@ -16,8 +16,8 @@ default = []
|
|||
failpoints = ["dep:fail", "fail/failpoints"]
|
||||
|
||||
[dependencies]
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" }
|
||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.4.2" }
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" }
|
||||
omnigraph-policy = { path = "../omnigraph-policy", version = "0.6.0" }
|
||||
lance = { workspace = true }
|
||||
lance-datafusion = { workspace = true }
|
||||
datafusion = { workspace = true }
|
||||
|
|
@ -51,7 +51,7 @@ chrono = { workspace = true }
|
|||
arc-swap = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.4.2" }
|
||||
omnigraph-compiler = { path = "../omnigraph-compiler", version = "0.6.0" }
|
||||
tokio = { workspace = true }
|
||||
lance-namespace-impls = { workspace = true }
|
||||
serial_test = "3"
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ use lance::Dataset;
|
|||
use lance_namespace::models::CreateTableVersionRequest;
|
||||
use omnigraph_compiler::catalog::Catalog;
|
||||
|
||||
#[path = "manifest/graph.rs"]
|
||||
mod graph;
|
||||
#[path = "manifest/layout.rs"]
|
||||
mod layout;
|
||||
#[path = "manifest/metadata.rs"]
|
||||
|
|
@ -18,11 +20,10 @@ mod namespace;
|
|||
mod publisher;
|
||||
#[path = "manifest/recovery.rs"]
|
||||
mod recovery;
|
||||
#[path = "manifest/repo.rs"]
|
||||
mod repo;
|
||||
#[path = "manifest/state.rs"]
|
||||
mod state;
|
||||
|
||||
use graph::{init_manifest_graph, open_manifest_graph, snapshot_state_at};
|
||||
use layout::{manifest_uri, open_manifest_dataset, type_name_hash};
|
||||
pub(crate) use metadata::TableVersionMetadata;
|
||||
#[cfg(test)]
|
||||
|
|
@ -33,11 +34,10 @@ pub(crate) use namespace::open_table_head_for_write;
|
|||
use namespace::{branch_manifest_namespace, staged_table_namespace};
|
||||
use publisher::{GraphNamespacePublisher, ManifestBatchPublisher};
|
||||
pub(crate) use recovery::{
|
||||
delete_sidecar, has_schema_apply_sidecar, new_sidecar, recover_manifest_drift, write_sidecar,
|
||||
RecoveryMode, RecoverySidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin,
|
||||
SidecarTableRegistration, SidecarTombstone,
|
||||
SidecarTableRegistration, SidecarTombstone, delete_sidecar, has_schema_apply_sidecar,
|
||||
new_sidecar, recover_manifest_drift, write_sidecar,
|
||||
};
|
||||
use repo::{init_manifest_repo, open_manifest_repo, snapshot_state_at};
|
||||
pub use state::SubTableEntry;
|
||||
#[cfg(test)]
|
||||
use state::string_column;
|
||||
|
|
@ -215,12 +215,12 @@ impl ManifestCoordinator {
|
|||
self
|
||||
}
|
||||
|
||||
/// Create a new repo at `root_uri` from a catalog.
|
||||
/// Create a new graph at `root_uri` from a catalog.
|
||||
///
|
||||
/// Creates per-type Lance datasets and the namespace `__manifest` table.
|
||||
pub async fn init(root_uri: &str, catalog: &Catalog) -> Result<Self> {
|
||||
let root = root_uri.trim_end_matches('/');
|
||||
let (dataset, known_state) = init_manifest_repo(root, catalog).await?;
|
||||
let (dataset, known_state) = init_manifest_graph(root, catalog).await?;
|
||||
|
||||
Ok(Self::from_parts_with_default_publisher(
|
||||
root,
|
||||
|
|
@ -230,10 +230,10 @@ impl ManifestCoordinator {
|
|||
))
|
||||
}
|
||||
|
||||
/// Open an existing repo's manifest.
|
||||
/// Open an existing graph's manifest.
|
||||
pub async fn open(root_uri: &str) -> Result<Self> {
|
||||
let root = root_uri.trim_end_matches('/');
|
||||
let (dataset, known_state) = open_manifest_repo(root, None).await?;
|
||||
let (dataset, known_state) = open_manifest_graph(root, None).await?;
|
||||
Ok(Self::from_parts_with_default_publisher(
|
||||
root,
|
||||
dataset,
|
||||
|
|
@ -242,14 +242,14 @@ impl ManifestCoordinator {
|
|||
))
|
||||
}
|
||||
|
||||
/// Open an existing repo's manifest at a specific branch.
|
||||
/// Open an existing graph's manifest at a specific branch.
|
||||
pub async fn open_at_branch(root_uri: &str, branch: &str) -> Result<Self> {
|
||||
if branch == "main" {
|
||||
return Self::open(root_uri).await;
|
||||
}
|
||||
|
||||
let root = root_uri.trim_end_matches('/');
|
||||
let (dataset, known_state) = open_manifest_repo(root, Some(branch)).await?;
|
||||
let (dataset, known_state) = open_manifest_graph(root, Some(branch)).await?;
|
||||
Ok(Self::from_parts_with_default_publisher(
|
||||
root,
|
||||
dataset,
|
||||
|
|
@ -410,7 +410,7 @@ impl ManifestCoordinator {
|
|||
Ok(descendants)
|
||||
}
|
||||
|
||||
/// Root URI of the repo.
|
||||
/// Root URI of the graph.
|
||||
pub fn root_uri(&self) -> &str {
|
||||
&self.root_uri
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ use super::state::{
|
|||
ManifestState, SubTableEntry, entries_to_batch, manifest_schema, read_manifest_state,
|
||||
};
|
||||
|
||||
pub(super) async fn init_manifest_repo(
|
||||
pub(super) async fn init_manifest_graph(
|
||||
root_uri: &str,
|
||||
catalog: &Catalog,
|
||||
) -> Result<(Dataset, ManifestState)> {
|
||||
|
|
@ -47,7 +47,7 @@ pub(super) async fn init_manifest_repo(
|
|||
Ok((dataset, known_state))
|
||||
}
|
||||
|
||||
pub(super) async fn open_manifest_repo(
|
||||
pub(super) async fn open_manifest_graph(
|
||||
root_uri: &str,
|
||||
branch: Option<&str>,
|
||||
) -> Result<(Dataset, ManifestState)> {
|
||||
|
|
@ -24,8 +24,8 @@
|
|||
//! Only on open-for-write paths (the publisher's `load_publish_state`).
|
||||
//! Reads are side-effect-free by contract; an old-shape `__manifest` reads
|
||||
//! fine, it just lacks the protections introduced by later versions.
|
||||
//! `init_manifest_repo` stamps the current version at creation, so newly
|
||||
//! initialized repos never need migration.
|
||||
//! `init_manifest_graph` stamps the current version at creation, so newly
|
||||
//! initialized graphs never need migration.
|
||||
//!
|
||||
//! ## Forward-version protection
|
||||
//!
|
||||
|
|
@ -78,7 +78,7 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()>
|
|||
if current > INTERNAL_MANIFEST_SCHEMA_VERSION {
|
||||
return Err(OmniError::manifest(format!(
|
||||
"__manifest is stamped at internal schema v{} but this binary expects v{} \
|
||||
— upgrade omnigraph before opening this repo for writes",
|
||||
— upgrade omnigraph before opening this graph for writes",
|
||||
current, INTERNAL_MANIFEST_SCHEMA_VERSION,
|
||||
)));
|
||||
}
|
||||
|
|
@ -112,7 +112,10 @@ pub(super) async fn migrate_internal_schema(dataset: &mut Dataset) -> Result<()>
|
|||
async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> {
|
||||
dataset
|
||||
.update_field_metadata()
|
||||
.update("object_id", [(OBJECT_ID_PK_KEY.to_string(), "true".to_string())])
|
||||
.update(
|
||||
"object_id",
|
||||
[(OBJECT_ID_PK_KEY.to_string(), "true".to_string())],
|
||||
)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
|
|
@ -121,10 +124,7 @@ async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> {
|
|||
|
||||
async fn set_stamp(dataset: &mut Dataset, version: u32) -> Result<()> {
|
||||
dataset
|
||||
.update_schema_metadata([(
|
||||
INTERNAL_SCHEMA_VERSION_KEY.to_string(),
|
||||
version.to_string(),
|
||||
)])
|
||||
.update_schema_metadata([(INTERNAL_SCHEMA_VERSION_KEY.to_string(), version.to_string())])
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
Ok(())
|
||||
|
|
|
|||
|
|
@ -230,6 +230,11 @@ impl LanceNamespace for BranchManifestNamespace {
|
|||
metadata: None,
|
||||
properties: None,
|
||||
managed_versioning: Some(true),
|
||||
// Every table we return from describe_table is physically
|
||||
// materialized (open_manifest_dataset succeeds), never just
|
||||
// "declared." See lance-namespace 6.0.1 DescribeTableResponse
|
||||
// field docs.
|
||||
is_only_declared: Some(false),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -373,6 +378,11 @@ impl LanceNamespace for StagedTableNamespace {
|
|||
metadata: None,
|
||||
properties: None,
|
||||
managed_versioning: Some(true),
|
||||
// Every table we return from describe_table is physically
|
||||
// materialized (open_manifest_dataset succeeds), never just
|
||||
// "declared." See lance-namespace 6.0.1 DescribeTableResponse
|
||||
// field docs.
|
||||
is_only_declared: Some(false),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ use super::{ManifestChange, SubTableUpdate, TableRegistration, TableTombstone};
|
|||
/// into the audit row's `recovery_for_actor` field.
|
||||
pub(crate) const RECOVERY_ACTOR: &str = "omnigraph:recovery";
|
||||
|
||||
/// Subdirectory under the repo root holding sidecar files.
|
||||
/// Subdirectory under the graph root holding sidecar files.
|
||||
pub(crate) const RECOVERY_DIR_NAME: &str = "__recovery";
|
||||
|
||||
/// Current sidecar JSON shape version. Bumping this is a breaking change:
|
||||
|
|
@ -142,7 +142,7 @@ pub(crate) struct SidecarTablePin {
|
|||
pub(crate) struct SidecarTableRegistration {
|
||||
/// Stable identifier (`node:Tag`, `edge:WorksAt`, etc.).
|
||||
pub table_key: String,
|
||||
/// Repo-relative path the manifest will register
|
||||
/// Graph-relative path the manifest will register
|
||||
/// (e.g. `nodes/{fnv1a64-hex}`); recovery joins this with `root_uri`
|
||||
/// to open the dataset Lance HEAD when constructing the
|
||||
/// accompanying `Update`.
|
||||
|
|
@ -295,7 +295,7 @@ pub(crate) enum SidecarDecision {
|
|||
Abort,
|
||||
}
|
||||
|
||||
/// Build the `__recovery/` directory URI under a repo root.
|
||||
/// Build the `__recovery/` directory URI under a graph root.
|
||||
pub(crate) fn recovery_dir_uri(root_uri: &str) -> String {
|
||||
let trimmed = root_uri.trim_end_matches('/');
|
||||
format!("{}/{}", trimmed, RECOVERY_DIR_NAME)
|
||||
|
|
@ -1122,7 +1122,7 @@ async fn record_audit(
|
|||
/// the rename so the recovery sweep's roll-forward step sees the new
|
||||
/// catalog. Without this, the disambiguation logic deletes the staging
|
||||
/// files (since manifest still pins the old table set) and leaves the
|
||||
/// repo with new-schema data on disk but the old `_schema.pg` live —
|
||||
/// graph with new-schema data on disk but the old `_schema.pg` live —
|
||||
/// real corruption.
|
||||
pub(crate) async fn has_schema_apply_sidecar(
|
||||
root_uri: &str,
|
||||
|
|
|
|||
|
|
@ -1393,7 +1393,10 @@ async fn test_concurrent_publish_with_overlapping_expected_versions_one_succeeds
|
|||
// version (no duplicate version rows).
|
||||
let mc = ManifestCoordinator::open(uri).await.unwrap();
|
||||
let entry = mc.snapshot().entry("node:Person").unwrap().clone();
|
||||
assert!(entry.table_version > 1, "Person should have advanced past v=1");
|
||||
assert!(
|
||||
entry.table_version > 1,
|
||||
"Person should have advanced past v=1"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -1418,7 +1421,7 @@ async fn test_publish_migrates_pre_stamp_manifest_to_current_version() {
|
|||
let catalog = build_test_catalog();
|
||||
let mc = ManifestCoordinator::init(uri, &catalog).await.unwrap();
|
||||
|
||||
// Simulate a v1 (pre-stamp) repo by removing the schema-level stamp on disk.
|
||||
// Simulate a v1 (pre-stamp) graph by removing the schema-level stamp on disk.
|
||||
{
|
||||
let mut ds = open_manifest_dataset(uri, None).await.unwrap();
|
||||
ds.update_schema_metadata([(
|
||||
|
|
@ -1449,7 +1452,7 @@ async fn test_publish_migrates_pre_stamp_manifest_to_current_version() {
|
|||
assert_eq!(
|
||||
super::migrations::read_stamp(&post),
|
||||
super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION,
|
||||
"publish on a v1 repo should leave the manifest stamped at the current version",
|
||||
"publish on a v1 graph should leave the manifest stamped at the current version",
|
||||
);
|
||||
|
||||
// Manifest should still serve correctly post-migration.
|
||||
|
|
|
|||
|
|
@ -166,7 +166,7 @@ pub enum OpenMode {
|
|||
}
|
||||
|
||||
impl Omnigraph {
|
||||
/// Create a new repo at `uri` from schema source.
|
||||
/// Create a new graph at `uri` from schema source.
|
||||
///
|
||||
/// Creates `_schema.pg`, per-type Lance datasets, and `__manifest`.
|
||||
pub async fn init(uri: &str, schema_source: &str) -> Result<Self> {
|
||||
|
|
@ -205,7 +205,7 @@ impl Omnigraph {
|
|||
})
|
||||
}
|
||||
|
||||
/// Open an existing repo (read-write).
|
||||
/// Open an existing graph (read-write).
|
||||
///
|
||||
/// Reads `_schema.pg`, parses it, builds the catalog, and opens `__manifest`.
|
||||
/// Runs the open-time recovery sweep before returning — see [`OpenMode`].
|
||||
|
|
@ -213,7 +213,7 @@ impl Omnigraph {
|
|||
Self::open_with_storage_and_mode(uri, storage_for_uri(uri)?, OpenMode::ReadWrite).await
|
||||
}
|
||||
|
||||
/// Open an existing repo for read-only consumers (NDJSON export,
|
||||
/// Open an existing graph for read-only consumers (NDJSON export,
|
||||
/// `commit list`, etc.). Skips the recovery sweep — see [`OpenMode`].
|
||||
pub async fn open_read_only(uri: &str) -> Result<Self> {
|
||||
Self::open_with_storage_and_mode(uri, storage_for_uri(uri)?, OpenMode::ReadOnly).await
|
||||
|
|
@ -397,7 +397,8 @@ impl Omnigraph {
|
|||
desired_schema_source: &str,
|
||||
options: SchemaApplyOptions,
|
||||
) -> Result<SchemaApplyResult> {
|
||||
self.apply_schema_as(desired_schema_source, options, None).await
|
||||
self.apply_schema_as(desired_schema_source, options, None)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Apply a schema migration with an explicit actor for engine-layer
|
||||
|
|
@ -470,7 +471,7 @@ impl Omnigraph {
|
|||
Arc::clone(&self.merge_exclusive)
|
||||
}
|
||||
|
||||
/// Engine-level access to the repo's normalized root URI. Used by
|
||||
/// Engine-level access to the graph's normalized root URI. Used by
|
||||
/// the recovery sidecar protocol to compute `__recovery/` paths.
|
||||
pub(crate) fn root_uri(&self) -> &str {
|
||||
&self.root_uri
|
||||
|
|
@ -510,9 +511,10 @@ impl Omnigraph {
|
|||
let normalized = normalize_branch_name(branch.unwrap_or("main"))?;
|
||||
let coord = self.coordinator.read().await;
|
||||
if normalized.as_deref() == coord.current_branch() {
|
||||
let snapshot_id = coord.head_commit_id().await?.unwrap_or_else(|| {
|
||||
SnapshotId::synthetic(coord.current_branch(), coord.version())
|
||||
});
|
||||
let snapshot_id = coord
|
||||
.head_commit_id()
|
||||
.await?
|
||||
.unwrap_or_else(|| SnapshotId::synthetic(coord.current_branch(), coord.version()));
|
||||
return Ok(ResolvedTarget {
|
||||
requested,
|
||||
branch: coord.current_branch().map(str::to_string),
|
||||
|
|
@ -587,7 +589,7 @@ impl Omnigraph {
|
|||
/// exist. Required BEFORE manifest-drift recovery so a
|
||||
/// SchemaApply roll-forward doesn't publish the manifest while
|
||||
/// the staging files remain unrenamed (which would corrupt the
|
||||
/// repo: data on new schema, catalog on old).
|
||||
/// graph: data on new schema, catalog on old).
|
||||
/// 3. `recover_manifest_drift(... RollForwardOnly)` — close the
|
||||
/// finalize→publisher residual via roll-forward; defer rollback
|
||||
/// work to next ReadWrite open.
|
||||
|
|
@ -668,7 +670,11 @@ impl Omnigraph {
|
|||
|
||||
pub async fn resolve_snapshot(&self, branch: &str) -> Result<SnapshotId> {
|
||||
self.ensure_schema_state_valid().await?;
|
||||
self.coordinator.read().await.resolve_snapshot_id(branch).await
|
||||
self.coordinator
|
||||
.read()
|
||||
.await
|
||||
.resolve_snapshot_id(branch)
|
||||
.await
|
||||
}
|
||||
|
||||
pub(crate) async fn resolved_target(
|
||||
|
|
@ -676,7 +682,11 @@ impl Omnigraph {
|
|||
target: impl Into<ReadTarget>,
|
||||
) -> Result<ResolvedTarget> {
|
||||
self.ensure_schema_state_valid().await?;
|
||||
self.coordinator.read().await.resolve_target(&target.into()).await
|
||||
self.coordinator
|
||||
.read()
|
||||
.await
|
||||
.resolve_target(&target.into())
|
||||
.await
|
||||
}
|
||||
|
||||
// ─── Change detection ────────────────────────────────────────────────
|
||||
|
|
@ -708,7 +718,9 @@ impl Omnigraph {
|
|||
filter: &crate::changes::ChangeFilter,
|
||||
) -> Result<crate::changes::ChangeSet> {
|
||||
let coord = self.coordinator.read().await;
|
||||
let from_commit = coord.resolve_commit(&SnapshotId::new(from_commit_id)).await?;
|
||||
let from_commit = coord
|
||||
.resolve_commit(&SnapshotId::new(from_commit_id))
|
||||
.await?;
|
||||
let to_commit = coord.resolve_commit(&SnapshotId::new(to_commit_id)).await?;
|
||||
let from_snap = coord
|
||||
.resolve_target(&ReadTarget::Snapshot(SnapshotId::new(
|
||||
|
|
@ -753,7 +765,11 @@ impl Omnigraph {
|
|||
/// Create a Snapshot at any historical manifest version.
|
||||
pub async fn snapshot_at_version(&self, version: u64) -> Result<Snapshot> {
|
||||
self.ensure_schema_state_valid().await?;
|
||||
self.coordinator.read().await.snapshot_at_version(version).await
|
||||
self.coordinator
|
||||
.read()
|
||||
.await
|
||||
.snapshot_at_version(version)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn export_jsonl(
|
||||
|
|
@ -894,11 +910,20 @@ impl Omnigraph {
|
|||
}
|
||||
|
||||
pub(crate) async fn active_branch(&self) -> Option<String> {
|
||||
self.coordinator.read().await.current_branch().map(str::to_string)
|
||||
self.coordinator
|
||||
.read()
|
||||
.await
|
||||
.current_branch()
|
||||
.map(str::to_string)
|
||||
}
|
||||
|
||||
async fn ensure_branch_delete_safe(&self, branch: &str, branches: &[String]) -> Result<()> {
|
||||
let descendants = self.coordinator.read().await.branch_descendants(branch).await?;
|
||||
let descendants = self
|
||||
.coordinator
|
||||
.read()
|
||||
.await
|
||||
.branch_descendants(branch)
|
||||
.await?;
|
||||
if let Some(descendant) = descendants.first() {
|
||||
return Err(OmniError::manifest_conflict(format!(
|
||||
"cannot delete branch '{}' because descendant branch '{}' still depends on it",
|
||||
|
|
@ -954,7 +979,12 @@ impl Omnigraph {
|
|||
}
|
||||
|
||||
async fn delete_branch_storage_only(&self, branch: &str) -> Result<()> {
|
||||
let active = self.coordinator.read().await.current_branch().map(str::to_string);
|
||||
let active = self
|
||||
.coordinator
|
||||
.read()
|
||||
.await
|
||||
.current_branch()
|
||||
.map(str::to_string);
|
||||
if active.as_deref() == Some(branch) {
|
||||
return Err(OmniError::manifest_conflict(format!(
|
||||
"cannot delete currently active branch '{}'",
|
||||
|
|
@ -1013,11 +1043,7 @@ impl Omnigraph {
|
|||
self.coordinator.write().await.branch_create(name).await
|
||||
}
|
||||
|
||||
pub async fn branch_create_from(
|
||||
&self,
|
||||
from: impl Into<ReadTarget>,
|
||||
name: &str,
|
||||
) -> Result<()> {
|
||||
pub async fn branch_create_from(&self, from: impl Into<ReadTarget>, name: &str) -> Result<()> {
|
||||
self.branch_create_from_as(from, name, None).await
|
||||
}
|
||||
|
||||
|
|
@ -1134,7 +1160,9 @@ impl Omnigraph {
|
|||
|
||||
pub async fn get_commit(&self, commit_id: &str) -> Result<GraphCommit> {
|
||||
self.ensure_schema_state_valid().await?;
|
||||
self.coordinator.read().await
|
||||
self.coordinator
|
||||
.read()
|
||||
.await
|
||||
.resolve_commit(&SnapshotId::new(commit_id))
|
||||
.await
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
//! Lance compaction + version cleanup exposed at the graph level.
|
||||
//!
|
||||
//! Lance accumulates many small `.lance` fragment files per table over the
|
||||
//! life of a repo: each `write`, `load`, and `change` op appends one or more
|
||||
//! life of a graph: each `write`, `load`, and `change` op appends one or more
|
||||
//! fragments and a new manifest. Over long timescales this hurts open times
|
||||
//! and S3 object counts without improving anything.
|
||||
//!
|
||||
|
|
@ -176,10 +176,9 @@ pub async fn cleanup_all_tables(
|
|||
clean_referenced_branches: false,
|
||||
delete_rate_limit: None,
|
||||
};
|
||||
let removed: RemovalStats =
|
||||
lance::dataset::cleanup::cleanup_old_versions(&ds, policy)
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
let removed: RemovalStats = lance::dataset::cleanup::cleanup_old_versions(&ds, policy)
|
||||
.await
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
Ok(TableCleanupStats {
|
||||
table_key,
|
||||
bytes_removed: removed.bytes_removed,
|
||||
|
|
@ -198,12 +197,7 @@ fn all_table_keys(catalog: &omnigraph_compiler::catalog::Catalog) -> Vec<String>
|
|||
.node_types
|
||||
.keys()
|
||||
.map(|n| format!("node:{}", n))
|
||||
.chain(
|
||||
catalog
|
||||
.edge_types
|
||||
.keys()
|
||||
.map(|n| format!("edge:{}", n)),
|
||||
)
|
||||
.chain(catalog.edge_types.keys().map(|n| format!("edge:{}", n)))
|
||||
.collect();
|
||||
keys.sort();
|
||||
keys
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ pub(super) async fn apply_schema_with_lock(
|
|||
// Skip `main` and internal system branches. The schema-apply lock branch
|
||||
// is excluded because it is the cluster-wide schema-apply serializer.
|
||||
// `__run__*` branches are no longer created; the filter remains as
|
||||
// defense-in-depth for legacy repos with leftover staging branches.
|
||||
// defense-in-depth for legacy graphs with leftover staging branches.
|
||||
// A future production sweep will let this guard go.
|
||||
let blocking_branches = branches
|
||||
.into_iter()
|
||||
|
|
@ -105,7 +105,7 @@ pub(super) async fn apply_schema_with_lock(
|
|||
.collect::<Vec<_>>();
|
||||
if !blocking_branches.is_empty() {
|
||||
return Err(OmniError::manifest_conflict(format!(
|
||||
"schema apply requires a repo with only main; found non-main branches: {}",
|
||||
"schema apply requires a graph with only main; found non-main branches: {}",
|
||||
blocking_branches.join(", ")
|
||||
)));
|
||||
}
|
||||
|
|
@ -780,7 +780,7 @@ pub(super) async fn acquire_schema_apply_lock(db: &Omnigraph) -> Result<()> {
|
|||
if !blocking_branches.is_empty() {
|
||||
let _ = release_schema_apply_lock(db).await;
|
||||
return Err(OmniError::manifest_conflict(format!(
|
||||
"schema apply requires a repo with only main; found non-main branches: {}",
|
||||
"schema apply requires a graph with only main; found non-main branches: {}",
|
||||
blocking_branches.join(", ")
|
||||
)));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ pub(crate) struct RecoveryAudit {
|
|||
}
|
||||
|
||||
impl RecoveryAudit {
|
||||
/// Open the recovery-audit dataset for the repo, or return a handle
|
||||
/// Open the recovery-audit dataset for the graph, or return a handle
|
||||
/// with no dataset yet (created on first append). Mirrors the
|
||||
/// optional-dataset pattern from `_graph_commit_actors.lance`.
|
||||
pub(crate) async fn open(root_uri: &str) -> Result<Self> {
|
||||
|
|
@ -205,9 +205,7 @@ fn recovery_record_to_batch(record: &RecoveryAuditRecord) -> Result<RecordBatch>
|
|||
vec![
|
||||
Arc::new(StringArray::from(vec![record.graph_commit_id.clone()])),
|
||||
Arc::new(StringArray::from(vec![record.recovery_kind.as_str()])),
|
||||
Arc::new(StringArray::from(vec![record
|
||||
.recovery_for_actor
|
||||
.clone()])),
|
||||
Arc::new(StringArray::from(vec![record.recovery_for_actor.clone()])),
|
||||
Arc::new(StringArray::from(vec![record.operation_id.clone()])),
|
||||
Arc::new(StringArray::from(vec![record.sidecar_writer_kind.clone()])),
|
||||
Arc::new(StringArray::from(vec![outcomes_json])),
|
||||
|
|
@ -221,10 +219,14 @@ fn decode_row(batch: &RecordBatch, row: usize) -> Result<RecoveryAuditRecord> {
|
|||
let str_col = |name: &str| -> Result<&StringArray> {
|
||||
batch
|
||||
.column_by_name(name)
|
||||
.ok_or_else(|| OmniError::manifest_internal(format!("missing column '{}' in recovery audit", name)))?
|
||||
.ok_or_else(|| {
|
||||
OmniError::manifest_internal(format!("missing column '{}' in recovery audit", name))
|
||||
})?
|
||||
.as_any()
|
||||
.downcast_ref::<StringArray>()
|
||||
.ok_or_else(|| OmniError::manifest_internal(format!("column '{}' has wrong type", name)))
|
||||
.ok_or_else(|| {
|
||||
OmniError::manifest_internal(format!("column '{}' has wrong type", name))
|
||||
})
|
||||
};
|
||||
let ts_col = batch
|
||||
.column_by_name("created_at")
|
||||
|
|
@ -269,9 +271,7 @@ pub(crate) fn now_micros() -> Result<i64> {
|
|||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_micros() as i64)
|
||||
.map_err(|e| {
|
||||
OmniError::manifest_internal(format!("system clock before unix epoch: {}", e))
|
||||
})
|
||||
.map_err(|e| OmniError::manifest_internal(format!("system clock before unix epoch: {}", e)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -307,7 +307,7 @@ mod tests {
|
|||
let root = dir.path().to_str().unwrap();
|
||||
|
||||
let mut audit = RecoveryAudit::open(root).await.unwrap();
|
||||
// Empty repo: list returns empty.
|
||||
// Empty graph: list returns empty.
|
||||
assert!(audit.list().await.unwrap().is_empty());
|
||||
|
||||
// Append + list.
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ pub(crate) async fn load_or_bootstrap_schema_contract(
|
|||
.collect::<Vec<_>>();
|
||||
if !public_non_main.is_empty() {
|
||||
return Err(schema_lock_conflict(format!(
|
||||
"repo is missing persisted schema state and has public branches ({}); public branches block schema evolution entirely",
|
||||
"graph is missing persisted schema state and has public branches ({}); public branches block schema evolution entirely",
|
||||
public_non_main.join(", ")
|
||||
)));
|
||||
}
|
||||
|
|
@ -70,7 +70,7 @@ pub(crate) async fn load_or_bootstrap_schema_contract(
|
|||
Ok((current_source_ir.clone(), state))
|
||||
}
|
||||
SchemaContractRead::PartialMissing => Err(schema_lock_conflict(
|
||||
"repo schema state is incomplete (_schema.ir.json and __schema_state.json must either both exist or both be absent)",
|
||||
"graph schema state is incomplete (_schema.ir.json and __schema_state.json must either both exist or both be absent)",
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
|
@ -84,7 +84,7 @@ pub(crate) async fn validate_schema_contract(
|
|||
SchemaContractRead::Present { ir, state } => (ir, state),
|
||||
SchemaContractRead::MissingAll | SchemaContractRead::PartialMissing => {
|
||||
return Err(schema_lock_conflict(
|
||||
"repo is missing persisted schema state; manual coordination is required before schema changes are allowed",
|
||||
"graph is missing persisted schema state; manual coordination is required before schema changes are allowed",
|
||||
));
|
||||
}
|
||||
};
|
||||
|
|
@ -163,7 +163,7 @@ pub(crate) async fn read_accepted_schema_ir(
|
|||
}
|
||||
SchemaContractRead::MissingAll | SchemaContractRead::PartialMissing => {
|
||||
Err(schema_lock_conflict(
|
||||
"repo is missing persisted schema state; manual coordination is required before schema changes are allowed",
|
||||
"graph is missing persisted schema state; manual coordination is required before schema changes are allowed",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
|
@ -221,7 +221,7 @@ async fn read_schema_contract(
|
|||
})?;
|
||||
let state = serde_json::from_str::<SchemaState>(&state_json).map_err(|err| {
|
||||
schema_lock_conflict(format!(
|
||||
"repo schema state in {} is invalid: {}",
|
||||
"graph schema state in {} is invalid: {}",
|
||||
SCHEMA_STATE_FILENAME, err
|
||||
))
|
||||
})?;
|
||||
|
|
@ -234,7 +234,7 @@ async fn read_schema_contract(
|
|||
fn validate_persisted_schema_contract(ir: &SchemaIR, state: &SchemaState) -> Result<()> {
|
||||
if state.format_version != SCHEMA_STATE_FORMAT_VERSION {
|
||||
return Err(schema_lock_conflict(format!(
|
||||
"repo schema state format {} is unsupported",
|
||||
"graph schema state format {} is unsupported",
|
||||
state.format_version
|
||||
)));
|
||||
}
|
||||
|
|
@ -344,7 +344,7 @@ pub(crate) async fn recover_schema_state_files(
|
|||
// to the new Lance HEADs; we MUST also rename the staging files
|
||||
// forward so the catalog matches. Without this, the disambiguation
|
||||
// logic below sees actual_keys == live_keys (manifest didn't move)
|
||||
// and deletes the staging files, leaving the repo with new-schema
|
||||
// and deletes the staging files, leaving the graph with new-schema
|
||||
// data on disk but the old `_schema.pg` live — corruption.
|
||||
if crate::db::manifest::has_schema_apply_sidecar(root_uri, storage.as_ref()).await? {
|
||||
warn!(
|
||||
|
|
|
|||
|
|
@ -1037,8 +1037,16 @@ async fn execute_node_scan(
|
|||
let table_key = format!("node:{}", type_name);
|
||||
let ds = snapshot.open(&table_key).await?;
|
||||
|
||||
// Build Lance SQL filter string from non-search IR filters
|
||||
let filter_sql = build_lance_filter(filters, params);
|
||||
// Lower the IR filters to a DataFusion `Expr` and apply via
|
||||
// `Scanner::filter_expr` inside the configure closure. The string
|
||||
// pushdown path (`build_lance_filter` → `scanner.filter(&str)`) is
|
||||
// gone for node scans — structured Expr unlocks `CompOp::Contains`
|
||||
// pushdown (via `array_has`) and lets DF 53's optimizer rules
|
||||
// (vectorized IN-list, PhysicalExprSimplifier, CASE-NULL shortcut)
|
||||
// reach our predicates. Other call sites that still take string SQL
|
||||
// (hydrate_nodes for the Expand pushdown, count_rows, the mutation
|
||||
// delete path) migrate in follow-up MRs.
|
||||
let filter_expr = build_lance_filter_expr(filters, params);
|
||||
|
||||
// Blob columns must be excluded from scan when a filter is present
|
||||
// (Lance bug: BlobsDescriptions + filter triggers a projection assertion).
|
||||
|
|
@ -1056,10 +1064,15 @@ async fn execute_node_scan(
|
|||
let batches = crate::table_store::TableStore::scan_stream_with(
|
||||
&ds,
|
||||
projection,
|
||||
filter_sql.as_deref(),
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
|scanner| {
|
||||
// Apply the structured IR filter via Lance's Expr pushdown.
|
||||
if let Some(ref expr) = filter_expr {
|
||||
scanner.filter_expr(expr.clone());
|
||||
}
|
||||
|
||||
// Apply FTS queries from hoisted search filters (search/fuzzy/match_text in match clause)
|
||||
for filter in filters {
|
||||
if is_search_filter(filter) {
|
||||
|
|
@ -1288,6 +1301,125 @@ pub(super) fn literal_to_sql(lit: &Literal) -> String {
|
|||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Structured DataFusion-Expr pushdown
|
||||
//
|
||||
// Parallel to the `ir_*_to_sql` family above, these helpers lower the same
|
||||
// IR filter shapes to `datafusion::prelude::Expr` so we can call
|
||||
// `Scanner::filter_expr(Expr)` instead of `Scanner::filter(&str)`. The
|
||||
// structured form unlocks two things the string path could not express:
|
||||
//
|
||||
// 1. `CompOp::Contains` against list-typed columns (lowered to
|
||||
// `array_has(col, value)` — requires the `nested_expressions`
|
||||
// feature on the `datafusion` crate, enabled in the workspace).
|
||||
// 2. Optimizer rules in DataFusion 53 that act on `Expr` shapes
|
||||
// (vectorized `IN`-list eq kernel, `PhysicalExprSimplifier`, the
|
||||
// `CASE WHEN x THEN y ELSE NULL` shortcut, etc.).
|
||||
//
|
||||
// Search predicates (`is_search_filter`) are still handled separately via
|
||||
// `scanner.full_text_search(...)`, not via filter_expr — they stay None
|
||||
// here just like in `ir_filter_to_sql`. The `literal_to_sql` path remains
|
||||
// because the mutation/update layer (`exec/mutation.rs`) still produces
|
||||
// SQL strings for `Dataset::delete(&str)`; that migration is MR-A's
|
||||
// territory (Lance #6658 + delete two-phase).
|
||||
|
||||
/// Convert IR filters to a single DataFusion `Expr` (AND-joined), or
|
||||
/// `None` if no filter is pushable.
|
||||
pub(super) fn build_lance_filter_expr(
|
||||
filters: &[IRFilter],
|
||||
params: &ParamMap,
|
||||
) -> Option<datafusion::prelude::Expr> {
|
||||
use datafusion::logical_expr::Operator;
|
||||
use datafusion::prelude::Expr;
|
||||
|
||||
let mut acc: Option<Expr> = None;
|
||||
for f in filters {
|
||||
let Some(e) = ir_filter_to_expr(f, params) else {
|
||||
continue;
|
||||
};
|
||||
acc = Some(match acc {
|
||||
None => e,
|
||||
Some(prev) => Expr::BinaryExpr(datafusion::logical_expr::BinaryExpr::new(
|
||||
Box::new(prev),
|
||||
Operator::And,
|
||||
Box::new(e),
|
||||
)),
|
||||
});
|
||||
}
|
||||
acc
|
||||
}
|
||||
|
||||
/// Convert a single IR filter to a DataFusion `Expr`. Returns `None` for
|
||||
/// search-mode filters (handled via `scanner.full_text_search`) or any
|
||||
/// expression shape we can't pushdown.
|
||||
pub(super) fn ir_filter_to_expr(
|
||||
filter: &IRFilter,
|
||||
params: &ParamMap,
|
||||
) -> Option<datafusion::prelude::Expr> {
|
||||
use datafusion::functions_nested::expr_fn::array_has;
|
||||
|
||||
if is_search_filter(filter) {
|
||||
return None;
|
||||
}
|
||||
|
||||
// List-contains: `prop CONTAINS value` lowers to `array_has(prop, value)`.
|
||||
// This is the case `ir_filter_to_sql` had to return None for ("Can't
|
||||
// pushdown list contains"); with structured Expr it pushes down fine.
|
||||
if matches!(filter.op, CompOp::Contains) {
|
||||
let left = ir_expr_to_expr(&filter.left, params)?;
|
||||
let right = ir_expr_to_expr(&filter.right, params)?;
|
||||
return Some(array_has(left, right));
|
||||
}
|
||||
|
||||
let left = ir_expr_to_expr(&filter.left, params)?;
|
||||
let right = ir_expr_to_expr(&filter.right, params)?;
|
||||
Some(match filter.op {
|
||||
CompOp::Eq => left.eq(right),
|
||||
CompOp::Ne => left.not_eq(right),
|
||||
CompOp::Gt => left.gt(right),
|
||||
CompOp::Lt => left.lt(right),
|
||||
CompOp::Ge => left.gt_eq(right),
|
||||
CompOp::Le => left.lt_eq(right),
|
||||
CompOp::Contains => unreachable!("handled above"),
|
||||
})
|
||||
}
|
||||
|
||||
/// Convert an IR expression to a DataFusion `Expr`. Returns `None` for
|
||||
/// shapes we don't support in pushdown (search funcs, RRF, aggregates,
|
||||
/// variable refs that aren't a property access).
|
||||
pub(super) fn ir_expr_to_expr(
|
||||
expr: &IRExpr,
|
||||
params: &ParamMap,
|
||||
) -> Option<datafusion::prelude::Expr> {
|
||||
use datafusion::prelude::{col, lit};
|
||||
match expr {
|
||||
IRExpr::PropAccess { property, .. } => Some(col(property)),
|
||||
IRExpr::Literal(l) => literal_to_expr(l),
|
||||
IRExpr::Param(name) => params.get(name).and_then(literal_to_expr),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a Literal to a DataFusion `Expr`. Returns `None` for List
|
||||
/// (which the existing SQL path also can't pushdown — falls through to
|
||||
/// post-scan in-memory application).
|
||||
fn literal_to_expr(lit: &Literal) -> Option<datafusion::prelude::Expr> {
|
||||
use datafusion::prelude::lit as df_lit;
|
||||
Some(match lit {
|
||||
Literal::Null => df_lit(datafusion::scalar::ScalarValue::Null),
|
||||
Literal::String(s) => df_lit(s.clone()),
|
||||
Literal::Integer(n) => df_lit(*n),
|
||||
Literal::Float(f) => df_lit(*f),
|
||||
Literal::Bool(b) => df_lit(*b),
|
||||
// Date/DateTime stored as strings; pass through as string literals
|
||||
// — Lance/DataFusion handles the comparison against typed columns
|
||||
// via implicit cast, matching the existing string-SQL behavior.
|
||||
Literal::Date(s) => df_lit(s.clone()),
|
||||
Literal::DateTime(s) => df_lit(s.clone()),
|
||||
Literal::List(_) => return None,
|
||||
})
|
||||
}
|
||||
|
||||
fn prefix_batch(batch: &RecordBatch, variable: &str) -> Result<RecordBatch> {
|
||||
let fields: Vec<Field> = batch.schema().fields().iter().map(|f| {
|
||||
Field::new(format!("{}.{}", variable, f.name()), f.data_type().clone(), f.is_nullable())
|
||||
|
|
|
|||
|
|
@ -26,10 +26,10 @@ use arrow_schema::SchemaRef;
|
|||
use lance::Dataset;
|
||||
use omnigraph_compiler::catalog::EdgeType;
|
||||
|
||||
use crate::db::{MutationOpKind, SubTableUpdate};
|
||||
use crate::db::manifest::{
|
||||
new_sidecar, write_sidecar, RecoverySidecarHandle, SidecarKind, SidecarTablePin,
|
||||
RecoverySidecarHandle, SidecarKind, SidecarTablePin, new_sidecar, write_sidecar,
|
||||
};
|
||||
use crate::db::{MutationOpKind, SubTableUpdate};
|
||||
use crate::error::{OmniError, Result};
|
||||
|
||||
/// Whether the per-table accumulator should commit via `stage_append`
|
||||
|
|
@ -119,10 +119,12 @@ impl MutationStaging {
|
|||
expected_version: u64,
|
||||
op_kind: MutationOpKind,
|
||||
) {
|
||||
self.paths.entry(table_key.to_string()).or_insert(StagedTablePath {
|
||||
full_path,
|
||||
table_branch,
|
||||
});
|
||||
self.paths
|
||||
.entry(table_key.to_string())
|
||||
.or_insert(StagedTablePath {
|
||||
full_path,
|
||||
table_branch,
|
||||
});
|
||||
self.expected_versions
|
||||
.entry(table_key.to_string())
|
||||
.or_insert(expected_version);
|
||||
|
|
@ -202,7 +204,8 @@ impl MutationStaging {
|
|||
|
||||
/// Record a delete that already inline-committed at the Lance layer.
|
||||
pub(crate) fn record_inline(&mut self, update: SubTableUpdate) {
|
||||
self.inline_committed.insert(update.table_key.clone(), update);
|
||||
self.inline_committed
|
||||
.insert(update.table_key.clone(), update);
|
||||
}
|
||||
|
||||
/// Read-your-writes accessor: the accumulated pending batches for
|
||||
|
|
@ -308,18 +311,13 @@ impl MutationStaging {
|
|||
// mode is exempt because no-key node and edge inserts use
|
||||
// ULID-generated ids that are unique within a query.
|
||||
let combined = match table.mode {
|
||||
PendingMode::Merge => {
|
||||
dedupe_merge_batches_by_id(&table.schema, table.batches)?
|
||||
}
|
||||
PendingMode::Merge => dedupe_merge_batches_by_id(&table.schema, table.batches)?,
|
||||
PendingMode::Append => {
|
||||
if table.batches.len() == 1 {
|
||||
table.batches.into_iter().next().unwrap()
|
||||
} else {
|
||||
arrow_select::concat::concat_batches(
|
||||
&table.schema,
|
||||
&table.batches,
|
||||
)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
arrow_select::concat::concat_batches(&table.schema, &table.batches)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
@ -327,9 +325,7 @@ impl MutationStaging {
|
|||
// Stage produces uncommitted fragments + transaction. No
|
||||
// Lance HEAD advance until `commit_all` runs `commit_staged`.
|
||||
let staged = match table.mode {
|
||||
PendingMode::Append => {
|
||||
db.table_store().stage_append(&ds, combined, &[]).await?
|
||||
}
|
||||
PendingMode::Append => db.table_store().stage_append(&ds, combined, &[]).await?,
|
||||
PendingMode::Merge => {
|
||||
db.table_store()
|
||||
.stage_merge_insert(
|
||||
|
|
@ -420,7 +416,7 @@ impl StagedMutation {
|
|||
///
|
||||
/// Revalidation: between `stage_all` and `commit_all`, another
|
||||
/// writer (in the same process or another process sharing the
|
||||
/// repo) may have committed to one of our touched tables, advancing
|
||||
/// graph) may have committed to one of our touched tables, advancing
|
||||
/// the manifest pin past our `expected_version`. We revalidate
|
||||
/// under the queue and fail-fast with `manifest_conflict` before
|
||||
/// any `commit_staged` so the orphaned uncommitted fragments stay
|
||||
|
|
@ -462,9 +458,8 @@ impl StagedMutation {
|
|||
// from interleaving between our delete and our publish, which
|
||||
// would otherwise leave a Lance-HEAD-ahead residual the
|
||||
// delete-only sidecar (added below) would have to recover.
|
||||
let mut queue_keys: Vec<(String, Option<String>)> = Vec::with_capacity(
|
||||
staged.len() + inline_committed.len(),
|
||||
);
|
||||
let mut queue_keys: Vec<(String, Option<String>)> =
|
||||
Vec::with_capacity(staged.len() + inline_committed.len());
|
||||
for entry in &staged {
|
||||
queue_keys.push((entry.table_key.clone(), entry.path.table_branch.clone()));
|
||||
}
|
||||
|
|
@ -565,9 +560,8 @@ impl StagedMutation {
|
|||
// Finding 3 hazard: delete-only mutations would otherwise skip
|
||||
// the sidecar, leaving any commit→publish residual unreachable
|
||||
// by recovery.
|
||||
let mut pins: Vec<SidecarTablePin> = Vec::with_capacity(
|
||||
staged.len() + inline_committed.len(),
|
||||
);
|
||||
let mut pins: Vec<SidecarTablePin> =
|
||||
Vec::with_capacity(staged.len() + inline_committed.len());
|
||||
for entry in &staged {
|
||||
pins.push(SidecarTablePin {
|
||||
table_key: entry.table_key.clone(),
|
||||
|
|
@ -899,10 +893,7 @@ pub(crate) async fn count_src_per_edge(
|
|||
/// Count pending edges per `src` with NO dedup. Correct when caller
|
||||
/// guarantees pending rows have unique primary keys (engine inserts via
|
||||
/// fresh ULID; loader Append mode).
|
||||
fn count_pending_src_naive(
|
||||
pending_batches: &[RecordBatch],
|
||||
counts: &mut HashMap<String, u32>,
|
||||
) {
|
||||
fn count_pending_src_naive(pending_batches: &[RecordBatch], counts: &mut HashMap<String, u32>) {
|
||||
for batch in pending_batches {
|
||||
let Some(col) = batch.column_by_name("src") else {
|
||||
continue;
|
||||
|
|
@ -947,12 +938,15 @@ fn count_pending_src_with_dedupe(
|
|||
dedupe_key_column
|
||||
)));
|
||||
};
|
||||
let key_arr = key_col.as_any().downcast_ref::<StringArray>().ok_or_else(|| {
|
||||
OmniError::Lance(format!(
|
||||
"count_src_per_edge: pending '{}' column is not Utf8",
|
||||
dedupe_key_column
|
||||
))
|
||||
})?;
|
||||
let key_arr = key_col
|
||||
.as_any()
|
||||
.downcast_ref::<StringArray>()
|
||||
.ok_or_else(|| {
|
||||
OmniError::Lance(format!(
|
||||
"count_src_per_edge: pending '{}' column is not Utf8",
|
||||
dedupe_key_column
|
||||
))
|
||||
})?;
|
||||
let src_arr = batch
|
||||
.column_by_name("src")
|
||||
.and_then(|c| c.as_any().downcast_ref::<StringArray>());
|
||||
|
|
|
|||
|
|
@ -1,3 +1,12 @@
|
|||
// Lance 6's trait surface (heavier futures/streams nesting around the
|
||||
// staged-write API in `storage_layer.rs`) pushes us past the default
|
||||
// trait-resolution recursion limit of 128 on Linux builds. Raising to
|
||||
// 256 here is the upstream-suggested fix from rustc itself
|
||||
// ("consider increasing the recursion limit"). macOS happens to short-
|
||||
// circuit before tripping the limit; CI on Linux does not. Revisit if
|
||||
// future Lance bumps stop needing this.
|
||||
#![recursion_limit = "256"]
|
||||
|
||||
pub mod changes;
|
||||
pub mod db;
|
||||
pub mod embedding;
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ impl StorageAdapter for LocalStorageAdapter {
|
|||
// Ensure parent directory exists. S3 has no equivalent (PutObject
|
||||
// is path-agnostic). For local fs, callers like the recovery
|
||||
// sidecar protocol expect transparent directory creation under
|
||||
// the repo root (the `__recovery/` directory doesn't pre-exist;
|
||||
// the graph root (the `__recovery/` directory doesn't pre-exist;
|
||||
// first sidecar write creates it).
|
||||
if let Some(parent) = path.parent() {
|
||||
if !parent.as_os_str().is_empty() {
|
||||
|
|
@ -398,10 +398,13 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn storage_backend_selection_is_scheme_aware() {
|
||||
assert_eq!(storage_kind_for_uri("/tmp/repo"), StorageKind::Local);
|
||||
assert_eq!(storage_kind_for_uri("file:///tmp/repo"), StorageKind::Local);
|
||||
assert_eq!(storage_kind_for_uri("/tmp/graph"), StorageKind::Local);
|
||||
assert_eq!(
|
||||
storage_kind_for_uri("s3://omnigraph-preview/repo"),
|
||||
storage_kind_for_uri("file:///tmp/graph"),
|
||||
StorageKind::Local
|
||||
);
|
||||
assert_eq!(
|
||||
storage_kind_for_uri("s3://omnigraph-preview/graph"),
|
||||
StorageKind::S3
|
||||
);
|
||||
}
|
||||
|
|
@ -440,8 +443,8 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn parse_s3_uri_splits_bucket_and_key() {
|
||||
let location = parse_s3_uri("s3://bucket/repo/_schema.pg").unwrap();
|
||||
let location = parse_s3_uri("s3://bucket/graph/_schema.pg").unwrap();
|
||||
assert_eq!(location.bucket, "bucket");
|
||||
assert_eq!(location.key, "repo/_schema.pg");
|
||||
assert_eq!(location.key, "graph/_schema.pg");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,11 +10,15 @@
|
|||
//! ## Transitional residuals on the trait
|
||||
//!
|
||||
//! Several inline-commit methods remain on the trait surface as
|
||||
//! documented residuals: `delete_where` (Lance 4.0.0's `DeleteJob` is
|
||||
//! `pub(crate)` — see [#6658](https://github.com/lance-format/lance/issues/6658)),
|
||||
//! documented residuals: `delete_where`
|
||||
//! ([#6658](https://github.com/lance-format/lance/issues/6658) closed
|
||||
//! 2026-05-14, but the public `DeleteBuilder::execute_uncommitted` API
|
||||
//! did not backport to the 6.x release line — it first ships in
|
||||
//! `v7.0.0-beta.10`. Migration to staged two-phase delete is tracked as
|
||||
//! MR-A and is gated on the Lance v7.x bump, not the current v6.0.1 pin),
|
||||
//! `create_vector_index` (segment-commit-path requires
|
||||
//! `build_index_metadata_from_segments` which is `pub(crate)` — see
|
||||
//! [#6666](https://github.com/lance-format/lance/issues/6666)), and the
|
||||
//! [#6666](https://github.com/lance-format/lance/issues/6666), still open), and the
|
||||
//! legacy `append_batch` / `merge_insert_batches` / `overwrite_batch` /
|
||||
//! `create_btree_index` / `create_inverted_index` paths kept while
|
||||
//! engine call sites finish migrating off of them (Phase 1b / Phase 9
|
||||
|
|
|
|||
|
|
@ -8,15 +8,17 @@ use lance::Dataset;
|
|||
use lance::blob::BlobArrayBuilder;
|
||||
use lance::dataset::scanner::{ColumnOrdering, DatasetRecordBatchStream, Scanner};
|
||||
use lance::dataset::transaction::{Operation, Transaction, TransactionBuilder};
|
||||
use lance::dataset::write::merge_insert::SourceDedupeBehavior;
|
||||
use lance::dataset::{
|
||||
CommitBuilder, InsertBuilder, MergeInsertBuilder, WhenMatched, WhenNotMatched, WriteMode,
|
||||
WriteParams,
|
||||
};
|
||||
use lance::datatypes::BlobKind;
|
||||
use lance::index::DatasetIndexExt;
|
||||
use lance::index::scalar::IndexDetails;
|
||||
use lance_file::version::LanceFileVersion;
|
||||
use lance_index::scalar::{InvertedIndexParams, ScalarIndexParams};
|
||||
use lance_index::{DatasetIndexExt, IndexType, is_system_index};
|
||||
use lance_index::{IndexType, is_system_index};
|
||||
use lance_linalg::distance::MetricType;
|
||||
use lance_table::format::{Fragment, IndexMetadata, RowIdMeta};
|
||||
use lance_table::rowids::{RowIdSequence, write_row_ids};
|
||||
|
|
@ -651,15 +653,58 @@ impl TableStore {
|
|||
return self.table_state(dataset_uri, &ds).await;
|
||||
}
|
||||
|
||||
// Precondition for the FirstSeen workaround below: every caller of
|
||||
// this primitive must hand in a source batch that is unique by
|
||||
// `key_columns`. Without this check, `SourceDedupeBehavior::FirstSeen`
|
||||
// would silently collapse genuine duplicates instead of erroring.
|
||||
check_batch_unique_by_keys(&batch, &key_columns, "merge_insert_batch")?;
|
||||
|
||||
// TODO(lance-upstream): MergeInsertBuilder does not accept WriteParams,
|
||||
// so allow_external_blob_outside_bases cannot be set here. External URI
|
||||
// blobs via merge_insert (LoadMode::Merge, mutations) are unsupported
|
||||
// until Lance exposes WriteParams on MergeInsertBuilder.
|
||||
let ds = Arc::new(ds);
|
||||
let job = MergeInsertBuilder::try_new(ds, key_columns)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
.when_matched(when_matched)
|
||||
.when_not_matched(when_not_matched)
|
||||
let mut builder = MergeInsertBuilder::try_new(ds, key_columns)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
builder.when_matched(when_matched);
|
||||
builder.when_not_matched(when_not_matched);
|
||||
// Workaround for a Lance 4.0.x bug class where sequential
|
||||
// merge_insert calls against rows previously rewritten by
|
||||
// merge_insert produce a spurious "Ambiguous merge inserts:
|
||||
// multiple source rows match the same target row on (id = ...)"
|
||||
// error. Lance's `processed_row_ids: Mutex<HashSet<u64>>`
|
||||
// (lance-4.0.0 `src/dataset/write/merge_insert.rs:2099`)
|
||||
// double-processes the same source/target match against
|
||||
// datasets previously rewritten by merge_insert, and the default
|
||||
// `SourceDedupeBehavior::Fail` errors on the second insertion.
|
||||
// `FirstSeen` makes Lance skip the duplicate match instead.
|
||||
//
|
||||
// Covers both observed surfaces:
|
||||
// - PR #98 (sequential `load --mode merge` against same keys).
|
||||
// - MR-920 (sequential `update T set {f} where x=y` on same row).
|
||||
//
|
||||
// Correctness-preserving for OmniGraph because every call path
|
||||
// that reaches this primitive either pre-dedupes the source batch
|
||||
// by id, or surfaces a real source dup via the
|
||||
// `check_batch_unique_by_keys` precondition above (which fires
|
||||
// before the FirstSeen setter has a chance to silently collapse
|
||||
// anything):
|
||||
// - Load path: `enforce_unique_constraints_intra_batch`
|
||||
// (`loader/mod.rs:1453`) errors on intra-batch `@key` dups.
|
||||
// - Mutate path: `MutationStaging::finalize` (`exec/staging.rs`)
|
||||
// accumulates and dedupes by `id`.
|
||||
// - Branch-merge path: `compute_source_delta` /
|
||||
// `compute_three_way_delta` (`exec/merge.rs`) walk via
|
||||
// `OrderedTableCursor` and `push_row` each id at most once.
|
||||
// So FirstSeen only suppresses the spurious Lance behavior, never
|
||||
// user data. Pinned by `loader_rejects_intra_batch_duplicate_keys`
|
||||
// in `tests/consistency.rs` plus the
|
||||
// `check_batch_unique_by_keys` precondition.
|
||||
//
|
||||
// Retire when upstream Lance fixes the bug class. Tracked at
|
||||
// MR-957; upstream: lance-format/lance#6877.
|
||||
builder.source_dedupe_behavior(SourceDedupeBehavior::FirstSeen);
|
||||
let job = builder
|
||||
.try_build()
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
|
||||
|
|
@ -870,11 +915,26 @@ impl TableStore {
|
|||
"stage_merge_insert called with empty batch".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
// Precondition for FirstSeen below. See the comment on
|
||||
// `merge_insert_batch` for why this check is here, not on the caller:
|
||||
// every call path that reaches stage_merge_insert (load,
|
||||
// MutationStaging::finalize, branch_merge::publish_rewritten_merge_table)
|
||||
// must hand in a source batch that is unique by `key_columns`.
|
||||
check_batch_unique_by_keys(&batch, &key_columns, "stage_merge_insert")?;
|
||||
|
||||
let ds = Arc::new(ds);
|
||||
let job = MergeInsertBuilder::try_new(ds, key_columns)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?
|
||||
.when_matched(when_matched)
|
||||
.when_not_matched(when_not_matched)
|
||||
let mut builder = MergeInsertBuilder::try_new(ds, key_columns)
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
builder.when_matched(when_matched);
|
||||
builder.when_not_matched(when_not_matched);
|
||||
// See `merge_insert_batch` for the FirstSeen rationale. Workaround
|
||||
// for the Lance 4.0.x bug class where sequential merge_insert /
|
||||
// update against rows previously rewritten by merge_insert trips
|
||||
// Lance's `processed_row_ids` HashSet and errors under the default
|
||||
// `SourceDedupeBehavior::Fail`. Retire when upstream Lance is fixed.
|
||||
builder.source_dedupe_behavior(SourceDedupeBehavior::FirstSeen);
|
||||
let job = builder
|
||||
.try_build()
|
||||
.map_err(|e| OmniError::Lance(e.to_string()))?;
|
||||
let schema = batch.schema();
|
||||
|
|
@ -1651,3 +1711,107 @@ fn combine_committed_with_staged(ds: &Dataset, staged: &[StagedWrite]) -> Vec<Fr
|
|||
}
|
||||
combined
|
||||
}
|
||||
|
||||
/// Precondition guard for `merge_insert_batch` and `stage_merge_insert`.
|
||||
/// Both opt into `SourceDedupeBehavior::FirstSeen` to suppress the Lance
|
||||
/// `processed_row_ids` bug (MR-957). FirstSeen would *also* silently
|
||||
/// collapse genuine duplicate source keys; this check restores fail-fast
|
||||
/// behavior on real dups by erroring before the builder gets a chance to
|
||||
/// silently skip them.
|
||||
///
|
||||
/// Today only single-column string keys are used at the call sites
|
||||
/// (`vec!["id".to_string()]`). The check restricts itself to that shape
|
||||
/// and surfaces an internal error if a future caller passes anything
|
||||
/// else — keeping the assumption explicit instead of silently degrading.
|
||||
fn check_batch_unique_by_keys(
|
||||
batch: &RecordBatch,
|
||||
key_columns: &[String],
|
||||
context: &'static str,
|
||||
) -> Result<()> {
|
||||
if key_columns.len() != 1 {
|
||||
return Err(OmniError::manifest_internal(format!(
|
||||
"{}: check_batch_unique_by_keys currently supports single-column keys only, got {:?}",
|
||||
context, key_columns
|
||||
)));
|
||||
}
|
||||
let key_col_name = &key_columns[0];
|
||||
let column = batch.column_by_name(key_col_name).ok_or_else(|| {
|
||||
OmniError::manifest_internal(format!(
|
||||
"{}: source batch missing key column '{}'",
|
||||
context, key_col_name
|
||||
))
|
||||
})?;
|
||||
let strs = column
|
||||
.as_any()
|
||||
.downcast_ref::<StringArray>()
|
||||
.ok_or_else(|| {
|
||||
OmniError::manifest_internal(format!(
|
||||
"{}: key column '{}' is not a StringArray (got {:?})",
|
||||
context,
|
||||
key_col_name,
|
||||
column.data_type()
|
||||
))
|
||||
})?;
|
||||
|
||||
let mut seen: std::collections::HashSet<&str> =
|
||||
std::collections::HashSet::with_capacity(batch.num_rows());
|
||||
for i in 0..strs.len() {
|
||||
if !strs.is_valid(i) {
|
||||
continue;
|
||||
}
|
||||
let v = strs.value(i);
|
||||
if !seen.insert(v) {
|
||||
return Err(OmniError::manifest(format!(
|
||||
"{}: duplicate source row for key '{}' (column '{}'); \
|
||||
callers must hand in a batch unique by `key_columns` \
|
||||
— see MR-957",
|
||||
context, v, key_col_name
|
||||
)));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use arrow_array::StringArray;
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
|
||||
fn batch_with_ids(ids: &[&str]) -> RecordBatch {
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Utf8, false)]));
|
||||
let col = Arc::new(StringArray::from(ids.to_vec())) as ArrayRef;
|
||||
RecordBatch::try_new(schema, vec![col]).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_batch_unique_by_keys_passes_when_all_unique() {
|
||||
let batch = batch_with_ids(&["a", "b", "c"]);
|
||||
check_batch_unique_by_keys(&batch, &["id".to_string()], "test").unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_batch_unique_by_keys_errors_on_duplicate_id() {
|
||||
let batch = batch_with_ids(&["a", "b", "a"]);
|
||||
let err =
|
||||
check_batch_unique_by_keys(&batch, &["id".to_string()], "test").unwrap_err();
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("duplicate source row for key 'a'"),
|
||||
"unexpected error: {msg}"
|
||||
);
|
||||
assert!(msg.contains("MR-957"), "error should reference MR-957: {msg}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_batch_unique_by_keys_rejects_multi_column_keys() {
|
||||
let batch = batch_with_ids(&["a"]);
|
||||
let err = check_batch_unique_by_keys(
|
||||
&batch,
|
||||
&["id".to_string(), "other".to_string()],
|
||||
"test",
|
||||
)
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().contains("single-column keys only"));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@ use std::fs;
|
|||
|
||||
use arrow_array::{Array, Int32Array, UInt64Array};
|
||||
use futures::TryStreamExt;
|
||||
use lance_index::{DatasetIndexExt, is_system_index};
|
||||
use lance::index::DatasetIndexExt;
|
||||
use lance_index::is_system_index;
|
||||
|
||||
use omnigraph::db::commit_graph::CommitGraph;
|
||||
use omnigraph::db::{MergeOutcome, Omnigraph, ReadTarget};
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ async fn composite_flow_canonical_lifecycle() {
|
|||
let uri = dir.path().to_str().unwrap();
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// Step 1: init a fresh repo with the standard test schema.
|
||||
// Step 1: init a fresh graph with the standard test schema.
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
let v_init = version_branch(&db, "main").await.unwrap();
|
||||
|
|
@ -70,7 +70,9 @@ async fn composite_flow_canonical_lifecycle() {
|
|||
// Step 2: load JSONL seed data (Person + Company nodes,
|
||||
// Knows + WorksAt edges).
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
load_jsonl(&mut db, TEST_DATA, LoadMode::Append).await.unwrap();
|
||||
load_jsonl(&mut db, TEST_DATA, LoadMode::Append)
|
||||
.await
|
||||
.unwrap();
|
||||
let v_after_load = version_branch(&db, "main").await.unwrap();
|
||||
assert!(
|
||||
v_after_load > v_init,
|
||||
|
|
@ -119,19 +121,13 @@ async fn composite_flow_canonical_lifecycle() {
|
|||
"feature",
|
||||
MUTATION_QUERIES,
|
||||
"insert_person_and_friend",
|
||||
&mixed_params(
|
||||
&[("$name", "Frank"), ("$friend", "Eve")],
|
||||
&[("$age", 33)],
|
||||
),
|
||||
&mixed_params(&[("$name", "Frank"), ("$friend", "Eve")], &[("$age", 33)]),
|
||||
)
|
||||
.await
|
||||
.expect("multi-statement insert+edge on feature");
|
||||
|
||||
// After: feature has 4 + Eve + Frank = 6 Persons.
|
||||
let snap = db
|
||||
.snapshot_of(ReadTarget::branch("feature"))
|
||||
.await
|
||||
.unwrap();
|
||||
let snap = db.snapshot_of(ReadTarget::branch("feature")).await.unwrap();
|
||||
let person_ds = snap.open("node:Person").await.unwrap();
|
||||
assert_eq!(
|
||||
person_ds.count_rows(None).await.unwrap(),
|
||||
|
|
@ -321,14 +317,10 @@ async fn composite_flow_canonical_lifecycle() {
|
|||
);
|
||||
|
||||
// Re-run a query to verify post-optimize correctness.
|
||||
let post_optimize_total = query_main(
|
||||
&mut db,
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
&ParamMap::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let post_optimize_total =
|
||||
query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(
|
||||
!post_optimize_total.batches().is_empty(),
|
||||
"queries must still work after optimize"
|
||||
|
|
@ -385,14 +377,9 @@ async fn composite_flow_canonical_lifecycle() {
|
|||
// post-cleanup. Post-cleanup mutation is omitted here pending
|
||||
// resolution of the optimize-vs-manifest-pin interaction documented
|
||||
// in Step 10.
|
||||
let final_total = query_main(
|
||||
&mut db,
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
&ParamMap::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let final_total = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(!final_total.batches().is_empty());
|
||||
}
|
||||
|
||||
|
|
@ -431,10 +418,12 @@ async fn composite_flow_schema_apply_then_branch_ops_no_deadlock_in_refresh() {
|
|||
|
||||
// Step 1: init + load on handle A.
|
||||
let mut db_a = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
load_jsonl(&mut db_a, TEST_DATA, LoadMode::Append).await.unwrap();
|
||||
load_jsonl(&mut db_a, TEST_DATA, LoadMode::Append)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(count_rows(&db_a, "node:Person").await, 4);
|
||||
|
||||
// Step 2: open handle B on the same repo. B's in-memory schema_source
|
||||
// Step 2: open handle B on the same graph. B's in-memory schema_source
|
||||
// cache is now a snapshot of `_schema.pg` at open time.
|
||||
let db_b = Omnigraph::open(uri).await.unwrap();
|
||||
|
||||
|
|
@ -444,7 +433,7 @@ async fn composite_flow_schema_apply_then_branch_ops_no_deadlock_in_refresh() {
|
|||
// to disk.
|
||||
const TEST_SCHEMA_V2: &str = "node Person {\n name: String @key\n age: I32?\n nickname: String?\n}\n\nnode Company {\n name: String @key\n}\n\nedge Knows: Person -> Person {\n since: Date?\n}\n\nedge WorksAt: Person -> Company\n";
|
||||
let plan = db_a.apply_schema(TEST_SCHEMA_V2).await.unwrap();
|
||||
assert!(plan.applied, "apply_schema must succeed on a clean repo");
|
||||
assert!(plan.applied, "apply_schema must succeed on a clean graph");
|
||||
assert!(
|
||||
!plan.steps.is_empty(),
|
||||
"apply_schema must record the AddProperty step"
|
||||
|
|
@ -561,7 +550,9 @@ async fn composite_flow_multi_branch_sequential_merges() {
|
|||
// edges from test.jsonl).
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
load_jsonl(&mut db, TEST_DATA, LoadMode::Append).await.unwrap();
|
||||
load_jsonl(&mut db, TEST_DATA, LoadMode::Append)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(count_rows(&db, "node:Person").await, 4);
|
||||
assert_eq!(count_rows(&db, "edge:Knows").await, 3);
|
||||
|
||||
|
|
@ -687,10 +678,7 @@ async fn composite_flow_multi_branch_sequential_merges() {
|
|||
"feat-a",
|
||||
MUTATION_QUERIES,
|
||||
"insert_person_and_friend",
|
||||
&mixed_params(
|
||||
&[("$name", "Grace"), ("$friend", "Eve")],
|
||||
&[("$age", 28)],
|
||||
),
|
||||
&mixed_params(&[("$name", "Grace"), ("$friend", "Eve")], &[("$age", 28)]),
|
||||
)
|
||||
.await
|
||||
.expect("insert Grace + Knows(Grace → Eve) on feat-a");
|
||||
|
|
@ -821,15 +809,14 @@ async fn composite_flow_multi_branch_sequential_merges() {
|
|||
// `total_people` returns count(Person) = 10. Catches regressions in
|
||||
// group-by/count execution against a multi-fragment table whose
|
||||
// current shape was produced by two sequential merges.
|
||||
let total_post_merges = query_main(
|
||||
&mut db,
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
&ParamMap::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_total(&total_post_merges, 10, "post both merges, main must total 10 Persons");
|
||||
let total_post_merges = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_total(
|
||||
&total_post_merges,
|
||||
10,
|
||||
"post both merges, main must total 10 Persons",
|
||||
);
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// Step 14: time-travel to pre-merge-a-version. Reads must return
|
||||
|
|
@ -1021,14 +1008,9 @@ async fn composite_flow_multi_branch_sequential_merges() {
|
|||
// correctly to disk but the reopened catalog can't bind them.
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
let mut db = db;
|
||||
let post_reopen_total = query_main(
|
||||
&mut db,
|
||||
TEST_QUERIES,
|
||||
"total_people",
|
||||
&ParamMap::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let post_reopen_total = query_main(&mut db, TEST_QUERIES, "total_people", &ParamMap::default())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_total(
|
||||
&post_reopen_total,
|
||||
10,
|
||||
|
|
|
|||
|
|
@ -119,6 +119,187 @@ async fn load_merge_upserts_existing_and_inserts_new() {
|
|||
}
|
||||
}
|
||||
|
||||
/// Regression: two sequential `LoadMode::Merge` invocations against the
|
||||
/// same set of keys must both succeed. Pre-fix, the second one failed
|
||||
/// with `Ambiguous merge inserts are prohibited: multiple source rows
|
||||
/// match the same target row on (id = "TEST-1")` even though every
|
||||
/// source batch had one row per key.
|
||||
///
|
||||
/// Triggered by Lance's `processed_row_ids: Mutex<HashSet<u64>>`
|
||||
/// (lance-4.0.0 `src/dataset/write/merge_insert.rs:2099`) double-
|
||||
/// processing the same source/target match against datasets previously
|
||||
/// rewritten by merge_insert. Worked around by opting
|
||||
/// `MergeInsertBuilder` into `SourceDedupeBehavior::FirstSeen` in
|
||||
/// `crates/omnigraph/src/table_store.rs` — see that file for the full
|
||||
/// rationale and the safety pin (`loader_rejects_intra_batch_duplicate_keys`).
|
||||
/// Tracked at MR-957; upstream: lance-format/lance#6877.
|
||||
#[tokio::test]
|
||||
async fn load_merge_repeated_against_overlapping_keys_succeeds() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let schema = r#"
|
||||
node Thing {
|
||||
key: String @key
|
||||
required_val: String
|
||||
optional_val: String?
|
||||
}
|
||||
"#;
|
||||
let mut db = Omnigraph::init(uri, schema).await.unwrap();
|
||||
|
||||
// Seed with 50 fully-populated rows (id + required + optional).
|
||||
let mut seed = String::new();
|
||||
for i in 1..=50 {
|
||||
seed.push_str(&format!(
|
||||
r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i}","optional_val":"optional {i}"}}}}
|
||||
"#,
|
||||
));
|
||||
}
|
||||
load_jsonl(&mut db, &seed, LoadMode::Overwrite)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Partial-schema delta — mirrors the bug report exactly: omits
|
||||
// `optional_val`. 25 existing keys + 5 new keys, one row per key.
|
||||
let mut delta = String::new();
|
||||
for i in (1..=25).chain(51..=55) {
|
||||
delta.push_str(&format!(
|
||||
r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i} UPDATED"}}}}
|
||||
"#,
|
||||
));
|
||||
}
|
||||
|
||||
load_jsonl(&mut db, &delta, LoadMode::Merge)
|
||||
.await
|
||||
.expect("first merge must succeed");
|
||||
assert_eq!(count_rows(&db, "node:Thing").await, 55);
|
||||
|
||||
load_jsonl(&mut db, &delta, LoadMode::Merge)
|
||||
.await
|
||||
.expect("second merge against same keys must succeed");
|
||||
assert_eq!(count_rows(&db, "node:Thing").await, 55);
|
||||
}
|
||||
|
||||
/// Safety pin for the `SourceDedupeBehavior::FirstSeen` workaround in
|
||||
/// `crates/omnigraph/src/table_store.rs`. FirstSeen tells Lance to
|
||||
/// silently skip a duplicate source row instead of erroring. Our use of
|
||||
/// it depends on user-provided duplicates being rejected *before* the
|
||||
/// batch reaches Lance — otherwise FirstSeen could silently drop user
|
||||
/// data.
|
||||
///
|
||||
/// Defense in depth:
|
||||
/// 1. The loader's `enforce_unique_constraints_intra_batch`
|
||||
/// (`loader/mod.rs:1453`), invoked unconditionally on any node type
|
||||
/// with a `@key`, errors on intra-batch duplicate `@key` values at
|
||||
/// intake — pinned by this test across every `LoadMode`.
|
||||
/// 2. The `check_batch_unique_by_keys` precondition at the top of
|
||||
/// `merge_insert_batch` and `stage_merge_insert` is the final
|
||||
/// fail-fast guard: even if a future caller bypasses the loader path
|
||||
/// (e.g. branch-merge's `publish_rewritten_merge_table` builds its
|
||||
/// own source batch directly), a real duplicate id reaches Lance
|
||||
/// only after surfacing as an `OmniError::Manifest`, never silently
|
||||
/// via FirstSeen. Pinned by the unit tests in `table_store::tests`.
|
||||
#[tokio::test]
|
||||
async fn loader_rejects_intra_batch_duplicate_keys() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let schema = r#"
|
||||
node Thing {
|
||||
key: String @key
|
||||
value: String
|
||||
}
|
||||
"#;
|
||||
let mut db = Omnigraph::init(uri, schema).await.unwrap();
|
||||
|
||||
let dupes = r#"{"type":"Thing","data":{"key":"DUP","value":"first"}}
|
||||
{"type":"Thing","data":{"key":"DUP","value":"second"}}
|
||||
"#;
|
||||
|
||||
for mode in [LoadMode::Overwrite, LoadMode::Append, LoadMode::Merge] {
|
||||
let err = load_jsonl(&mut db, dupes, mode).await.unwrap_err();
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("@unique violation") && msg.contains("DUP"),
|
||||
"load mode {mode:?} must reject intra-batch duplicate @key (got: {msg})"
|
||||
);
|
||||
assert_eq!(
|
||||
count_rows(&db, "node:Thing").await,
|
||||
0,
|
||||
"load mode {mode:?} must not persist any rows when the batch is rejected"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Canary for the upstream Lance gap that the `FirstSeen` workaround
|
||||
/// in `table_store.rs` masks. The bug class is "Window 2": load →
|
||||
/// indices built explicitly → merge → merge. Even with the engine
|
||||
/// fully aligned to the "indexes are derived state" invariant
|
||||
/// (MR-848), as long as an `id` index has been built between the
|
||||
/// first and second merge_insert, the Lance internal that triggers
|
||||
/// the bug remains reachable.
|
||||
///
|
||||
/// This test runs the Window-2 sequence under the FirstSeen workaround.
|
||||
/// It is expected to pass today. If a future Lance upgrade or local
|
||||
/// change makes it START failing, the workaround has lost effectiveness
|
||||
/// (upstream Lance changed something, or the FirstSeen setter was
|
||||
/// dropped from `table_store.rs`). If a future Lance upgrade fixes the
|
||||
/// bug class, this test continues to pass and the FirstSeen setter can
|
||||
/// be retired.
|
||||
///
|
||||
/// Tracked at MR-957; upstream: lance-format/lance#6877.
|
||||
#[tokio::test]
|
||||
async fn load_merge_window_2_documents_upstream_lance_gap() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let schema = r#"
|
||||
node Thing {
|
||||
key: String @key
|
||||
required_val: String
|
||||
optional_val: String?
|
||||
}
|
||||
"#;
|
||||
let mut db = Omnigraph::init(uri, schema).await.unwrap();
|
||||
|
||||
let mut seed = String::new();
|
||||
for i in 1..=50 {
|
||||
seed.push_str(&format!(
|
||||
r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i}","optional_val":"optional {i}"}}}}
|
||||
"#,
|
||||
));
|
||||
}
|
||||
load_jsonl(&mut db, &seed, LoadMode::Overwrite)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Explicit ensure_indices between seed and the merges — the Window
|
||||
// 2 trigger. The eager-build behavior (MR-583) means the BTREE on
|
||||
// `id` is already present here, but calling explicitly pins the
|
||||
// invariant for the post-MR-848 future where the eager build is
|
||||
// gone.
|
||||
db.ensure_indices().await.unwrap();
|
||||
|
||||
let mut delta = String::new();
|
||||
for i in (1..=25).chain(51..=55) {
|
||||
delta.push_str(&format!(
|
||||
r#"{{"type":"Thing","data":{{"key":"TEST-{i}","required_val":"required {i} UPDATED"}}}}
|
||||
"#,
|
||||
));
|
||||
}
|
||||
|
||||
// Both merges must succeed under the FirstSeen workaround.
|
||||
// `processed_row_ids` re-processes the same target row_id under
|
||||
// the default `SourceDedupeBehavior::Fail`; FirstSeen tolerates it.
|
||||
load_jsonl(&mut db, &delta, LoadMode::Merge)
|
||||
.await
|
||||
.expect("first merge after ensure_indices must succeed");
|
||||
db.ensure_indices().await.unwrap();
|
||||
load_jsonl(&mut db, &delta, LoadMode::Merge).await.expect(
|
||||
"second merge after ensure_indices must succeed \
|
||||
(Window 2 canary: drop the FirstSeen setter in table_store.rs \
|
||||
only when this stays green WITHOUT it)",
|
||||
);
|
||||
assert_eq!(count_rows(&db, "node:Thing").await, 55);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn cross_type_traversal_deduplicates_duplicate_edges() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
|
@ -163,7 +344,7 @@ async fn explicit_target_query_sees_other_writer_commits_without_refresh() {
|
|||
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
|
||||
// Two independent handles to the same repo
|
||||
// Two independent handles to the same graph
|
||||
let mut db1 = Omnigraph::open(uri).await.unwrap();
|
||||
let mut db2 = Omnigraph::open(uri).await.unwrap();
|
||||
|
||||
|
|
|
|||
|
|
@ -1866,3 +1866,65 @@ async fn ensure_indices_does_not_error_on_repeated_call() {
|
|||
let ds = snap.open("node:Person").await.unwrap();
|
||||
assert_eq!(ds.count_rows(None).await.unwrap(), 4);
|
||||
}
|
||||
|
||||
// ─── DataFusion-Expr filter pushdown (Tier-1 follow-up to the Lance v6 bump) ──
|
||||
|
||||
/// Regression for `CompOp::Contains` pushdown via `array_has` in
|
||||
/// `ir_filter_to_expr`. Before the Expr-pushdown refactor, the
|
||||
/// `ir_filter_to_sql` family returned `None` for list-contains (the
|
||||
/// comment said *"Can't pushdown list contains"*) and the predicate was
|
||||
/// applied post-scan in memory. With `Scanner::filter_expr(Expr)` and
|
||||
/// DF's `array_has` builtin, the contains predicate now pushes down to
|
||||
/// Lance — the test confirms results are correct AND the pushdown path
|
||||
/// is exercised (a regression on the pushdown would land all rows in
|
||||
/// the scan, then be filtered post-hoc; that still produces the right
|
||||
/// count so this test pins correctness, while `lance_surface_guards.rs`
|
||||
/// is the structural pin for the surface itself).
|
||||
#[tokio::test]
|
||||
async fn ir_filter_with_list_contains_pushes_down() {
|
||||
let schema = r#"
|
||||
node Doc {
|
||||
slug: String @key
|
||||
tags: [String]
|
||||
}
|
||||
"#;
|
||||
let data = r#"{"type":"Doc","data":{"slug":"alpha","tags":["red","blue"]}}
|
||||
{"type":"Doc","data":{"slug":"bravo","tags":["green"]}}
|
||||
{"type":"Doc","data":{"slug":"charlie","tags":["red","green"]}}
|
||||
{"type":"Doc","data":{"slug":"delta","tags":[]}}"#;
|
||||
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut db = Omnigraph::init(dir.path().to_str().unwrap(), schema)
|
||||
.await
|
||||
.unwrap();
|
||||
load_jsonl(&mut db, data, LoadMode::Overwrite)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let queries = r#"
|
||||
query docs_with_tag($tag: String) {
|
||||
match {
|
||||
$d: Doc
|
||||
$d.tags contains $tag
|
||||
}
|
||||
return { $d.slug }
|
||||
}
|
||||
"#;
|
||||
let result = query_main(&mut db, queries, "docs_with_tag", ¶ms(&[("$tag", "red")]))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let batch = result.concat_batches().unwrap();
|
||||
let slugs = batch
|
||||
.column(0)
|
||||
.as_any()
|
||||
.downcast_ref::<StringArray>()
|
||||
.unwrap();
|
||||
let mut got: Vec<&str> = (0..slugs.len()).map(|i| slugs.value(i)).collect();
|
||||
got.sort();
|
||||
assert_eq!(
|
||||
got,
|
||||
vec!["alpha", "charlie"],
|
||||
"contains-pushdown should return exactly the rows whose tags list contains 'red'"
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ async fn graph_publish_failpoint_triggers_before_commit_append() {
|
|||
|
||||
// Atomic schema apply: schema apply writes staging files first, then commits
|
||||
// the manifest, then renames staging → final. Tests below inject crashes at
|
||||
// the two boundaries and assert that reopening the repo yields a consistent
|
||||
// the two boundaries and assert that reopening the graph yields a consistent
|
||||
// state.
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -303,14 +303,10 @@ async fn inline_delete_conflict_writes_sidecar_before_rejecting() {
|
|||
let person_uri = node_table_uri(&uri, "Person");
|
||||
|
||||
{
|
||||
let _pause_delete = ScopedFailPoint::new("mutation.delete_node_pre_primary_delete", "pause");
|
||||
let _pause_delete =
|
||||
ScopedFailPoint::new("mutation.delete_node_pre_primary_delete", "pause");
|
||||
let delete_params = helpers::params(&[("$name", "Alice")]);
|
||||
let delete = db.mutate(
|
||||
"main",
|
||||
MUTATION_QUERIES,
|
||||
"remove_person",
|
||||
&delete_params,
|
||||
);
|
||||
let delete = db.mutate("main", MUTATION_QUERIES, "remove_person", &delete_params);
|
||||
tokio::pin!(delete);
|
||||
|
||||
let mut concurrent_update_succeeded = false;
|
||||
|
|
@ -325,15 +321,18 @@ async fn inline_delete_conflict_writes_sidecar_before_rejecting() {
|
|||
"set_age",
|
||||
&mixed_params(&[("$name", "Bob")], &[("$age", 26)]),
|
||||
)
|
||||
.await
|
||||
.is_ok()
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
concurrent_update_succeeded = true;
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
|
||||
}
|
||||
assert!(concurrent_update_succeeded, "concurrent update must land while delete is paused");
|
||||
assert!(
|
||||
concurrent_update_succeeded,
|
||||
"concurrent update must land while delete is paused"
|
||||
);
|
||||
fail::remove("mutation.delete_node_pre_primary_delete");
|
||||
|
||||
let err = delete.await.unwrap_err();
|
||||
|
|
@ -464,7 +463,7 @@ async fn recovery_rolls_forward_load_on_feature_branch() {
|
|||
|
||||
#[tokio::test]
|
||||
async fn recovery_rolls_forward_ensure_indices_on_feature_branch() {
|
||||
use lance_index::DatasetIndexExt;
|
||||
use lance::index::DatasetIndexExt;
|
||||
use omnigraph::loader::{LoadMode, load_jsonl};
|
||||
use omnigraph::table_store::TableStore;
|
||||
|
||||
|
|
@ -925,13 +924,13 @@ async fn ensure_indices_stage_btree_failure_leaves_existing_tables_writable() {
|
|||
.expect("Person mutation must succeed after the failed schema apply — existing tables are not drifted");
|
||||
}
|
||||
|
||||
fn assert_no_staging_files(repo: &std::path::Path) {
|
||||
fn assert_no_staging_files(graph: &std::path::Path) {
|
||||
for name in [
|
||||
"_schema.pg.staging",
|
||||
"_schema.ir.json.staging",
|
||||
"__schema_state.json.staging",
|
||||
] {
|
||||
let path = repo.join(name);
|
||||
let path = graph.join(name);
|
||||
assert!(
|
||||
!path.exists(),
|
||||
"staging file {} still exists after recovery",
|
||||
|
|
@ -1164,7 +1163,7 @@ edge WorksAt: Person -> Company
|
|||
// NEW schema (city column on Person, Tag node type) — not the old.
|
||||
// Without the schema-staging coordination, the schema-state
|
||||
// recovery would have deleted the staging files (because manifest
|
||||
// hadn't advanced when it ran), leaving a corrupt repo with new-
|
||||
// hadn't advanced when it ran), leaving a corrupt graph with new-
|
||||
// schema data on disk but old-schema catalog.
|
||||
let live_schema = std::fs::read_to_string(dir.path().join("_schema.pg")).unwrap();
|
||||
assert!(
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ query insert_person_and_friend($name: String, $age: I32, $friend: String) {
|
|||
}
|
||||
"#;
|
||||
|
||||
/// Init a repo and load the standard test data.
|
||||
/// Init a graph and load the standard test data.
|
||||
pub async fn init_and_load(dir: &tempfile::TempDir) -> Omnigraph {
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
|
|
@ -249,7 +249,7 @@ pub fn vector_and_string_params(
|
|||
map
|
||||
}
|
||||
|
||||
pub fn s3_test_repo_uri(suite: &str) -> Option<String> {
|
||||
pub fn s3_test_graph_uri(suite: &str) -> Option<String> {
|
||||
let bucket = std::env::var("OMNIGRAPH_S3_TEST_BUCKET").ok()?;
|
||||
let prefix = std::env::var("OMNIGRAPH_S3_TEST_PREFIX")
|
||||
.ok()
|
||||
|
|
|
|||
|
|
@ -110,8 +110,8 @@ impl FollowUpMutation {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn single_sidecar_operation_id(repo_root: &Path) -> String {
|
||||
let ids = sidecar_operation_ids(repo_root);
|
||||
pub fn single_sidecar_operation_id(graph_root: &Path) -> String {
|
||||
let ids = sidecar_operation_ids(graph_root);
|
||||
assert_eq!(
|
||||
ids.len(),
|
||||
1,
|
||||
|
|
@ -121,8 +121,8 @@ pub fn single_sidecar_operation_id(repo_root: &Path) -> String {
|
|||
ids.into_iter().next().unwrap()
|
||||
}
|
||||
|
||||
pub fn sidecar_operation_ids(repo_root: &Path) -> Vec<String> {
|
||||
let dir = repo_root.join("__recovery");
|
||||
pub fn sidecar_operation_ids(graph_root: &Path) -> Vec<String> {
|
||||
let dir = graph_root.join("__recovery");
|
||||
if !dir.exists() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
|
@ -143,10 +143,10 @@ pub fn sidecar_operation_ids(repo_root: &Path) -> Vec<String> {
|
|||
ids
|
||||
}
|
||||
|
||||
pub async fn branch_head_commit_id(repo_root: &Path, branch: &str) -> Result<String> {
|
||||
pub async fn branch_head_commit_id(graph_root: &Path, branch: &str) -> Result<String> {
|
||||
let graph = match branch {
|
||||
"main" => CommitGraph::open(&repo_uri(repo_root)).await?,
|
||||
branch => CommitGraph::open_at_branch(&repo_uri(repo_root), branch).await?,
|
||||
"main" => CommitGraph::open(&graph_uri(graph_root)).await?,
|
||||
branch => CommitGraph::open_at_branch(&graph_uri(graph_root), branch).await?,
|
||||
};
|
||||
graph.head_commit_id().await?.ok_or_else(|| {
|
||||
OmniError::manifest_internal(format!("commit graph for branch {branch} has no head"))
|
||||
|
|
@ -154,52 +154,52 @@ pub async fn branch_head_commit_id(repo_root: &Path, branch: &str) -> Result<Str
|
|||
}
|
||||
|
||||
pub async fn assert_post_recovery_invariants(
|
||||
repo_root: &Path,
|
||||
graph_root: &Path,
|
||||
operation_id: &str,
|
||||
expectation: RecoveryExpectation,
|
||||
) -> Result<()> {
|
||||
match expectation {
|
||||
RecoveryExpectation::RolledForward { tables } => {
|
||||
assert_sidecar_absent(repo_root, operation_id);
|
||||
let audit = read_audit_row(repo_root, operation_id).await?;
|
||||
assert_sidecar_absent(graph_root, operation_id);
|
||||
let audit = read_audit_row(graph_root, operation_id).await?;
|
||||
assert_eq!(
|
||||
audit.recovery_kind, "RolledForward",
|
||||
"audit row for {operation_id} recorded the wrong recovery_kind",
|
||||
);
|
||||
assert_manifest_pins_match_lance_heads(repo_root, &tables).await?;
|
||||
assert_audit_to_versions_match_lance_heads(repo_root, &audit, &tables).await?;
|
||||
assert_recovery_commit_shape(repo_root, &audit, &tables).await?;
|
||||
assert_non_main_did_not_move_main(repo_root, &tables).await?;
|
||||
assert_idempotent_reopen(repo_root, operation_id).await?;
|
||||
run_follow_up_mutations(repo_root, tables).await?;
|
||||
assert_manifest_pins_match_lance_heads(graph_root, &tables).await?;
|
||||
assert_audit_to_versions_match_lance_heads(graph_root, &audit, &tables).await?;
|
||||
assert_recovery_commit_shape(graph_root, &audit, &tables).await?;
|
||||
assert_non_main_did_not_move_main(graph_root, &tables).await?;
|
||||
assert_idempotent_reopen(graph_root, operation_id).await?;
|
||||
run_follow_up_mutations(graph_root, tables).await?;
|
||||
}
|
||||
RecoveryExpectation::RolledBack { tables } => {
|
||||
assert_sidecar_absent(repo_root, operation_id);
|
||||
let audit = read_audit_row(repo_root, operation_id).await?;
|
||||
assert_sidecar_absent(graph_root, operation_id);
|
||||
let audit = read_audit_row(graph_root, operation_id).await?;
|
||||
assert_eq!(
|
||||
audit.recovery_kind, "RolledBack",
|
||||
"audit row for {operation_id} recorded the wrong recovery_kind",
|
||||
);
|
||||
assert_rollback_outcomes_record_drift(&audit);
|
||||
assert_recovery_commit_shape(repo_root, &audit, &tables).await?;
|
||||
assert_non_main_did_not_move_main(repo_root, &tables).await?;
|
||||
assert_idempotent_reopen(repo_root, operation_id).await?;
|
||||
run_follow_up_mutations(repo_root, tables).await?;
|
||||
assert_recovery_commit_shape(graph_root, &audit, &tables).await?;
|
||||
assert_non_main_did_not_move_main(graph_root, &tables).await?;
|
||||
assert_idempotent_reopen(graph_root, operation_id).await?;
|
||||
run_follow_up_mutations(graph_root, tables).await?;
|
||||
}
|
||||
RecoveryExpectation::Deferred => {
|
||||
assert!(
|
||||
sidecar_path(repo_root, operation_id).exists(),
|
||||
sidecar_path(graph_root, operation_id).exists(),
|
||||
"deferred recovery must leave sidecar {operation_id} on disk",
|
||||
);
|
||||
assert!(
|
||||
read_audit_row(repo_root, operation_id).await.is_err(),
|
||||
read_audit_row(graph_root, operation_id).await.is_err(),
|
||||
"deferred recovery must not record an audit row for {operation_id}",
|
||||
);
|
||||
}
|
||||
RecoveryExpectation::NoOp => {
|
||||
assert_sidecar_absent(repo_root, operation_id);
|
||||
assert_sidecar_absent(graph_root, operation_id);
|
||||
assert!(
|
||||
read_audit_row(repo_root, operation_id).await.is_err(),
|
||||
read_audit_row(graph_root, operation_id).await.is_err(),
|
||||
"no-op recovery must not record an audit row for {operation_id}",
|
||||
);
|
||||
}
|
||||
|
|
@ -216,24 +216,24 @@ fn branch_context(tables: &[TableExpectation]) -> Option<String> {
|
|||
.map(str::to_string)
|
||||
}
|
||||
|
||||
fn sidecar_path(repo_root: &Path, operation_id: &str) -> PathBuf {
|
||||
repo_root
|
||||
fn sidecar_path(graph_root: &Path, operation_id: &str) -> PathBuf {
|
||||
graph_root
|
||||
.join("__recovery")
|
||||
.join(format!("{operation_id}.json"))
|
||||
}
|
||||
|
||||
fn assert_sidecar_absent(repo_root: &Path, operation_id: &str) {
|
||||
fn assert_sidecar_absent(graph_root: &Path, operation_id: &str) {
|
||||
assert!(
|
||||
!sidecar_path(repo_root, operation_id).exists(),
|
||||
!sidecar_path(graph_root, operation_id).exists(),
|
||||
"recovery sidecar {operation_id} must be deleted after successful recovery",
|
||||
);
|
||||
}
|
||||
|
||||
async fn assert_manifest_pins_match_lance_heads(
|
||||
repo_root: &Path,
|
||||
graph_root: &Path,
|
||||
tables: &[TableExpectation],
|
||||
) -> Result<()> {
|
||||
let uri = repo_uri(repo_root);
|
||||
let uri = graph_uri(graph_root);
|
||||
let db = Omnigraph::open(&uri).await?;
|
||||
for table in tables {
|
||||
let (entry, lance_head) = entry_and_lance_head(&db, &uri, table).await?;
|
||||
|
|
@ -254,11 +254,11 @@ async fn assert_manifest_pins_match_lance_heads(
|
|||
}
|
||||
|
||||
async fn assert_audit_to_versions_match_lance_heads(
|
||||
repo_root: &Path,
|
||||
graph_root: &Path,
|
||||
audit: &RecoveryAuditRow,
|
||||
tables: &[TableExpectation],
|
||||
) -> Result<()> {
|
||||
let uri = repo_uri(repo_root);
|
||||
let uri = graph_uri(graph_root);
|
||||
let db = Omnigraph::open(&uri).await?;
|
||||
for table in tables {
|
||||
let (_, lance_head) = entry_and_lance_head(&db, &uri, table).await?;
|
||||
|
|
@ -301,10 +301,10 @@ fn assert_rollback_outcomes_record_drift(audit: &RecoveryAuditRow) {
|
|||
}
|
||||
|
||||
async fn assert_non_main_did_not_move_main(
|
||||
repo_root: &Path,
|
||||
graph_root: &Path,
|
||||
tables: &[TableExpectation],
|
||||
) -> Result<()> {
|
||||
let uri = repo_uri(repo_root);
|
||||
let uri = graph_uri(graph_root);
|
||||
let db = Omnigraph::open(&uri).await?;
|
||||
let main = db.snapshot_of(ReadTarget::branch("main")).await?;
|
||||
for table in tables {
|
||||
|
|
@ -327,14 +327,14 @@ async fn assert_non_main_did_not_move_main(
|
|||
}
|
||||
|
||||
async fn assert_recovery_commit_shape(
|
||||
repo_root: &Path,
|
||||
graph_root: &Path,
|
||||
audit: &RecoveryAuditRow,
|
||||
tables: &[TableExpectation],
|
||||
) -> Result<()> {
|
||||
let branch = branch_context(tables);
|
||||
let expected_parent = expected_recovery_parent(tables)?;
|
||||
let branch = branch.as_deref();
|
||||
let commit = read_recovery_commit(repo_root, audit, branch).await?;
|
||||
let commit = read_recovery_commit(graph_root, audit, branch).await?;
|
||||
|
||||
assert_eq!(
|
||||
commit.actor_id.as_deref(),
|
||||
|
|
@ -362,7 +362,7 @@ async fn assert_recovery_commit_shape(
|
|||
);
|
||||
|
||||
if let Some(branch) = branch {
|
||||
let graph = CommitGraph::open_at_branch(&repo_uri(repo_root), branch).await?;
|
||||
let graph = CommitGraph::open_at_branch(&graph_uri(graph_root), branch).await?;
|
||||
let commits = graph.load_commits().await?;
|
||||
let parent = commit.parent_commit_id.as_deref().ok_or_else(|| {
|
||||
OmniError::manifest_internal(format!(
|
||||
|
|
@ -403,12 +403,12 @@ fn expected_recovery_parent(tables: &[TableExpectation]) -> Result<Option<String
|
|||
Ok(expected)
|
||||
}
|
||||
|
||||
async fn assert_idempotent_reopen(repo_root: &Path, operation_id: &str) -> Result<()> {
|
||||
let before = matching_audit_rows(repo_root, operation_id).await?;
|
||||
let uri = repo_uri(repo_root);
|
||||
async fn assert_idempotent_reopen(graph_root: &Path, operation_id: &str) -> Result<()> {
|
||||
let before = matching_audit_rows(graph_root, operation_id).await?;
|
||||
let uri = graph_uri(graph_root);
|
||||
let _db = Omnigraph::open(&uri).await?;
|
||||
assert_sidecar_absent(repo_root, operation_id);
|
||||
let after = matching_audit_rows(repo_root, operation_id).await?;
|
||||
assert_sidecar_absent(graph_root, operation_id);
|
||||
let after = matching_audit_rows(graph_root, operation_id).await?;
|
||||
assert_eq!(
|
||||
after.len(),
|
||||
before.len(),
|
||||
|
|
@ -417,14 +417,14 @@ async fn assert_idempotent_reopen(repo_root: &Path, operation_id: &str) -> Resul
|
|||
Ok(())
|
||||
}
|
||||
|
||||
async fn run_follow_up_mutations(repo_root: &Path, tables: Vec<TableExpectation>) -> Result<()> {
|
||||
async fn run_follow_up_mutations(graph_root: &Path, tables: Vec<TableExpectation>) -> Result<()> {
|
||||
let mut db: Option<Omnigraph> = None;
|
||||
for table in tables {
|
||||
let Some(mutation) = table.follow_up_mutation else {
|
||||
continue;
|
||||
};
|
||||
if db.is_none() {
|
||||
db = Some(Omnigraph::open(&repo_uri(repo_root)).await?);
|
||||
db = Some(Omnigraph::open(&graph_uri(graph_root)).await?);
|
||||
}
|
||||
let db = db.as_mut().unwrap();
|
||||
db.mutate(
|
||||
|
|
@ -480,11 +480,11 @@ async fn lance_head_for_entry(root_uri: &str, entry: &SubTableEntry) -> Result<u
|
|||
}
|
||||
|
||||
async fn read_recovery_commit(
|
||||
repo_root: &Path,
|
||||
graph_root: &Path,
|
||||
audit: &RecoveryAuditRow,
|
||||
branch: Option<&str>,
|
||||
) -> Result<GraphCommit> {
|
||||
let uri = repo_uri(repo_root);
|
||||
let uri = graph_uri(graph_root);
|
||||
let graph = match branch {
|
||||
Some(branch) => CommitGraph::open_at_branch(&uri, branch).await?,
|
||||
None => CommitGraph::open(&uri).await?,
|
||||
|
|
@ -502,8 +502,8 @@ async fn read_recovery_commit(
|
|||
})
|
||||
}
|
||||
|
||||
async fn read_audit_row(repo_root: &Path, operation_id: &str) -> Result<RecoveryAuditRow> {
|
||||
let mut rows = matching_audit_rows(repo_root, operation_id).await?;
|
||||
async fn read_audit_row(graph_root: &Path, operation_id: &str) -> Result<RecoveryAuditRow> {
|
||||
let mut rows = matching_audit_rows(graph_root, operation_id).await?;
|
||||
if rows.len() != 1 {
|
||||
return Err(OmniError::manifest_internal(format!(
|
||||
"expected exactly one recovery audit row for {operation_id}, got {}",
|
||||
|
|
@ -514,10 +514,10 @@ async fn read_audit_row(repo_root: &Path, operation_id: &str) -> Result<Recovery
|
|||
}
|
||||
|
||||
async fn matching_audit_rows(
|
||||
repo_root: &Path,
|
||||
graph_root: &Path,
|
||||
operation_id: &str,
|
||||
) -> Result<Vec<RecoveryAuditRow>> {
|
||||
let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance");
|
||||
let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance");
|
||||
if !recoveries_dir.exists() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
|
@ -575,6 +575,6 @@ fn string_column<'a>(batch: &'a RecordBatch, name: &str) -> Result<&'a StringArr
|
|||
})
|
||||
}
|
||||
|
||||
fn repo_uri(repo_root: &Path) -> String {
|
||||
repo_root.to_str().unwrap().to_string()
|
||||
fn graph_uri(graph_root: &Path) -> String {
|
||||
graph_root.to_str().unwrap().to_string()
|
||||
}
|
||||
|
|
|
|||
244
crates/omnigraph/tests/lance_surface_guards.rs
Normal file
244
crates/omnigraph/tests/lance_surface_guards.rs
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
//! Lance API surface guards.
|
||||
//!
|
||||
//! Each guard pins a Lance API surface that OmniGraph relies on. If a future
|
||||
//! Lance bump silently renames a variant, restructures a public struct, or
|
||||
//! flips a method to async, the corresponding guard either fails to compile
|
||||
//! (compile-time guards) or fails at runtime (runtime guards). The purpose
|
||||
//! is to turn silent-break risks into red CI bars on the *next* Lance bump,
|
||||
//! rather than into wrong-state recovery in production.
|
||||
//!
|
||||
//! Pair this file with `docs/dev/lance.md`'s alignment audit stanza: any
|
||||
//! Lance bump runs `cargo test -p omnigraph-engine --test lance_surface_guards`
|
||||
//! first as the smoke check.
|
||||
//!
|
||||
//! ## Compile-only guards
|
||||
//!
|
||||
//! Functions prefixed with `_compile_` are gated with a broad `#[allow(...)]`
|
||||
//! and never called. They exist to make `cargo build -p omnigraph-engine --tests`
|
||||
//! enforce the API shape. Using `unimplemented!()` as a placeholder lets type
|
||||
//! inference proceed without running anything.
|
||||
//!
|
||||
//! ## Runtime guards
|
||||
//!
|
||||
//! Functions decorated `#[tokio::test]` actually run; they construct real
|
||||
//! values and assert field shapes / types.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use lance::Dataset;
|
||||
use lance::dataset::builder::DatasetBuilder;
|
||||
use lance::dataset::optimize::{CompactionOptions, compact_files};
|
||||
use lance::dataset::write::delete::DeleteResult;
|
||||
use lance::dataset::{MergeInsertBuilder, WhenMatched, WhenNotMatched, WriteMode, WriteParams};
|
||||
use lance_file::version::LanceFileVersion;
|
||||
use lance_namespace::LanceNamespace;
|
||||
use lance_table::io::commit::ManifestNamingScheme;
|
||||
|
||||
/// Helper: build a small fresh dataset in a tempdir. Pinned at V2_2 to match
|
||||
/// production write paths (blob v2 requires V2_2; see `docs/dev/lance.md`).
|
||||
async fn fresh_dataset(uri: &str) -> Dataset {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Utf8, false),
|
||||
Field::new("value", DataType::Int32, false),
|
||||
]));
|
||||
let batch = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(StringArray::from(vec!["alice", "bob"])),
|
||||
Arc::new(Int32Array::from(vec![1, 2])),
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
let reader = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
||||
let params = WriteParams {
|
||||
mode: WriteMode::Create,
|
||||
enable_stable_row_ids: true,
|
||||
data_storage_version: Some(LanceFileVersion::V2_2),
|
||||
..Default::default()
|
||||
};
|
||||
Dataset::write(reader, uri, Some(params)).await.unwrap()
|
||||
}
|
||||
|
||||
// --- Guard 1: LanceError::TooMuchWriteContention variant exists ------------
|
||||
//
|
||||
// `db/manifest/publisher.rs::map_lance_publish_error` pattern-matches on this
|
||||
// variant to surface typed `OmniError::ManifestRowLevelCasContention`. If
|
||||
// Lance renames the variant or removes the builder, this guard fails.
|
||||
|
||||
#[tokio::test]
|
||||
async fn lance_error_too_much_write_contention_variant_exists() {
|
||||
let err = lance::Error::too_much_write_contention("guard");
|
||||
assert!(
|
||||
matches!(err, lance::Error::TooMuchWriteContention { .. }),
|
||||
"Lance::Error::TooMuchWriteContention variant missing or renamed; \
|
||||
update db/manifest/publisher.rs::map_lance_publish_error and \
|
||||
this guard, then re-pin docs/dev/lance.md."
|
||||
);
|
||||
}
|
||||
|
||||
// --- Guard 2: ManifestLocation field shape ---------------------------------
|
||||
//
|
||||
// `db/manifest/metadata.rs:84-88` reads `.path`, `.size`, `.e_tag`,
|
||||
// `.naming_scheme` off `dataset.manifest_location()`. If any field renames
|
||||
// or changes type, this guard fails to compile.
|
||||
|
||||
#[tokio::test]
|
||||
async fn manifest_location_field_shape() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().join("guard.lance");
|
||||
let ds = fresh_dataset(uri.to_str().unwrap()).await;
|
||||
|
||||
let loc = ds.manifest_location();
|
||||
// Explicit type bindings — these are the load-bearing assertions. If a
|
||||
// type drifts (e.g. .size: Option<u64> → .size: u64), this fails to
|
||||
// compile.
|
||||
let _path: &object_store::path::Path = &loc.path;
|
||||
let _size: Option<u64> = loc.size;
|
||||
let _e_tag: Option<String> = loc.e_tag.clone();
|
||||
let _scheme: ManifestNamingScheme = loc.naming_scheme;
|
||||
// Runtime sanity — naming_scheme should produce a Debug string we use
|
||||
// verbatim in `TableVersionMetadata::naming_scheme`.
|
||||
assert!(!format!("{:?}", loc.naming_scheme).is_empty());
|
||||
}
|
||||
|
||||
// --- Guard 3: checkout_version + restore async chain -----------------------
|
||||
//
|
||||
// `db/manifest/recovery.rs:505-522` chains `Dataset::open(...).await?
|
||||
// .checkout_version(N).await?.restore().await?` as the recovery rollback
|
||||
// hammer. Compile-only — never runs.
|
||||
|
||||
#[allow(
|
||||
dead_code,
|
||||
unreachable_code,
|
||||
unused_variables,
|
||||
unused_mut,
|
||||
clippy::diverging_sub_expression
|
||||
)]
|
||||
async fn _compile_checkout_version_then_restore_signature() -> lance::Result<()> {
|
||||
let ds: Dataset = unimplemented!();
|
||||
let mut ds: Dataset = ds.checkout_version(1u64).await?;
|
||||
// `restore()` takes `&mut self` and returns `Result<()>`; the dataset
|
||||
// mutates in place. If Lance flips this to return a fresh `Dataset`
|
||||
// (consuming `self`), this guard fails to compile.
|
||||
let _: () = ds.restore().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// --- Guard 4: DatasetBuilder::from_namespace fluent chain ------------------
|
||||
//
|
||||
// `db/manifest/namespace.rs:162-174` chains
|
||||
// `DatasetBuilder::from_namespace(ns, vec![id]).await?.with_branch(...).with_version(...).load().await?`.
|
||||
// Compile-only.
|
||||
|
||||
#[allow(
|
||||
dead_code,
|
||||
unreachable_code,
|
||||
unused_variables,
|
||||
unused_mut,
|
||||
clippy::diverging_sub_expression
|
||||
)]
|
||||
async fn _compile_dataset_builder_from_namespace_signature(
|
||||
ns: Arc<dyn LanceNamespace>,
|
||||
) -> lance::Result<()> {
|
||||
let builder: DatasetBuilder =
|
||||
DatasetBuilder::from_namespace(ns, vec!["table".to_string()]).await?;
|
||||
let builder: DatasetBuilder = builder.with_branch("b", None);
|
||||
let builder: DatasetBuilder = builder.with_version(1u64);
|
||||
let _ds: Dataset = builder.load().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// --- Guard 5: MergeInsertBuilder fluent chain ------------------------------
|
||||
//
|
||||
// `db/manifest/publisher.rs:370-391` is the manifest CAS. If any method on
|
||||
// the builder renames or changes signature, the publisher silently breaks.
|
||||
// Compile-only.
|
||||
|
||||
#[allow(
|
||||
dead_code,
|
||||
unreachable_code,
|
||||
unused_variables,
|
||||
unused_mut,
|
||||
clippy::diverging_sub_expression
|
||||
)]
|
||||
async fn _compile_merge_insert_builder_method_chain() -> lance::Result<()> {
|
||||
use lance::dataset::MergeStats;
|
||||
|
||||
let ds: Arc<Dataset> = unimplemented!();
|
||||
let job = MergeInsertBuilder::try_new(ds, vec!["object_id".to_string()])?
|
||||
.when_matched(WhenMatched::UpdateAll)
|
||||
.when_not_matched(WhenNotMatched::InsertAll)
|
||||
.conflict_retries(0)
|
||||
.use_index(false)
|
||||
.try_build()?;
|
||||
|
||||
// execute_reader takes `impl StreamingWriteSource` (lance trait), which
|
||||
// RecordBatchIterator implements. Pin the return shape
|
||||
// `(Arc<Dataset>, MergeStats)` — the publisher's CAS loop depends on
|
||||
// both: the new Dataset to advance HEAD, the stats for the audit row.
|
||||
let source: RecordBatchIterator<Vec<Result<RecordBatch, arrow_schema::ArrowError>>> =
|
||||
unimplemented!();
|
||||
let result: (Arc<Dataset>, MergeStats) = job.execute_reader(source).await?;
|
||||
let _ds: Arc<Dataset> = result.0;
|
||||
let _stats: MergeStats = result.1;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// --- Guard 6: WriteParams::default() leaves data_storage_version = None ----
|
||||
//
|
||||
// Our V2_2 pin is load-bearing for blob v2 (verified earlier this session
|
||||
// when V2_1 produced "Blob v2 requires file version >= 2.2" on 13 blob
|
||||
// tests). If Lance changes the default to pin some version itself, audit
|
||||
// every `data_storage_version: Some(LanceFileVersion::V2_2)` site.
|
||||
|
||||
#[test]
|
||||
fn write_params_default_does_not_set_storage_version() {
|
||||
let params = WriteParams::default();
|
||||
assert_eq!(
|
||||
params.data_storage_version, None,
|
||||
"WriteParams::default().data_storage_version is no longer None; \
|
||||
audit every explicit V2_2 pin (see rg 'LanceFileVersion::V2_2')."
|
||||
);
|
||||
}
|
||||
|
||||
// --- Guard 7: compact_files signature --------------------------------------
|
||||
//
|
||||
// `db/omnigraph/optimize.rs:107` calls `compact_files(&mut ds, options, None)`.
|
||||
// Compile-only.
|
||||
|
||||
#[allow(
|
||||
dead_code,
|
||||
unreachable_code,
|
||||
unused_variables,
|
||||
unused_mut,
|
||||
clippy::diverging_sub_expression
|
||||
)]
|
||||
async fn _compile_compact_files_signature() -> lance::Result<()> {
|
||||
let mut ds: Dataset = unimplemented!();
|
||||
let options: CompactionOptions = CompactionOptions::default();
|
||||
let _metrics = compact_files(&mut ds, options, None).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// --- Guard 8: Dataset::delete returns DeleteResult { new_dataset, num_deleted_rows } ---
|
||||
//
|
||||
// `table_store.rs::delete_where` consumes both fields. When MR-A migrates
|
||||
// `delete_where` to two-phase via `DeleteBuilder::execute_uncommitted`, this
|
||||
// guard updates to pin the staged path. Compile-only.
|
||||
|
||||
#[allow(
|
||||
dead_code,
|
||||
unreachable_code,
|
||||
unused_variables,
|
||||
unused_mut,
|
||||
clippy::diverging_sub_expression
|
||||
)]
|
||||
async fn _compile_delete_result_field_shape() -> lance::Result<()> {
|
||||
let mut ds: Dataset = unimplemented!();
|
||||
let result: DeleteResult = ds.delete("x = 1").await?;
|
||||
let _new_dataset: Arc<Dataset> = result.new_dataset;
|
||||
let _num_deleted: u64 = result.num_deleted_rows;
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -3,13 +3,13 @@ mod helpers;
|
|||
use std::fs;
|
||||
|
||||
use omnigraph::db::{Omnigraph, ReadTarget};
|
||||
use omnigraph_compiler::{build_schema_ir, schema_ir_pretty_json};
|
||||
use omnigraph_compiler::schema::parser::parse_schema;
|
||||
use omnigraph_compiler::{build_schema_ir, schema_ir_pretty_json};
|
||||
|
||||
use helpers::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn init_creates_repo() {
|
||||
async fn init_creates_graph() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
|
||||
|
|
@ -34,7 +34,7 @@ async fn init_creates_repo() {
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn open_reads_existing_repo() {
|
||||
async fn open_reads_existing_graph() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
|
||||
|
|
@ -49,7 +49,7 @@ async fn open_reads_existing_repo() {
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn open_bootstraps_legacy_schema_state_for_main_only_repo() {
|
||||
async fn open_bootstraps_legacy_schema_state_for_main_only_graph() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
|
|
@ -64,7 +64,7 @@ async fn open_bootstraps_legacy_schema_state_for_main_only_repo() {
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn open_rejects_legacy_repo_with_public_branch() {
|
||||
async fn open_rejects_legacy_graph_with_public_branch() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
|
|
@ -74,7 +74,7 @@ async fn open_rejects_legacy_repo_with_public_branch() {
|
|||
fs::remove_file(dir.path().join("__schema_state.json")).unwrap();
|
||||
|
||||
let err = match Omnigraph::open(uri).await {
|
||||
Ok(_) => panic!("expected legacy repo with public branch to fail schema bootstrap"),
|
||||
Ok(_) => panic!("expected legacy graph with public branch to fail schema bootstrap"),
|
||||
Err(err) => err,
|
||||
};
|
||||
assert!(
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
// Maintenance tests: `optimize` (Lance compact_files) and `cleanup`
|
||||
// (Lance cleanup_old_versions) at the graph level. Covers no-op edges
|
||||
// (empty repo, already-optimized repo), the policy-validation contract on
|
||||
// (empty graph, already-optimized graph), the policy-validation contract on
|
||||
// `cleanup`, and the keep-versions cap that protects head.
|
||||
|
||||
mod helpers;
|
||||
|
|
@ -13,7 +13,7 @@ use omnigraph::loader::{LoadMode, load_jsonl};
|
|||
use helpers::{TEST_DATA, TEST_SCHEMA, count_rows, init_and_load};
|
||||
|
||||
#[tokio::test]
|
||||
async fn optimize_on_empty_repo_returns_stats_per_table_with_no_changes() {
|
||||
async fn optimize_on_empty_graph_returns_stats_per_table_with_no_changes() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
|
||||
|
|
@ -37,7 +37,7 @@ async fn optimize_after_load_then_again_is_idempotent() {
|
|||
// First pass may compact (load wrote real fragments).
|
||||
let _first = db.optimize().await.unwrap();
|
||||
|
||||
// Second pass should be a no-op: already-compacted repo produces no
|
||||
// Second pass should be a no-op: already-compacted graph produces no
|
||||
// fragments_removed / fragments_added.
|
||||
let second = db.optimize().await.unwrap();
|
||||
for s in &second {
|
||||
|
|
@ -119,7 +119,9 @@ async fn cleanup_older_than_zero_preserves_head() {
|
|||
|
||||
// Smoke test: after aggressive cleanup, we can still read and write the
|
||||
// graph — head wasn't pruned.
|
||||
load_jsonl(&mut db, TEST_DATA, LoadMode::Merge).await.unwrap();
|
||||
load_jsonl(&mut db, TEST_DATA, LoadMode::Merge)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -151,6 +153,8 @@ async fn cleanup_then_optimize_preserves_rows_and_table_remains_writable() {
|
|||
assert_eq!(count_rows(&db, "node:Company").await, companies_before);
|
||||
|
||||
// Table is still writable after the cleanup+optimize sequence.
|
||||
load_jsonl(&mut db, TEST_DATA, LoadMode::Merge).await.unwrap();
|
||||
load_jsonl(&mut db, TEST_DATA, LoadMode::Merge)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(count_rows(&db, "node:Person").await, people_before);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,16 +22,16 @@ use helpers::recovery::{RecoveryExpectation, TableExpectation, assert_post_recov
|
|||
|
||||
const TEST_SCHEMA: &str = include_str!("fixtures/test.pg");
|
||||
|
||||
fn write_sidecar_file(repo_root: &Path, operation_id: &str, json: &str) {
|
||||
let dir = repo_root.join("__recovery");
|
||||
fn write_sidecar_file(graph_root: &Path, operation_id: &str, json: &str) {
|
||||
let dir = graph_root.join("__recovery");
|
||||
if !dir.exists() {
|
||||
std::fs::create_dir(&dir).unwrap();
|
||||
}
|
||||
std::fs::write(dir.join(format!("{}.json", operation_id)), json).unwrap();
|
||||
}
|
||||
|
||||
fn list_recovery_dir(repo_root: &Path) -> Vec<String> {
|
||||
let dir = repo_root.join("__recovery");
|
||||
fn list_recovery_dir(graph_root: &Path) -> Vec<String> {
|
||||
let dir = graph_root.join("__recovery");
|
||||
if !dir.exists() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
|
@ -41,7 +41,7 @@ fn list_recovery_dir(repo_root: &Path) -> Vec<String> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
/// Full URI of a node-type Lance dataset under a fresh Omnigraph repo.
|
||||
/// Full URI of a node-type Lance dataset under a fresh Omnigraph graph.
|
||||
/// Mirrors the `nodes/{fnv1a64-hex(type_name)}` layout in `db/manifest/layout.rs`.
|
||||
fn node_table_uri(root: &str, type_name: &str) -> String {
|
||||
let h: u64 = fnv1a64(type_name.as_bytes());
|
||||
|
|
@ -283,8 +283,8 @@ async fn recovery_rolls_back_synthetic_drift_on_open() {
|
|||
// =====================================================================
|
||||
|
||||
/// Helper: count rows in `_graph_commit_recoveries.lance` at the given root.
|
||||
async fn count_recovery_audit_rows(repo_root: &Path) -> usize {
|
||||
let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance");
|
||||
async fn count_recovery_audit_rows(graph_root: &Path) -> usize {
|
||||
let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance");
|
||||
if !recoveries_dir.exists() {
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -306,9 +306,9 @@ async fn count_recovery_audit_rows(repo_root: &Path) -> usize {
|
|||
/// Helper: read the most recent recovery audit row's `recovery_kind`,
|
||||
/// `recovery_for_actor`, and `operation_id`. Returns `None` if no rows.
|
||||
async fn read_latest_recovery_audit(
|
||||
repo_root: &Path,
|
||||
graph_root: &Path,
|
||||
) -> Option<(String, Option<String>, String, String)> {
|
||||
let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance");
|
||||
let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance");
|
||||
if !recoveries_dir.exists() {
|
||||
return None;
|
||||
}
|
||||
|
|
@ -357,8 +357,8 @@ async fn read_latest_recovery_audit(
|
|||
/// storage order (multiple batches concatenated). Used by the
|
||||
/// multi-sidecar fresh-snapshot test as a diagnostic alongside the
|
||||
/// post-recovery Lance HEAD assertion.
|
||||
async fn list_recovery_audit_kinds(repo_root: &Path) -> Vec<String> {
|
||||
let recoveries_dir = repo_root.join("_graph_commit_recoveries.lance");
|
||||
async fn list_recovery_audit_kinds(graph_root: &Path) -> Vec<String> {
|
||||
let recoveries_dir = graph_root.join("_graph_commit_recoveries.lance");
|
||||
if !recoveries_dir.exists() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
|
@ -391,8 +391,8 @@ async fn list_recovery_audit_kinds(repo_root: &Path) -> Vec<String> {
|
|||
}
|
||||
|
||||
/// Helper: count `_graph_commits.lance` rows tagged with the recovery actor.
|
||||
async fn count_recovery_actor_commits(repo_root: &Path) -> usize {
|
||||
let actors_dir = repo_root.join("_graph_commit_actors.lance");
|
||||
async fn count_recovery_actor_commits(graph_root: &Path) -> usize {
|
||||
let actors_dir = graph_root.join("_graph_commit_actors.lance");
|
||||
if !actors_dir.exists() {
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -908,7 +908,7 @@ async fn recovery_ensure_indices_steady_state_no_sidecar() {
|
|||
/// ran) and rolls back any sibling table's legitimate index work.
|
||||
///
|
||||
/// Integration verification: after a real init + ensure_indices on a
|
||||
/// repo where every table is empty, the recovery sweep must complete
|
||||
/// graph where every table is empty, the recovery sweep must complete
|
||||
/// cleanly (no leftover sidecar) AND the next ensure_indices must also
|
||||
/// leave no sidecar — proving the empty-table-scoping behavior lets
|
||||
/// steady-state runs incur zero sidecar I/O. The
|
||||
|
|
@ -930,7 +930,7 @@ async fn recovery_ensure_indices_handles_empty_tables() {
|
|||
db.ensure_indices().await.unwrap();
|
||||
assert!(
|
||||
list_recovery_dir(dir.path()).is_empty(),
|
||||
"ensure_indices on an all-empty repo must not leave a sidecar"
|
||||
"ensure_indices on an all-empty graph must not leave a sidecar"
|
||||
);
|
||||
// Reopen + ensure_indices — still steady state, still no sidecar.
|
||||
drop(db);
|
||||
|
|
@ -938,7 +938,7 @@ async fn recovery_ensure_indices_handles_empty_tables() {
|
|||
db.ensure_indices().await.unwrap();
|
||||
assert!(
|
||||
list_recovery_dir(dir.path()).is_empty(),
|
||||
"second ensure_indices on an all-empty repo must also not leave a sidecar"
|
||||
"second ensure_indices on an all-empty graph must also not leave a sidecar"
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -521,6 +521,10 @@ query delete_two_persons($first: String, $second: String) {
|
|||
delete Person where name = $first
|
||||
delete Person where name = $second
|
||||
}
|
||||
|
||||
query update_age_by_name($name: String, $age: I32) {
|
||||
update Person set { age: $age } where name = $name
|
||||
}
|
||||
"#;
|
||||
|
||||
/// D₂: a query mixing inserts/updates with deletes is rejected at parse
|
||||
|
|
@ -1362,3 +1366,85 @@ query insert_then_update_note(
|
|||
.unwrap();
|
||||
assert_eq!(qr.num_rows(), 0, "letter must not be visible after early error");
|
||||
}
|
||||
|
||||
/// MR-920 regression: two sequential `update T set {f:v} where x=y`
|
||||
/// invocations against the same row must both succeed. Pre-fix, the
|
||||
/// second one failed with `Ambiguous merge inserts are prohibited:
|
||||
/// multiple source rows match the same target row on (id = "Alice")`
|
||||
/// even though the scan returned exactly one row.
|
||||
///
|
||||
/// Root cause hypothesis (per MR-920): Lance's
|
||||
/// `processed_row_ids: Mutex<HashSet<u64>>`
|
||||
/// (`src/dataset/write/merge_insert.rs:2099`) double-processes the
|
||||
/// same target row_id against datasets previously rewritten by
|
||||
/// merge_insert. `SourceDedupeBehavior::FirstSeen` makes Lance skip
|
||||
/// rather than error.
|
||||
///
|
||||
/// Companion to `consistency.rs::load_merge_repeated_against_overlapping_keys_succeeds`
|
||||
/// (PR #98 / Window 1 of the bug class via the load surface).
|
||||
#[tokio::test]
|
||||
async fn second_sequential_update_on_same_row_succeeds() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut db = init_and_load(&dir).await;
|
||||
|
||||
db.mutate(
|
||||
"main",
|
||||
STAGED_QUERIES,
|
||||
"update_age_by_name",
|
||||
&mixed_params(&[("$name", "Alice")], &[("$age", 99)]),
|
||||
)
|
||||
.await
|
||||
.expect("first sequential update on Alice must succeed");
|
||||
|
||||
let batches = read_table(&db, "node:Person").await;
|
||||
let alice_count: usize = batches
|
||||
.iter()
|
||||
.map(|b| {
|
||||
let names = b
|
||||
.column_by_name("name")
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<arrow_array::StringArray>()
|
||||
.unwrap();
|
||||
(0..b.num_rows())
|
||||
.filter(|i| names.is_valid(*i) && names.value(*i) == "Alice")
|
||||
.count()
|
||||
})
|
||||
.sum();
|
||||
assert_eq!(
|
||||
alice_count, 1,
|
||||
"after first update, exactly one Alice row should be visible"
|
||||
);
|
||||
|
||||
db.mutate(
|
||||
"main",
|
||||
STAGED_QUERIES,
|
||||
"update_age_by_name",
|
||||
&mixed_params(&[("$name", "Alice")], &[("$age", 42)]),
|
||||
)
|
||||
.await
|
||||
.expect("second sequential update on Alice must succeed");
|
||||
|
||||
let batches = read_table(&db, "node:Person").await;
|
||||
let mut alice_age: Option<i32> = None;
|
||||
for batch in &batches {
|
||||
let names = batch
|
||||
.column_by_name("name")
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<arrow_array::StringArray>()
|
||||
.unwrap();
|
||||
let ages = batch
|
||||
.column_by_name("age")
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<arrow_array::Int32Array>()
|
||||
.unwrap();
|
||||
for i in 0..batch.num_rows() {
|
||||
if names.is_valid(i) && names.value(i) == "Alice" && ages.is_valid(i) {
|
||||
alice_age = Some(ages.value(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
assert_eq!(alice_age, Some(42), "Alice's age must reflect the second update");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ use omnigraph::loader::{LoadMode, load_jsonl};
|
|||
use helpers::*;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn s3_compatible_repo_lifecycle_works() {
|
||||
let Some(uri) = s3_test_repo_uri("omnigraph-runtime") else {
|
||||
async fn s3_compatible_graph_lifecycle_works() {
|
||||
let Some(uri) = s3_test_graph_uri("omnigraph-runtime") else {
|
||||
eprintln!("skipping s3 runtime test: OMNIGRAPH_S3_TEST_BUCKET is not set");
|
||||
return;
|
||||
};
|
||||
|
|
@ -81,7 +81,7 @@ async fn s3_compatible_repo_lifecycle_works() {
|
|||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn s3_branch_change_merge_flow_works() {
|
||||
let Some(uri) = s3_test_repo_uri("omnigraph-branching") else {
|
||||
let Some(uri) = s3_test_graph_uri("omnigraph-branching") else {
|
||||
eprintln!("skipping s3 branch test: OMNIGRAPH_S3_TEST_BUCKET is not set");
|
||||
return;
|
||||
};
|
||||
|
|
@ -135,7 +135,7 @@ async fn s3_branch_change_merge_flow_works() {
|
|||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn s3_public_load_uses_hidden_run_and_publishes() {
|
||||
let Some(uri) = s3_test_repo_uri("omnigraph-public-load") else {
|
||||
let Some(uri) = s3_test_graph_uri("omnigraph-public-load") else {
|
||||
eprintln!("skipping s3 public load test: OMNIGRAPH_S3_TEST_BUCKET is not set");
|
||||
return;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ async fn apply_schema_rejects_when_non_main_branch_exists() {
|
|||
let err = db.apply_schema(&desired).await.unwrap_err();
|
||||
assert!(
|
||||
err.to_string()
|
||||
.contains("schema apply requires a repo with only main")
|
||||
.contains("schema apply requires a graph with only main")
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -402,10 +402,7 @@ async fn apply_schema_rejects_adding_a_required_property_without_backfill() {
|
|||
|
||||
// Add `email: String` (required, non-nullable, no @rename_from). Existing
|
||||
// rows have no value to fill in, so this is unsupported in v1.
|
||||
let desired = TEST_SCHEMA.replace(
|
||||
" age: I32?\n}",
|
||||
" age: I32?\n email: String\n}",
|
||||
);
|
||||
let desired = TEST_SCHEMA.replace(" age: I32?\n}", " age: I32?\n email: String\n}");
|
||||
let err = db.apply_schema(&desired).await.unwrap_err();
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
|
|
@ -437,7 +434,10 @@ async fn plan_schema_for_property_type_narrowing_is_not_supported() {
|
|||
.unwrap();
|
||||
|
||||
let plan = db.plan_schema(TEST_SCHEMA).await.unwrap();
|
||||
assert!(!plan.supported, "narrowing I64 -> I32 must not be supported");
|
||||
assert!(
|
||||
!plan.supported,
|
||||
"narrowing I64 -> I32 must not be supported"
|
||||
);
|
||||
assert!(plan.steps.iter().any(|step| matches!(
|
||||
step,
|
||||
SchemaMigrationStep::UnsupportedChange { code, .. }
|
||||
|
|
|
|||
|
|
@ -3,7 +3,8 @@ mod helpers;
|
|||
use std::env;
|
||||
|
||||
use arrow_array::{Array, StringArray};
|
||||
use lance_index::{DatasetIndexExt, is_system_index};
|
||||
use lance::index::DatasetIndexExt;
|
||||
use lance_index::is_system_index;
|
||||
use serial_test::serial;
|
||||
|
||||
use omnigraph::db::Omnigraph;
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ Three views, increasing zoom:
|
|||
2. **Layer view** — the eight-layer stack inside one OmniGraph process.
|
||||
3. **Component zoom-ins** — what's inside each layer.
|
||||
|
||||
For runtime flows (read query, mutation), see [`docs/dev/execution.md`](execution.md). For the on-disk layout of a repo, see [`docs/user/storage.md`](../user/storage.md).
|
||||
For runtime flows (read query, mutation), see [`docs/dev/execution.md`](execution.md). For the on-disk layout of a graph, see [`docs/user/storage.md`](../user/storage.md).
|
||||
|
||||
L1 (orange in the diagrams) is what we inherit from Lance; L2 (blue) is what OmniGraph adds. The L1/L2 framing is also called out in prose at the bottom of this doc.
|
||||
|
||||
|
|
@ -63,7 +63,7 @@ flowchart TB
|
|||
subgraph engine[omnigraph engine]
|
||||
plan[exec query and mutation]:::l2
|
||||
gi[graph index CSR/CSC<br/>RuntimeCache LRU 8]:::l2
|
||||
coord[coordinator<br/>ManifestRepo · CommitGraph]:::l2
|
||||
coord[coordinator<br/>ManifestCoordinator · CommitGraph]:::l2
|
||||
end
|
||||
|
||||
subgraph storage[storage trait — wraps Lance]
|
||||
|
|
@ -132,7 +132,7 @@ flowchart TB
|
|||
|
||||
subgraph state[graph state]
|
||||
coord[GraphCoordinator]:::l2
|
||||
mr[ManifestRepo<br/>db/manifest.rs]:::l2
|
||||
mr[ManifestCoordinator<br/>db/manifest.rs]:::l2
|
||||
cg[CommitGraph<br/>_graph_commits.lance]:::l2
|
||||
stg[MutationStaging<br/>per-query in-memory accumulator<br/>exec/staging.rs]:::l2
|
||||
end
|
||||
|
|
@ -166,7 +166,7 @@ Code paths:
|
|||
|
||||
- Read entry: `Omnigraph::query` at `crates/omnigraph/src/exec/query.rs:7`
|
||||
- Mutation entry: `Omnigraph::mutate` at `crates/omnigraph/src/exec/mutation.rs:511`
|
||||
- Manifest commit: `ManifestRepo::commit` at `crates/omnigraph/src/db/manifest.rs:280`
|
||||
- Manifest commit: `ManifestCoordinator::commit` at `crates/omnigraph/src/db/manifest.rs:280`
|
||||
- Graph index: `crates/omnigraph/src/graph_index/`
|
||||
- Loader: `Omnigraph::ingest` at `crates/omnigraph/src/loader/mod.rs:74`
|
||||
|
||||
|
|
|
|||
|
|
@ -16,12 +16,12 @@ This page explains what the policy says and how to change it.
|
|||
| **Disallow force pushes** | `true` | No history rewrites on `main`. |
|
||||
| **Disallow branch deletions** | `true` | `main` cannot be deleted. |
|
||||
| **Required conversation resolution** | `true` | All review comment threads must be resolved before merge. |
|
||||
| **Enforce on admins** | `true` | Even repo admins go through the gates. The point is no bypasses. |
|
||||
| **Enforce on admins** | `true` | Even repository admins go through the gates. The point is no bypasses. |
|
||||
| **Required signed commits** | not yet | Not enabled. Would lock out maintainers until everyone enrolls GPG/SSH commit signing. Tracked as a follow-up. |
|
||||
|
||||
## How to apply
|
||||
|
||||
Run from the repo root:
|
||||
Run from the repository root:
|
||||
|
||||
```bash
|
||||
./scripts/apply-branch-protection.sh
|
||||
|
|
@ -29,7 +29,7 @@ Run from the repo root:
|
|||
|
||||
The script reads `.github/branch-protection.json`, strips the human-readable `_comment` field (the GitHub API rejects unknown keys), and PUTs to `repos/ModernRelay/omnigraph/branches/main/protection`.
|
||||
|
||||
Requires `gh` authenticated with a token that has admin permissions on the repo.
|
||||
Requires `gh` authenticated with a token that has admin permissions on the repository.
|
||||
|
||||
To preview without applying:
|
||||
|
||||
|
|
@ -57,7 +57,7 @@ Outputs the live policy. Compare against `.github/branch-protection.json` to det
|
|||
|
||||
- **Audit trail**: `git log .github/branch-protection.json` shows every change with a reviewable diff and a merge commit.
|
||||
- **Disaster recovery**: if branch protection is accidentally removed or weakened via the UI, the JSON is the canonical recovery point.
|
||||
- **Consistency**: pairs with `.github/codeowners-roles.yml` (the CODEOWNERS source of truth). Repo policy lives in the repo.
|
||||
- **Consistency**: pairs with `.github/codeowners-roles.yml` (the CODEOWNERS source of truth). Repository policy lives in the repository.
|
||||
|
||||
## What this gates
|
||||
|
||||
|
|
@ -69,7 +69,7 @@ After branch protection is applied, every PR targeting `main` must:
|
|||
4. Have all review conversations resolved.
|
||||
5. Be squash- or rebase-merged (no merge commits).
|
||||
|
||||
Even repo admins are subject to these rules.
|
||||
Even repository admins are subject to these rules.
|
||||
|
||||
## Subsequent hardening (not in this PR)
|
||||
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@
|
|||
|
||||
`.github/workflows/`:
|
||||
|
||||
- **ci.yml**: text-only changes skip; otherwise `cargo test --workspace --locked` on ubuntu-latest with protobuf compiler. OpenAPI-drift check that auto-commits the regenerated `openapi.json` for same-repo PRs. Also runs the AGENTS.md cross-link integrity check (`scripts/check-agents-md.sh`).
|
||||
- **ci.yml**: text-only changes skip; otherwise `cargo test --workspace --locked` on ubuntu-latest with protobuf compiler. OpenAPI-drift check that auto-commits the regenerated `openapi.json` for same-repository PRs. Also runs the AGENTS.md cross-link integrity check (`scripts/check-agents-md.sh`).
|
||||
- **AWS feature build job**: `cargo build/test -p omnigraph-server --features aws` on ubuntu-latest.
|
||||
- **RustFS S3 integration**: spins up RustFS in Docker, runs `s3_storage`, `server_opens_s3_repo_directly_and_serves_snapshot_and_read`, and `local_cli_s3_end_to_end_init_load_read_flow`.
|
||||
- **RustFS S3 integration**: spins up RustFS in Docker, runs `s3_storage`, `server_opens_s3_graph_directly_and_serves_snapshot_and_read`, and `local_cli_s3_end_to_end_init_load_read_flow`.
|
||||
- **release-edge.yml**: on every push to main, retags `edge`, builds Linux/macOS-Intel/macOS-arm64 archives + sha256, publishes a rolling prerelease.
|
||||
- **release.yml**: on `v*` tags, builds the 3-platform matrix and updates the Homebrew tap (`scripts/update-homebrew-formula.sh`) by pushing the regenerated formula to `ModernRelay/homebrew-tap`.
|
||||
- **package.yml**: manual ECR image build; emits two image tags per commit (`<sha>`, `<sha>-aws`) via CodeBuild.
|
||||
|
|
|
|||
|
|
@ -2,13 +2,13 @@
|
|||
|
||||
`.github/CODEOWNERS` is **generated** — not hand-edited. The source of truth is `.github/codeowners-roles.yml`, expanded by `.github/scripts/render-codeowners.py`. CI rejects drift between the two and rejects direct edits to `CODEOWNERS` that don't accompany a yml change.
|
||||
|
||||
This setup gives every role change a reviewable PR and a permanent in-repo audit trail (`git log .github/codeowners-roles.yml`).
|
||||
This setup gives every role change a reviewable PR and a permanent in-repository audit trail (`git log .github/codeowners-roles.yml`).
|
||||
|
||||
## Current roles
|
||||
|
||||
| Role | Members | Scope |
|
||||
|---|---|---|
|
||||
| `engineering` | `@aaltshuler` | All code under `crates/**`, repo infrastructure, default for unmapped paths |
|
||||
| `engineering` | `@aaltshuler` | All code under `crates/**`, repository infrastructure, default for unmapped paths |
|
||||
| `docs` | `@aaltshuler`, `@ragnorc` | `docs/**`, README.md, AGENTS.md, CLAUDE.md, SECURITY.md |
|
||||
|
||||
GitHub treats multiple owners in a CODEOWNERS line as **"any one of them satisfies the review requirement"**. For docs, either named member can approve. To require N distinct approvers on a specific path, layer a CI check on top (not currently configured).
|
||||
|
|
@ -34,4 +34,4 @@ CI fails the PR if:
|
|||
- **Audit trail**: `git log .github/codeowners-roles.yml` is the canonical record of every role change. The rendered `CODEOWNERS` is a derived artifact.
|
||||
- **Roles are first-class**: paths reference roles, not raw handles. Renaming a person or rotating a role updates one place, not every path.
|
||||
- **Future extension**: scheduled rotation (weekly on-call, quarterly leads) plugs into the same yml without changing the path mappings. Not enabled today.
|
||||
- **Consistency with the product**: omnigraph itself enforces auditable Cedar policy. The repo's code-owner policy follows the same "policy as reviewed code" pattern.
|
||||
- **Consistency with the product**: omnigraph itself enforces auditable Cedar policy. The repository's code-owner policy follows the same "policy as reviewed code" pattern.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Lance Docs Index (for OmniGraph agents)
|
||||
|
||||
OmniGraph sits on top of Lance. Many problems — index lifecycle, branching, transactions, fragments, compaction, vector/FTS internals — are answered upstream in Lance's docs, not in this repo.
|
||||
OmniGraph sits on top of Lance. Many problems — index lifecycle, branching, transactions, fragments, compaction, vector/FTS internals — are answered upstream in Lance's docs, not in this codebase.
|
||||
|
||||
This file is the curated entry point. **When you hit a Lance-shaped problem, find the matching topic below and fetch the listed URL(s) before guessing.** Don't grep our codebase for behavior that is documented authoritatively in Lance.
|
||||
|
||||
|
|
@ -156,13 +156,26 @@ If a future need pulls one of these into scope, add a row to the matching domain
|
|||
|
||||
When Lance ships a major release that changes any of the above (file format bump, new index type, transaction semantics change, new branching primitive), refresh this index in the same change as the omnigraph upgrade. Stale Lance pointers are worse than no pointers.
|
||||
|
||||
### Last alignment audit: 2026-05-02 (Lance 4.0.1 upstream; omnigraph pinned at 4.0.0)
|
||||
### Last alignment audit: 2026-05-22 (Lance 6.0.1 upstream; omnigraph pinned at 6.0.1)
|
||||
|
||||
A full read-through of every index page above was performed in the MR-793 cycle. Findings (no code changes required for PR #70):
|
||||
Migration from Lance 4.0.0 → 6.0.1 landed in this cycle (DataFusion 52 → 53, Arrow 57 → 58, lance-tokenizer 6.0.1 added, tantivy* removed). Direct 4 → 6 jump; v5.x was not used as an intermediate (rationale in `~/.claude/plans/shimmering-percolating-duckling.md`). Behavior-affecting findings:
|
||||
|
||||
- The MemWAL "three sub-pages" (Overview / Details / Implementation) turned out to be **anchor sections on the single existing page** at `https://lance.org/format/table/mem_wal/` — not separate URLs. Findings: MemWAL is opt-in (requires an unenforced primary key + explicit shard config; omnigraph doesn't use it), operates intra-table (LSM-tree for streaming writes into one Lance table), and does NOT overlap with MR-847's cross-table manifest-vs-Lance-HEAD recovery problem. MR-847's design is unaffected.
|
||||
- The distributed-indexing guide names Python APIs (`commit_existing_index_segments`, `merge_existing_index_segments`); the Rust analogues exist via `CreateIndexBuilder::execute_uncommitted` for scalar indices but **`build_index_metadata_from_segments` is `pub(crate)`** and blocks vector-index two-phase commits from outside the lance crate. Filed [lance-format/lance#6666](https://github.com/lance-format/lance/issues/6666) as a companion to [#6658](https://github.com/lance-format/lance/issues/6658).
|
||||
- "Stable Row ID for Index" is documented as **experimental** in lance-4.0.x. Our datasets enable stable row IDs at the dataset level (`WriteParams::enable_stable_row_ids = true`); confirming whether our created indices opt into stable-row-id mode is a follow-up worth doing before MR-848 (index reconciler) lands.
|
||||
- Fragment Reuse Index (FRI) is documented as one of three compaction strategies. omnigraph currently uses option 2 (immediate index rewrite at compaction time, via `omnigraph optimize`'s post-compaction rebuild). Adopting FRI is the explicit option for compaction-friendly index updates; relevant to MR-848.
|
||||
- **DatasetIndexExt moved** from `lance-index` to `lance::index` (Lance PR #6280, v5.0). Six import sites updated. `lance-index::IndexType` and `lance-index::is_system_index` stayed in `lance-index`. `omnigraph-cli` and `omnigraph-server` gained `lance = { workspace = true }` in their dev-dependencies.
|
||||
- **`DescribeTableResponse` gained `is_only_declared: Option<bool>`** (lance-namespace 6.0+, v5.0 PR #6186). Set to `Some(false)` in both `BranchManifestNamespace::describe_table` and `StagedTableNamespace::describe_table` — every table we return is physically materialized via `Dataset::open`, never "declared-only."
|
||||
- **`MergeInsertBuilder` execute_reader return shape preserved** `(Arc<Dataset>, MergeStats)`; the publisher CAS chain at `db/manifest/publisher.rs:370-391` works unchanged. Pinned by `tests/lance_surface_guards.rs::_compile_merge_insert_builder_method_chain`.
|
||||
- **`LanceError::TooMuchWriteContention` variant retained** in v6.0.1 (no rename). The typed publisher translation at `db/manifest/publisher.rs:417-430` continues to apply. Pinned by `lance_surface_guards.rs::lance_error_too_much_write_contention_variant_exists`.
|
||||
- **`ManifestLocation` field shape stable**: `.path: object_store::path::Path`, `.size: Option<u64>`, `.e_tag: Option<String>`, `.naming_scheme: ManifestNamingScheme`. Pinned by `lance_surface_guards.rs::manifest_location_field_shape`.
|
||||
- **`LanceFileVersion::default()` flipped V2_0 → V2_1** (v5.0). No effect — every `data_storage_version` callsite explicitly pins `Some(LanceFileVersion::V2_2)` (load-bearing for blob v2: `Blob v2 requires file version >= 2.2` enforced in `lance/src/dataset/write.rs:748`).
|
||||
- **`Dataset::checkout_version(N).await?.restore().await?`**: `restore()` takes `&mut self` and returns `Result<()>` (mutates in place, does not consume + return a new dataset). The recovery rollback hammer at `db/manifest/recovery.rs:505-522` continues to work. Pinned by `lance_surface_guards.rs::_compile_checkout_version_then_restore_signature`.
|
||||
- **`DatasetBuilder::from_namespace(...).with_branch(...).with_version(...).load()`** surface preserved (the namespace builder chain at `db/manifest/namespace.rs:162-174`). Pinned by `lance_surface_guards.rs::_compile_dataset_builder_from_namespace_signature`.
|
||||
- **`compact_files(&mut ds, CompactionOptions::default(), None)`** signature stable. `CompactionOptions` still does not expose `data_storage_version`; `compact_files` builds its own `WriteParams { ..Default::default() }`. Note: `LanceFileVersion::default()` is now V2_1 in v6, so optimize-rewritten fragments come out at V2_1 by default (was V2_0 in v4). Existing explicit V2_2 pins on creates/appends still apply.
|
||||
- **`Dataset::delete(predicate)` returns `DeleteResult { new_dataset: Arc<Dataset>, num_deleted_rows: u64 }`** — unchanged shape. Pinned by `lance_surface_guards.rs::_compile_delete_result_field_shape`. MR-A will repurpose this guard to the staged two-phase variant once `DeleteBuilder::execute_uncommitted` migration lands.
|
||||
- **File reader read methods now async** (Lance PR #6710, v6.0). No effect — omnigraph reaches Lance exclusively through `Dataset::scan` and the staged-write API.
|
||||
- **Tokenizer vendored as `lance-tokenizer`** (Lance PR #6512, v6.0). No effect — no direct tokenizer imports.
|
||||
- **Lance #6658 closed** (2026-05-14) but `DeleteBuilder::execute_uncommitted` did **not** ship in v6.0.1 — binary search across the release stream shows it first appears in `v7.0.0-beta.10` (the closing commits landed on main but didn't backport to the 6.x line). Tracked as MR-A: migrate `delete_where` to staged, retire the parse-time D2 mutation rule, extend recovery sidecar coverage. **Gated on the Lance v7.x bump**, not this PR. v7.0.0-rc.1 dropped 2026-05-21.
|
||||
- **Lance #6666 still open** (`build_index_metadata_from_segments` public): vector-index two-phase blocked; inline `create_vector_index` residual retained.
|
||||
- **Lance #6877 still open** (`MergeInsertBuilder` dup-rowid): PR #109's `SourceDedupeBehavior::FirstSeen` + `check_batch_unique_by_keys` precondition stay load-bearing.
|
||||
|
||||
Surface guards added: `crates/omnigraph/tests/lance_surface_guards.rs` (8 named guards; 3 runtime + 5 compile-only). Future Lance bumps re-run this file first as the smoke check. Two additional guards from the original plan deferred to follow-up (`manifest_cas_returns_row_level_contention_variant` needs full publisher-race harness; `table_version_metadata_byte_compatible_with_v4` needs `pub(crate)` reach extension).
|
||||
|
||||
Bump this date stanza on the next alignment pass.
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ The engine's `tests/` is the principal coverage surface; most graph-shaped behav
|
|||
| `merge_truth_table.rs` | Merge-pair truth table (MR-786): all 9×9 `(left_op, right_op)` cells from `{noop, addNode, removeNode, addEdge, removeEdge, setProperty, dropProperty, addLabel, removeLabel}`. Adding a new op to `OpVariant` forces a compile error in `build_case` until the new row + column are dispositioned. 36 executable cells run through real `branch_merge` with a structured oracle (`MergeOutcome` / `MergeConflictKind` + graph-state assert); 45 cells involving `dropProperty`/`addLabel`/`removeLabel` are recorded as `Unsupported` until the mutation grammar grows. |
|
||||
| `runs.rs` | Direct-publish writes: cancellation, concurrent-writer CAS, multi-statement atomicity, MR-794 staged-write rewire (D₂ rejection, insert+update coalesce, multi-append coalesce, partial-failure recovery, load RI/cardinality recovery) |
|
||||
| `staged_writes.rs` | TableStore staged-write primitives (`stage_append`, `stage_merge_insert`, `commit_staged`, `scan_with_staged`, `count_rows_with_staged`) — primitive-level only; engine code uses the in-memory `MutationStaging` accumulator instead |
|
||||
| `lifecycle.rs` | Repo lifecycle, schema state |
|
||||
| `lifecycle.rs` | Graph lifecycle, schema state |
|
||||
| `point_in_time.rs` | Snapshots, time travel (`snapshot_at_version`, `entity_at`) |
|
||||
| `changes.rs` | `diff_between` / `diff_commits` |
|
||||
| `consistency.rs` | Cross-table snapshot isolation, atomic publish |
|
||||
|
|
@ -31,7 +31,7 @@ The engine's `tests/` is the principal coverage surface; most graph-shaped behav
|
|||
| `traversal.rs` | `Expand`, variable-length hops, anti-join |
|
||||
| `aggregation.rs` | `count`, `sum`, `avg`, `min`, `max` |
|
||||
| `export.rs` | NDJSON streaming export filters |
|
||||
| `s3_storage.rs` | S3-backed repo (skipped unless `OMNIGRAPH_S3_TEST_BUCKET` is set) |
|
||||
| `s3_storage.rs` | S3-backed graph (skipped unless `OMNIGRAPH_S3_TEST_BUCKET` is set) |
|
||||
| `lance_version_columns.rs` | Per-row `_row_last_updated_at_version` behavior |
|
||||
| `validators.rs` | Schema constraint enforcement (enum, range, unique, cardinality) across JSONL, insert, update paths |
|
||||
| `maintenance.rs` | `optimize` (compaction) + `cleanup` (version GC): empty/idempotent/no-op edges, policy validation, head preservation |
|
||||
|
|
@ -45,7 +45,7 @@ The engine's `tests/` is the principal coverage surface; most graph-shaped behav
|
|||
|
||||
## Test helpers
|
||||
|
||||
- **Engine** — `crates/omnigraph/tests/helpers/mod.rs`: `init_and_load()` (bootstrap a temp repo + load standard fixture), `snapshot_main()`, `snapshot_branch()`, query/mutation runners, row collection and counting. Use these instead of hand-rolling.
|
||||
- **Engine** — `crates/omnigraph/tests/helpers/mod.rs`: `init_and_load()` (bootstrap a temp graph + load standard fixture), `snapshot_main()`, `snapshot_branch()`, query/mutation runners, row collection and counting. Use these instead of hand-rolling.
|
||||
- **CLI** — `crates/omnigraph-cli/tests/support/mod.rs`: `Command`-style wrapper for invoking `omnigraph`, server-process spawning, fixture resolution, output assertion helpers.
|
||||
- **Server** — no shared helpers; server tests call the `Omnigraph` engine API directly and exercise endpoints over the wire.
|
||||
|
||||
|
|
@ -63,14 +63,14 @@ The engine's `tests/` is the principal coverage surface; most graph-shaped behav
|
|||
CI runs three S3-backed tests against a containerized RustFS server (`.github/workflows/ci.yml` → `rustfs_integration` job):
|
||||
|
||||
- `cargo test -p omnigraph-engine --test s3_storage`
|
||||
- `cargo test -p omnigraph-server --test server server_opens_s3_repo_directly_and_serves_snapshot_and_read`
|
||||
- `cargo test -p omnigraph-server --test server server_opens_s3_graph_directly_and_serves_snapshot_and_read`
|
||||
- `cargo test -p omnigraph-cli --test system_local local_cli_s3_end_to_end_init_load_read_flow`
|
||||
|
||||
Locally, set `OMNIGRAPH_S3_TEST_BUCKET` (and the usual `AWS_*` vars including `AWS_ENDPOINT_URL_S3` for non-AWS) before running. Without those, S3 tests skip gracefully.
|
||||
|
||||
## OpenAPI drift
|
||||
|
||||
`crates/omnigraph-server/tests/openapi.rs` regenerates `openapi.json` and diffs against the checked-in copy. CI auto-commits the regeneration on same-repo PRs and otherwise runs in strict-check mode (env: `OMNIGRAPH_UPDATE_OPENAPI`).
|
||||
`crates/omnigraph-server/tests/openapi.rs` regenerates `openapi.json` and diffs against the checked-in copy. CI auto-commits the regeneration on same-repository PRs and otherwise runs in strict-check mode (env: `OMNIGRAPH_UPDATE_OPENAPI`).
|
||||
|
||||
## Examples & benches
|
||||
|
||||
|
|
@ -79,7 +79,7 @@ Locally, set `OMNIGRAPH_S3_TEST_BUCKET` (and the usual `AWS_*` vars including `A
|
|||
|
||||
## Coverage tooling — what's missing
|
||||
|
||||
There is **no** coverage tooling in the repo today: no `tarpaulin.toml`, no `codecov.yml`, no coverage CI step. If you want to know whether your change is covered, the answer comes from reading and running the relevant integration tests, not from a tool.
|
||||
There is **no** coverage tooling in the repository today: no `tarpaulin.toml`, no `codecov.yml`, no coverage CI step. If you want to know whether your change is covered, the answer comes from reading and running the relevant integration tests, not from a tool.
|
||||
|
||||
If introducing coverage tooling is in scope for your task, the natural first step is `cargo-llvm-cov` wired into a separate CI job, and a per-crate threshold rather than a global one.
|
||||
|
||||
|
|
@ -97,7 +97,7 @@ How to check:
|
|||
- *Existing test covers the area but not your case* → **add an assertion or a fixture row to the existing test**, don't write a new function with `init_and_load()` again.
|
||||
- *No existing coverage in any test file* → only then write a new test; put it in the file that owns the area, or open a new file only if the area itself is new.
|
||||
|
||||
Three duplicated `init_and_load() → run_query → assert_eq` blocks where one parameterized test would do is the most common form of test rot in this repo. Don't add to it.
|
||||
Three duplicated `init_and_load() → run_query → assert_eq` blocks where one parameterized test would do is the most common form of test rot in this repository. Don't add to it.
|
||||
|
||||
## Before-every-task checklist
|
||||
|
||||
|
|
@ -106,7 +106,7 @@ When you pick up any change, walk through this:
|
|||
1. **Find existing coverage** (per the principle above). Don't just look at the first test file by name — grep for the symbol you're touching across every crate's `tests/`.
|
||||
2. **Run those tests locally before editing.** `cargo test --workspace --locked` for the broad pass; `-p <crate> --test <file>` for a focused loop. Confirm a clean baseline.
|
||||
3. **Decide extend-vs-new** explicitly. If you can extend an existing test (assertion, fixture row, parameterization), do that. Only add a new test fn or new file if no existing one owns the area.
|
||||
4. **Reuse the helpers.** `init_and_load()`, fixture files, the CLI `support` harness — re-use them. Don't bootstrap a fresh repo by hand if a helper exists.
|
||||
4. **Reuse the helpers.** `init_and_load()`, fixture files, the CLI `support` harness — re-use them. Don't bootstrap a fresh graph by hand if a helper exists.
|
||||
5. **Mind the boundary.** Per [docs/dev/invariants.md](invariants.md), test at the layer the change lives at — planner-level changes deserve planner-level tests, not just end-to-end.
|
||||
6. **For substrate-touching changes** (Lance behavior), reach for `failpoints` or fixture-driven scenarios, not stubbed-out mocks.
|
||||
7. **For server / API changes**, confirm the OpenAPI regeneration happens in `openapi.rs` and that the diff lands in `openapi.json`.
|
||||
|
|
|
|||
171
docs/releases/v0.5.0.md
Normal file
171
docs/releases/v0.5.0.md
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
# Omnigraph v0.5.0
|
||||
|
||||
Omnigraph v0.5.0 is a substrate, security, and migration-safety release. It
|
||||
jumps the storage substrate from Lance 4 to Lance 6.0.1 (DataFusion 52 → 53,
|
||||
Arrow 57 → 58), introduces engine-wide Cedar policy enforcement on every
|
||||
authoring path, and ships a structured schema-lint v1 chassis with
|
||||
code-tagged diagnostics, soft drops, and an explicit `--allow-data-loss`
|
||||
flag for destructive migrations.
|
||||
|
||||
## Highlights
|
||||
|
||||
- **Lance 6.0.1 substrate**: bump from Lance 4.0.0 → 6.0.1, DataFusion 52 →
|
||||
53, Arrow 57 → 58. New optimizer rules (vectorized `IN`-list eq kernel,
|
||||
`PhysicalExprSimplifier`, push-limit-into-hash-join, CASE-NULL shortcut)
|
||||
reach predicates that flow through the engine. `lance-tokenizer` replaces
|
||||
tantivy internally; FTS behavior preserved.
|
||||
- **Cedar policy engine**: a new `omnigraph-policy` crate wires
|
||||
`Omnigraph::enforce(action, scope, actor)` into every `_as` writer
|
||||
(`mutate_as`, `load_as`, `apply_schema_as`, `branch_create_as`,
|
||||
`branch_merge_as`, `branch_delete_as`, plus the load and change
|
||||
variants). The HTTP server defaults to deny-all when no Cedar policy is
|
||||
configured; a YAML policy file is required to enable writes. Actor
|
||||
identity comes only from signed token claims — clients cannot set actor
|
||||
identity directly.
|
||||
- **Schema lint v1 chassis**: diagnostics now carry stable codes of the form
|
||||
`OG-XXX-NNN` instead of free-form messages. `omnigraph schema plan` and
|
||||
`apply` understand soft drops on properties and types — destructive drops
|
||||
require the new `--allow-data-loss` flag (Hard mode) at the CLI and an
|
||||
equivalent JSON flag over HTTP.
|
||||
- **Structured filter pushdown**: query-language predicates lower to
|
||||
DataFusion `Expr` and push down through Lance's `Scanner::filter_expr`
|
||||
instead of being flattened to SQL strings. This unlocks `CompOp::Contains`
|
||||
pushdown (via `array_has`), which previously fell through to in-memory
|
||||
post-scan filtering, and lets the DataFusion 53 optimizer rules above act
|
||||
on our predicates.
|
||||
- **HTTP `allow_data_loss` parity**: the destructive-drop guard now exists
|
||||
on both the CLI (`--allow-data-loss`) and HTTP (`allow_data_loss: true` in
|
||||
the schema-apply request body).
|
||||
- **Inline query strings on CLI and HTTP**: `omnigraph read` /
|
||||
`omnigraph mutate` and the corresponding HTTP endpoints accept inline
|
||||
`.gq` source, not just a file path. Easier ad-hoc queries, clearer
|
||||
request logs.
|
||||
- **Browser CORS layer**: optional CORS layer on `omnigraph-server` for
|
||||
browser-based UIs, gated by `OMNIGRAPH_CORS_ORIGINS`.
|
||||
- **Merge-insert dup-rowid fix**: Lance's `MergeInsertBuilder` could surface
|
||||
spurious `"Ambiguous merge inserts"` errors on sequential merges against
|
||||
rows previously rewritten by `merge_insert`. The engine now opts into
|
||||
`SourceDedupeBehavior::FirstSeen` with a `check_batch_unique_by_keys`
|
||||
fail-fast precondition that guarantees source-side dedup happens before
|
||||
Lance sees the batch.
|
||||
- **Branch-merge error-path recovery**: a branch merge that failed
|
||||
mid-flight could leave the in-process coordinator pointing at a stale
|
||||
active branch. The error path now restores the prior coordinator,
|
||||
matching the success path's invariant.
|
||||
- **Branch merge with blob columns**: external blob URIs are now
|
||||
materialized correctly during branch merge instead of being dropped or
|
||||
pointing at the source branch.
|
||||
- **Lance API surface guards**: a new test file
|
||||
(`crates/omnigraph/tests/lance_surface_guards.rs`) pins eight specific
|
||||
Lance API surfaces (`LanceError::TooMuchWriteContention`,
|
||||
`ManifestLocation` fields, `MergeInsertBuilder` return shape,
|
||||
`WriteParams::default`, `compact_files` signature, etc.) so the next
|
||||
Lance bump fails compile or runtime on any silent drift rather than
|
||||
producing wrong-state recovery in production.
|
||||
|
||||
## Behavior changes
|
||||
|
||||
- **On-disk format unchanged**: existing v0.4.2 datasets open unchanged.
|
||||
The Lance file format pin stays at V2_2 (required by Lance's blob v2
|
||||
feature).
|
||||
- **`omnigraph-server` defaults to deny-all under `--policy`**: starting a
|
||||
server with the policy feature enabled but no Cedar YAML policy
|
||||
configured rejects every write. Operators must supply a policy file to
|
||||
authorize anything.
|
||||
- **Schema-lint diagnostics carry stable codes**: messages now lead with
|
||||
`OG-XXX-NNN`. CI parsers or tooling that keyed off the v0.4.2 free-form
|
||||
text need to switch to code-based matching.
|
||||
- **Destructive schema drops require `--allow-data-loss`**: dropping a
|
||||
property or type returns a structured diagnostic by default.
|
||||
`omnigraph schema apply --allow-data-loss` (CLI) or
|
||||
`{"allow_data_loss": true}` (HTTP) opts into Hard mode.
|
||||
- **`HashJoinExec` null-aware semantics on anti-join**: a side effect of
|
||||
the DataFusion 53 bump — `NOT IN` semantics under null-valued anti-join
|
||||
columns are now correct per SQL standard. Queries that depended on the
|
||||
prior behavior would have been incorrect.
|
||||
|
||||
## Upgrade Notes
|
||||
|
||||
### Migration
|
||||
|
||||
- No data migration. v0.4.2 repos open directly on v0.5.0.
|
||||
|
||||
### Clients
|
||||
|
||||
- HTTP and SDK clients should switch any string-matching schema-lint
|
||||
parsing to code-based matching against the `OG-XXX-NNN` prefix.
|
||||
- Clients exercising destructive schema drops (`DropProperty`, `DropType`)
|
||||
must add the `allow_data_loss` request field (HTTP) or
|
||||
`--allow-data-loss` flag (CLI). Default is soft-drop-or-reject.
|
||||
- Clients consuming `mutate_as` / `load_as` / `apply_schema_as` / branch
|
||||
authoring APIs now flow through the policy enforcer. Anything bypassing
|
||||
authorization on v0.4.2 will be rejected on v0.5.0 once a policy is
|
||||
configured.
|
||||
|
||||
### Operators
|
||||
|
||||
- Configure a Cedar policy YAML for production servers before enabling
|
||||
writes; deny-all is the new default. The `omnigraph policy validate` /
|
||||
`test` / `explain` CLI commands are unchanged.
|
||||
- Bearer tokens continue to be the actor-identity source; review the
|
||||
signed-token-claim-only invariant in `docs/dev/invariants.md` if you've
|
||||
built custom authentication.
|
||||
- If your local CI uses RustFS for S3-compatible storage testing, our CI
|
||||
pins `rustfs/rustfs:1.0.0-beta.3` (the last known-good tag before the
|
||||
upstream credentials-policy change). Mirror the pin or set
|
||||
`RUSTFS_ALLOW_INSECURE_DEFAULT_CREDENTIALS=true` for the new image
|
||||
versions.
|
||||
|
||||
## Tests added or strengthened
|
||||
|
||||
- `crates/omnigraph/tests/lance_surface_guards.rs` — 8 named guards pinning
|
||||
Lance API surfaces against silent drift on future bumps.
|
||||
- `crates/omnigraph/tests/policy_engine_chassis.rs` — engine-level policy
|
||||
enforcement coverage; complements the existing HTTP policy tests.
|
||||
- Policy chassis e2e gap-fills — branch-merge, branch-create, branch-delete
|
||||
policy paths now have explicit end-to-end tests over HTTP and CLI.
|
||||
- Merge-pair truth table — exhaustive op-variant matrix for three-way
|
||||
merge across `noop`, `addNode`, `removeNode`, `addEdge`, `removeEdge`,
|
||||
`setProperty`, `dropProperty`, `addLabel`, `removeLabel`; the build
|
||||
fails to compile when a new op variant is added without dispositioning
|
||||
every pairing.
|
||||
- Merge-insert: regression for the dup-rowid bug class on the load surface
|
||||
(`load_merge_repeated_against_overlapping_keys_succeeds`), the update
|
||||
surface (`second_sequential_update_on_same_row_succeeds`), and the
|
||||
upstream-Lance-gap canary
|
||||
(`load_merge_window_2_documents_upstream_lance_gap`).
|
||||
- Maintenance + destructive-migration coverage — `omnigraph optimize` /
|
||||
`cleanup` boundary cases, plus schema-apply soft-drop and Hard-mode
|
||||
paths.
|
||||
- Stable-row-id preservation across `stage_overwrite` — pins the invariant
|
||||
that staged overwrites carry stable row IDs through to the committed
|
||||
fragment set.
|
||||
- `CompOp::Contains` pushdown regression
|
||||
(`ir_filter_with_list_contains_pushes_down`) — pins the new structured
|
||||
Expr pushdown path that retired the in-memory fallback.
|
||||
|
||||
## Included Changes
|
||||
|
||||
- Lance 4 → 6.0.1, DataFusion 52 → 53, Arrow 57 → 58 substrate upgrade.
|
||||
- `omnigraph-policy` crate with engine-wide Cedar enforcement and
|
||||
signed-token-claim-only actor identity.
|
||||
- Schema-lint v1 chassis with `OG-XXX-NNN` codes, soft `DropProperty` /
|
||||
`DropType` semantics, and `--allow-data-loss` for Hard mode.
|
||||
- HTTP `allow_data_loss` request field parity with the CLI flag.
|
||||
- Structured DataFusion `Expr` filter pushdown via
|
||||
`Scanner::filter_expr`, with `CompOp::Contains` lowered through
|
||||
`array_has`.
|
||||
- Inline `.gq` source acceptance on CLI and HTTP read/mutate endpoints.
|
||||
- Optional CORS layer on `omnigraph-server` for browser UIs.
|
||||
- Bug fixes: merge-insert dup-rowid (FirstSeen + uniqueness precondition),
|
||||
branch-merge coordinator restore on error, blob-column materialization
|
||||
during branch merge.
|
||||
- New Lance API surface-guard test file as the canary for future Lance
|
||||
bumps.
|
||||
- Recovery-sidecar coverage extended across the four write paths
|
||||
(`MutationStaging::finalize`, `schema_apply`, `branch_merge`,
|
||||
`ensure_indices`) with failpoint regression tests.
|
||||
- CI: pinned `rustfs/rustfs:1.0.0-beta.3` after the upstream `:latest`
|
||||
introduced a credentials-policy change.
|
||||
- Version bump to `0.5.0` across workspace crates, `Cargo.lock`,
|
||||
`openapi.json`, and the `AGENTS.md` surveyed version.
|
||||
19
docs/releases/v0.6.0.md
Normal file
19
docs/releases/v0.6.0.md
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
# Omnigraph v0.6.0
|
||||
|
||||
## Breaking Changes
|
||||
|
||||
- Renamed the Cedar resource entity from `Omnigraph::Repo` to `Omnigraph::Graph`.
|
||||
- Renamed policy API terminology from `repo_id` to `graph_id` on `PolicyCompiler::compile` and `PolicyEngine::load`.
|
||||
- Renamed query-lint schema source JSON from `"repo"` to `"graph"` for `schema_source.kind`.
|
||||
|
||||
## User Impact
|
||||
|
||||
- No on-disk migration is required. Existing `.omni` graphs continue to open with the same storage layout.
|
||||
- Supported YAML policy authoring is unchanged because the YAML schema does not expose the Cedar entity type name.
|
||||
- Operators with unsupported raw Cedar policy files should update `Omnigraph::Repo`
|
||||
resource references to `Omnigraph::Graph`.
|
||||
|
||||
## Documentation
|
||||
|
||||
- Public docs, CLI help, examples, server docs, and test helpers now consistently use "graph" for the OmniGraph data artifact.
|
||||
- GitHub/source repository terminology remains spelled out as "repository" where needed.
|
||||
|
|
@ -4,4 +4,4 @@
|
|||
- `_as` variants of every write API let callers override the actor: `mutate_as`, `ingest_as`, `branch_merge_as`, `apply_schema_as`, etc.
|
||||
- Actor IDs are persisted on `GraphCommit.actor_id` with split storage in `_graph_commit_actors.lance` (the commit graph is split into `_graph_commits.lance` for the linkage and `_graph_commit_actors.lance` for the actor map).
|
||||
- HTTP server uses the bearer-token actor automatically; CLI uses the local user / explicit env (no implicit actor).
|
||||
- Pre-v0.4.0 repos also stored actor IDs on `RunRecord.actor_id` in `_graph_runs.lance` / `_graph_run_actors.lance`. The Run state machine was removed in MR-771; those files are inert post-v0.4.0 and reclaimed by MR-770's production sweep.
|
||||
- Pre-v0.4.0 graphs also stored actor IDs on `RunRecord.actor_id` in `_graph_runs.lance` / `_graph_run_actors.lance`. The Run state machine was removed in MR-771; those files are inert post-v0.4.0 and reclaimed by MR-770's production sweep.
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ A reference for the `omnigraph` binary's command surface and `omnigraph.yaml` sc
|
|||
|
||||
| Command | Purpose |
|
||||
|---|---|
|
||||
| `init` | `--schema <pg>` → initialize a repo (also scaffolds `omnigraph.yaml` if missing) |
|
||||
| `init` | `--schema <pg>` → initialize a graph (also scaffolds `omnigraph.yaml` if missing) |
|
||||
| `load` | bulk load a branch (`--mode overwrite\|append\|merge`) |
|
||||
| `ingest` | branch-creating transactional load (`--from <base>`) |
|
||||
| `query` (alias: `read`) | run named read query; source via `--query <path>`, `-e`/`--query-string <GQ>`, or `--alias <name>` (exactly one). `read` is the deprecated previous name and prints a one-line warning to stderr |
|
||||
|
|
@ -19,7 +19,7 @@ A reference for the `omnigraph` binary's command surface and `omnigraph.yaml` sc
|
|||
| `commit list \| show` | inspect commit graph |
|
||||
| `run list \| show \| publish \| abort` | transactional run ops |
|
||||
| `schema plan \| apply \| show (alias: get)` | migrations |
|
||||
| `lint` (alias: `check`) | offline / repo-backed query validation. Replaces `query lint` / `query check`, which are kept as deprecated argv-level shims that print a one-line warning and rewrite to `omnigraph lint` |
|
||||
| `lint` (alias: `check`) | offline / graph-backed query validation. Replaces `query lint` / `query check`, which are kept as deprecated argv-level shims that print a one-line warning and rewrite to `omnigraph lint` |
|
||||
| `optimize` | non-destructive Lance compaction |
|
||||
| `cleanup --keep N --older-than 7d --confirm` | destructive version GC |
|
||||
| `embed` | offline JSONL embedding pipeline |
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
# CLI Guide
|
||||
|
||||
## Core Repo Flow
|
||||
## Core Graph Flow
|
||||
|
||||
```bash
|
||||
omnigraph init --schema ./schema.pg ./repo.omni
|
||||
omnigraph load --data ./data.jsonl --mode overwrite ./repo.omni
|
||||
omnigraph snapshot ./repo.omni --branch main --json
|
||||
omnigraph query --uri ./repo.omni --query ./queries.gq --name get_person --params '{"name":"Alice"}'
|
||||
omnigraph mutate --uri ./repo.omni --query ./queries.gq --name insert_person --params '{"name":"Mina","age":28}'
|
||||
omnigraph init --schema ./schema.pg ./graph.omni
|
||||
omnigraph load --data ./data.jsonl --mode overwrite ./graph.omni
|
||||
omnigraph snapshot ./graph.omni --branch main --json
|
||||
omnigraph query --uri ./graph.omni --query ./queries.gq --name get_person --params '{"name":"Alice"}'
|
||||
omnigraph mutate --uri ./graph.omni --query ./queries.gq --name insert_person --params '{"name":"Mina","age":28}'
|
||||
```
|
||||
|
||||
`omnigraph query` is the canonical read command (pairs with `POST /query`);
|
||||
|
|
@ -21,11 +21,11 @@ For ad-hoc reads and mutations (REPLs, AI agents, one-off scripts), pass the
|
|||
GQ source inline with `-e` / `--query-string` instead of a file path:
|
||||
|
||||
```bash
|
||||
omnigraph query --uri ./repo.omni \
|
||||
omnigraph query --uri ./graph.omni \
|
||||
-e 'query find($name: String) { match { $p: Person { name: $name } } return { $p.name, $p.age } }' \
|
||||
--params '{"name":"Alice"}'
|
||||
|
||||
omnigraph mutate --uri ./repo.omni \
|
||||
omnigraph mutate --uri ./graph.omni \
|
||||
-e 'query add($name: String, $age: I32) { insert Person { name: $name, age: $age } }' \
|
||||
--params '{"name":"Inline","age":42}'
|
||||
```
|
||||
|
|
@ -38,22 +38,22 @@ only the source loader changes.
|
|||
## Branching And Reviewable Data Flows
|
||||
|
||||
```bash
|
||||
omnigraph branch create --uri ./repo.omni --from main feature-x
|
||||
omnigraph branch list --uri ./repo.omni
|
||||
omnigraph branch merge --uri ./repo.omni feature-x --into main
|
||||
omnigraph branch create --uri ./graph.omni --from main feature-x
|
||||
omnigraph branch list --uri ./graph.omni
|
||||
omnigraph branch merge --uri ./graph.omni feature-x --into main
|
||||
|
||||
omnigraph ingest --data ./batch.jsonl --branch review/import-2026-04-09 ./repo.omni
|
||||
omnigraph export ./repo.omni --branch main --type Person > people.jsonl
|
||||
omnigraph commit list ./repo.omni --branch main --json
|
||||
omnigraph commit show --uri ./repo.omni <commit-id> --json
|
||||
omnigraph ingest --data ./batch.jsonl --branch review/import-2026-04-09 ./graph.omni
|
||||
omnigraph export ./graph.omni --branch main --type Person > people.jsonl
|
||||
omnigraph commit list ./graph.omni --branch main --json
|
||||
omnigraph commit show --uri ./graph.omni <commit-id> --json
|
||||
```
|
||||
|
||||
## Remote Server Mode
|
||||
|
||||
Serve a repo:
|
||||
Serve a graph:
|
||||
|
||||
```bash
|
||||
omnigraph-server ./repo.omni --bind 127.0.0.1:8080
|
||||
omnigraph-server ./graph.omni --bind 127.0.0.1:8080
|
||||
```
|
||||
|
||||
Read through the HTTP API:
|
||||
|
|
@ -73,22 +73,22 @@ and configure the matching `bearer_token_env` in `omnigraph.yaml`.
|
|||
|
||||
```bash
|
||||
omnigraph lint --query ./queries.gq --schema ./schema.pg --json
|
||||
omnigraph check --query ./queries.gq ./repo.omni --json
|
||||
omnigraph check --query ./queries.gq ./graph.omni --json
|
||||
|
||||
omnigraph schema plan --schema ./next.pg ./repo.omni --json
|
||||
omnigraph schema apply --schema ./next.pg ./repo.omni --json
|
||||
omnigraph schema plan --schema ./next.pg ./graph.omni --json
|
||||
omnigraph schema apply --schema ./next.pg ./graph.omni --json
|
||||
omnigraph policy validate --config ./omnigraph.yaml
|
||||
omnigraph policy test --config ./omnigraph.yaml
|
||||
omnigraph policy explain --config ./omnigraph.yaml --actor act-alice --action read --branch main
|
||||
|
||||
omnigraph commit list ./repo.omni --json
|
||||
omnigraph commit show --uri ./repo.omni <commit-id> --json
|
||||
omnigraph commit list ./graph.omni --json
|
||||
omnigraph commit show --uri ./graph.omni <commit-id> --json
|
||||
```
|
||||
|
||||
(The legacy `omnigraph run list/show/publish/abort` subcommands were removed in MR-771; mutations and loads publish atomically and the commit graph (`omnigraph commit list`) is the audit surface.)
|
||||
|
||||
`query lint` and `query check` are the same command surface. In v1, repo-backed
|
||||
lint uses local or `s3://` repo URIs; HTTP targets are only supported when you
|
||||
`query lint` and `query check` are the same command surface. In v1, graph-backed
|
||||
lint uses local or `s3://` graph URIs; HTTP targets are only supported when you
|
||||
also pass `--schema`.
|
||||
|
||||
## Config
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@ internal deploy automation.
|
|||
|
||||
Omnigraph supports two broad deployment shapes:
|
||||
|
||||
- local directory repos
|
||||
- `s3://` repos on AWS S3 or S3-compatible object stores
|
||||
- local directory graphs
|
||||
- `s3://` graphs on AWS S3 or S3-compatible object stores
|
||||
|
||||
The server binary and container image expose the same HTTP surface.
|
||||
|
||||
|
|
@ -20,18 +20,18 @@ Build or install:
|
|||
- `omnigraph`
|
||||
- `omnigraph-server`
|
||||
|
||||
Run against a local repo:
|
||||
Run against a local graph:
|
||||
|
||||
```bash
|
||||
omnigraph-server ./repo.omni --bind 0.0.0.0:8080
|
||||
omnigraph-server ./graph.omni --bind 0.0.0.0:8080
|
||||
```
|
||||
|
||||
Run against an object-store-backed repo:
|
||||
Run against an object-store-backed graph:
|
||||
|
||||
```bash
|
||||
OMNIGRAPH_SERVER_BEARER_TOKEN="change-me" \
|
||||
AWS_REGION="us-east-1" \
|
||||
omnigraph-server s3://my-bucket/repos/example/releases/2026-04-10-v0.1.0 \
|
||||
omnigraph-server s3://my-bucket/graphs/example/releases/2026-04-10-v0.1.0 \
|
||||
--bind 0.0.0.0:8080
|
||||
```
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ curl -fsSL https://raw.githubusercontent.com/ModernRelay/omnigraph/main/scripts/
|
|||
The bootstrap:
|
||||
|
||||
- starts a local RustFS-backed object store
|
||||
- creates a bucket and S3-backed Omnigraph repo
|
||||
- creates a bucket and S3-backed Omnigraph graph
|
||||
- loads the checked-in context fixture
|
||||
- starts `omnigraph-server` on `127.0.0.1:8080`
|
||||
|
||||
|
|
@ -60,8 +60,8 @@ Useful overrides:
|
|||
|
||||
- `WORKDIR=/path/to/state`
|
||||
- `BUCKET=omnigraph-local`
|
||||
- `PREFIX=repos/context`
|
||||
- `RESET_REPO=1` to delete an existing partially initialized repo prefix before recreating it
|
||||
- `PREFIX=graphs/context`
|
||||
- `RESET_REPO=1` to delete an existing partially initialized graph prefix before recreating it
|
||||
- `BIND=127.0.0.1:8080`
|
||||
- `RUSTFS_CONTAINER_NAME=omnigraph-rustfs-demo`
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ If `aws` is not installed, the script attempts a user-local AWS CLI install via
|
|||
running.
|
||||
|
||||
If a previous bootstrap left objects behind under the selected `PREFIX` but did
|
||||
not finish initializing the repo, rerun with `RESET_REPO=1` or choose a new
|
||||
not finish initializing the graph, rerun with `RESET_REPO=1` or choose a new
|
||||
`PREFIX`.
|
||||
|
||||
## Container Deployment
|
||||
|
|
@ -87,23 +87,23 @@ Build the image:
|
|||
docker build -t omnigraph-server:local .
|
||||
```
|
||||
|
||||
Run against a local repo:
|
||||
Run against a local graph:
|
||||
|
||||
```bash
|
||||
docker run --rm -p 8080:8080 \
|
||||
-v "$PWD/repo.omni:/data/repo.omni" \
|
||||
-v "$PWD/graph.omni:/data/graph.omni" \
|
||||
omnigraph-server:local \
|
||||
/data/repo.omni --bind 0.0.0.0:8080
|
||||
/data/graph.omni --bind 0.0.0.0:8080
|
||||
```
|
||||
|
||||
Run against an S3-backed repo:
|
||||
Run against an S3-backed graph:
|
||||
|
||||
```bash
|
||||
docker run --rm -p 8080:8080 \
|
||||
-e OMNIGRAPH_SERVER_BEARER_TOKEN="change-me" \
|
||||
-e AWS_REGION="us-east-1" \
|
||||
omnigraph-server:local \
|
||||
s3://my-bucket/repos/example/releases/2026-04-10-v0.1.0 \
|
||||
s3://my-bucket/graphs/example/releases/2026-04-10-v0.1.0 \
|
||||
--bind 0.0.0.0:8080
|
||||
```
|
||||
|
||||
|
|
@ -154,7 +154,7 @@ Manager secret whose `SecretString` is a JSON object of
|
|||
`{"actor_id": "token", ...}`:
|
||||
|
||||
```bash
|
||||
omnigraph-server-aws s3://my-bucket/repos/example ...
|
||||
omnigraph-server-aws s3://my-bucket/graphs/example ...
|
||||
# Environment:
|
||||
# OMNIGRAPH_SERVER_BEARER_TOKENS_AWS_SECRET=arn:aws:secretsmanager:us-east-1:123456789012:secret:omnigraph-tokens-AbCdEf
|
||||
```
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ Mark a Vector property with `@embed("source_text_property")`. At ingest, the eng
|
|||
|
||||
## CLI `omnigraph embed` (offline file pipeline)
|
||||
|
||||
Operates on **JSONL files** (not on a repo). Three modes (mutually exclusive):
|
||||
Operates on **JSONL files** (not on a graph). Three modes (mutually exclusive):
|
||||
|
||||
- (default) `fill_missing` — only embed rows whose target field is empty
|
||||
- `--reembed-all` — overwrite all
|
||||
|
|
|
|||
|
|
@ -18,11 +18,11 @@ of MRs, internal recovery mechanics, or contributor-only invariants.
|
|||
| Write queries and mutations | [query-language.md](query-language.md) |
|
||||
| Use embeddings | [embeddings.md](embeddings.md) |
|
||||
|
||||
## Operate A Repo
|
||||
## Operate A Graph
|
||||
|
||||
| Goal | Read |
|
||||
|---|---|
|
||||
| Understand repo layout and URI support | [storage.md](storage.md) |
|
||||
| Understand graph layout and URI support | [storage.md](storage.md) |
|
||||
| Work with branches, commits, and snapshots | [branches-commits.md](branches-commits.md) |
|
||||
| Coordinate multi-query workflows | [transactions.md](transactions.md) |
|
||||
| Read diffs and change feeds | [changes.md](changes.md) |
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# HTTP Server (`omnigraph-server`)
|
||||
|
||||
Axum 0.8 + tokio + utoipa-generated OpenAPI. Single repo per process; deploy multiple processes for multi-tenant.
|
||||
Axum 0.8 + tokio + utoipa-generated OpenAPI. Single graph per process; deploy multiple processes for multi-tenant.
|
||||
|
||||
## Endpoint inventory
|
||||
|
||||
|
|
@ -136,7 +136,7 @@ See [deployment.md](deployment.md) for token-source operational details.
|
|||
|
||||
- `tower_http::TraceLayer::new_for_http()`
|
||||
- Policy decisions logged at INFO level with actor, action, branch, decision, matched rule
|
||||
- Startup logs: token source name, repo URI, bind address
|
||||
- Startup logs: token source name, graph URI, bind address
|
||||
- Graceful SIGINT shutdown
|
||||
|
||||
## Not implemented (by design or "TBD")
|
||||
|
|
@ -148,4 +148,4 @@ See [deployment.md](deployment.md) for token-source operational details.
|
|||
admission control" above). No global rate limiter is configured;
|
||||
add `tower_http::limit` if a graph-wide cap is needed.
|
||||
- Pagination — none (commits/branches return everything; export streams).
|
||||
- Multi-tenant routing — one repo per process.
|
||||
- Multi-tenant routing — one graph per process.
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ Every node type and every edge type is its own Lance dataset:
|
|||
- **Columnar Arrow storage**: each property is a column; nullable per Arrow schema.
|
||||
- **Fragments**: data is partitioned into fragments; new writes create new fragments.
|
||||
- **Manifest versioning**: every commit produces a new dataset version; old versions remain readable.
|
||||
- **Stable row IDs**: `enable_stable_row_ids: true` is set on every Lance dataset OmniGraph creates — node and edge data tables, `__manifest`, `_graph_commits.lance`, `_graph_commit_recoveries.lance`, and any future system tables. This is an architectural invariant: the flag is one-way at dataset create per Lance's row-id-lineage spec, so a future change that introduces a Lance dataset must preserve it. Consequences: `_row_created_at_version` and `_row_last_updated_at_version` are available on every dataset (load-bearing for change-feed validators); `CreateIndex × Rewrite` is not a retryable conflict, so indices survive `omnigraph optimize` without needing the Fragment Reuse Index; readers must use a Lance build that recognises the flag (our pinned 4.0.0 is fine). Pre-0.4.x repos created before this code path settled may have datasets without the flag and cannot be retrofitted in place — the supported path is dump-and-reload. The `stage_overwrite` rewrite path (used by `schema_apply`) preserves the flag through `Operation::Overwrite`; pinned by `stage_overwrite_preserves_stable_row_ids` in `crates/omnigraph/tests/staged_writes.rs`.
|
||||
- **Stable row IDs**: `enable_stable_row_ids: true` is set on every Lance dataset OmniGraph creates — node and edge data tables, `__manifest`, `_graph_commits.lance`, `_graph_commit_recoveries.lance`, and any future system tables. This is an architectural invariant: the flag is one-way at dataset create per Lance's row-id-lineage spec, so a future change that introduces a Lance dataset must preserve it. Consequences: `_row_created_at_version` and `_row_last_updated_at_version` are available on every dataset (load-bearing for change-feed validators); `CreateIndex × Rewrite` is not a retryable conflict, so indices survive `omnigraph optimize` without needing the Fragment Reuse Index; readers must use a Lance build that recognises the flag (our pinned 4.0.0 is fine). Pre-0.4.x graphs created before this code path settled may have datasets without the flag and cannot be retrofitted in place — the supported path is dump-and-reload. The `stage_overwrite` rewrite path (used by `schema_apply`) preserves the flag through `Operation::Overwrite`; pinned by `stage_overwrite_preserves_stable_row_ids` in `crates/omnigraph/tests/staged_writes.rs`.
|
||||
- **Append / delete / `merge_insert`**: native Lance write modes.
|
||||
- **Per-dataset branches** (Lance native): copy-on-write at the dataset level.
|
||||
- **Object-store agnostic**: file://, s3://, gs://, az://, http (read-only via Lance) — OmniGraph wires file:// and s3:// (`storage.rs`).
|
||||
|
|
@ -22,7 +22,7 @@ OmniGraph is **not** a single Lance dataset; it is a *graph* of datasets coordin
|
|||
- `edges/{fnv1a64-hex(edge_type_name)}` — one Lance dataset per edge type
|
||||
- `__manifest/` — the catalog of all sub-tables and their published versions
|
||||
- `_graph_commits.lance` / `_graph_commit_actors.lance` — the commit graph and its actor map
|
||||
- (legacy `_graph_runs.lance` / `_graph_run_actors.lance` from pre-v0.4.0 repos are inert; the run state machine was removed in MR-771 and these files are cleaned up via MR-770's production sweep)
|
||||
- (legacy `_graph_runs.lance` / `_graph_run_actors.lance` from pre-v0.4.0 graphs are inert; the run state machine was removed in MR-771 and these files are cleaned up via MR-770's production sweep)
|
||||
- **Manifest row schema** (`object_id, object_type, location, metadata, base_objects, table_key, table_version, table_branch, row_count`):
|
||||
- `object_type` ∈ `table | table_version | table_tombstone`
|
||||
- `table_key` ∈ `node:<TypeName> | edge:<EdgeName>`
|
||||
|
|
@ -36,7 +36,7 @@ OmniGraph is **not** a single Lance dataset; it is a *graph* of datasets coordin
|
|||
|
||||
The on-disk shape of `__manifest` is reconciled with the binary via a single stamp + dispatcher. `INTERNAL_MANIFEST_SCHEMA_VERSION` declares the shape this binary writes; the on-disk stamp `omnigraph:internal_schema_version` lives in the manifest dataset's schema-level metadata (Lance `update_schema_metadata`).
|
||||
|
||||
- **`init_manifest_repo`** stamps the current version at creation, so newly initialized repos never need migration.
|
||||
- **`init_manifest_graph`** stamps the current version at creation, so newly initialized graphs never need migration.
|
||||
- **Publisher open-for-write path** (`load_publish_state`) calls `migrate_internal_schema(&mut dataset)` before reading state. When the on-disk stamp matches the binary, this is a single metadata read with no writes; otherwise the dispatcher walks `match`-arm steps forward (1→2, 2→3, …) until the stamp matches, then proceeds with the publish. Reads stay side-effect-free.
|
||||
- **Forward-version protection**: a stamp *higher* than the binary's known version triggers a clear "upgrade omnigraph first" error. An old binary cannot clobber a newer schema by silently treating "unknown stamp" as "missing stamp".
|
||||
- **Idempotency**: each migration step is safe to re-run. A crash between two metadata updates inside a single step leaves the partial state; the next open re-runs the step and the second update lands. The dispatcher itself is a cheap stamp-read on the steady-state path.
|
||||
|
|
@ -50,14 +50,14 @@ Adding a new on-disk shape change is one constant bump (`INTERNAL_MANIFEST_SCHEM
|
|||
|
||||
## On-disk layout
|
||||
|
||||
A repo on disk is a directory tree of Lance datasets. Each dataset follows the standard Lance layout (`_versions/`, `data/`, `_indices/`, `_refs/`); OmniGraph adds the multi-dataset coordination by keeping `__manifest/` alongside the per-type datasets.
|
||||
A graph on disk is a directory tree of Lance datasets. Each dataset follows the standard Lance layout (`_versions/`, `data/`, `_indices/`, `_refs/`); OmniGraph adds the multi-dataset coordination by keeping `__manifest/` alongside the per-type datasets.
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
classDef l1 fill:#fef3e8,stroke:#c46900,color:#000
|
||||
classDef l2 fill:#e8f4fd,stroke:#1e6aa8,color:#000
|
||||
|
||||
repo["repo URI<br/>file:// or s3://bucket/prefix"]:::l2
|
||||
graph["graph URI<br/>file:// or s3://bucket/prefix"]:::l2
|
||||
|
||||
manifest["__manifest/<br/>L2 catalog of sub-tables"]:::l2
|
||||
nodes["nodes/{fnv1a64-hex}/<br/>one dataset per node type"]:::l2
|
||||
|
|
@ -66,12 +66,12 @@ flowchart TB
|
|||
recovery["__recovery/{ulid}.json<br/>recovery sidecars (transient)"]:::l2
|
||||
refs["_refs/branches/{name}.json<br/>graph-level branches"]:::l2
|
||||
|
||||
repo --> manifest
|
||||
repo --> nodes
|
||||
repo --> edges
|
||||
repo --> cgraph
|
||||
repo --> recovery
|
||||
repo --> refs
|
||||
graph --> manifest
|
||||
graph --> nodes
|
||||
graph --> edges
|
||||
graph --> cgraph
|
||||
graph --> recovery
|
||||
graph --> refs
|
||||
|
||||
subgraph dataset[Inside each Lance dataset — L1]
|
||||
ds_v["_versions/{n}.manifest<br/>per-dataset versions"]:::l1
|
||||
|
|
@ -88,10 +88,10 @@ flowchart TB
|
|||
|
||||
**What's where:**
|
||||
|
||||
- **Repo root** is one directory (or S3 prefix). Everything below is part of one OmniGraph repo.
|
||||
- **Graph root** is one directory (or S3 prefix). Everything below is part of one OmniGraph graph.
|
||||
- **`__manifest/`** is a Lance dataset whose rows describe which sub-table version is published at which graph-branch. Reading a snapshot starts here.
|
||||
- **`nodes/`** and **`edges/`** are sibling directories holding one Lance dataset per declared type. Names are `fnv1a64-hex` of the type name to keep paths fixed-length and case-safe.
|
||||
- **`_graph_commits.lance`** is an L2 dataset that records the graph-level commit DAG, with a paired `_graph_commit_actors.lance` for the actor map. (Pre-v0.4.0 repos also have inert `_graph_runs.lance` / `_graph_run_actors.lance` from the removed Run state machine; MR-770 sweeps these in production.)
|
||||
- **`_graph_commits.lance`** is an L2 dataset that records the graph-level commit DAG, with a paired `_graph_commit_actors.lance` for the actor map. (Pre-v0.4.0 graphs also have inert `_graph_runs.lance` / `_graph_run_actors.lance` from the removed Run state machine; MR-770 sweeps these in production.)
|
||||
- **`_graph_commit_recoveries.lance`** — one row per recovery sweep action. Joined to `_graph_commits.lance` by `graph_commit_id`; the linked commit row carries `actor_id=omnigraph:recovery`. Operators correlate recoveries with the original mutations they rolled forward / back via this join. See `crates/omnigraph/src/db/recovery_audit.rs`.
|
||||
- **`__recovery/{ulid}.json`** — transient sidecar files written by the four migrated writers (`MutationStaging::finalize`, `schema_apply`, `branch_merge`, `ensure_indices`) before Phase B begins, deleted after Phase C succeeds. A sidecar persisting after process exit means the writer crashed in the Phase B → Phase C window; the next `Omnigraph::open` recovery sweep processes it. Steady-state directory is empty. See `crates/omnigraph/src/db/manifest/recovery.rs`.
|
||||
- **`_refs/branches/{name}.json`** is graph-level branch metadata — pointers from a branch name to the manifest version it heads.
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ query register_employee_with_team($name: String, $age: I32, $team: String) {
|
|||
|
||||
```bash
|
||||
omnigraph change --query ./mutations.gq --name register_employee_with_team \
|
||||
--params '{"name":"Alice","age":30,"team":"Acme"}' ./repo.omni
|
||||
--params '{"name":"Alice","age":30,"team":"Acme"}' ./graph.omni
|
||||
```
|
||||
|
||||
If the second statement fails (e.g. `Acme` doesn't exist), the publisher never publishes; `Alice` is not in the database. Atomic.
|
||||
|
|
@ -57,10 +57,10 @@ If the second statement fails (e.g. `Acme` doesn't exist), the publisher never p
|
|||
|
||||
```bash
|
||||
# Query 1
|
||||
omnigraph change --query ./mutations.gq --name register_employee --params '{"name":"Alice","age":30}' ./repo.omni
|
||||
omnigraph change --query ./mutations.gq --name register_employee --params '{"name":"Alice","age":30}' ./graph.omni
|
||||
|
||||
# Query 2 — runs after Query 1 has already published
|
||||
omnigraph change --query ./mutations.gq --name link_to_team --params '{"name":"Alice","team":"Acme"}' ./repo.omni
|
||||
omnigraph change --query ./mutations.gq --name link_to_team --params '{"name":"Alice","team":"Acme"}' ./graph.omni
|
||||
```
|
||||
|
||||
These are **two publishes** on `main`. If Query 2 fails, Query 1's effects are already visible. There is no `ROLLBACK` for Query 1.
|
||||
|
|
@ -75,32 +75,32 @@ The pattern when you need to run multiple queries — possibly across multiple c
|
|||
|
||||
```bash
|
||||
# Fork a working branch from main.
|
||||
omnigraph branch create --from main onboarding/2026-04-25 ./repo.omni
|
||||
omnigraph branch create --from main onboarding/2026-04-25 ./graph.omni
|
||||
|
||||
# Run any number of mutations on the branch — each one is its own publish on the branch.
|
||||
# Concurrent reads of `main` are unaffected.
|
||||
omnigraph change --branch onboarding/2026-04-25 \
|
||||
--query ./mutations.gq --name register_employee \
|
||||
--params '{"name":"Alice","age":30}' ./repo.omni
|
||||
--params '{"name":"Alice","age":30}' ./graph.omni
|
||||
|
||||
omnigraph change --branch onboarding/2026-04-25 \
|
||||
--query ./mutations.gq --name register_employee \
|
||||
--params '{"name":"Bob","age":25}' ./repo.omni
|
||||
--params '{"name":"Bob","age":25}' ./graph.omni
|
||||
|
||||
omnigraph change --branch onboarding/2026-04-25 \
|
||||
--query ./mutations.gq --name link_to_team \
|
||||
--params '{"name":"Alice","team":"Acme"}' ./repo.omni
|
||||
--params '{"name":"Alice","team":"Acme"}' ./graph.omni
|
||||
|
||||
# Inspect the branch — read queries work just like on main.
|
||||
omnigraph read --branch onboarding/2026-04-25 \
|
||||
--query ./queries.gq --name list_employees ./repo.omni
|
||||
--query ./queries.gq --name list_employees ./graph.omni
|
||||
|
||||
# Happy with what's on the branch? Merge it. This is one atomic publish:
|
||||
# `main` flips to include every commit on the branch.
|
||||
omnigraph branch merge onboarding/2026-04-25 --into main ./repo.omni
|
||||
omnigraph branch merge onboarding/2026-04-25 --into main ./graph.omni
|
||||
|
||||
# OR: not happy? Throw it away. `main` is untouched.
|
||||
# omnigraph branch delete onboarding/2026-04-25 ./repo.omni
|
||||
# omnigraph branch delete onboarding/2026-04-25 ./graph.omni
|
||||
```
|
||||
|
||||
Properties:
|
||||
|
|
@ -115,16 +115,16 @@ Two agents writing to the same graph independently:
|
|||
|
||||
```bash
|
||||
# Agent A
|
||||
omnigraph branch create --from main agent-a/work ./repo.omni
|
||||
omnigraph change --branch agent-a/work … ./repo.omni
|
||||
omnigraph branch create --from main agent-a/work ./graph.omni
|
||||
omnigraph change --branch agent-a/work … ./graph.omni
|
||||
# … many mutations …
|
||||
omnigraph branch merge agent-a/work --into main ./repo.omni
|
||||
omnigraph branch merge agent-a/work --into main ./graph.omni
|
||||
|
||||
# Agent B (running concurrently)
|
||||
omnigraph branch create --from main agent-b/work ./repo.omni
|
||||
omnigraph change --branch agent-b/work … ./repo.omni
|
||||
omnigraph branch create --from main agent-b/work ./graph.omni
|
||||
omnigraph change --branch agent-b/work … ./graph.omni
|
||||
# … many mutations …
|
||||
omnigraph branch merge agent-b/work --into main ./repo.omni
|
||||
omnigraph branch merge agent-b/work --into main ./graph.omni
|
||||
```
|
||||
|
||||
Each agent sees a consistent snapshot of `main` at the time it forked. The first merge to `main` lands as a fast-forward (or a no-op if no concurrent change). The second merge runs three-way: rows touched by both branches surface as `MergeConflict`s for the caller to resolve.
|
||||
|
|
@ -138,7 +138,7 @@ This is the workflow MR-797 / agentic loops are designed around: **branches are
|
|||
| Single query fails mid-flight | Publisher never publishes; target unchanged | Read the error, decide whether to retry |
|
||||
| Concurrent writers race the same `(table, branch)` | Publisher CAS rejects the loser with `ManifestConflictDetails::ExpectedVersionMismatch` | Refresh handle, retry the query |
|
||||
| Branch with N successful mutations, then merge fails (three-way conflict) | Each individual mutation already committed on the branch; merge surfaces `MergeConflicts` | Inspect, decide whether to keep working on the branch, abandon it (`branch_delete`), or resolve and re-merge |
|
||||
| Process crashes mid-branch-workflow | Each completed mutation on the branch is durable | Re-open the repo, continue where you left off |
|
||||
| Process crashes mid-branch-workflow | Each completed mutation on the branch is durable | Re-open the graph, continue where you left off |
|
||||
|
||||
## When to use what
|
||||
|
||||
|
|
@ -156,7 +156,7 @@ This is the workflow MR-797 / agentic loops are designed around: **branches are
|
|||
|
||||
- **Cross-query atomicity on `main` without a branch.** If you don't want to fork a branch, multiple queries on `main` publish independently. There is no implicit transaction.
|
||||
- **Long-running interactive transactions.** No `BEGIN` over a connection. Branches are the durable equivalent.
|
||||
- **Cross-graph (cross-repo) transactions.** Each repo is its own atomicity domain.
|
||||
- **Cross-graph transactions.** Each graph is its own atomicity domain.
|
||||
- **"Pessimistic" locks** that serialize writers before they reach the storage layer. Snapshot-MVCC + publisher CAS handles concurrency optimistically; the loser retries.
|
||||
|
||||
## See also
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
"name": "MIT",
|
||||
"identifier": "MIT"
|
||||
},
|
||||
"version": "0.4.2"
|
||||
"version": "0.6.0"
|
||||
},
|
||||
"paths": {
|
||||
"/branches": {
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
#
|
||||
# Requires:
|
||||
# - `gh` CLI authenticated.
|
||||
# - Repo-admin or org-admin permissions on ModernRelay/omnigraph.
|
||||
# - Repository-admin or org-admin permissions on ModernRelay/omnigraph.
|
||||
#
|
||||
# This script is idempotent: re-running applies whatever is currently
|
||||
# declared in .github/branch-protection.json. The JSON file is the
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ PY
|
|||
canonical=()
|
||||
while IFS= read -r line; do
|
||||
canonical+=("$line")
|
||||
done < <(find docs -type f -name '*.md' ! -path 'docs/releases/*' | sort)
|
||||
done < <(find docs -type f -name '*.md' ! -path 'docs/releases/*' ! -path 'docs/internal/*' | sort)
|
||||
if [[ -d docs/releases ]]; then
|
||||
canonical+=("docs/releases/")
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -291,7 +291,7 @@ ensure_bucket() {
|
|||
s3api create-bucket --bucket "$BUCKET" >/dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
repo_prefix_has_objects() {
|
||||
graph_prefix_has_objects() {
|
||||
local key_count
|
||||
key_count="$("$AWS_BIN" --endpoint-url "$AWS_ENDPOINT_URL_S3" \
|
||||
s3api list-objects-v2 \
|
||||
|
|
@ -304,27 +304,27 @@ repo_prefix_has_objects() {
|
|||
[ -n "$key_count" ] && [ "$key_count" != "None" ] && [ "$key_count" != "0" ]
|
||||
}
|
||||
|
||||
reset_repo_prefix() {
|
||||
reset_graph_prefix() {
|
||||
log "Removing existing objects under $REPO_URI"
|
||||
"$AWS_BIN" --endpoint-url "$AWS_ENDPOINT_URL_S3" \
|
||||
s3 rm "s3://$BUCKET/$PREFIX" --recursive >/dev/null
|
||||
}
|
||||
|
||||
initialize_repo() {
|
||||
initialize_graph() {
|
||||
if "$BIN_DIR/omnigraph" snapshot "$REPO_URI" --json >/dev/null 2>&1; then
|
||||
log "Reusing existing repo at $REPO_URI"
|
||||
log "Reusing existing graph at $REPO_URI"
|
||||
return
|
||||
fi
|
||||
|
||||
if repo_prefix_has_objects; then
|
||||
if graph_prefix_has_objects; then
|
||||
if [ "$RESET_REPO" = "1" ]; then
|
||||
reset_repo_prefix
|
||||
reset_graph_prefix
|
||||
else
|
||||
die "found existing objects under $REPO_URI but could not open an Omnigraph repo there. This usually means a previous bootstrap left a partially initialized prefix. Rerun with RESET_REPO=1 to delete that prefix and recreate it, or set PREFIX to a new value."
|
||||
die "found existing objects under $REPO_URI but could not open an Omnigraph graph there. This usually means a previous bootstrap left a partially initialized prefix. Rerun with RESET_REPO=1 to delete that prefix and recreate it, or set PREFIX to a new value."
|
||||
fi
|
||||
fi
|
||||
|
||||
log "Initializing repo at $REPO_URI"
|
||||
log "Initializing graph at $REPO_URI"
|
||||
"$BIN_DIR/omnigraph" init --schema "$FIXTURE_DIR/context.pg" "$REPO_URI"
|
||||
|
||||
log "Loading context fixture into $REPO_URI"
|
||||
|
|
@ -377,7 +377,7 @@ Omnigraph local RustFS demo is up.
|
|||
Server:
|
||||
$base_url
|
||||
|
||||
Repo URI:
|
||||
Graph URI:
|
||||
$REPO_URI
|
||||
|
||||
RustFS console:
|
||||
|
|
@ -414,7 +414,7 @@ main() {
|
|||
start_rustfs
|
||||
wait_for_rustfs
|
||||
ensure_bucket
|
||||
initialize_repo
|
||||
initialize_graph
|
||||
start_server
|
||||
print_summary "$(wait_for_server)"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ usage() {
|
|||
Usage: update-homebrew-formula.sh <tag> [formula_path]
|
||||
|
||||
Environment:
|
||||
REPO_SLUG GitHub repo that owns the Omnigraph release
|
||||
REPO_SLUG GitHub repository that owns the Omnigraph release
|
||||
default: ModernRelay/omnigraph
|
||||
EOF
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue